Update example with possible yarn context extension
Browse files
README.md
CHANGED
|
@@ -452,7 +452,7 @@ numactl -N "$SOCKET" -m "$SOCKET" \
|
|
| 452 |
./build/bin/llama-server \
|
| 453 |
--model "$model"\
|
| 454 |
--alias ubergarm/Ling-1T-GGUF \
|
| 455 |
-
--ctx-size
|
| 456 |
-fa -fmoe -ger \
|
| 457 |
-ctk q8_0 -ctv q8_0 \
|
| 458 |
-ub 4096 -b 4096 \
|
|
@@ -471,6 +471,15 @@ numactl -N "$SOCKET" -m "$SOCKET" \
|
|
| 471 |
|
| 472 |
# optional use this once after downloading to confirm good files
|
| 473 |
--validate-quants
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
```
|
| 475 |
|
| 476 |
## References
|
|
|
|
| 452 |
./build/bin/llama-server \
|
| 453 |
--model "$model"\
|
| 454 |
--alias ubergarm/Ling-1T-GGUF \
|
| 455 |
+
--ctx-size 32768 \
|
| 456 |
-fa -fmoe -ger \
|
| 457 |
-ctk q8_0 -ctv q8_0 \
|
| 458 |
-ub 4096 -b 4096 \
|
|
|
|
| 471 |
|
| 472 |
# optional use this once after downloading to confirm good files
|
| 473 |
--validate-quants
|
| 474 |
+
|
| 475 |
+
# NOTE: if you *really* want over the official 32k supported context consider these options:
|
| 476 |
+
# 64k
|
| 477 |
+
--ctx-size 65536 --rope-scaling yarn --rope-scale 2 --yarn-orig-ctx 32768 --override-kv bailingmoe2.context_length=int:65536
|
| 478 |
+
# 128k (longer extension likely reduces quality of output, always use minimum context required)
|
| 479 |
+
--ctx-size 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768 --override-kv bailingmoe2.context_length=int:131072
|
| 480 |
+
# Details:
|
| 481 |
+
# * https://github.com/ikawrakow/ik_llama.cpp/discussions/839#discussioncomment-14745117
|
| 482 |
+
# * https://github.com/ikawrakow/ik_llama.cpp/issues/873
|
| 483 |
```
|
| 484 |
|
| 485 |
## References
|