ubergarm commited on
Commit
ddb0e0b
·
1 Parent(s): ebf35e1

Update example with possible yarn context extension

Browse files
Files changed (1) hide show
  1. README.md +10 -1
README.md CHANGED
@@ -452,7 +452,7 @@ numactl -N "$SOCKET" -m "$SOCKET" \
452
  ./build/bin/llama-server \
453
  --model "$model"\
454
  --alias ubergarm/Ling-1T-GGUF \
455
- --ctx-size 65536 \
456
  -fa -fmoe -ger \
457
  -ctk q8_0 -ctv q8_0 \
458
  -ub 4096 -b 4096 \
@@ -471,6 +471,15 @@ numactl -N "$SOCKET" -m "$SOCKET" \
471
 
472
  # optional use this once after downloading to confirm good files
473
  --validate-quants
 
 
 
 
 
 
 
 
 
474
  ```
475
 
476
  ## References
 
452
  ./build/bin/llama-server \
453
  --model "$model"\
454
  --alias ubergarm/Ling-1T-GGUF \
455
+ --ctx-size 32768 \
456
  -fa -fmoe -ger \
457
  -ctk q8_0 -ctv q8_0 \
458
  -ub 4096 -b 4096 \
 
471
 
472
  # optional use this once after downloading to confirm good files
473
  --validate-quants
474
+
475
+ # NOTE: if you *really* want over the official 32k supported context consider these options:
476
+ # 64k
477
+ --ctx-size 65536 --rope-scaling yarn --rope-scale 2 --yarn-orig-ctx 32768 --override-kv bailingmoe2.context_length=int:65536
478
+ # 128k (longer extension likely reduces quality of output, always use minimum context required)
479
+ --ctx-size 131072 --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768 --override-kv bailingmoe2.context_length=int:131072
480
+ # Details:
481
+ # * https://github.com/ikawrakow/ik_llama.cpp/discussions/839#discussioncomment-14745117
482
+ # * https://github.com/ikawrakow/ik_llama.cpp/issues/873
483
  ```
484
 
485
  ## References