add embedding model
This commit is contained in:
parent
3183b8e4fc
commit
4311b41375
|
@ -6,11 +6,11 @@ models:
|
||||||
checkEndpoint: /v1/audio/speech
|
checkEndpoint: /v1/audio/speech
|
||||||
unlisted: true
|
unlisted: true
|
||||||
|
|
||||||
Gemma-3-12B:
|
Gemma-3-4B:
|
||||||
cmd: >
|
cmd: >
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf unsloth/gemma-3-12b-it-qat-GGUF
|
-hf unsloth/gemma-3-4b-it-qat-GGUF
|
||||||
-hff gemma-3-12b-it-qat-UD-Q4_K_XL.gguf
|
-hff gemma-3-4b-it-qat-UD-Q6_K_XL.gguf
|
||||||
--port ${PORT}
|
--port ${PORT}
|
||||||
--flash-attn
|
--flash-attn
|
||||||
--cache-type-k q8_0
|
--cache-type-k q8_0
|
||||||
|
@ -18,6 +18,21 @@ models:
|
||||||
--temp 1.0 --top-k 64 --min-p 0.00 --top-p 0.95 --repeat-penalty 1.0
|
--temp 1.0 --top-k 64 --min-p 0.00 --top-p 0.95 --repeat-penalty 1.0
|
||||||
--gpu-layers 49
|
--gpu-layers 49
|
||||||
|
|
||||||
|
Qwen3-Embedding-0.6B:
|
||||||
|
cmd: >
|
||||||
|
/app/llama-server
|
||||||
|
-hf Qwen/Qwen3-Embedding-0.6B-GGUF:Q8_0
|
||||||
|
--port ${PORT}
|
||||||
|
--embedding
|
||||||
|
--pooling last
|
||||||
|
-ub 8912
|
||||||
|
|
||||||
|
groups:
|
||||||
|
embedding:
|
||||||
|
persistent: true
|
||||||
|
swap: true
|
||||||
|
exclusive: false
|
||||||
|
members:
|
||||||
|
- Qwen3-Embedding-0.6B
|
||||||
# --no-kv-offload
|
# --no-kv-offload
|
||||||
# --ctx-size 16384
|
# --ctx-size 16384
|
Loading…
Reference in New Issue