1
0
Fork 0

add embedding model

This commit is contained in:
Massaki Archambault 2025-06-05 22:29:30 -04:00
parent 3183b8e4fc
commit 836e3a3cb4
1 changed files with 21 additions and 5 deletions

View File

@ -6,18 +6,34 @@ models:
checkEndpoint: /v1/audio/speech
unlisted: true
Gemma-3-12B:
Gemma-3-4B:
cmd: >
/app/llama-server
-hf unsloth/gemma-3-12b-it-qat-GGUF
-hff gemma-3-12b-it-qat-UD-Q4_K_XL.gguf
-hf unsloth/gemma-3-4b-it-qat-GGUF
-hff gemma-3-4b-it-qat-UD-Q6_K_XL.gguf
--port ${PORT}
--flash-attn
--cache-type-k q8_0
--cache-type-v q8_0
--temp 1.0 --top-k 64 --min-p 0.00 --top-p 0.95 --repeat-penalty 1.0
--gpu-layers 49
Qwen3-Embedding-0.6B:
cmd: >
/app/llama-server
-hf Qwen/Qwen3-Embedding-0.6B-GGUF:Q8_0
--port ${PORT}
--embedding
--pooling last
-ub 8912
--gpu-layers 29
groups:
embedding:
persistent: true
swap: true
exclusive: false
members:
- Qwen3-Embedding-0.6B
# --no-kv-offload
# --ctx-size 16384
# --ctx-size 16384