24 lines
545 B
YAML
24 lines
545 B
YAML
healthCheckTimeout: 60
|
|
|
|
models:
|
|
kokoro-tts:
|
|
proxy: http://kokoro-tts-server.llm.svc:8880
|
|
checkEndpoint: /v1/audio/speech
|
|
unlisted: true
|
|
|
|
gemma-3-12b-it-qat:
|
|
cmd: >
|
|
/app/llama-server
|
|
-hf unsloth/gemma-3-12b-it-qat-GGUF
|
|
-hff gemma-3-12b-it-qat-UD-Q4_K_XL.gguf
|
|
--port ${PORT}
|
|
--no-mmap
|
|
--flash-attn
|
|
--cache-type-k q8_0
|
|
--cache-type-v q8_0
|
|
--temp 1.0 --top-k 64 --min-p 0.00 --top-p 0.95 --repeat-penalty 1.0
|
|
--gpu-layers 49
|
|
|
|
|
|
# --no-kv-offload
|
|
# --ctx-size 16384 |