diff --git a/kustomize/bases/llama-swap/configurations/config.yaml b/kustomize/bases/llama-swap/configurations/config.yaml
index a309c63..d361065 100644
--- a/kustomize/bases/llama-swap/configurations/config.yaml
+++ b/kustomize/bases/llama-swap/configurations/config.yaml
@@ -6,18 +6,34 @@ models:
     checkEndpoint: /v1/audio/speech
     unlisted: true
 
-  Gemma-3-12B:
+  Gemma-3-4B:
     cmd: >
       /app/llama-server
-      -hf unsloth/gemma-3-12b-it-qat-GGUF
-      -hff gemma-3-12b-it-qat-UD-Q4_K_XL.gguf
+      -hf unsloth/gemma-3-4b-it-qat-GGUF
+      -hff gemma-3-4b-it-qat-UD-Q6_K_XL.gguf
       --port ${PORT}
       --flash-attn
       --cache-type-k q8_0
       --cache-type-v q8_0
       --temp 1.0 --top-k 64 --min-p 0.00 --top-p 0.95 --repeat-penalty 1.0
       --gpu-layers 49
-      
 
+  Qwen3-Embedding-0.6B:
+    cmd: >
+      /app/llama-server
+      -hf Qwen/Qwen3-Embedding-0.6B-GGUF:Q8_0
+      --port ${PORT}
+      --embedding
+      --pooling last
+      -ub 8912
+      --gpu-layers 29
+
+groups:
+  embedding:
+    persistent: true
+    swap: true
+    exclusive: false
+    members:
+      - Qwen3-Embedding-0.6B
 # --no-kv-offload
-# --ctx-size 16384
\ No newline at end of file
+# --ctx-size 16384