tweak llm

2024-07-06 17:05:05 -04:00 · 2024-07-06 17:05:05 -04:00 · 598dd7da2b
parent 8788fac6d5
commit 598dd7da2b
3 changed files with 18 additions and 4 deletions
--- a/kustomize/bases/litellm/litellm-deployment.yaml
+++ b/kustomize/bases/litellm/litellm-deployment.yaml
@ -13,6 +13,16 @@ spec:
      labels:
        app.kubernetes.io/component: proxy
    spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                  # Image only supports amd64
+                  - key: kubernetes.io/arch
+                    operator: In
+                    values:
+                      - amd64
      containers:
        - name: litellm
          image: ghcr.io/berriai/litellm:main-latest
--- a/kustomize/bases/openwebui/openwebui-deployment.yaml
+++ b/kustomize/bases/openwebui/openwebui-deployment.yaml
@ -26,10 +26,10 @@ spec:
              value: http://ollama-server.$(NAMESPACE).svc:11434
          resources:
            requests:
-              cpu: 500m
+              cpu: 250m
              memory: 1Gi
            limits:
-              cpu: 1000m
+              cpu: 500m
              memory: 1Gi
          ports:
            - containerPort: 8080
--- a/kustomize/env/prod/configurations/litellm/config.yaml
+++ b/kustomize/env/prod/configurations/litellm/config.yaml
@ -1,9 +1,13 @@
 model_list:
-  - model_name: llama3
+  - model_name: llama3-8b
    litellm_params:
      model: ollama_chat/llama3
      api_base: http://ollama-server:11434
-  - model_name: gemma2
+  - model_name: gemma2-9b
    litellm_params:
      model: ollama_chat/gemma2
+      api_base: http://ollama-server:11434
+  - model_name: phi3-3b
+    litellm_params:
+      model: ollama_chat/phi3:mini
      api_base: http://ollama-server:11434