From 598dd7da2b988c091631bdb42b7069addb162ae3 Mon Sep 17 00:00:00 2001 From: Massaki Archambault Date: Sat, 6 Jul 2024 17:05:05 -0400 Subject: [PATCH] tweak llm --- kustomize/bases/litellm/litellm-deployment.yaml | 10 ++++++++++ kustomize/bases/openwebui/openwebui-deployment.yaml | 4 ++-- kustomize/env/prod/configurations/litellm/config.yaml | 8 ++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/kustomize/bases/litellm/litellm-deployment.yaml b/kustomize/bases/litellm/litellm-deployment.yaml index d4236c7..8b35975 100644 --- a/kustomize/bases/litellm/litellm-deployment.yaml +++ b/kustomize/bases/litellm/litellm-deployment.yaml @@ -13,6 +13,16 @@ spec: labels: app.kubernetes.io/component: proxy spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + # Image only supports amd64 + - key: kubernetes.io/arch + operator: In + values: + - amd64 containers: - name: litellm image: ghcr.io/berriai/litellm:main-latest diff --git a/kustomize/bases/openwebui/openwebui-deployment.yaml b/kustomize/bases/openwebui/openwebui-deployment.yaml index 4d7a010..42383c7 100644 --- a/kustomize/bases/openwebui/openwebui-deployment.yaml +++ b/kustomize/bases/openwebui/openwebui-deployment.yaml @@ -26,10 +26,10 @@ spec: value: http://ollama-server.$(NAMESPACE).svc:11434 resources: requests: - cpu: 500m + cpu: 250m memory: 1Gi limits: - cpu: 1000m + cpu: 500m memory: 1Gi ports: - containerPort: 8080 diff --git a/kustomize/env/prod/configurations/litellm/config.yaml b/kustomize/env/prod/configurations/litellm/config.yaml index 70332db..74ef45a 100644 --- a/kustomize/env/prod/configurations/litellm/config.yaml +++ b/kustomize/env/prod/configurations/litellm/config.yaml @@ -1,9 +1,13 @@ model_list: - - model_name: llama3 + - model_name: llama3-8b litellm_params: model: ollama_chat/llama3 api_base: http://ollama-server:11434 - - model_name: gemma2 + - model_name: gemma2-9b litellm_params: model: ollama_chat/gemma2 + api_base: http://ollama-server:11434 + - model_name: phi3-3b + litellm_params: + model: ollama_chat/phi3:mini api_base: http://ollama-server:11434 \ No newline at end of file