1
0
Fork 0

tweak llm

This commit is contained in:
Massaki Archambault 2024-07-06 17:05:05 -04:00
parent 8788fac6d5
commit 598dd7da2b
3 changed files with 18 additions and 4 deletions

View File

@ -13,6 +13,16 @@ spec:
labels:
app.kubernetes.io/component: proxy
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
containers:
- name: litellm
image: ghcr.io/berriai/litellm:main-latest

View File

@ -26,10 +26,10 @@ spec:
value: http://ollama-server.$(NAMESPACE).svc:11434
resources:
requests:
cpu: 500m
cpu: 250m
memory: 1Gi
limits:
cpu: 1000m
cpu: 500m
memory: 1Gi
ports:
- containerPort: 8080

View File

@ -1,9 +1,13 @@
model_list:
- model_name: llama3
- model_name: llama3-8b
litellm_params:
model: ollama_chat/llama3
api_base: http://ollama-server:11434
- model_name: gemma2
- model_name: gemma2-9b
litellm_params:
model: ollama_chat/gemma2
api_base: http://ollama-server:11434
- model_name: phi3-3b
litellm_params:
model: ollama_chat/phi3:mini
api_base: http://ollama-server:11434