tweak llm
This commit is contained in:
parent
8788fac6d5
commit
598dd7da2b
|
@ -13,6 +13,16 @@ spec:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: proxy
|
app.kubernetes.io/component: proxy
|
||||||
spec:
|
spec:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
# Image only supports amd64
|
||||||
|
- key: kubernetes.io/arch
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- amd64
|
||||||
containers:
|
containers:
|
||||||
- name: litellm
|
- name: litellm
|
||||||
image: ghcr.io/berriai/litellm:main-latest
|
image: ghcr.io/berriai/litellm:main-latest
|
||||||
|
|
|
@ -26,10 +26,10 @@ spec:
|
||||||
value: http://ollama-server.$(NAMESPACE).svc:11434
|
value: http://ollama-server.$(NAMESPACE).svc:11434
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 500m
|
cpu: 250m
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
limits:
|
limits:
|
||||||
cpu: 1000m
|
cpu: 500m
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: llama3
|
- model_name: llama3-8b
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: ollama_chat/llama3
|
model: ollama_chat/llama3
|
||||||
api_base: http://ollama-server:11434
|
api_base: http://ollama-server:11434
|
||||||
- model_name: gemma2
|
- model_name: gemma2-9b
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: ollama_chat/gemma2
|
model: ollama_chat/gemma2
|
||||||
|
api_base: http://ollama-server:11434
|
||||||
|
- model_name: phi3-3b
|
||||||
|
litellm_params:
|
||||||
|
model: ollama_chat/phi3:mini
|
||||||
api_base: http://ollama-server:11434
|
api_base: http://ollama-server:11434
|
Loading…
Reference in New Issue