tweak llm
This commit is contained in:
parent
8788fac6d5
commit
598dd7da2b
|
@ -13,6 +13,16 @@ spec:
|
|||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
# Image only supports amd64
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values:
|
||||
- amd64
|
||||
containers:
|
||||
- name: litellm
|
||||
image: ghcr.io/berriai/litellm:main-latest
|
||||
|
|
|
@ -26,10 +26,10 @@ spec:
|
|||
value: http://ollama-server.$(NAMESPACE).svc:11434
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
model_list:
|
||||
- model_name: llama3
|
||||
- model_name: llama3-8b
|
||||
litellm_params:
|
||||
model: ollama_chat/llama3
|
||||
api_base: http://ollama-server:11434
|
||||
- model_name: gemma2
|
||||
- model_name: gemma2-9b
|
||||
litellm_params:
|
||||
model: ollama_chat/gemma2
|
||||
api_base: http://ollama-server:11434
|
||||
- model_name: phi3-3b
|
||||
litellm_params:
|
||||
model: ollama_chat/phi3:mini
|
||||
api_base: http://ollama-server:11434
|
Loading…
Reference in New Issue