1
0
Fork 0

add llm configs

This commit is contained in:
Massaki Archambault 2024-07-06 12:37:26 -04:00
parent e5ef004238
commit 8788fac6d5
18 changed files with 396 additions and 124 deletions

View File

@ -1,10 +1,12 @@
resources:
- litellm-deployment.yaml
namePrefix: litellm-
commonLabels:
app.kubernetes.io/component: litellm
app.kubernetes.io/name: litellm
configMapGenerator:
- name: litellm-config
- name: proxy-config
literals:
- config.yml=

View File

@ -1,59 +1,51 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm
name: proxy
labels:
app.kubernetes.io/component: proxy
spec:
selector:
matchLabels:
app.kubernetes.io/component: litellm
app.kubernetes.io/component: proxy
template:
metadata:
labels:
app.kubernetes.io/component: litellm
app.kubernetes.io/component: proxy
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
containers:
- name: litellm
image: ghcr.io/berriai/litellm:main-latest
args: ['--config', '/config/config.yml']
env: []
args: ['--config', '/app/config.yaml']
ports:
- name: http
containerPort: 4000
resources:
requests:
memory: 200Mi
cpu: 200m
memory: 200Mi
limits:
memory: 200Mi
cpu: 200m
ports:
- containerPort: 8000
name: http
memory: 200Mi
volumeMounts:
- name: litellm-config
mountPath: /config
- name: proxy-config
mountPath: /app/config.yaml
subPath: config.yaml
volumes:
- name: litellm-config
- name: proxy-config
configMap:
name: litellm-config
name: proxy-config
---
apiVersion: v1
kind: Service
metadata:
name: litellm
name: proxy
labels:
app.kubernetes.io/component: litellm
app.kubernetes.io/component: proxy
spec:
selector:
app.kubernetes.io/component: litellm
app.kubernetes.io/component: proxy
ports:
- name: http
port: 8000
port: 4000
targetPort: http

View File

@ -0,0 +1,4 @@
FROM ollama/ollama:0.1.48-rocm
# https://github.com/ollama/ollama/issues/2503#issuecomment-2159672925
RUN ln -s /opt/rocm/lib/rocblas/library/TensileLibrary_lazy_gfx{1030,1010}.dat

View File

@ -0,0 +1,7 @@
resources:
- ollama-deployment.yaml
namePrefix: ollama-
commonLabels:
app.kubernetes.io/name: ollama

View File

@ -0,0 +1,77 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
priorityClassName: high-priority
containers:
- name: server
imagePullPolicy: Always
image: ollama/ollama:rocm
# image: badjware/ollama-tweak
# env:
# - name: HSA_OVERRIDE_GFX_VERSION
# value: 10.1.0
# - name: HSA_ENABLE_SDMA
# value: "0"
resources:
requests:
memory: 8Gi
# cpu: 2000m
limits:
memory: 8Gi
# cpu: 3000m
amd.com/gpu: 1
ports:
- containerPort: 11434
name: http
volumeMounts:
- name: server-data
mountPath: /root/.ollama
volumes:
- name: server-data
hostPath:
path: /var/lib/ollama
type: DirectoryOrCreate
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
globalDefault: false
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 11434
targetPort: http

View File

@ -0,0 +1,26 @@
resources:
- openwebui-deployment.yaml
- openwebui-ingress.yaml
namePrefix: openwebui-
commonLabels:
app.kubernetes.io/name: openwebui
configMapGenerator:
- name: kustomize-generated-config
literals:
- OPENWEBUI_EXTERNAL_HOST=chat.badjware.dev
- OPENWEBUI_EXTERNAL_URL=https://chat.badjware.dev
replacements:
- source:
kind: ConfigMap
name: kustomize-generated-config
fieldPath: data.OPENWEBUI_EXTERNAL_HOST
targets:
- select:
kind: Ingress
name: server
fieldPaths:
- spec.rules.0.host

View File

@ -0,0 +1,70 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
containers:
- name: server
image: ghcr.io/open-webui/open-webui:main
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: OLLAMA_BASE_URL
value: http://ollama-server.$(NAMESPACE).svc:11434
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 1000m
memory: 1Gi
ports:
- containerPort: 8080
name: http
volumeMounts:
- name: server-data-pv
mountPath: /app/backend/data
volumes:
- name: server-data-pv
persistentVolumeClaim:
claimName: server-data-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-data-pvc
labels:
app.kubernetes.io/name: server
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 8080
targetPort: http

View File

@ -0,0 +1,19 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: server
labels:
app.kubernetes.io/name: ollama
probe: blackbox-http
spec:
rules:
- host: ${OPENWEBUI_EXTERNAL_HOST}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: server
port:
name: http

View File

@ -0,0 +1,26 @@
resources:
- sillytavern-deployment.yaml
- sillytavern-ingress.yaml
namePrefix: sillytavern-
commonLabels:
app.kubernetes.io/name: sillytavern
configMapGenerator:
- name: kustomize-generated-config
literals:
- SILLYTAVERN_EXTERNAL_HOST=tavern.badjware.dev
- SILLYTAVERN_EXTERNAL_URL=https://tavern.badjware.dev
replacements:
- source:
kind: ConfigMap
name: kustomize-generated-config
fieldPath: data.SILLYTAVERN_EXTERNAL_HOST
targets:
- select:
kind: Ingress
name: server
fieldPaths:
- spec.rules.0.host

View File

@ -0,0 +1,102 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
containers:
- name: sillytavern
image: ghcr.io/sillytavern/sillytavern:latest
resources:
requests:
memory: 500Mi
cpu: 200m
limits:
memory: 500Mi
cpu: 200m
ports:
- containerPort: 8000
name: http
volumeMounts:
- name: sillytavern-extensions
mountPath: /home/node/app/public/scripts/extensions/third-party
- name: sillytavern-config
mountPath: /home/node/app/config
- name: sillytavern-user
mountPath: /home/node/app/public/user
volumes:
- name: sillytavern-extensions
persistentVolumeClaim:
claimName: server-extensions
- name: sillytavern-config
persistentVolumeClaim:
claimName: server-config
- name: sillytavern-user
persistentVolumeClaim:
claimName: server-user
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-extensions
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-config
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-user
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 8000
targetPort: http

View File

@ -0,0 +1,19 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: server
labels:
app.kubernetes.io/name: sillytavern
probe: blackbox-http
spec:
rules:
- host: ${SILLYTAVERN_EXTERNAL_HOST}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: server
port:
name: http

View File

@ -1,18 +0,0 @@
version: 1.0.1
cache: true
endpoints:
custom:
- name: "LiteLLM"
iconURL: https://ollama.ai/public/icon.png
apiKey: "${LITELLM_MASTER_KEY}"
baseURL: "http://librechat-litellm.llm.svc:8000"
models:
default: ["openhermes", "solar"]
fetch: true
titleConvo: true
titleModel: "solar"
summarize: false
summaryModel: "solar"
forcePrompt: false
modelDisplayLabel: "LiteLLM"
dropParams: ["stop", "frequency_penalty", "presence_penalty"]

View File

@ -0,0 +1,9 @@
model_list:
- model_name: llama3
litellm_params:
model: ollama_chat/llama3
api_base: http://ollama-server:11434
- model_name: gemma2
litellm_params:
model: ollama_chat/gemma2
api_base: http://ollama-server:11434

View File

@ -1,32 +0,0 @@
model_list:
- model_name: mistral
litellm_params:
model: ollama/mistral:7b-instruct-v0.2-q5_K_M
api_base: http://192.168.30.20:11434
stream: True
- model_name: openhermes
litellm_params:
model: ollama/openhermes:7b-mistral-v2.5-q5_K_M
api_base: http://192.168.30.20:11434
# stream: True
# - model_name: dolphin-mistral
# litellm_params:
# model: ollama/dolphin-mistral
# api_base: http://192.168.30.20:11434
# # stream: True
- model_name: solar
litellm_params:
model: ollama/solar:10.7b-instruct-v1-q5_K_M
api_base: http://192.168.30.20:11434
stream: True
- model_name: deepseek-coder-6.7b
litellm_params:
model: ollama/deepseek-coder:6.7b-instruct-q8_0
api_base: http://192.168.30.20:11434
stream: True
litellm_settings:
drop_params: True
general_settings:
master_key: "os.environ/MASTER_KEY"

View File

@ -13,8 +13,6 @@ resources:
- ../../overlays/actual
- ../../overlays/llm
- probes/snmp-exporter.yaml
- resources/litellm-externalsecret.yaml
- resources/litellm-stripprefix.yaml
# resources:
# - probes/external-services-bobcat-miner.yaml
@ -54,32 +52,23 @@ images:
newTag: "7.2"
- name: bitnami/kubectl
newTag: "1.29"
# - name: ghcr.io/danny-avila/librechat
# newTag: v0.6.10
# - name: ghcr.io/berriai/litellm
# newTag: main-v1.24.6
configMapGenerator:
- name: librechat-server-config
- name: litellm-proxy-config
namespace: llm
behavior: replace
files:
- librechat.yaml=configurations/librechat/librechat.yaml
- name: librechat-litellm-config
namespace: llm
behavior: replace
files:
- config.yml=configurations/litellm/config.yml
- config.yaml=./configurations/litellm/config.yaml
# - name: home-assistant-server-config
# namespace: home-assistant
# behavior: replace
# files:
# - configuration.yaml=configurations/home-assistant/configuration.yaml
# - name: ecommerce-exporter-config
# namespace: monitoring
# behavior: replace
# files:
# - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml
# - name: ecommerce-exporter-config
# namespace: monitoring
# behavior: replace
# files:
# - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml
secretGenerator:
- name: prometheus-additional-scrape-configs
@ -109,21 +98,6 @@ patches:
kind: Deployment
name: deluge-server
path: patches/deluge-deployment-patch.yaml
- target:
version: v1
kind: Deployment
name: librechat-server
path: patches/librechat-deployment-patch.yaml
- target:
version: v1
kind: Ingress
name: librechat-server
path: patches/librechat-ingress-patch.yaml
- target:
version: v1
kind: Deployment
name: librechat-litellm
path: patches/litellm-deployment-patch.yaml
# - target:
# version: v1
# kind: Prometheus

View File

@ -1,9 +0,0 @@
apiVersion: traefik.io/v1alpha1
kind: Middleware
metadata:
name: litellm-stripprefix
namespace: llm
spec:
stripPrefix:
prefixes:
- /backend

View File

@ -1,5 +1,8 @@
resources:
- namespace.yaml
- ../../bases/librechat
- ../../bases/ollama
- ../../bases/litellm
- ../../bases/openwebui
# - ../../bases/sillytavern
namespace: llm

View File

@ -2,4 +2,5 @@ resources:
- ../../bases/longhorn
- ../../bases/traefik
- ../../bases/external-secrets
- https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml
- clustersecretstore.yaml