add llm configs
This commit is contained in:
parent
e5ef004238
commit
8788fac6d5
|
@ -1,10 +1,12 @@
|
|||
resources:
|
||||
- litellm-deployment.yaml
|
||||
|
||||
namePrefix: litellm-
|
||||
|
||||
commonLabels:
|
||||
app.kubernetes.io/component: litellm
|
||||
app.kubernetes.io/name: litellm
|
||||
|
||||
configMapGenerator:
|
||||
- name: litellm-config
|
||||
- name: proxy-config
|
||||
literals:
|
||||
- config.yml=
|
|
@ -1,59 +1,51 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: litellm
|
||||
name: proxy
|
||||
labels:
|
||||
app.kubernetes.io/component: proxy
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: litellm
|
||||
app.kubernetes.io/component: proxy
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: litellm
|
||||
app.kubernetes.io/component: proxy
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
# Image only supports amd64
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values:
|
||||
- amd64
|
||||
containers:
|
||||
- name: litellm
|
||||
image: ghcr.io/berriai/litellm:main-latest
|
||||
args: ['--config', '/config/config.yml']
|
||||
env: []
|
||||
args: ['--config', '/app/config.yaml']
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 4000
|
||||
resources:
|
||||
requests:
|
||||
memory: 200Mi
|
||||
cpu: 200m
|
||||
memory: 200Mi
|
||||
limits:
|
||||
memory: 200Mi
|
||||
cpu: 200m
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
memory: 200Mi
|
||||
volumeMounts:
|
||||
- name: litellm-config
|
||||
mountPath: /config
|
||||
- name: proxy-config
|
||||
mountPath: /app/config.yaml
|
||||
subPath: config.yaml
|
||||
volumes:
|
||||
- name: litellm-config
|
||||
- name: proxy-config
|
||||
configMap:
|
||||
name: litellm-config
|
||||
name: proxy-config
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: litellm
|
||||
name: proxy
|
||||
labels:
|
||||
app.kubernetes.io/component: litellm
|
||||
app.kubernetes.io/component: proxy
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/component: litellm
|
||||
app.kubernetes.io/component: proxy
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: http
|
||||
port: 4000
|
||||
targetPort: http
|
|
@ -0,0 +1,4 @@
|
|||
FROM ollama/ollama:0.1.48-rocm
|
||||
|
||||
# https://github.com/ollama/ollama/issues/2503#issuecomment-2159672925
|
||||
RUN ln -s /opt/rocm/lib/rocblas/library/TensileLibrary_lazy_gfx{1030,1010}.dat
|
|
@ -0,0 +1,7 @@
|
|||
resources:
|
||||
- ollama-deployment.yaml
|
||||
|
||||
namePrefix: ollama-
|
||||
|
||||
commonLabels:
|
||||
app.kubernetes.io/name: ollama
|
|
@ -0,0 +1,77 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: server
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
# Image only supports amd64
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values:
|
||||
- amd64
|
||||
priorityClassName: high-priority
|
||||
containers:
|
||||
- name: server
|
||||
imagePullPolicy: Always
|
||||
image: ollama/ollama:rocm
|
||||
# image: badjware/ollama-tweak
|
||||
# env:
|
||||
# - name: HSA_OVERRIDE_GFX_VERSION
|
||||
# value: 10.1.0
|
||||
# - name: HSA_ENABLE_SDMA
|
||||
# value: "0"
|
||||
resources:
|
||||
requests:
|
||||
memory: 8Gi
|
||||
# cpu: 2000m
|
||||
limits:
|
||||
memory: 8Gi
|
||||
# cpu: 3000m
|
||||
amd.com/gpu: 1
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
name: http
|
||||
volumeMounts:
|
||||
- name: server-data
|
||||
mountPath: /root/.ollama
|
||||
volumes:
|
||||
- name: server-data
|
||||
hostPath:
|
||||
path: /var/lib/ollama
|
||||
type: DirectoryOrCreate
|
||||
---
|
||||
apiVersion: scheduling.k8s.io/v1
|
||||
kind: PriorityClass
|
||||
metadata:
|
||||
name: high-priority
|
||||
value: 1000000
|
||||
globalDefault: false
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: server
|
||||
labels:
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/component: server
|
||||
ports:
|
||||
- name: http
|
||||
port: 11434
|
||||
targetPort: http
|
|
@ -0,0 +1,26 @@
|
|||
resources:
|
||||
- openwebui-deployment.yaml
|
||||
- openwebui-ingress.yaml
|
||||
|
||||
namePrefix: openwebui-
|
||||
|
||||
commonLabels:
|
||||
app.kubernetes.io/name: openwebui
|
||||
|
||||
configMapGenerator:
|
||||
- name: kustomize-generated-config
|
||||
literals:
|
||||
- OPENWEBUI_EXTERNAL_HOST=chat.badjware.dev
|
||||
- OPENWEBUI_EXTERNAL_URL=https://chat.badjware.dev
|
||||
|
||||
replacements:
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: kustomize-generated-config
|
||||
fieldPath: data.OPENWEBUI_EXTERNAL_HOST
|
||||
targets:
|
||||
- select:
|
||||
kind: Ingress
|
||||
name: server
|
||||
fieldPaths:
|
||||
- spec.rules.0.host
|
|
@ -0,0 +1,70 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: server
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
containers:
|
||||
- name: server
|
||||
image: ghcr.io/open-webui/open-webui:main
|
||||
env:
|
||||
- name: NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: OLLAMA_BASE_URL
|
||||
value: http://ollama-server.$(NAMESPACE).svc:11434
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 1Gi
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
volumeMounts:
|
||||
- name: server-data-pv
|
||||
mountPath: /app/backend/data
|
||||
volumes:
|
||||
- name: server-data-pv
|
||||
persistentVolumeClaim:
|
||||
claimName: server-data-pvc
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: server-data-pvc
|
||||
labels:
|
||||
app.kubernetes.io/name: server
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: server
|
||||
labels:
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/component: server
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: http
|
|
@ -0,0 +1,19 @@
|
|||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: server
|
||||
labels:
|
||||
app.kubernetes.io/name: ollama
|
||||
probe: blackbox-http
|
||||
spec:
|
||||
rules:
|
||||
- host: ${OPENWEBUI_EXTERNAL_HOST}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: server
|
||||
port:
|
||||
name: http
|
|
@ -0,0 +1,26 @@
|
|||
resources:
|
||||
- sillytavern-deployment.yaml
|
||||
- sillytavern-ingress.yaml
|
||||
|
||||
namePrefix: sillytavern-
|
||||
|
||||
commonLabels:
|
||||
app.kubernetes.io/name: sillytavern
|
||||
|
||||
configMapGenerator:
|
||||
- name: kustomize-generated-config
|
||||
literals:
|
||||
- SILLYTAVERN_EXTERNAL_HOST=tavern.badjware.dev
|
||||
- SILLYTAVERN_EXTERNAL_URL=https://tavern.badjware.dev
|
||||
|
||||
replacements:
|
||||
- source:
|
||||
kind: ConfigMap
|
||||
name: kustomize-generated-config
|
||||
fieldPath: data.SILLYTAVERN_EXTERNAL_HOST
|
||||
targets:
|
||||
- select:
|
||||
kind: Ingress
|
||||
name: server
|
||||
fieldPaths:
|
||||
- spec.rules.0.host
|
|
@ -0,0 +1,102 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: server
|
||||
spec:
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
# Image only supports amd64
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values:
|
||||
- amd64
|
||||
containers:
|
||||
- name: sillytavern
|
||||
image: ghcr.io/sillytavern/sillytavern:latest
|
||||
resources:
|
||||
requests:
|
||||
memory: 500Mi
|
||||
cpu: 200m
|
||||
limits:
|
||||
memory: 500Mi
|
||||
cpu: 200m
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
volumeMounts:
|
||||
- name: sillytavern-extensions
|
||||
mountPath: /home/node/app/public/scripts/extensions/third-party
|
||||
- name: sillytavern-config
|
||||
mountPath: /home/node/app/config
|
||||
- name: sillytavern-user
|
||||
mountPath: /home/node/app/public/user
|
||||
volumes:
|
||||
- name: sillytavern-extensions
|
||||
persistentVolumeClaim:
|
||||
claimName: server-extensions
|
||||
- name: sillytavern-config
|
||||
persistentVolumeClaim:
|
||||
claimName: server-config
|
||||
- name: sillytavern-user
|
||||
persistentVolumeClaim:
|
||||
claimName: server-user
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: server-extensions
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: server-config
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: server-user
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: server
|
||||
labels:
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/component: server
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: http
|
|
@ -0,0 +1,19 @@
|
|||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: server
|
||||
labels:
|
||||
app.kubernetes.io/name: sillytavern
|
||||
probe: blackbox-http
|
||||
spec:
|
||||
rules:
|
||||
- host: ${SILLYTAVERN_EXTERNAL_HOST}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: server
|
||||
port:
|
||||
name: http
|
|
@ -1,18 +0,0 @@
|
|||
version: 1.0.1
|
||||
cache: true
|
||||
endpoints:
|
||||
custom:
|
||||
- name: "LiteLLM"
|
||||
iconURL: https://ollama.ai/public/icon.png
|
||||
apiKey: "${LITELLM_MASTER_KEY}"
|
||||
baseURL: "http://librechat-litellm.llm.svc:8000"
|
||||
models:
|
||||
default: ["openhermes", "solar"]
|
||||
fetch: true
|
||||
titleConvo: true
|
||||
titleModel: "solar"
|
||||
summarize: false
|
||||
summaryModel: "solar"
|
||||
forcePrompt: false
|
||||
modelDisplayLabel: "LiteLLM"
|
||||
dropParams: ["stop", "frequency_penalty", "presence_penalty"]
|
|
@ -0,0 +1,9 @@
|
|||
model_list:
|
||||
- model_name: llama3
|
||||
litellm_params:
|
||||
model: ollama_chat/llama3
|
||||
api_base: http://ollama-server:11434
|
||||
- model_name: gemma2
|
||||
litellm_params:
|
||||
model: ollama_chat/gemma2
|
||||
api_base: http://ollama-server:11434
|
|
@ -1,32 +0,0 @@
|
|||
model_list:
|
||||
- model_name: mistral
|
||||
litellm_params:
|
||||
model: ollama/mistral:7b-instruct-v0.2-q5_K_M
|
||||
api_base: http://192.168.30.20:11434
|
||||
stream: True
|
||||
- model_name: openhermes
|
||||
litellm_params:
|
||||
model: ollama/openhermes:7b-mistral-v2.5-q5_K_M
|
||||
api_base: http://192.168.30.20:11434
|
||||
# stream: True
|
||||
# - model_name: dolphin-mistral
|
||||
# litellm_params:
|
||||
# model: ollama/dolphin-mistral
|
||||
# api_base: http://192.168.30.20:11434
|
||||
# # stream: True
|
||||
- model_name: solar
|
||||
litellm_params:
|
||||
model: ollama/solar:10.7b-instruct-v1-q5_K_M
|
||||
api_base: http://192.168.30.20:11434
|
||||
stream: True
|
||||
- model_name: deepseek-coder-6.7b
|
||||
litellm_params:
|
||||
model: ollama/deepseek-coder:6.7b-instruct-q8_0
|
||||
api_base: http://192.168.30.20:11434
|
||||
stream: True
|
||||
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
|
||||
general_settings:
|
||||
master_key: "os.environ/MASTER_KEY"
|
|
@ -13,8 +13,6 @@ resources:
|
|||
- ../../overlays/actual
|
||||
- ../../overlays/llm
|
||||
- probes/snmp-exporter.yaml
|
||||
- resources/litellm-externalsecret.yaml
|
||||
- resources/litellm-stripprefix.yaml
|
||||
|
||||
# resources:
|
||||
# - probes/external-services-bobcat-miner.yaml
|
||||
|
@ -54,32 +52,23 @@ images:
|
|||
newTag: "7.2"
|
||||
- name: bitnami/kubectl
|
||||
newTag: "1.29"
|
||||
# - name: ghcr.io/danny-avila/librechat
|
||||
# newTag: v0.6.10
|
||||
# - name: ghcr.io/berriai/litellm
|
||||
# newTag: main-v1.24.6
|
||||
|
||||
configMapGenerator:
|
||||
- name: librechat-server-config
|
||||
- name: litellm-proxy-config
|
||||
namespace: llm
|
||||
behavior: replace
|
||||
files:
|
||||
- librechat.yaml=configurations/librechat/librechat.yaml
|
||||
- name: librechat-litellm-config
|
||||
namespace: llm
|
||||
behavior: replace
|
||||
files:
|
||||
- config.yml=configurations/litellm/config.yml
|
||||
- config.yaml=./configurations/litellm/config.yaml
|
||||
# - name: home-assistant-server-config
|
||||
# namespace: home-assistant
|
||||
# behavior: replace
|
||||
# files:
|
||||
# - configuration.yaml=configurations/home-assistant/configuration.yaml
|
||||
# - name: ecommerce-exporter-config
|
||||
# namespace: monitoring
|
||||
# behavior: replace
|
||||
# files:
|
||||
# - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml
|
||||
# - name: ecommerce-exporter-config
|
||||
# namespace: monitoring
|
||||
# behavior: replace
|
||||
# files:
|
||||
# - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml
|
||||
|
||||
secretGenerator:
|
||||
- name: prometheus-additional-scrape-configs
|
||||
|
@ -109,21 +98,6 @@ patches:
|
|||
kind: Deployment
|
||||
name: deluge-server
|
||||
path: patches/deluge-deployment-patch.yaml
|
||||
- target:
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: librechat-server
|
||||
path: patches/librechat-deployment-patch.yaml
|
||||
- target:
|
||||
version: v1
|
||||
kind: Ingress
|
||||
name: librechat-server
|
||||
path: patches/librechat-ingress-patch.yaml
|
||||
- target:
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: librechat-litellm
|
||||
path: patches/litellm-deployment-patch.yaml
|
||||
# - target:
|
||||
# version: v1
|
||||
# kind: Prometheus
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
apiVersion: traefik.io/v1alpha1
|
||||
kind: Middleware
|
||||
metadata:
|
||||
name: litellm-stripprefix
|
||||
namespace: llm
|
||||
spec:
|
||||
stripPrefix:
|
||||
prefixes:
|
||||
- /backend
|
|
@ -1,5 +1,8 @@
|
|||
resources:
|
||||
- namespace.yaml
|
||||
- ../../bases/librechat
|
||||
- ../../bases/ollama
|
||||
- ../../bases/litellm
|
||||
- ../../bases/openwebui
|
||||
# - ../../bases/sillytavern
|
||||
|
||||
namespace: llm
|
||||
|
|
|
@ -2,4 +2,5 @@ resources:
|
|||
- ../../bases/longhorn
|
||||
- ../../bases/traefik
|
||||
- ../../bases/external-secrets
|
||||
- https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml
|
||||
- clustersecretstore.yaml
|
Loading…
Reference in New Issue