1
0
Fork 0

Compare commits

..

2 Commits

Author SHA1 Message Date
Massaki Archambault 8788fac6d5 add llm configs 2024-07-06 12:37:26 -04:00
Massaki Archambault e5ef004238 upgrade longhorn 2024-07-05 20:35:58 -04:00
19 changed files with 397 additions and 125 deletions

View File

@ -1,10 +1,12 @@
resources: resources:
- litellm-deployment.yaml - litellm-deployment.yaml
namePrefix: litellm-
commonLabels: commonLabels:
app.kubernetes.io/component: litellm app.kubernetes.io/name: litellm
configMapGenerator: configMapGenerator:
- name: litellm-config - name: proxy-config
literals: literals:
- config.yml= - config.yml=

View File

@ -1,59 +1,51 @@
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
name: litellm name: proxy
labels:
app.kubernetes.io/component: proxy
spec: spec:
selector: selector:
matchLabels: matchLabels:
app.kubernetes.io/component: litellm app.kubernetes.io/component: proxy
template: template:
metadata: metadata:
labels: labels:
app.kubernetes.io/component: litellm app.kubernetes.io/component: proxy
spec: spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
containers: containers:
- name: litellm - name: litellm
image: ghcr.io/berriai/litellm:main-latest image: ghcr.io/berriai/litellm:main-latest
args: ['--config', '/config/config.yml'] args: ['--config', '/app/config.yaml']
env: [] ports:
- name: http
containerPort: 4000
resources: resources:
requests: requests:
memory: 200Mi
cpu: 200m cpu: 200m
memory: 200Mi
limits: limits:
memory: 200Mi
cpu: 200m cpu: 200m
ports: memory: 200Mi
- containerPort: 8000
name: http
volumeMounts: volumeMounts:
- name: litellm-config - name: proxy-config
mountPath: /config mountPath: /app/config.yaml
subPath: config.yaml
volumes: volumes:
- name: litellm-config - name: proxy-config
configMap: configMap:
name: litellm-config name: proxy-config
--- ---
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
metadata: metadata:
name: litellm name: proxy
labels: labels:
app.kubernetes.io/component: litellm app.kubernetes.io/component: proxy
spec: spec:
selector: selector:
app.kubernetes.io/component: litellm app.kubernetes.io/component: proxy
ports: ports:
- name: http - name: http
port: 8000 port: 4000
targetPort: http targetPort: http

View File

@ -6,7 +6,7 @@ metadata:
spec: spec:
repo: https://charts.longhorn.io repo: https://charts.longhorn.io
chart: longhorn chart: longhorn
version: 1.4.2 version: 1.5.5
targetNamespace: longhorn-system targetNamespace: longhorn-system
set: set:
backupTargetCredentialSecret: s3-backupstore-credentials backupTargetCredentialSecret: s3-backupstore-credentials

View File

@ -0,0 +1,4 @@
FROM ollama/ollama:0.1.48-rocm
# https://github.com/ollama/ollama/issues/2503#issuecomment-2159672925
RUN ln -s /opt/rocm/lib/rocblas/library/TensileLibrary_lazy_gfx{1030,1010}.dat

View File

@ -0,0 +1,7 @@
resources:
- ollama-deployment.yaml
namePrefix: ollama-
commonLabels:
app.kubernetes.io/name: ollama

View File

@ -0,0 +1,77 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
priorityClassName: high-priority
containers:
- name: server
imagePullPolicy: Always
image: ollama/ollama:rocm
# image: badjware/ollama-tweak
# env:
# - name: HSA_OVERRIDE_GFX_VERSION
# value: 10.1.0
# - name: HSA_ENABLE_SDMA
# value: "0"
resources:
requests:
memory: 8Gi
# cpu: 2000m
limits:
memory: 8Gi
# cpu: 3000m
amd.com/gpu: 1
ports:
- containerPort: 11434
name: http
volumeMounts:
- name: server-data
mountPath: /root/.ollama
volumes:
- name: server-data
hostPath:
path: /var/lib/ollama
type: DirectoryOrCreate
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
globalDefault: false
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 11434
targetPort: http

View File

@ -0,0 +1,26 @@
resources:
- openwebui-deployment.yaml
- openwebui-ingress.yaml
namePrefix: openwebui-
commonLabels:
app.kubernetes.io/name: openwebui
configMapGenerator:
- name: kustomize-generated-config
literals:
- OPENWEBUI_EXTERNAL_HOST=chat.badjware.dev
- OPENWEBUI_EXTERNAL_URL=https://chat.badjware.dev
replacements:
- source:
kind: ConfigMap
name: kustomize-generated-config
fieldPath: data.OPENWEBUI_EXTERNAL_HOST
targets:
- select:
kind: Ingress
name: server
fieldPaths:
- spec.rules.0.host

View File

@ -0,0 +1,70 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
containers:
- name: server
image: ghcr.io/open-webui/open-webui:main
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: OLLAMA_BASE_URL
value: http://ollama-server.$(NAMESPACE).svc:11434
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 1000m
memory: 1Gi
ports:
- containerPort: 8080
name: http
volumeMounts:
- name: server-data-pv
mountPath: /app/backend/data
volumes:
- name: server-data-pv
persistentVolumeClaim:
claimName: server-data-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-data-pvc
labels:
app.kubernetes.io/name: server
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 8080
targetPort: http

View File

@ -0,0 +1,19 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: server
labels:
app.kubernetes.io/name: ollama
probe: blackbox-http
spec:
rules:
- host: ${OPENWEBUI_EXTERNAL_HOST}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: server
port:
name: http

View File

@ -0,0 +1,26 @@
resources:
- sillytavern-deployment.yaml
- sillytavern-ingress.yaml
namePrefix: sillytavern-
commonLabels:
app.kubernetes.io/name: sillytavern
configMapGenerator:
- name: kustomize-generated-config
literals:
- SILLYTAVERN_EXTERNAL_HOST=tavern.badjware.dev
- SILLYTAVERN_EXTERNAL_URL=https://tavern.badjware.dev
replacements:
- source:
kind: ConfigMap
name: kustomize-generated-config
fieldPath: data.SILLYTAVERN_EXTERNAL_HOST
targets:
- select:
kind: Ingress
name: server
fieldPaths:
- spec.rules.0.host

View File

@ -0,0 +1,102 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
containers:
- name: sillytavern
image: ghcr.io/sillytavern/sillytavern:latest
resources:
requests:
memory: 500Mi
cpu: 200m
limits:
memory: 500Mi
cpu: 200m
ports:
- containerPort: 8000
name: http
volumeMounts:
- name: sillytavern-extensions
mountPath: /home/node/app/public/scripts/extensions/third-party
- name: sillytavern-config
mountPath: /home/node/app/config
- name: sillytavern-user
mountPath: /home/node/app/public/user
volumes:
- name: sillytavern-extensions
persistentVolumeClaim:
claimName: server-extensions
- name: sillytavern-config
persistentVolumeClaim:
claimName: server-config
- name: sillytavern-user
persistentVolumeClaim:
claimName: server-user
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-extensions
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-config
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: server-user
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 8000
targetPort: http

View File

@ -0,0 +1,19 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: server
labels:
app.kubernetes.io/name: sillytavern
probe: blackbox-http
spec:
rules:
- host: ${SILLYTAVERN_EXTERNAL_HOST}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: server
port:
name: http

View File

@ -1,18 +0,0 @@
version: 1.0.1
cache: true
endpoints:
custom:
- name: "LiteLLM"
iconURL: https://ollama.ai/public/icon.png
apiKey: "${LITELLM_MASTER_KEY}"
baseURL: "http://librechat-litellm.llm.svc:8000"
models:
default: ["openhermes", "solar"]
fetch: true
titleConvo: true
titleModel: "solar"
summarize: false
summaryModel: "solar"
forcePrompt: false
modelDisplayLabel: "LiteLLM"
dropParams: ["stop", "frequency_penalty", "presence_penalty"]

View File

@ -0,0 +1,9 @@
model_list:
- model_name: llama3
litellm_params:
model: ollama_chat/llama3
api_base: http://ollama-server:11434
- model_name: gemma2
litellm_params:
model: ollama_chat/gemma2
api_base: http://ollama-server:11434

View File

@ -1,32 +0,0 @@
model_list:
- model_name: mistral
litellm_params:
model: ollama/mistral:7b-instruct-v0.2-q5_K_M
api_base: http://192.168.30.20:11434
stream: True
- model_name: openhermes
litellm_params:
model: ollama/openhermes:7b-mistral-v2.5-q5_K_M
api_base: http://192.168.30.20:11434
# stream: True
# - model_name: dolphin-mistral
# litellm_params:
# model: ollama/dolphin-mistral
# api_base: http://192.168.30.20:11434
# # stream: True
- model_name: solar
litellm_params:
model: ollama/solar:10.7b-instruct-v1-q5_K_M
api_base: http://192.168.30.20:11434
stream: True
- model_name: deepseek-coder-6.7b
litellm_params:
model: ollama/deepseek-coder:6.7b-instruct-q8_0
api_base: http://192.168.30.20:11434
stream: True
litellm_settings:
drop_params: True
general_settings:
master_key: "os.environ/MASTER_KEY"

View File

@ -13,8 +13,6 @@ resources:
- ../../overlays/actual - ../../overlays/actual
- ../../overlays/llm - ../../overlays/llm
- probes/snmp-exporter.yaml - probes/snmp-exporter.yaml
- resources/litellm-externalsecret.yaml
- resources/litellm-stripprefix.yaml
# resources: # resources:
# - probes/external-services-bobcat-miner.yaml # - probes/external-services-bobcat-miner.yaml
@ -54,22 +52,13 @@ images:
newTag: "7.2" newTag: "7.2"
- name: bitnami/kubectl - name: bitnami/kubectl
newTag: "1.29" newTag: "1.29"
# - name: ghcr.io/danny-avila/librechat
# newTag: v0.6.10
# - name: ghcr.io/berriai/litellm
# newTag: main-v1.24.6
configMapGenerator: configMapGenerator:
- name: librechat-server-config - name: litellm-proxy-config
namespace: llm namespace: llm
behavior: replace behavior: replace
files: files:
- librechat.yaml=configurations/librechat/librechat.yaml - config.yaml=./configurations/litellm/config.yaml
- name: librechat-litellm-config
namespace: llm
behavior: replace
files:
- config.yml=configurations/litellm/config.yml
# - name: home-assistant-server-config # - name: home-assistant-server-config
# namespace: home-assistant # namespace: home-assistant
# behavior: replace # behavior: replace
@ -109,21 +98,6 @@ patches:
kind: Deployment kind: Deployment
name: deluge-server name: deluge-server
path: patches/deluge-deployment-patch.yaml path: patches/deluge-deployment-patch.yaml
- target:
version: v1
kind: Deployment
name: librechat-server
path: patches/librechat-deployment-patch.yaml
- target:
version: v1
kind: Ingress
name: librechat-server
path: patches/librechat-ingress-patch.yaml
- target:
version: v1
kind: Deployment
name: librechat-litellm
path: patches/litellm-deployment-patch.yaml
# - target: # - target:
# version: v1 # version: v1
# kind: Prometheus # kind: Prometheus

View File

@ -1,9 +0,0 @@
apiVersion: traefik.io/v1alpha1
kind: Middleware
metadata:
name: litellm-stripprefix
namespace: llm
spec:
stripPrefix:
prefixes:
- /backend

View File

@ -1,5 +1,8 @@
resources: resources:
- namespace.yaml - namespace.yaml
- ../../bases/librechat - ../../bases/ollama
- ../../bases/litellm
- ../../bases/openwebui
# - ../../bases/sillytavern
namespace: llm namespace: llm

View File

@ -2,4 +2,5 @@ resources:
- ../../bases/longhorn - ../../bases/longhorn
- ../../bases/traefik - ../../bases/traefik
- ../../bases/external-secrets - ../../bases/external-secrets
- https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml
- clustersecretstore.yaml - clustersecretstore.yaml