From 8788fac6d517c6acaf5d3a4345428e2d31404f91 Mon Sep 17 00:00:00 2001 From: Massaki Archambault Date: Sat, 6 Jul 2024 12:37:26 -0400 Subject: [PATCH] add llm configs --- kustomize/bases/litellm/kustomization.yaml | 6 +- .../bases/litellm/litellm-deployment.yaml | 50 ++++----- kustomize/bases/ollama/docker/Dockerfile | 4 + kustomize/bases/ollama/kustomization.yaml | 7 ++ kustomize/bases/ollama/ollama-deployment.yaml | 77 +++++++++++++ kustomize/bases/openwebui/kustomization.yaml | 26 +++++ .../bases/openwebui/openwebui-deployment.yaml | 70 ++++++++++++ .../bases/openwebui/openwebui-ingress.yaml | 19 ++++ .../bases/sillytavern/kustomization.yaml | 26 +++++ .../sillytavern/sillytavern-deployment.yaml | 102 ++++++++++++++++++ .../sillytavern/sillytavern-ingress.yaml | 19 ++++ .../configurations/librechat/librechat.yaml | 18 ---- .../prod/configurations/litellm/config.yaml | 9 ++ .../prod/configurations/litellm/config.yml | 32 ------ kustomize/env/prod/kustomization.yaml | 40 ++----- .../prod/resources/litellm-stripprefix.yaml | 9 -- kustomize/overlays/llm/kustomization.yaml | 5 +- kustomize/overlays/system/kustomization.yaml | 1 + 18 files changed, 396 insertions(+), 124 deletions(-) create mode 100644 kustomize/bases/ollama/docker/Dockerfile create mode 100644 kustomize/bases/ollama/kustomization.yaml create mode 100644 kustomize/bases/ollama/ollama-deployment.yaml create mode 100644 kustomize/bases/openwebui/kustomization.yaml create mode 100644 kustomize/bases/openwebui/openwebui-deployment.yaml create mode 100644 kustomize/bases/openwebui/openwebui-ingress.yaml create mode 100644 kustomize/bases/sillytavern/kustomization.yaml create mode 100644 kustomize/bases/sillytavern/sillytavern-deployment.yaml create mode 100644 kustomize/bases/sillytavern/sillytavern-ingress.yaml delete mode 100644 kustomize/env/prod/configurations/librechat/librechat.yaml create mode 100644 kustomize/env/prod/configurations/litellm/config.yaml delete mode 100644 kustomize/env/prod/configurations/litellm/config.yml delete mode 100644 kustomize/env/prod/resources/litellm-stripprefix.yaml diff --git a/kustomize/bases/litellm/kustomization.yaml b/kustomize/bases/litellm/kustomization.yaml index 4110727..0e7ff7e 100644 --- a/kustomize/bases/litellm/kustomization.yaml +++ b/kustomize/bases/litellm/kustomization.yaml @@ -1,10 +1,12 @@ resources: - litellm-deployment.yaml +namePrefix: litellm- + commonLabels: - app.kubernetes.io/component: litellm + app.kubernetes.io/name: litellm configMapGenerator: - - name: litellm-config + - name: proxy-config literals: - config.yml= \ No newline at end of file diff --git a/kustomize/bases/litellm/litellm-deployment.yaml b/kustomize/bases/litellm/litellm-deployment.yaml index 1f038e2..d4236c7 100644 --- a/kustomize/bases/litellm/litellm-deployment.yaml +++ b/kustomize/bases/litellm/litellm-deployment.yaml @@ -1,59 +1,51 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: litellm + name: proxy + labels: + app.kubernetes.io/component: proxy spec: selector: matchLabels: - app.kubernetes.io/component: litellm + app.kubernetes.io/component: proxy template: metadata: labels: - app.kubernetes.io/component: litellm + app.kubernetes.io/component: proxy spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - # Image only supports amd64 - - key: kubernetes.io/arch - operator: In - values: - - amd64 containers: - name: litellm image: ghcr.io/berriai/litellm:main-latest - args: ['--config', '/config/config.yml'] - env: [] + args: ['--config', '/app/config.yaml'] + ports: + - name: http + containerPort: 4000 resources: requests: - memory: 200Mi cpu: 200m + memory: 200Mi limits: - memory: 200Mi cpu: 200m - ports: - - containerPort: 8000 - name: http + memory: 200Mi volumeMounts: - - name: litellm-config - mountPath: /config + - name: proxy-config + mountPath: /app/config.yaml + subPath: config.yaml volumes: - - name: litellm-config + - name: proxy-config configMap: - name: litellm-config + name: proxy-config --- apiVersion: v1 kind: Service metadata: - name: litellm + name: proxy labels: - app.kubernetes.io/component: litellm + app.kubernetes.io/component: proxy spec: selector: - app.kubernetes.io/component: litellm + app.kubernetes.io/component: proxy ports: - name: http - port: 8000 - targetPort: http + port: 4000 + targetPort: http \ No newline at end of file diff --git a/kustomize/bases/ollama/docker/Dockerfile b/kustomize/bases/ollama/docker/Dockerfile new file mode 100644 index 0000000..669d0d8 --- /dev/null +++ b/kustomize/bases/ollama/docker/Dockerfile @@ -0,0 +1,4 @@ +FROM ollama/ollama:0.1.48-rocm + +# https://github.com/ollama/ollama/issues/2503#issuecomment-2159672925 +RUN ln -s /opt/rocm/lib/rocblas/library/TensileLibrary_lazy_gfx{1030,1010}.dat \ No newline at end of file diff --git a/kustomize/bases/ollama/kustomization.yaml b/kustomize/bases/ollama/kustomization.yaml new file mode 100644 index 0000000..f61f906 --- /dev/null +++ b/kustomize/bases/ollama/kustomization.yaml @@ -0,0 +1,7 @@ +resources: + - ollama-deployment.yaml + +namePrefix: ollama- + +commonLabels: + app.kubernetes.io/name: ollama diff --git a/kustomize/bases/ollama/ollama-deployment.yaml b/kustomize/bases/ollama/ollama-deployment.yaml new file mode 100644 index 0000000..c3f76ee --- /dev/null +++ b/kustomize/bases/ollama/ollama-deployment.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: server +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/component: server + template: + metadata: + labels: + app.kubernetes.io/component: server + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + # Image only supports amd64 + - key: kubernetes.io/arch + operator: In + values: + - amd64 + priorityClassName: high-priority + containers: + - name: server + imagePullPolicy: Always + image: ollama/ollama:rocm + # image: badjware/ollama-tweak + # env: + # - name: HSA_OVERRIDE_GFX_VERSION + # value: 10.1.0 + # - name: HSA_ENABLE_SDMA + # value: "0" + resources: + requests: + memory: 8Gi + # cpu: 2000m + limits: + memory: 8Gi + # cpu: 3000m + amd.com/gpu: 1 + ports: + - containerPort: 11434 + name: http + volumeMounts: + - name: server-data + mountPath: /root/.ollama + volumes: + - name: server-data + hostPath: + path: /var/lib/ollama + type: DirectoryOrCreate +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: high-priority +value: 1000000 +globalDefault: false +--- +apiVersion: v1 +kind: Service +metadata: + name: server + labels: + app.kubernetes.io/component: server +spec: + selector: + app.kubernetes.io/component: server + ports: + - name: http + port: 11434 + targetPort: http \ No newline at end of file diff --git a/kustomize/bases/openwebui/kustomization.yaml b/kustomize/bases/openwebui/kustomization.yaml new file mode 100644 index 0000000..34aaf4c --- /dev/null +++ b/kustomize/bases/openwebui/kustomization.yaml @@ -0,0 +1,26 @@ +resources: + - openwebui-deployment.yaml + - openwebui-ingress.yaml + +namePrefix: openwebui- + +commonLabels: + app.kubernetes.io/name: openwebui + +configMapGenerator: + - name: kustomize-generated-config + literals: + - OPENWEBUI_EXTERNAL_HOST=chat.badjware.dev + - OPENWEBUI_EXTERNAL_URL=https://chat.badjware.dev + +replacements: + - source: + kind: ConfigMap + name: kustomize-generated-config + fieldPath: data.OPENWEBUI_EXTERNAL_HOST + targets: + - select: + kind: Ingress + name: server + fieldPaths: + - spec.rules.0.host diff --git a/kustomize/bases/openwebui/openwebui-deployment.yaml b/kustomize/bases/openwebui/openwebui-deployment.yaml new file mode 100644 index 0000000..4d7a010 --- /dev/null +++ b/kustomize/bases/openwebui/openwebui-deployment.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: server +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/component: server + template: + metadata: + labels: + app.kubernetes.io/component: server + spec: + containers: + - name: server + image: ghcr.io/open-webui/open-webui:main + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: OLLAMA_BASE_URL + value: http://ollama-server.$(NAMESPACE).svc:11434 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1000m + memory: 1Gi + ports: + - containerPort: 8080 + name: http + volumeMounts: + - name: server-data-pv + mountPath: /app/backend/data + volumes: + - name: server-data-pv + persistentVolumeClaim: + claimName: server-data-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: server-data-pvc + labels: + app.kubernetes.io/name: server +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: server + labels: + app.kubernetes.io/component: server +spec: + selector: + app.kubernetes.io/component: server + ports: + - name: http + port: 8080 + targetPort: http diff --git a/kustomize/bases/openwebui/openwebui-ingress.yaml b/kustomize/bases/openwebui/openwebui-ingress.yaml new file mode 100644 index 0000000..dcccbd0 --- /dev/null +++ b/kustomize/bases/openwebui/openwebui-ingress.yaml @@ -0,0 +1,19 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: server + labels: + app.kubernetes.io/name: ollama + probe: blackbox-http +spec: + rules: + - host: ${OPENWEBUI_EXTERNAL_HOST} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: server + port: + name: http diff --git a/kustomize/bases/sillytavern/kustomization.yaml b/kustomize/bases/sillytavern/kustomization.yaml new file mode 100644 index 0000000..eb1cf59 --- /dev/null +++ b/kustomize/bases/sillytavern/kustomization.yaml @@ -0,0 +1,26 @@ +resources: + - sillytavern-deployment.yaml + - sillytavern-ingress.yaml + +namePrefix: sillytavern- + +commonLabels: + app.kubernetes.io/name: sillytavern + +configMapGenerator: + - name: kustomize-generated-config + literals: + - SILLYTAVERN_EXTERNAL_HOST=tavern.badjware.dev + - SILLYTAVERN_EXTERNAL_URL=https://tavern.badjware.dev + +replacements: + - source: + kind: ConfigMap + name: kustomize-generated-config + fieldPath: data.SILLYTAVERN_EXTERNAL_HOST + targets: + - select: + kind: Ingress + name: server + fieldPaths: + - spec.rules.0.host diff --git a/kustomize/bases/sillytavern/sillytavern-deployment.yaml b/kustomize/bases/sillytavern/sillytavern-deployment.yaml new file mode 100644 index 0000000..3785feb --- /dev/null +++ b/kustomize/bases/sillytavern/sillytavern-deployment.yaml @@ -0,0 +1,102 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: server +spec: + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/component: server + template: + metadata: + labels: + app.kubernetes.io/component: server + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + # Image only supports amd64 + - key: kubernetes.io/arch + operator: In + values: + - amd64 + containers: + - name: sillytavern + image: ghcr.io/sillytavern/sillytavern:latest + resources: + requests: + memory: 500Mi + cpu: 200m + limits: + memory: 500Mi + cpu: 200m + ports: + - containerPort: 8000 + name: http + volumeMounts: + - name: sillytavern-extensions + mountPath: /home/node/app/public/scripts/extensions/third-party + - name: sillytavern-config + mountPath: /home/node/app/config + - name: sillytavern-user + mountPath: /home/node/app/public/user + volumes: + - name: sillytavern-extensions + persistentVolumeClaim: + claimName: server-extensions + - name: sillytavern-config + persistentVolumeClaim: + claimName: server-config + - name: sillytavern-user + persistentVolumeClaim: + claimName: server-user +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: server-extensions +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: server-config +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: server-user +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: server + labels: + app.kubernetes.io/component: server +spec: + selector: + app.kubernetes.io/component: server + ports: + - name: http + port: 8000 + targetPort: http \ No newline at end of file diff --git a/kustomize/bases/sillytavern/sillytavern-ingress.yaml b/kustomize/bases/sillytavern/sillytavern-ingress.yaml new file mode 100644 index 0000000..fb5eec1 --- /dev/null +++ b/kustomize/bases/sillytavern/sillytavern-ingress.yaml @@ -0,0 +1,19 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: server + labels: + app.kubernetes.io/name: sillytavern + probe: blackbox-http +spec: + rules: + - host: ${SILLYTAVERN_EXTERNAL_HOST} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: server + port: + name: http diff --git a/kustomize/env/prod/configurations/librechat/librechat.yaml b/kustomize/env/prod/configurations/librechat/librechat.yaml deleted file mode 100644 index 2dcb408..0000000 --- a/kustomize/env/prod/configurations/librechat/librechat.yaml +++ /dev/null @@ -1,18 +0,0 @@ -version: 1.0.1 -cache: true -endpoints: - custom: - - name: "LiteLLM" - iconURL: https://ollama.ai/public/icon.png - apiKey: "${LITELLM_MASTER_KEY}" - baseURL: "http://librechat-litellm.llm.svc:8000" - models: - default: ["openhermes", "solar"] - fetch: true - titleConvo: true - titleModel: "solar" - summarize: false - summaryModel: "solar" - forcePrompt: false - modelDisplayLabel: "LiteLLM" - dropParams: ["stop", "frequency_penalty", "presence_penalty"] \ No newline at end of file diff --git a/kustomize/env/prod/configurations/litellm/config.yaml b/kustomize/env/prod/configurations/litellm/config.yaml new file mode 100644 index 0000000..70332db --- /dev/null +++ b/kustomize/env/prod/configurations/litellm/config.yaml @@ -0,0 +1,9 @@ +model_list: + - model_name: llama3 + litellm_params: + model: ollama_chat/llama3 + api_base: http://ollama-server:11434 + - model_name: gemma2 + litellm_params: + model: ollama_chat/gemma2 + api_base: http://ollama-server:11434 \ No newline at end of file diff --git a/kustomize/env/prod/configurations/litellm/config.yml b/kustomize/env/prod/configurations/litellm/config.yml deleted file mode 100644 index 2024726..0000000 --- a/kustomize/env/prod/configurations/litellm/config.yml +++ /dev/null @@ -1,32 +0,0 @@ -model_list: - - model_name: mistral - litellm_params: - model: ollama/mistral:7b-instruct-v0.2-q5_K_M - api_base: http://192.168.30.20:11434 - stream: True - - model_name: openhermes - litellm_params: - model: ollama/openhermes:7b-mistral-v2.5-q5_K_M - api_base: http://192.168.30.20:11434 - # stream: True - # - model_name: dolphin-mistral - # litellm_params: - # model: ollama/dolphin-mistral - # api_base: http://192.168.30.20:11434 - # # stream: True - - model_name: solar - litellm_params: - model: ollama/solar:10.7b-instruct-v1-q5_K_M - api_base: http://192.168.30.20:11434 - stream: True - - model_name: deepseek-coder-6.7b - litellm_params: - model: ollama/deepseek-coder:6.7b-instruct-q8_0 - api_base: http://192.168.30.20:11434 - stream: True - -litellm_settings: - drop_params: True - -general_settings: - master_key: "os.environ/MASTER_KEY" \ No newline at end of file diff --git a/kustomize/env/prod/kustomization.yaml b/kustomize/env/prod/kustomization.yaml index d3d1855..f38ccd4 100644 --- a/kustomize/env/prod/kustomization.yaml +++ b/kustomize/env/prod/kustomization.yaml @@ -13,8 +13,6 @@ resources: - ../../overlays/actual - ../../overlays/llm - probes/snmp-exporter.yaml - - resources/litellm-externalsecret.yaml - - resources/litellm-stripprefix.yaml # resources: # - probes/external-services-bobcat-miner.yaml @@ -54,32 +52,23 @@ images: newTag: "7.2" - name: bitnami/kubectl newTag: "1.29" - # - name: ghcr.io/danny-avila/librechat - # newTag: v0.6.10 - # - name: ghcr.io/berriai/litellm - # newTag: main-v1.24.6 configMapGenerator: - - name: librechat-server-config + - name: litellm-proxy-config namespace: llm behavior: replace files: - - librechat.yaml=configurations/librechat/librechat.yaml - - name: librechat-litellm-config - namespace: llm - behavior: replace - files: - - config.yml=configurations/litellm/config.yml + - config.yaml=./configurations/litellm/config.yaml # - name: home-assistant-server-config # namespace: home-assistant # behavior: replace # files: # - configuration.yaml=configurations/home-assistant/configuration.yaml - # - name: ecommerce-exporter-config - # namespace: monitoring - # behavior: replace - # files: - # - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml +# - name: ecommerce-exporter-config +# namespace: monitoring +# behavior: replace +# files: +# - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml secretGenerator: - name: prometheus-additional-scrape-configs @@ -109,21 +98,6 @@ patches: kind: Deployment name: deluge-server path: patches/deluge-deployment-patch.yaml - - target: - version: v1 - kind: Deployment - name: librechat-server - path: patches/librechat-deployment-patch.yaml - - target: - version: v1 - kind: Ingress - name: librechat-server - path: patches/librechat-ingress-patch.yaml - - target: - version: v1 - kind: Deployment - name: librechat-litellm - path: patches/litellm-deployment-patch.yaml # - target: # version: v1 # kind: Prometheus diff --git a/kustomize/env/prod/resources/litellm-stripprefix.yaml b/kustomize/env/prod/resources/litellm-stripprefix.yaml deleted file mode 100644 index 8a4ce09..0000000 --- a/kustomize/env/prod/resources/litellm-stripprefix.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: litellm-stripprefix - namespace: llm -spec: - stripPrefix: - prefixes: - - /backend \ No newline at end of file diff --git a/kustomize/overlays/llm/kustomization.yaml b/kustomize/overlays/llm/kustomization.yaml index d85b6a7..5a7ca84 100644 --- a/kustomize/overlays/llm/kustomization.yaml +++ b/kustomize/overlays/llm/kustomization.yaml @@ -1,5 +1,8 @@ resources: - namespace.yaml - - ../../bases/librechat + - ../../bases/ollama + - ../../bases/litellm + - ../../bases/openwebui + # - ../../bases/sillytavern namespace: llm diff --git a/kustomize/overlays/system/kustomization.yaml b/kustomize/overlays/system/kustomization.yaml index cb3882d..a4f53ee 100644 --- a/kustomize/overlays/system/kustomization.yaml +++ b/kustomize/overlays/system/kustomization.yaml @@ -2,4 +2,5 @@ resources: - ../../bases/longhorn - ../../bases/traefik - ../../bases/external-secrets + - https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml - clustersecretstore.yaml \ No newline at end of file