add llm configs

2024-07-06 12:37:26 -04:00 · 2024-07-06 12:37:26 -04:00 · 8788fac6d5
parent e5ef004238
commit 8788fac6d5
18 changed files with 396 additions and 124 deletions
--- a/kustomize/bases/litellm/kustomization.yaml
+++ b/kustomize/bases/litellm/kustomization.yaml
@ -1,10 +1,12 @@
 resources:
  - litellm-deployment.yaml
 namePrefix: litellm-
 commonLabels:
-  app.kubernetes.io/component: litellm
+  app.kubernetes.io/name: litellm
 configMapGenerator:
-  - name: litellm-config
+  - name: proxy-config
    literals:
      - config.yml=
--- a/kustomize/bases/litellm/litellm-deployment.yaml
+++ b/kustomize/bases/litellm/litellm-deployment.yaml
@ -1,59 +1,51 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: litellm
+  name: proxy
  labels:
      app.kubernetes.io/component: proxy
 spec:
  selector:
    matchLabels:
-      app.kubernetes.io/component: litellm
+      app.kubernetes.io/component: proxy
  template:
    metadata:
      labels:
-        app.kubernetes.io/component: litellm
+        app.kubernetes.io/component: proxy
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  # Image only supports amd64
                  - key: kubernetes.io/arch
                    operator: In
                    values:
                      - amd64
      containers:
        - name: litellm
          image: ghcr.io/berriai/litellm:main-latest
-          args: ['--config', '/config/config.yml']
+          args: ['--config', '/app/config.yaml']
-          env: []
+          ports:
            - name: http
              containerPort: 4000
          resources:
            requests:
              memory: 200Mi
              cpu: 200m
              memory: 200Mi
            limits:
              memory: 200Mi
              cpu: 200m
-          ports:
+              memory: 200Mi
            - containerPort: 8000
              name: http
          volumeMounts:
-            - name: litellm-config
+            - name: proxy-config
-              mountPath: /config
+              mountPath: /app/config.yaml
              subPath: config.yaml
      volumes:
-        - name: litellm-config
+        - name: proxy-config
          configMap:
-            name: litellm-config
+            name: proxy-config
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: litellm
+  name: proxy
  labels:
-    app.kubernetes.io/component: litellm
+      app.kubernetes.io/component: proxy
 spec:
  selector:
-    app.kubernetes.io/component: litellm
+      app.kubernetes.io/component: proxy
  ports:
    - name: http
-      port: 8000
+      port: 4000
-      targetPort: http
+      targetPort: http
--- a/kustomize/bases/ollama/docker/Dockerfile
+++ b/kustomize/bases/ollama/docker/Dockerfile
@ -0,0 +1,4 @@
 FROM ollama/ollama:0.1.48-rocm
 # https://github.com/ollama/ollama/issues/2503#issuecomment-2159672925
 RUN  ln -s /opt/rocm/lib/rocblas/library/TensileLibrary_lazy_gfx{1030,1010}.dat
--- a/kustomize/bases/ollama/kustomization.yaml
+++ b/kustomize/bases/ollama/kustomization.yaml
@ -0,0 +1,7 @@
 resources:
  - ollama-deployment.yaml
 namePrefix: ollama-
 commonLabels:
  app.kubernetes.io/name: ollama
--- a/kustomize/bases/ollama/ollama-deployment.yaml
+++ b/kustomize/bases/ollama/ollama-deployment.yaml
@ -0,0 +1,77 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: server
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app.kubernetes.io/component: server
  template:
    metadata:
      labels:
        app.kubernetes.io/component: server
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  # Image only supports amd64
                  - key: kubernetes.io/arch
                    operator: In
                    values:
                      - amd64
      priorityClassName: high-priority
      containers:
        - name: server
          imagePullPolicy: Always
          image: ollama/ollama:rocm
          # image: badjware/ollama-tweak
          # env:
            # - name: HSA_OVERRIDE_GFX_VERSION
            #   value: 10.1.0
            # - name: HSA_ENABLE_SDMA
            #   value: "0"
          resources:
            requests:
              memory: 8Gi
              # cpu: 2000m
            limits:
              memory: 8Gi
              # cpu: 3000m
              amd.com/gpu: 1
          ports:
            - containerPort: 11434
              name: http
          volumeMounts:
            - name: server-data
              mountPath: /root/.ollama
      volumes:
        - name: server-data
          hostPath:
            path: /var/lib/ollama
            type: DirectoryOrCreate
 ---
 apiVersion: scheduling.k8s.io/v1
 kind: PriorityClass
 metadata:
  name: high-priority
 value: 1000000
 globalDefault: false
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: server
  labels:
    app.kubernetes.io/component: server
 spec:
  selector:
    app.kubernetes.io/component: server
  ports:
    - name: http
      port: 11434
      targetPort: http
--- a/kustomize/bases/openwebui/kustomization.yaml
+++ b/kustomize/bases/openwebui/kustomization.yaml
@ -0,0 +1,26 @@
 resources:
  - openwebui-deployment.yaml
  - openwebui-ingress.yaml
 namePrefix: openwebui-
 commonLabels:
  app.kubernetes.io/name: openwebui
 configMapGenerator:
  - name: kustomize-generated-config
    literals:
      - OPENWEBUI_EXTERNAL_HOST=chat.badjware.dev
      - OPENWEBUI_EXTERNAL_URL=https://chat.badjware.dev
 replacements:
  - source:
      kind: ConfigMap
      name: kustomize-generated-config
      fieldPath: data.OPENWEBUI_EXTERNAL_HOST
    targets:
      - select:
          kind: Ingress
          name: server
        fieldPaths:
          - spec.rules.0.host
--- a/kustomize/bases/openwebui/openwebui-deployment.yaml
+++ b/kustomize/bases/openwebui/openwebui-deployment.yaml
@ -0,0 +1,70 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: server
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app.kubernetes.io/component: server
  template:
    metadata:
      labels:
        app.kubernetes.io/component: server
    spec:
      containers:
        - name: server
          image: ghcr.io/open-webui/open-webui:main
          env: 
            - name: NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
            - name: OLLAMA_BASE_URL
              value: http://ollama-server.$(NAMESPACE).svc:11434
          resources:
            requests:
              cpu: 500m
              memory: 1Gi
            limits:
              cpu: 1000m
              memory: 1Gi
          ports:
            - containerPort: 8080
              name: http
          volumeMounts:
            - name: server-data-pv
              mountPath: /app/backend/data
      volumes:
        - name: server-data-pv
          persistentVolumeClaim:
            claimName: server-data-pvc
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: server-data-pvc
  labels:
    app.kubernetes.io/name: server
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: server
  labels:
    app.kubernetes.io/component: server
 spec:
  selector:
    app.kubernetes.io/component: server
  ports:
    - name: http
      port: 8080
      targetPort: http
--- a/kustomize/bases/openwebui/openwebui-ingress.yaml
+++ b/kustomize/bases/openwebui/openwebui-ingress.yaml
@ -0,0 +1,19 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: server
  labels:
    app.kubernetes.io/name: ollama
    probe: blackbox-http
 spec:
  rules:
  - host: ${OPENWEBUI_EXTERNAL_HOST}
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: server
            port:
              name: http
--- a/kustomize/bases/sillytavern/kustomization.yaml
+++ b/kustomize/bases/sillytavern/kustomization.yaml
@ -0,0 +1,26 @@
 resources:
  - sillytavern-deployment.yaml
  - sillytavern-ingress.yaml
 namePrefix: sillytavern-
 commonLabels:
  app.kubernetes.io/name: sillytavern
 configMapGenerator:
  - name: kustomize-generated-config
    literals:
      - SILLYTAVERN_EXTERNAL_HOST=tavern.badjware.dev
      - SILLYTAVERN_EXTERNAL_URL=https://tavern.badjware.dev
 replacements:
  - source:
      kind: ConfigMap
      name: kustomize-generated-config
      fieldPath: data.SILLYTAVERN_EXTERNAL_HOST
    targets:
      - select:
          kind: Ingress
          name: server
        fieldPaths:
          - spec.rules.0.host
--- a/kustomize/bases/sillytavern/sillytavern-deployment.yaml
+++ b/kustomize/bases/sillytavern/sillytavern-deployment.yaml
@ -0,0 +1,102 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: server
 spec:
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app.kubernetes.io/component: server
  template:
    metadata:
      labels:
        app.kubernetes.io/component: server
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  # Image only supports amd64
                  - key: kubernetes.io/arch
                    operator: In
                    values:
                      - amd64
      containers:
        - name: sillytavern
          image: ghcr.io/sillytavern/sillytavern:latest
          resources:
            requests:
              memory: 500Mi
              cpu: 200m
            limits:
              memory: 500Mi
              cpu: 200m
          ports:
            - containerPort: 8000
              name: http
          volumeMounts:
            - name: sillytavern-extensions
              mountPath: /home/node/app/public/scripts/extensions/third-party
            - name: sillytavern-config
              mountPath: /home/node/app/config
            - name: sillytavern-user
              mountPath: /home/node/app/public/user
      volumes:
        - name: sillytavern-extensions
          persistentVolumeClaim:
            claimName: server-extensions
        - name: sillytavern-config
          persistentVolumeClaim:
            claimName: server-config
        - name: sillytavern-user
          persistentVolumeClaim:
            claimName: server-user
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: server-extensions
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: server-config
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: server-user
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: server
  labels:
    app.kubernetes.io/component: server
 spec:
  selector:
    app.kubernetes.io/component: server
  ports:
    - name: http
      port: 8000
      targetPort: http
--- a/kustomize/bases/sillytavern/sillytavern-ingress.yaml
+++ b/kustomize/bases/sillytavern/sillytavern-ingress.yaml
@ -0,0 +1,19 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: server
  labels:
    app.kubernetes.io/name: sillytavern
    probe: blackbox-http
 spec:
  rules:
  - host: ${SILLYTAVERN_EXTERNAL_HOST}
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: server
            port:
              name: http
--- a/kustomize/env/prod/configurations/librechat/librechat.yaml
+++ b/kustomize/env/prod/configurations/librechat/librechat.yaml
@ -1,18 +0,0 @@
 version: 1.0.1
 cache: true
 endpoints:
  custom:
    - name: "LiteLLM"
      iconURL: https://ollama.ai/public/icon.png
      apiKey: "${LITELLM_MASTER_KEY}"
      baseURL: "http://librechat-litellm.llm.svc:8000"
      models:
        default: ["openhermes", "solar"]
        fetch: true
      titleConvo: true
      titleModel: "solar" 
      summarize: false
      summaryModel: "solar" 
      forcePrompt: false 
      modelDisplayLabel: "LiteLLM"
      dropParams: ["stop", "frequency_penalty", "presence_penalty"]
--- a/kustomize/env/prod/configurations/litellm/config.yaml
+++ b/kustomize/env/prod/configurations/litellm/config.yaml
@ -0,0 +1,9 @@
 model_list:
  - model_name: llama3
    litellm_params:
      model: ollama_chat/llama3
      api_base: http://ollama-server:11434
  - model_name: gemma2
    litellm_params:
      model: ollama_chat/gemma2
      api_base: http://ollama-server:11434
--- a/kustomize/env/prod/configurations/litellm/config.yml
+++ b/kustomize/env/prod/configurations/litellm/config.yml
@ -1,32 +0,0 @@
 model_list:
  - model_name: mistral
    litellm_params:
      model: ollama/mistral:7b-instruct-v0.2-q5_K_M
      api_base: http://192.168.30.20:11434
      stream: True
  - model_name: openhermes
    litellm_params:
      model: ollama/openhermes:7b-mistral-v2.5-q5_K_M
      api_base: http://192.168.30.20:11434
      # stream: True
  # - model_name: dolphin-mistral
  #   litellm_params:
  #     model: ollama/dolphin-mistral
  #     api_base: http://192.168.30.20:11434
  #     # stream: True
  - model_name: solar
    litellm_params:
      model: ollama/solar:10.7b-instruct-v1-q5_K_M
      api_base: http://192.168.30.20:11434
      stream: True
  - model_name: deepseek-coder-6.7b
    litellm_params:
      model: ollama/deepseek-coder:6.7b-instruct-q8_0
      api_base: http://192.168.30.20:11434
      stream: True
 litellm_settings:
  drop_params: True
 general_settings:
  master_key: "os.environ/MASTER_KEY"
--- a/kustomize/env/prod/kustomization.yaml
+++ b/kustomize/env/prod/kustomization.yaml
@ -13,8 +13,6 @@ resources:
  - ../../overlays/actual
  - ../../overlays/llm
  - probes/snmp-exporter.yaml
  - resources/litellm-externalsecret.yaml
  - resources/litellm-stripprefix.yaml
 # resources:
 #   - probes/external-services-bobcat-miner.yaml
@ -54,32 +52,23 @@ images:
    newTag: "7.2"
  - name: bitnami/kubectl
    newTag: "1.29"
  # - name: ghcr.io/danny-avila/librechat
  #   newTag: v0.6.10
  # - name: ghcr.io/berriai/litellm
  #   newTag: main-v1.24.6
 configMapGenerator:
-  - name: librechat-server-config
+  - name: litellm-proxy-config
    namespace: llm
    behavior: replace
    files:
-      - librechat.yaml=configurations/librechat/librechat.yaml
+      - config.yaml=./configurations/litellm/config.yaml
  - name: librechat-litellm-config
    namespace: llm
    behavior: replace
    files:
      - config.yml=configurations/litellm/config.yml
 #   - name: home-assistant-server-config
 #     namespace: home-assistant
 #     behavior: replace
 #     files:
 #       - configuration.yaml=configurations/home-assistant/configuration.yaml
-  # - name: ecommerce-exporter-config
+#   - name: ecommerce-exporter-config
-  #   namespace: monitoring
+#     namespace: monitoring
-  #   behavior: replace
+#     behavior: replace
-  #   files:
+#     files:
-  #     - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml
+#       - ecommerce-exporter.yml=configurations/ecommerce-exporter/ecommerce-exporter.yml
 secretGenerator:
  - name: prometheus-additional-scrape-configs
@ -109,21 +98,6 @@ patches:
      kind: Deployment
      name: deluge-server
    path: patches/deluge-deployment-patch.yaml
  - target: 
      version: v1
      kind: Deployment
      name: librechat-server
    path: patches/librechat-deployment-patch.yaml
  - target: 
      version: v1
      kind: Ingress
      name: librechat-server
    path: patches/librechat-ingress-patch.yaml
  - target: 
      version: v1
      kind: Deployment
      name: librechat-litellm
    path: patches/litellm-deployment-patch.yaml
  # - target:
  #     version: v1
  #     kind: Prometheus
--- a/kustomize/env/prod/resources/litellm-stripprefix.yaml
+++ b/kustomize/env/prod/resources/litellm-stripprefix.yaml
@ -1,9 +0,0 @@
 apiVersion: traefik.io/v1alpha1
 kind: Middleware
 metadata:
  name: litellm-stripprefix
  namespace: llm
 spec:
  stripPrefix:
    prefixes:
      - /backend
--- a/kustomize/overlays/llm/kustomization.yaml
+++ b/kustomize/overlays/llm/kustomization.yaml
@ -1,5 +1,8 @@
 resources:
  - namespace.yaml
-  - ../../bases/librechat
+  - ../../bases/ollama
  - ../../bases/litellm
  - ../../bases/openwebui
  # - ../../bases/sillytavern
 namespace: llm
--- a/kustomize/overlays/system/kustomization.yaml
+++ b/kustomize/overlays/system/kustomization.yaml
@ -2,4 +2,5 @@ resources:
  - ../../bases/longhorn
  - ../../bases/traefik
  - ../../bases/external-secrets
  - https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml
  - clustersecretstore.yaml