From 09b05de4482931ce567fdb3a3ac5406df81c4b7d Mon Sep 17 00:00:00 2001 From: Massaki Archambault Date: Sat, 28 Dec 2024 16:17:37 -0500 Subject: [PATCH] enable gpu acceleration for openedai-speech --- kustomize/bases/ollama/ollama-deployment.yaml | 2 +- .../bases/openedai-speech/openedai-speech-deployment.yaml | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/kustomize/bases/ollama/ollama-deployment.yaml b/kustomize/bases/ollama/ollama-deployment.yaml index 5de254a..4e95a8f 100644 --- a/kustomize/bases/ollama/ollama-deployment.yaml +++ b/kustomize/bases/ollama/ollama-deployment.yaml @@ -43,7 +43,7 @@ spec: limits: memory: 4Gi cpu: 3000m - nvidia.com/gpu: "2" + nvidia.com/gpu: "1" # amd.com/gpu: "1" ports: - name: http diff --git a/kustomize/bases/openedai-speech/openedai-speech-deployment.yaml b/kustomize/bases/openedai-speech/openedai-speech-deployment.yaml index b36ced9..ffea627 100644 --- a/kustomize/bases/openedai-speech/openedai-speech-deployment.yaml +++ b/kustomize/bases/openedai-speech/openedai-speech-deployment.yaml @@ -26,9 +26,13 @@ spec: operator: In values: - amd64 + runtimeClassName: nvidia containers: - name: openedai-speech image: ghcr.io/matatonic/openedai-speech + env: + - name: EXTRA_ARGS + value: --preload xtts_v2.0.2 ports: - name: http containerPort: 8000 @@ -38,7 +42,7 @@ spec: memory: 1Gi limits: cpu: 1000m - memory: 1Gi + nvidia.com/gpu: "1" volumeMounts: - name: server-voices-pv mountPath: /app/voices