1
0
Fork 0

add nvidia gpu support

This commit is contained in:
Massaki Archambault 2024-08-07 00:50:31 -04:00
parent 7339200fc6
commit c175883985
8 changed files with 59 additions and 8 deletions

View File

@ -0,0 +1,2 @@
resources:
- https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml

View File

@ -0,0 +1,9 @@
version: v1
flags:
migStrategy: "none"
failOnInitError: true
nvidiaDriverRoot: "/"
plugin:
passDeviceSpecs: false
deviceListStrategy: envvar
deviceIDStrategy: uuid

View File

@ -0,0 +1,13 @@
resources:
# - https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.16.1/deployments/static/nvidia-device-plugin.yml
- nvidia-device-plugin-helmchart.yaml
namespace: kube-system
configMapGenerator:
- name: nvidia-device-plugin-config
files:
- config0=config/config0.yaml
configurations:
- kustomizeconfig/namereference.yaml

View File

@ -0,0 +1,5 @@
nameReference:
- kind: ConfigMap
fieldSpecs:
- kind: HelmChart
path: spec/set/config.name

View File

@ -0,0 +1,18 @@
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: nvidia-device-plugin
namespace: kube-system
spec:
repo: https://nvidia.github.io/k8s-device-plugin
chart: nvidia-device-plugin
version: 0.16.1
targetNamespace: kube-system
set:
config.default: config0
config.name: nvidia-device-plugin-config
gfd.enabled: "true"
valuesContent: |-
nodeSelector:
kubernetes.io/arch: amd64
runtimeClassName: nvidia

View File

@ -25,10 +25,10 @@ spec:
values:
- amd64
priorityClassName: high-priority
runtimeClassName: nvidia
containers:
- name: server
imagePullPolicy: Always
image: ollama/ollama:rocm
image: ollama/ollama
# image: badjware/ollama-tweak
# env:
# - name: HSA_OVERRIDE_GFX_VERSION
@ -37,12 +37,12 @@ spec:
# value: "0"
resources:
requests:
memory: 8Gi
# cpu: 2000m
memory: 4Gi
cpu: 2000m
limits:
memory: 8Gi
# cpu: 3000m
amd.com/gpu: 1
memory: 4Gi
cpu: 3000m
nvidia.com/gpu: 1
ports:
- containerPort: 11434
name: http

View File

@ -52,6 +52,10 @@ images:
newTag: "7.2"
- name: bitnami/kubectl
newTag: "1.29"
- name: ollama/ollama
newTag: 0.3.3
# newTag: 0.3.3-rocm
configMapGenerator:
- name: litellm-proxy-config

View File

@ -2,5 +2,5 @@ resources:
- ../../bases/longhorn
- ../../bases/traefik
- ../../bases/external-secrets
- https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml
- ../../bases/k8s-device-plugin-nvidia
- clustersecretstore.yaml