1
0
Fork 0
home-stack-kustomize/kustomize/bases/ollama/ollama-deployment.yaml

77 lines
1.8 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
priorityClassName: high-priority
containers:
- name: server
imagePullPolicy: Always
image: ollama/ollama:rocm
# image: badjware/ollama-tweak
# env:
# - name: HSA_OVERRIDE_GFX_VERSION
# value: 10.1.0
# - name: HSA_ENABLE_SDMA
# value: "0"
resources:
requests:
memory: 8Gi
# cpu: 2000m
limits:
memory: 8Gi
# cpu: 3000m
amd.com/gpu: 1
ports:
- containerPort: 11434
name: http
volumeMounts:
- name: server-data
mountPath: /root/.ollama
volumes:
- name: server-data
hostPath:
path: /var/lib/ollama
type: DirectoryOrCreate
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
globalDefault: false
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 11434
targetPort: http