1
0
Fork 0
home-stack-kustomize/kustomize/bases/ollama/ollama-deployment.yaml

77 lines
1.8 KiB
YAML
Raw Normal View History

2024-07-06 16:37:26 +00:00
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
priorityClassName: high-priority
2024-08-07 04:50:31 +00:00
runtimeClassName: nvidia
2024-07-06 16:37:26 +00:00
containers:
- name: server
2024-08-07 04:50:31 +00:00
image: ollama/ollama
2024-07-06 16:37:26 +00:00
# image: badjware/ollama-tweak
# env:
# - name: HSA_OVERRIDE_GFX_VERSION
# value: 10.1.0
# - name: HSA_ENABLE_SDMA
# value: "0"
resources:
requests:
2024-08-07 04:50:31 +00:00
memory: 4Gi
cpu: 2000m
2024-07-06 16:37:26 +00:00
limits:
2024-08-07 04:50:31 +00:00
memory: 4Gi
cpu: 3000m
nvidia.com/gpu: 1
2024-07-06 16:37:26 +00:00
ports:
- containerPort: 11434
name: http
volumeMounts:
- name: server-data
mountPath: /root/.ollama
volumes:
- name: server-data
hostPath:
path: /var/lib/ollama
type: DirectoryOrCreate
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
globalDefault: false
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 11434
targetPort: http