1
0
Fork 0
home-stack-kustomize/kustomize/bases/ollama/ollama-deployment.yaml

95 lines
2.1 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
priorityClassName: high-priority
runtimeClassName: nvidia
containers:
- name: server
image: ollama/ollama
env:
- name: OLLAMA_KEEP_ALIVE
value: 12h
# - name: HSA_OVERRIDE_GFX_VERSION
# value: 10.1.0
# - name: HSA_ENABLE_SDMA
# value: "0"
resources:
requests:
memory: 4Gi
cpu: 2000m
limits:
memory: 4Gi
cpu: 3000m
nvidia.com/gpu: "2"
# amd.com/gpu: "1"
ports:
- name: http
hostPort: 11434
containerPort: 11434
volumeMounts:
- name: server-data
mountPath: /root/.ollama
volumes:
- name: server-data
hostPath:
path: /var/lib/ollama
type: DirectoryOrCreate
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
globalDefault: false
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 11434
targetPort: http
---
apiVersion: v1
kind: Service
metadata:
name: nodeport
labels:
app.kubernetes.io/component: nodeport
spec:
type: NodePort
selector:
app.kubernetes.io/component: server
ports:
- targetPort: http
port: 11434
nodePort: 31002