79 lines
1.8 KiB
YAML
79 lines
1.8 KiB
YAML
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: server
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: server
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/component: server
|
|
spec:
|
|
affinity:
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
# Image only supports amd64
|
|
- key: kubernetes.io/arch
|
|
operator: In
|
|
values:
|
|
- amd64
|
|
priorityClassName: high-priority
|
|
runtimeClassName: nvidia
|
|
containers:
|
|
- name: server
|
|
image: ollama/ollama
|
|
# image: badjware/ollama-tweak
|
|
env:
|
|
- name: OLLAMA_KEEP_ALIVE
|
|
value: 36h
|
|
# - name: HSA_OVERRIDE_GFX_VERSION
|
|
# value: 10.1.0
|
|
# - name: HSA_ENABLE_SDMA
|
|
# value: "0"
|
|
resources:
|
|
requests:
|
|
memory: 4Gi
|
|
cpu: 2000m
|
|
limits:
|
|
memory: 4Gi
|
|
cpu: 3000m
|
|
nvidia.com/gpu: 1
|
|
ports:
|
|
- containerPort: 11434
|
|
name: http
|
|
volumeMounts:
|
|
- name: server-data
|
|
mountPath: /root/.ollama
|
|
volumes:
|
|
- name: server-data
|
|
hostPath:
|
|
path: /var/lib/ollama
|
|
type: DirectoryOrCreate
|
|
---
|
|
apiVersion: scheduling.k8s.io/v1
|
|
kind: PriorityClass
|
|
metadata:
|
|
name: high-priority
|
|
value: 1000000
|
|
globalDefault: false
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: server
|
|
labels:
|
|
app.kubernetes.io/component: server
|
|
spec:
|
|
selector:
|
|
app.kubernetes.io/component: server
|
|
ports:
|
|
- name: http
|
|
port: 11434
|
|
targetPort: http |