77 lines
1.8 KiB
YAML
77 lines
1.8 KiB
YAML
|
apiVersion: apps/v1
|
||
|
kind: Deployment
|
||
|
metadata:
|
||
|
name: server
|
||
|
spec:
|
||
|
replicas: 1
|
||
|
strategy:
|
||
|
type: Recreate
|
||
|
selector:
|
||
|
matchLabels:
|
||
|
app.kubernetes.io/component: server
|
||
|
template:
|
||
|
metadata:
|
||
|
labels:
|
||
|
app.kubernetes.io/component: server
|
||
|
spec:
|
||
|
affinity:
|
||
|
nodeAffinity:
|
||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||
|
nodeSelectorTerms:
|
||
|
- matchExpressions:
|
||
|
# Image only supports amd64
|
||
|
- key: kubernetes.io/arch
|
||
|
operator: In
|
||
|
values:
|
||
|
- amd64
|
||
|
priorityClassName: high-priority
|
||
|
containers:
|
||
|
- name: server
|
||
|
imagePullPolicy: Always
|
||
|
image: ollama/ollama:rocm
|
||
|
# image: badjware/ollama-tweak
|
||
|
# env:
|
||
|
# - name: HSA_OVERRIDE_GFX_VERSION
|
||
|
# value: 10.1.0
|
||
|
# - name: HSA_ENABLE_SDMA
|
||
|
# value: "0"
|
||
|
resources:
|
||
|
requests:
|
||
|
memory: 8Gi
|
||
|
# cpu: 2000m
|
||
|
limits:
|
||
|
memory: 8Gi
|
||
|
# cpu: 3000m
|
||
|
amd.com/gpu: 1
|
||
|
ports:
|
||
|
- containerPort: 11434
|
||
|
name: http
|
||
|
volumeMounts:
|
||
|
- name: server-data
|
||
|
mountPath: /root/.ollama
|
||
|
volumes:
|
||
|
- name: server-data
|
||
|
hostPath:
|
||
|
path: /var/lib/ollama
|
||
|
type: DirectoryOrCreate
|
||
|
---
|
||
|
apiVersion: scheduling.k8s.io/v1
|
||
|
kind: PriorityClass
|
||
|
metadata:
|
||
|
name: high-priority
|
||
|
value: 1000000
|
||
|
globalDefault: false
|
||
|
---
|
||
|
apiVersion: v1
|
||
|
kind: Service
|
||
|
metadata:
|
||
|
name: server
|
||
|
labels:
|
||
|
app.kubernetes.io/component: server
|
||
|
spec:
|
||
|
selector:
|
||
|
app.kubernetes.io/component: server
|
||
|
ports:
|
||
|
- name: http
|
||
|
port: 11434
|
||
|
targetPort: http
|