1
0
Fork 0
home-stack-kustomize/kustomize/bases/ollama/ollama-deployment.yaml

95 lines
2.1 KiB
YAML
Raw Normal View History

2024-07-06 16:37:26 +00:00
apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
priorityClassName: high-priority
2024-08-07 04:50:31 +00:00
runtimeClassName: nvidia
2024-07-06 16:37:26 +00:00
containers:
- name: server
2024-08-07 04:50:31 +00:00
image: ollama/ollama
env:
- name: OLLAMA_KEEP_ALIVE
2024-08-23 05:06:46 +00:00
value: 12h
2024-07-06 16:37:26 +00:00
# - name: HSA_OVERRIDE_GFX_VERSION
# value: 10.1.0
# - name: HSA_ENABLE_SDMA
# value: "0"
resources:
requests:
2024-08-07 04:50:31 +00:00
memory: 4Gi
cpu: 2000m
2024-07-06 16:37:26 +00:00
limits:
2024-08-07 04:50:31 +00:00
memory: 4Gi
cpu: 3000m
2024-08-17 18:16:16 +00:00
nvidia.com/gpu: "2"
# amd.com/gpu: "1"
2024-07-06 16:37:26 +00:00
ports:
2024-08-09 02:19:35 +00:00
- name: http
hostPort: 11434
containerPort: 11434
2024-07-06 16:37:26 +00:00
volumeMounts:
- name: server-data
mountPath: /root/.ollama
volumes:
- name: server-data
hostPath:
path: /var/lib/ollama
type: DirectoryOrCreate
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
globalDefault: false
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 11434
2024-08-09 02:19:35 +00:00
targetPort: http
---
apiVersion: v1
kind: Service
metadata:
name: nodeport
labels:
app.kubernetes.io/component: nodeport
spec:
type: NodePort
selector:
app.kubernetes.io/component: server
ports:
- targetPort: http
port: 11434
nodePort: 31002