78 lines
1.9 KiB
YAML
78 lines
1.9 KiB
YAML
|
apiVersion: apps/v1
|
||
|
kind: Deployment
|
||
|
metadata:
|
||
|
name: server
|
||
|
spec:
|
||
|
replicas: 1
|
||
|
strategy:
|
||
|
type: Recreate
|
||
|
selector:
|
||
|
matchLabels:
|
||
|
app.kubernetes.io/component: server
|
||
|
template:
|
||
|
metadata:
|
||
|
labels:
|
||
|
app.kubernetes.io/component: server
|
||
|
spec:
|
||
|
affinity:
|
||
|
nodeAffinity:
|
||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||
|
nodeSelectorTerms:
|
||
|
- matchExpressions:
|
||
|
# Image only supports amd64
|
||
|
- key: kubernetes.io/arch
|
||
|
operator: In
|
||
|
values:
|
||
|
- amd64
|
||
|
priorityClassName: high-priority
|
||
|
containers:
|
||
|
- name: server
|
||
|
image: ghcr.io/mostlygeek/llama-swap:vulkan
|
||
|
imagePullPolicy: Always
|
||
|
args: ["--config", "/config/config.yaml"]
|
||
|
resources:
|
||
|
requests:
|
||
|
memory: 4Gi
|
||
|
cpu: 1000m
|
||
|
limits:
|
||
|
memory: 4Gi
|
||
|
# nvidia.com/gpu: "1"
|
||
|
amd.com/gpu: "1"
|
||
|
ports:
|
||
|
- name: http
|
||
|
hostPort: 8080
|
||
|
containerPort: 8080
|
||
|
volumeMounts:
|
||
|
- name: server-data
|
||
|
mountPath: /root/.cache/llama.cpp
|
||
|
- name: llama-swap-config
|
||
|
mountPath: /config
|
||
|
volumes:
|
||
|
- name: server-data
|
||
|
hostPath:
|
||
|
path: /var/lib/llama.cpp
|
||
|
type: DirectoryOrCreate
|
||
|
- name: llama-swap-config
|
||
|
configMap:
|
||
|
name: llama-swap-config
|
||
|
---
|
||
|
apiVersion: scheduling.k8s.io/v1
|
||
|
kind: PriorityClass
|
||
|
metadata:
|
||
|
name: high-priority
|
||
|
value: 1000000
|
||
|
globalDefault: false
|
||
|
---
|
||
|
apiVersion: v1
|
||
|
kind: Service
|
||
|
metadata:
|
||
|
name: server
|
||
|
labels:
|
||
|
app.kubernetes.io/component: server
|
||
|
spec:
|
||
|
selector:
|
||
|
app.kubernetes.io/component: server
|
||
|
ports:
|
||
|
- name: http
|
||
|
port: 8080
|
||
|
targetPort: http
|