1
0
Fork 0
home-stack-kustomize/kustomize/bases/llama-swap/llama-swap-deployment.yaml

78 lines
1.9 KiB
YAML
Raw Normal View History

apiVersion: apps/v1
kind: Deployment
metadata:
name: server
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/component: server
template:
metadata:
labels:
app.kubernetes.io/component: server
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Image only supports amd64
- key: kubernetes.io/arch
operator: In
values:
- amd64
priorityClassName: high-priority
containers:
- name: server
image: ghcr.io/mostlygeek/llama-swap:vulkan
imagePullPolicy: Always
args: ["--config", "/config/config.yaml"]
resources:
requests:
memory: 4Gi
cpu: 1000m
limits:
memory: 4Gi
# nvidia.com/gpu: "1"
amd.com/gpu: "1"
ports:
- name: http
hostPort: 8080
containerPort: 8080
volumeMounts:
- name: server-data
mountPath: /root/.cache/llama.cpp
- name: llama-swap-config
mountPath: /config
volumes:
- name: server-data
hostPath:
path: /var/lib/llama.cpp
type: DirectoryOrCreate
- name: llama-swap-config
configMap:
name: llama-swap-config
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
globalDefault: false
---
apiVersion: v1
kind: Service
metadata:
name: server
labels:
app.kubernetes.io/component: server
spec:
selector:
app.kubernetes.io/component: server
ports:
- name: http
port: 8080
targetPort: http