1# Default values for ollama-helm.
2# This is a YAML-formatted file.
3# Declare variables to be passed into your templates.
9 # -- Enable Knative integration
11 # -- Knative service container concurrency
12 containerConcurrency: 0
13 # -- Knative service timeout seconds
15 # -- Knative service response start timeout seconds
16 responseStartTimeoutSeconds: 300
17 # -- Knative service idle timeout seconds
18 idleTimeoutSeconds: 300
19 # -- Knative service annotations
22 # -- Time to keep completed Knative model bootstrap Jobs before cleanup. Set to null to disable TTL-based cleanup.
23 ttlSecondsAfterFinished: 300
26 # -- Docker image registry
27 repository: cgr.dev/chainguard-private/ollama
28 # -- Docker pull policy
29 pullPolicy: IfNotPresent
30 # -- Docker image tag, overrides the image tag whose default is the chart appVersion.
31 tag: latest@sha256:0ad9cb1ab46bb6adae37a2492ece660068a1bea567d8e4177b3af0a4c680d31b
32# -- Docker registry secret names as an array
34# -- String to partially override template (will maintain the release name)
36# -- String to fully override template
38# -- String to fully override namespace
42 # Port Ollama is listening on
45 # -- Enable GPU integration
47 # -- Enable DRA GPU integration
48 # If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters
50 # -- DRA GPU DriverClass
51 draDriverClass: "gpu.nvidia.com"
52 # -- Existing DRA GPU ResourceClaim Template
53 draExistingClaimTemplate: ""
54 # -- GPU type: 'nvidia' or 'amd'
55 # If 'ollama.gpu.enabled', default value is nvidia
56 # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override
57 # This is due cause AMD and CPU/CUDA are different images
59 # -- Specify the number of GPU
60 # If you use MIG section below then this parameter is ignored
62 # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice
63 nvidiaResource: "nvidia.com/gpu"
64 # nvidiaResource: "nvidia.com/mig-1g.10gb" # example
65 # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)
68 # -- Enable multiple mig devices
69 # If enabled you will have to specify the mig devices
70 # If enabled is set to false this section is ignored
72 # -- Specify the mig devices and the corresponding number
77 # -- List of models to pull at container startup
78 # The more you add, the longer the container will take to start if models are not present
83 # -- List of models to load in memory at container startup
88 # -- List of models to create at container startup, there are two options
89 # 1. Create a raw model
90 # 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.
92 # - name: llama3.1-ctx32768
93 # configMapRef: my-configmap
94 # configMapKeyRef: configmap-key
95 # - name: llama3.1-ctx32768
98 # PARAMETER num_ctx 32768
100 # -- Automatically remove models present on the disk but not specified in the values file
102 # -- Add insecure flag for pulling at container startup
104 # -- Override ollama-data volume mount path, default: "/root/.ollama"
107# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
109 # -- Specifies whether a service account should be created
111 # -- Automatically mount a ServiceAccount's API credentials?
113 # -- Annotations to add to the service account
115 # -- The name of the service account to use.
116 # If not set and create is true, a name is generated using the fullname template
118# -- Map of annotations to add to the pods
120# -- Map of labels to add to the pods
122# -- Pod Security Context
123podSecurityContext: {}
126# -- Priority Class Name
128# -- Container Security Context
133# readOnlyRootFilesystem: true
137# -- Specify runtime class
145 # -- Service node port when service type is 'NodePort'
147 # -- Load Balancer IP address
149 # -- Annotations to add to the service
151 # -- Labels to add to the service
153 # -- IP Families for the service
158 # -- IP Family Policy for the service
163# Configure Deployment
165 # -- Labels to add to the deployment
169 # -- Create Gateway if gateway.enabled = true. Otherwise, httpRoute will need parentRefs to attach to an existing Gateway.
171 # -- Name of the Gateway. Defaults to the chart's full name if empty
173 # -- Name of an existing Gateway Class. Mandatory non-empty field
175 # -- Labels to add to the Gateway
177 # -- Annotations to add to the Gateway
179 # -- Configure the listener
181 # -- Listener network port. May depend on implementation (eg. Traefik)
183 # -- Define which Routes may be attached to this Listener
188 # -- TLS configuration
192 # -- Reference to valid certificates(s)
193 # -- See https://gateway-api.sigs.k8s.io/reference/spec/#secretobjectreference
195 # - name: ollama-certificate
200 # -- Enable HttpRoute
202 # -- Labels to add to the HTTPRoute
204 # -- Hostnames to match for this HTTPRoute
207 # -- References to the existing Gateway(s) this route should attach to.
208 # -- Ignored if gateway.enabled is true. It will automatically attach to the created gateway.
209 # -- See https://gateway-api.sigs.k8s.io/reference/spec/#parentreference
214 # -- Routing rules. If empty, a default rule routing '/' to the Ollama service is created.
215 # -- See https://gateway-api.sigs.k8s.io/reference/spec/#httprouterule
221# Configure the ingress resource that allows you to access the
223 # -- Enable ingress controller resource
225 # -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)
227 # -- Additional annotations for the Ingress resource.
229 # kubernetes.io/ingress.class: traefik
230 # kubernetes.io/ingress.class: nginx
231 # kubernetes.io/tls-acme: "true"
233 # The list of hostnames to be covered with this ingress record.
239 # -- The tls configuration for hostnames to be covered with this ingress record.
241 # - secretName: chart-example-tls
243 # - chart-example.local
244# Configure resource requests and limits
245# ref: http://kubernetes.io/docs/user-guide/compute-resources/
262# Configure extra options for liveness probe
263# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
265 # -- Enable livenessProbe
267 # -- Request path for livenessProbe
269 # -- Initial delay seconds for livenessProbe
270 initialDelaySeconds: 60
271 # -- Period seconds for livenessProbe
273 # -- Timeout seconds for livenessProbe
275 # -- Failure threshold for livenessProbe
277 # -- Success threshold for livenessProbe
279# Configure extra options for readiness probe
280# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
282 # -- Enable readinessProbe
284 # -- Request path for readinessProbe
286 # -- Initial delay seconds for readinessProbe
287 initialDelaySeconds: 30
288 # -- Period seconds for readinessProbe
290 # -- Timeout seconds for readinessProbe
292 # -- Failure threshold for readinessProbe
294 # -- Success threshold for readinessProbe
296# Configure autoscaling
298 # -- Enable autoscaling
300 # -- Number of minimum replicas
302 # -- Number of maximum replicas
304 # -- CPU usage to target replica
305 targetCPUUtilizationPercentage: 80
306 # -- targetMemoryUtilizationPercentage: 80
307# -- Additional volumes on the output Deployment definition.
311# secretName: mysecret
314# -- Additional volumeMounts on the output Deployment definition.
317# mountPath: "/etc/foo"
320# -- Additional arguments on the output Deployment definition.
322# -- Additional environments variables on the output Deployment definition.
323# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go
325# - name: OLLAMA_DEBUG
328# -- Additionl environment variables from external sources (like ConfigMap)
331# name: my-env-configmap
333# Enable persistence using Persistent Volume Claims
334# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/
336 # -- Enable persistence using PVC
338 # -- Ollama server data Persistent Volume access modes
339 # Must match those of existing PV or dynamic provisioner
340 # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
343 # -- Ollama server data Persistent Volume annotations
345 # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the
346 # created + ready PVC here. If set, this Chart will not create the default PVC.
347 # Requires server.persistentVolume.enabled: true
349 # -- Ollama server data Persistent Volume size
351 # -- Ollama server data Persistent Volume Storage Class
352 # If defined, storageClassName: <storageClass>
353 # If set to "-", storageClassName: "", which disables dynamic provisioning
354 # If undefined (the default) or set to null, no storageClassName spec is
355 # set, choosing the default provisioner. (gp2 on AWS, standard on
356 # GKE, AWS & OpenStack)
358 # -- Ollama server data Persistent Volume Binding Mode
359 # If defined, volumeMode: <volumeMode>
360 # If empty (the default) or set to null, no volumeBindingMode spec is
361 # set, choosing the default mode.
363 # -- Subdirectory of Ollama server data Persistent Volume to mount
364 # Useful if the volume's root directory is not empty
366 # -- Pre-existing PV to attach this claim to
367 # Useful if a CSI auto-provisions a PV for you and you want to always
368 # reference the PV moving forward
370# -- Node labels for pod assignment.
372# -- Tolerations for pod assignment
374# -- Affinity for pod assignment
376# -- Lifecycle for pod assignment (override ollama.models startup pull/run)
378# How to replace existing pods
380 # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate
382# -- Topology Spread Constraints for pod assignment
383topologySpreadConstraints: {}
384# -- Wait for a grace period
385terminationGracePeriodSeconds: 120
386# -- Init containers to add to the pod
388# - name: startup-tool
394# -- Use the host’s ipc namespace.
396# -- Use the host’s pid namespace
398# -- Use the host's network namespace.
400# -- Extra K8s manifests to deploy
403# kind: PersistentVolume
408# - apiVersion: scheduling.k8s.io/v1
413# globalDefault: false
414# description: "This priority class should be used for XYZ service pods only."
416# Test connection pods
419 # -- Labels to add to the tests
421 # -- Annotations to add to the tests