ollama

Helm chart

Last changed

Request a free trial

Contact our team to test out this Helm chart and related images for free. Please also indicate any other images you would like to evaluate.

Tag:

# Default values for ollama-helm.

# This is a YAML-formatted file.

# Declare variables to be passed into your templates.

# -- Number of replicas

replicaCount: 1

# Knative configuration

knative:

# -- Enable Knative integration

enabled: false

# -- Knative service container concurrency

containerConcurrency: 0

# -- Knative service timeout seconds

timeoutSeconds: 300

# -- Knative service response start timeout seconds

responseStartTimeoutSeconds: 300

# -- Knative service idle timeout seconds

idleTimeoutSeconds: 300

# -- Knative service annotations

annotations: {}

modelBootstrap:

# -- Time to keep completed Knative model bootstrap Jobs before cleanup. Set to null to disable TTL-based cleanup.

ttlSecondsAfterFinished: 300

# Docker image

image:

# -- Docker image registry

repository: cgr.dev/chainguard-private/ollama

# -- Docker pull policy

pullPolicy: IfNotPresent

# -- Docker image tag, overrides the image tag whose default is the chart appVersion.

tag: latest@sha256:0ad9cb1ab46bb6adae37a2492ece660068a1bea567d8e4177b3af0a4c680d31b

# -- Docker registry secret names as an array

imagePullSecrets: []

# -- String to partially override template (will maintain the release name)

nameOverride: ""

# -- String to fully override template

fullnameOverride: ""

# -- String to fully override namespace

namespaceOverride: ""

# Ollama parameters

ollama:

# Port Ollama is listening on

port: 11434

gpu:

# -- Enable GPU integration

enabled: false

# -- Enable DRA GPU integration

# If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters

draEnabled: false

# -- DRA GPU DriverClass

draDriverClass: "gpu.nvidia.com"

# -- Existing DRA GPU ResourceClaim Template

draExistingClaimTemplate: ""

# -- GPU type: 'nvidia' or 'amd'

# If 'ollama.gpu.enabled', default value is nvidia

# If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override

# This is due cause AMD and CPU/CUDA are different images

type: 'nvidia'

# -- Specify the number of GPU

# If you use MIG section below then this parameter is ignored

number: 1

# -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice

nvidiaResource: "nvidia.com/gpu"

# nvidiaResource: "nvidia.com/mig-1g.10gb" # example

# If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)

mig:

# -- Enable multiple mig devices

# If enabled you will have to specify the mig devices

# If enabled is set to false this section is ignored

enabled: false

# -- Specify the mig devices and the corresponding number

devices: {}

# 1g.10gb: 1

# 3g.40gb: 1

models:

# -- List of models to pull at container startup

# The more you add, the longer the container will take to start if models are not present

# pull:

# - llama2

# - mistral

pull: []

# -- List of models to load in memory at container startup

# run:

# - llama2

# - mistral

run: []

# -- List of models to create at container startup, there are two options

# 1. Create a raw model

# 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.

# create:

# - name: llama3.1-ctx32768

# configMapRef: my-configmap

# configMapKeyRef: configmap-key

# - name: llama3.1-ctx32768

# template: |

# FROM llama3.1

# PARAMETER num_ctx 32768

create: []

100

# -- Automatically remove models present on the disk but not specified in the values file

101

clean: false

102

# -- Add insecure flag for pulling at container startup

103

insecure: false

104

# -- Override ollama-data volume mount path, default: "/root/.ollama"

105

mountPath: ""

106

# Service account

107

# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/

108

serviceAccount:

109

# -- Specifies whether a service account should be created

110

create: true

111

# -- Automatically mount a ServiceAccount's API credentials?

112

automount: true

113

# -- Annotations to add to the service account

114

annotations: {}

115

# -- The name of the service account to use.

116

# If not set and create is true, a name is generated using the fullname template

117

name: ""

118

# -- Map of annotations to add to the pods

119

podAnnotations: {}

120

# -- Map of labels to add to the pods

121

podLabels: {}

122

# -- Pod Security Context

123

podSecurityContext: {}

124

# fsGroup: 2000

125

126

# -- Priority Class Name

127

priorityClassName: ""

128

# -- Container Security Context

129

securityContext: {}

130

# capabilities:

131

# drop:

132

# - ALL

133

# readOnlyRootFilesystem: true

134

# runAsNonRoot: true

135

# runAsUser: 1000

136

137

# -- Specify runtime class

138

runtimeClassName: ""

139

# Configure Service

140

service:

141

# -- Service type

142

type: ClusterIP

143

# -- Service port

144

port: 11434

145

# -- Service node port when service type is 'NodePort'

146

nodePort: 31434

147

# -- Load Balancer IP address

148

loadBalancerIP:

149

# -- Annotations to add to the service

150

annotations: {}

151

# -- Labels to add to the service

152

labels: {}

153

# -- IP Families for the service

154

ipFamilies: []

155

# - IPv4

156

# - IPv6

157

158

# -- IP Family Policy for the service

159

ipFamilyPolicy: ""

160

# SingleStack

161

# PreferDualStack

162

# RequireDualStack

163

# Configure Deployment

164

deployment:

165

# -- Labels to add to the deployment

166

labels: {}

167

# Configure Gateway

168

gateway:

169

# -- Create Gateway if gateway.enabled = true. Otherwise, httpRoute will need parentRefs to attach to an existing Gateway.

170

enabled: false

171

# -- Name of the Gateway. Defaults to the chart's full name if empty

172

name: ""

173

# -- Name of an existing Gateway Class. Mandatory non-empty field

174

className: ""

175

# -- Labels to add to the Gateway

176

labels: {}

177

# -- Annotations to add to the Gateway

178

annotations: {}

179

# -- Configure the listener

180

listener:

181

# -- Listener network port. May depend on implementation (eg. Traefik)

182

port: 80

183

# -- Define which Routes may be attached to this Listener

184

allowedRoutes: {}

185

# namespaces:

186

# from: Same

187

188

# -- TLS configuration

189

tls:

190

# -- Enable TLS

191

enabled: false

192

# -- Reference to valid certificates(s)

193

# -- See https://gateway-api.sigs.k8s.io/reference/spec/#secretobjectreference

194

certificateRefs: []

195

# - name: ollama-certificate

196

# kind: Secret

197

# group: ""

198

# Configure HTTPRoute

199

httpRoute:

200

# -- Enable HttpRoute

201

enabled: false

202

# -- Labels to add to the HTTPRoute

203

labels: {}

204

# -- Hostnames to match for this HTTPRoute

205

hostnames:

206

- ollama.local

207

# -- References to the existing Gateway(s) this route should attach to.

208

# -- Ignored if gateway.enabled is true. It will automatically attach to the created gateway.

209

# -- See https://gateway-api.sigs.k8s.io/reference/spec/#parentreference

210

parentRefs: []

211

# - name: ollama

212

# namespace: default

213

214

# -- Routing rules. If empty, a default rule routing '/' to the Ollama service is created.

215

# -- See https://gateway-api.sigs.k8s.io/reference/spec/#httprouterule

216

rules: []

217

# - matches:

218

# - path:

219

# type: PathPrefix

220

# value: /api

221

# Configure the ingress resource that allows you to access the

222

ingress:

223

# -- Enable ingress controller resource

224

enabled: false

225

# -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)

226

className: ""

227

# -- Additional annotations for the Ingress resource.

228

annotations: {}

229

# kubernetes.io/ingress.class: traefik

230

# kubernetes.io/ingress.class: nginx

231

# kubernetes.io/tls-acme: "true"

232

233

# The list of hostnames to be covered with this ingress record.

234

hosts:

235

- host: ollama.local

236

paths:

237

- path: /

238

pathType: Prefix

239

# -- The tls configuration for hostnames to be covered with this ingress record.

240

tls: []

241

# - secretName: chart-example-tls

242

# hosts:

243

# - chart-example.local

244

# Configure resource requests and limits

245

# ref: http://kubernetes.io/docs/user-guide/compute-resources/

246

resources:

247

# -- Pod requests

248

requests: {}

249

# Memory request

250

# memory: 4096Mi

251

252

# CPU request

253

# cpu: 2000m

254

255

# -- Pod limit

256

limits: {}

257

# Memory limit

258

# memory: 8192Mi

259

# CPU limit

260

# cpu: 4000m

261

262

# Configure extra options for liveness probe

263

# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes

264

livenessProbe:

265

# -- Enable livenessProbe

266

enabled: true

267

# -- Request path for livenessProbe

268

path: /

269

# -- Initial delay seconds for livenessProbe

270

initialDelaySeconds: 60

271

# -- Period seconds for livenessProbe

272

periodSeconds: 10

273

# -- Timeout seconds for livenessProbe

274

timeoutSeconds: 5

275

# -- Failure threshold for livenessProbe

276

failureThreshold: 6

277

# -- Success threshold for livenessProbe

278

successThreshold: 1

279

# Configure extra options for readiness probe

280

# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes

281

readinessProbe:

282

# -- Enable readinessProbe

283

enabled: true

284

# -- Request path for readinessProbe

285

path: /

286

# -- Initial delay seconds for readinessProbe

287

initialDelaySeconds: 30

288

# -- Period seconds for readinessProbe

289

periodSeconds: 5

290

# -- Timeout seconds for readinessProbe

291

timeoutSeconds: 3

292

# -- Failure threshold for readinessProbe

293

failureThreshold: 6

294

# -- Success threshold for readinessProbe

295

successThreshold: 1

296

# Configure autoscaling

297

autoscaling:

298

# -- Enable autoscaling

299

enabled: false

300

# -- Number of minimum replicas

301

minReplicas: 1

302

# -- Number of maximum replicas

303

maxReplicas: 100

304

# -- CPU usage to target replica

305

targetCPUUtilizationPercentage: 80

306

# -- targetMemoryUtilizationPercentage: 80

307

# -- Additional volumes on the output Deployment definition.

308

volumes: []

309

# -- - name: foo

310

# secret:

311

# secretName: mysecret

312

# optional: false

313

314

# -- Additional volumeMounts on the output Deployment definition.

315

volumeMounts: []

316

# -- - name: foo

317

# mountPath: "/etc/foo"

318

# readOnly: true

319

320

# -- Additional arguments on the output Deployment definition.

321

extraArgs: []

322

# -- Additional environments variables on the output Deployment definition.

323

# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go

324

extraEnv: []

325

# - name: OLLAMA_DEBUG

326

# value: "1"

327

328

# -- Additionl environment variables from external sources (like ConfigMap)

329

extraEnvFrom: []

330

# - configMapRef:

331

# name: my-env-configmap

332

333

# Enable persistence using Persistent Volume Claims

334

# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/

335

persistentVolume:

336

# -- Enable persistence using PVC

337

enabled: false

338

# -- Ollama server data Persistent Volume access modes

339

# Must match those of existing PV or dynamic provisioner

340

# Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/

341

accessModes:

342

- ReadWriteOnce

343

# -- Ollama server data Persistent Volume annotations

344

annotations: {}

345

# -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the

346

# created + ready PVC here. If set, this Chart will not create the default PVC.

347

# Requires server.persistentVolume.enabled: true

348

existingClaim: ""

349

# -- Ollama server data Persistent Volume size

350

size: 30Gi

351

# -- Ollama server data Persistent Volume Storage Class

352

# If defined, storageClassName: <storageClass>

353

# If set to "-", storageClassName: "", which disables dynamic provisioning

354

# If undefined (the default) or set to null, no storageClassName spec is

355

# set, choosing the default provisioner. (gp2 on AWS, standard on

356

# GKE, AWS & OpenStack)

357

storageClass: ""

358

# -- Ollama server data Persistent Volume Binding Mode

359

# If defined, volumeMode: <volumeMode>

360

# If empty (the default) or set to null, no volumeBindingMode spec is

361

# set, choosing the default mode.

362

volumeMode: ""

363

# -- Subdirectory of Ollama server data Persistent Volume to mount

364

# Useful if the volume's root directory is not empty

365

subPath: ""

366

# -- Pre-existing PV to attach this claim to

367

# Useful if a CSI auto-provisions a PV for you and you want to always

368

# reference the PV moving forward

369

volumeName: ""

370

# -- Node labels for pod assignment.

371

nodeSelector: {}

372

# -- Tolerations for pod assignment

373

tolerations: []

374

# -- Affinity for pod assignment

375

affinity: {}

376

# -- Lifecycle for pod assignment (override ollama.models startup pull/run)

377

lifecycle: {}

378

# How to replace existing pods

379

updateStrategy:

380

# -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate

381

type: "Recreate"

382

# -- Topology Spread Constraints for pod assignment

383

topologySpreadConstraints: {}

384

# -- Wait for a grace period

385

terminationGracePeriodSeconds: 120

386

# -- Init containers to add to the pod

387

initContainers: []

388

# - name: startup-tool

389

# image: alpine:3

390

# command: [sh, -c]

391

# args:

392

# - echo init

393

394

# -- Use the host’s ipc namespace.

395

hostIPC: false

396

# -- Use the host’s pid namespace

397

hostPID: false

398

# -- Use the host's network namespace.

399

hostNetwork: false

400

# -- Extra K8s manifests to deploy

401

extraObjects: []

402

# - apiVersion: v1

403

# kind: PersistentVolume

404

# metadata:

405

# name: aws-efs

406

# data:

407

# key: "value"

408

# - apiVersion: scheduling.k8s.io/v1

409

# kind: PriorityClass

410

# metadata:

411

# name: high-priority

412

# value: 1000000

413

# globalDefault: false

414

# description: "This priority class should be used for XYZ service pods only."

415

416

# Test connection pods

417

tests:

418

enabled: true

419

# -- Labels to add to the tests

420

labels: {}

421

# -- Annotations to add to the tests

422

annotations: {}

423

podSchedulerName: ""

424

The trusted source for open source

Talk to an expert

Privacy

Terms

© 2026 Chainguard, Inc. All Rights Reserved.
Chainguard® and the Chainguard logo are registered trademarks of Chainguard, Inc. in the United States and/or other countries.
The other respective trademarks mentioned on this page are owned by the respective companies and use of them does not imply any affiliation or endorsement.

ollama

The trusted source for open source

Product

Solutions

Customers

Resources

Company