DirectorySecurity AdvisoriesPricing
Sign in
Directory
kuberay-operator logoHELM

kuberay-operator

Helm chart
Last changed
Request a free trial

Contact our team to test out this Helm chart and related images for free. Please also indicate any other images you would like to evaluate.

Overview
Chart versions
Default values
Chart metadata
Images

Tag:

1
# Default values for kuberay-operator.
2
# This is a YAML-formatted file.
3
# Declare variables to be passed into your templates.
4
5
# -- String to partially override release name.
6
nameOverride: kuberay-operator
7
# -- String to fully override release name.
8
fullnameOverride: kuberay-operator
9
# -- String to override component name.
10
componentOverride: kuberay-operator
11
# -- Number of replicas for the KubeRay operator Deployment.
12
replicas: 1
13
image:
14
# -- Image repository.
15
repository: cgr.dev/chainguard-private/kuberay-operator-fips
16
# -- Image tag.
17
tag: latest@sha256:b4101b949fd66a1cf8847d4b0c935e090370bde5d9fd5f85d1a1b2cf399433b6
18
# -- Image pull policy.
19
pullPolicy: IfNotPresent
20
# -- Secrets with credentials to pull images from a private registry
21
imagePullSecrets: []
22
# -- Restrict to run on particular nodes.
23
nodeSelector: {}
24
# -- Pod priorityClassName
25
priorityClassName: ""
26
# -- Extra labels.
27
labels: {}
28
# -- Extra annotations.
29
annotations: {}
30
# -- Pod affinity
31
affinity: {}
32
# -- Pod tolerations
33
tolerations: []
34
serviceAccount:
35
# -- Specifies whether a service account should be created.
36
create: true
37
# -- The name of the service account to use.
38
# If not set and create is true, a name is generated using the fullname template.
39
name: kuberay-operator
40
logging:
41
# -- Log encoder to use for stdout (one of `json` or `console`).
42
stdoutEncoder: json
43
# -- Log encoder to use for file logging (one of `json` or `console`).
44
fileEncoder: json
45
# -- Directory for kuberay-operator log file.
46
baseDir: ""
47
# -- File name for kuberay-operator log file.
48
fileName: ""
49
# -- EmptyDir volume size limit for kuberay-operator log file.
50
sizeLimit: ""
51
# Enable customized Kubernetes scheduler integration. If enabled, Ray workloads will be scheduled
52
# by the customized scheduler.
53
# * "enabled" is the legacy option and will be deprecated soon.
54
# * "name" is the standard option, expecting a scheduler name, supported values are
55
# "default", "volcano", "yunikorn", and "scheduler-plugins".
56
#
57
# Note: "enabled" and "name" should not be set at the same time. If both are set, an error will be thrown.
58
#
59
# Examples:
60
# 1. Use volcano (deprecated)
61
# batchScheduler:
62
# enabled: true
63
#
64
# 2. Use volcano
65
# batchScheduler:
66
# name: volcano
67
#
68
# 3. Use yunikorn
69
# batchScheduler:
70
# name: yunikorn
71
#
72
# 4. Use PodGroup
73
# batchScheduler:
74
# name: scheduler-plugins
75
76
# 5. Use Kai Scheduler
77
# batchScheduler:
78
# name: kai-scheduler
79
batchScheduler:
80
# Deprecated. This option will be removed in the future.
81
# Note, for backwards compatibility. When it sets to true, it enables volcano scheduler integration.
82
enabled: false
83
# Set the customized scheduler name, supported values are "volcano", "yunikorn", "kai-scheduler" or "scheduler-plugins", do not set
84
# "batchScheduler.enabled=true" at the same time as it will override this option.
85
name: ""
86
# Configuration for the KubeRay operator.
87
configuration:
88
# -- Whether to enable the configuration feature. If enabled, a ConfigMap will be created and mounted to the operator.
89
# When enabled, flag-based configuration values (leaderElectionEnabled, metrics.enabled, kubeClient.qps, etc.)
90
# will be injected into the ConfigMap. The operator will use the ConfigMap and ignore command-line flags.
91
enabled: false
92
# -- Default environment variables to inject into all Ray containers in all RayCluster CRs.
93
# This allows user to set feature flags across all Ray pods.
94
# Example:
95
# defaultContainerEnvs:
96
# - name: RAY_enable_open_telemetry
97
# value: "true"
98
# - name: RAY_metric_cardinality_level
99
# value: "recommended"
100
defaultContainerEnvs: []
101
# -- Sidecar containers to inject into every Ray head pod.
102
# Example:
103
# headSidecarContainers:
104
# - name: fluentbit
105
# image: fluent/fluent-bit:1.9
106
headSidecarContainers: []
107
# -- Sidecar containers to inject into every Ray worker pod.
108
# Example:
109
# workerSidecarContainers:
110
# - name: fluentbit
111
# image: fluent/fluent-bit:1.9
112
workerSidecarContainers: []
113
featureGates:
114
- name: RayClusterStatusConditions
115
enabled: true
116
- name: RayJobDeletionPolicy
117
enabled: true
118
- name: RayMultiHostIndexing
119
enabled: true
120
- name: RayServiceIncrementalUpgrade
121
enabled: false
122
- name: RayCronJob
123
enabled: false
124
# Configurations for KubeRay operator metrics.
125
metrics:
126
# -- Whether KubeRay operator should emit control plane metrics.
127
enabled: true
128
serviceMonitor:
129
# -- Enable a prometheus ServiceMonitor
130
enabled: false
131
# -- Prometheus ServiceMonitor interval
132
interval: 30s
133
# -- When true, honorLabels preserves the metric’s labels when they collide with the target’s labels.
134
honorLabels: true
135
# -- Prometheus ServiceMonitor selector
136
selector: {}
137
# release: prometheus
138
# -- Prometheus ServiceMonitor namespace
139
namespace: "" # "monitoring"
140
# -- Path to the operator binary
141
operatorCommand: /manager
142
# if userKubernetesProxy is set to true, the KubeRay operator will be configured with the --use-kubernetes-proxy flag.
143
# Using this option to configure kuberay-operator to comunitcate to Ray head pods by proxying through the Kubernetes API Server.
144
# useKubernetesProxy: true
145
146
# -- If leaderElectionEnabled is set to true, the KubeRay operator will use leader election for high availability.
147
leaderElectionEnabled: true
148
# -- The maximum number of reconcile operations that can be performed simultaneously.
149
# This setting controls the concurrency of the controller reconciliation loops.
150
# Higher values can improve throughput in clusters with many resources, but may increase resource consumption.
151
reconcileConcurrency: 1
152
# -- Kube Client configuration for QPS and burst settings.
153
# This setting controls the QPS and burst rate of the kube client when sending requests to the Kubernetes API server.
154
# If the QPS and burst values are too low, we may easily hit rate limits on the API server and slow down the controller reconciliation loops.
155
kubeClient:
156
# -- The QPS value for the client communicating with the Kubernetes API server.
157
# Must be a float number.
158
qps: 100.0
159
# -- The maximum burst for throttling requests from this client to the Kubernetes API server.
160
# Must be a non-negative integer.
161
burst: 200
162
# -- If rbacEnable is set to false, no RBAC resources will be created, including the Role for leader election, the Role for Pods and Services, and so on.
163
rbacEnable: true
164
# -- When crNamespacedRbacEnable is set to true, the KubeRay operator will create a Role for RayCluster preparation (e.g., Pods, Services)
165
# and a corresponding RoleBinding for each namespace listed in the "watchNamespace" parameter. Please note that even if crNamespacedRbacEnable
166
# is set to false, the Role and RoleBinding for leader election will still be created.
167
#
168
# Note:
169
# (1) This variable is only effective when rbacEnable and singleNamespaceInstall are both set to true.
170
# (2) In most cases, it should be set to true, unless you are using a Kubernetes cluster managed by GitOps tools such as ArgoCD.
171
crNamespacedRbacEnable: true
172
# -- When singleNamespaceInstall is true:
173
# - Install namespaced RBAC resources such as Role and RoleBinding instead of cluster-scoped ones like ClusterRole and ClusterRoleBinding so that
174
# the chart can be installed by users with permissions restricted to a single namespace.
175
# (Please note that this excludes the CRDs, which can only be installed at the cluster scope.)
176
# - If "watchNamespace" is not set, the KubeRay operator will, by default, only listen
177
# to resource events within its own namespace.
178
singleNamespaceInstall: false
179
# The KubeRay operator will watch the custom resources in the namespaces listed in the "watchNamespace" parameter.
180
# watchNamespace:
181
# - n1
182
# - n2
183
184
# -- Environment variables.
185
env:
186
# If not set or set to true, kuberay auto injects an init container waiting for ray GCS.
187
# If false, you will need to inject your own init container to ensure ray GCS is up before the ray workers start.
188
# Warning: we highly recommend setting to true and let kuberay handle for you.
189
# - name: ENABLE_INIT_CONTAINER_INJECTION
190
# value: "true"
191
# If set to true, kuberay creates a normal ClusterIP service for a Ray Head instead of a Headless service. Default to false.
192
# - name: ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE
193
# value: "false"
194
# If not set or set to "", kuberay will pick up the default k8s cluster domain `cluster.local`
195
# Otherwise, kuberay will use your custom domain
196
# - name: CLUSTER_DOMAIN
197
# value: ""
198
# If not set or set to false, when running on OpenShift with Ingress creation enabled, kuberay will create OpenShift route
199
# Otherwise, regardless of the type of cluster with Ingress creation enabled, kuberay will create Ingress
200
# - name: USE_INGRESS_ON_OPENSHIFT
201
# value: "true"
202
# Unconditionally requeue after the number of seconds specified in the
203
# environment variable RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV. If the
204
# environment variable is not set, requeue after the default value (300).
205
# - name: RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV
206
# value: 300
207
# If not set or set to "true", KubeRay will clean up the Redis storage namespace when a GCS FT-enabled RayCluster is deleted.
208
# - name: ENABLE_GCS_FT_REDIS_CLEANUP
209
# value: "true"
210
# For LLM serving, some users might not have sufficient GPU resources to run two RayClusters simultaneously.
211
# Therefore, KubeRay offers ENABLE_ZERO_DOWNTIME as a feature flag for zero-downtime upgrades.
212
# - name: ENABLE_ZERO_DOWNTIME
213
# value: "true"
214
# This environment variable for the KubeRay operator is used to determine whether to enable
215
# the injection of readiness and liveness probes into Ray head and worker containers.
216
# Enabling this feature contributes to the robustness of Ray clusters.
217
# - name: ENABLE_PROBES_INJECTION
218
# value: "true"
219
# If set to true, the RayJob CR itself will be deleted if shutdownAfterJobFinishes is set to true. Note that all resources created by the RayJob CR will be deleted, including the K8s Job. Otherwise, only the RayCluster CR will be deleted. Default is false.
220
# - name: DELETE_RAYJOB_CR_AFTER_JOB_FINISHES
221
# value: "false"
222
# If set to true, we will use deterministic name for head pod. Otherwise, the non-deterministic name is used.
223
# - name: ENABLE_DETERMINISTIC_HEAD_POD_NAME
224
# value: "false"
225
# This environment variable determines whether to enable a login shell by passing the -l option to the container command /bin/bash.
226
# The -l flag was added by default before KubeRay v1.4.0, but it is no longer added by default starting with v1.4.0.
227
# - name: ENABLE_LOGIN_SHELL
228
# value: "true"
229
# This KubeRay operator environment variable is used to determine if random Pod
230
# deletion should be enabled. Note that this only takes effect when autoscaling
231
# is enabled for the RayCluster.
232
# - name: ENABLE_RANDOM_POD_DELETE
233
# value: "false"
234
# If JobDeploymentStatus does not transition to Complete or Failed within
235
# this grace period seconds after JobStatus reaches a terminal state,
236
# KubeRay will update JobDeploymentStatus directly.
237
# - name: RAYJOB_DEPLOYMENT_STATUS_TRANSITION_GRACE_PERIOD_SECONDS
238
# value: "300"
239
240
# -- Resource requests and limits for containers.
241
resources:
242
limits:
243
cpu: 100m
244
# Anecdotally, managing 500 Ray pods requires roughly 500MB memory.
245
# Monitor memory usage and adjust as needed.
246
memory: 512Mi
247
# requests:
248
# cpu: 100m
249
# memory: 512Mi
250
# @Ignore -- Pod liveness probe configuration.
251
livenessProbe:
252
initialDelaySeconds: 10
253
periodSeconds: 5
254
failureThreshold: 5
255
# @Ignore -- Pod readiness probe configuration.
256
readinessProbe:
257
initialDelaySeconds: 10
258
periodSeconds: 5
259
failureThreshold: 5
260
# -- Set up `securityContext` to improve Pod security.
261
podSecurityContext: {}
262
# @ignore -- Set up `securityContext` to improve container security.
263
securityContext:
264
allowPrivilegeEscalation: false
265
readOnlyRootFilesystem: true
266
capabilities:
267
drop:
268
- ALL
269
runAsNonRoot: true
270
seccompProfile:
271
type: RuntimeDefault
272
service:
273
# -- Service type.
274
type: ClusterIP
275
# -- Service port.
276
port: 8080
277

The trusted source for open source

Talk to an expert
PrivacyTerms

Product

Chainguard ContainersChainguard LibrariesChainguard VMsChainguard OS PackagesChainguard ActionsChainguard Agent SkillsIntegrationsPricing
© 2026 Chainguard, Inc. All Rights Reserved.
Chainguard® and the Chainguard logo are registered trademarks of Chainguard, Inc. in the United States and/or other countries.
The other respective trademarks mentioned on this page are owned by the respective companies and use of them does not imply any affiliation or endorsement.