From 43abc2f75ab543cc9dff22cbd3e22f37b03b4aa2 Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Thu, 30 Apr 2026 16:31:02 -0700 Subject: [PATCH 01/37] support agent-sandbox service (#281) Made-with: Cursor --- charts/retool/Chart.lock | 4 +- charts/retool/Chart.yaml | 4 +- charts/retool/files/gvisor-seccomp.json | 447 +++++++++++ charts/retool/templates/_helpers.tpl | 166 ++++ charts/retool/templates/_workers.tpl | 22 +- .../agent_sandbox_device_plugin.yaml | 91 +++ .../agent_sandbox_networkpolicy.yaml | 216 ++++++ .../templates/agent_sandbox_prepuller.yaml | 84 +++ .../templates/agent_sandbox_seccomp.yaml | 91 +++ .../templates/deployment_agent_sandbox.yaml | 714 ++++++++++++++++++ .../retool/templates/deployment_backend.yaml | 1 + .../templates/deployment_workflows.yaml | 1 + charts/retool/values.yaml | 249 ++++++ values.yaml | 249 ++++++ 14 files changed, 2331 insertions(+), 8 deletions(-) create mode 100644 charts/retool/files/gvisor-seccomp.json create mode 100644 charts/retool/templates/agent_sandbox_device_plugin.yaml create mode 100644 charts/retool/templates/agent_sandbox_networkpolicy.yaml create mode 100644 charts/retool/templates/agent_sandbox_prepuller.yaml create mode 100644 charts/retool/templates/agent_sandbox_seccomp.yaml create mode 100644 charts/retool/templates/deployment_agent_sandbox.yaml diff --git a/charts/retool/Chart.lock b/charts/retool/Chart.lock index 832b6f5..7a8aeec 100644 --- a/charts/retool/Chart.lock +++ b/charts/retool/Chart.lock @@ -5,5 +5,5 @@ dependencies: - name: retool-temporal-services-helm repository: "" version: 1.1.5 -digest: sha256:6b027cb2d661c436127fe34c4a5e14c820c691d4ec9e0c08609f416e6fe5af21 -generated: "2024-03-26T15:39:11.463027-04:00" +digest: sha256:7b9440db4914c56407c98faace390fd00374820b0f87987903912de7ac899ce8 +generated: "2026-04-22T17:14:51.109299-07:00" diff --git a/charts/retool/Chart.yaml b/charts/retool/Chart.yaml index 771605b..dbedb80 100644 --- a/charts/retool/Chart.yaml +++ b/charts/retool/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: retool description: A Helm chart for Kubernetes type: application -version: 6.10.6 +version: 6.11.0 maintainers: - name: Retool Engineering email: engineering+helm@retool.com @@ -13,4 +13,4 @@ dependencies: condition: postgresql.enabled - name: retool-temporal-services-helm version: 1.1.5 - condition: retool-temporal-services-helm.enabled,workflows.enabled + condition: retool-temporal-services-helm.enabled diff --git a/charts/retool/files/gvisor-seccomp.json b/charts/retool/files/gvisor-seccomp.json new file mode 100644 index 0000000..9b2a1de --- /dev/null +++ b/charts/retool/files/gvisor-seccomp.json @@ -0,0 +1,447 @@ +{ + "comment": "Docker default seccomp profile extended with syscalls required by gVisor runsc (systrap platform, rootless mode). Use with: docker run --security-opt seccomp=gvisor-seccomp.json", + "defaultAction": "SCMP_ACT_ERRNO", + "defaultErrnoRet": 1, + "archMap": [ + { + "architecture": "SCMP_ARCH_X86_64", + "subArchitectures": ["SCMP_ARCH_X86", "SCMP_ARCH_X32"] + }, + { + "architecture": "SCMP_ARCH_AARCH64", + "subArchitectures": [] + } + ], + "syscalls": [ + { + "comment": "Docker default allowlist (Docker 27.x, x86_64 + aarch64)", + "names": [ + "_llseek", + "_newselect", + "accept", + "accept4", + "access", + "acct", + "adjtimex", + "alarm", + "arch_prctl", + "bind", + "bpf", + "brk", + "cachestat", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "chroot", + "clock_adjtime", + "clock_adjtime64", + "clock_getres", + "clock_getres_time64", + "clock_gettime", + "clock_gettime64", + "clock_nanosleep", + "clock_nanosleep_time64", + "clock_settime", + "clock_settime64", + "close", + "close_range", + "connect", + "copy_file_range", + "creat", + "delete_module", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_pwait2", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "faccessat2", + "fadvise64", + "fadvise64_64", + "fallocate", + "fanotify_init", + "fanotify_mark", + "fchdir", + "fchmod", + "fchmodat", + "fchmodat2", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "finit_module", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsconfig", + "fsetxattr", + "fsmount", + "fsopen", + "fspick", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futex_requeue", + "futex_time64", + "futex_wait", + "futex_waitv", + "futex_wake", + "futimesat", + "get_mempolicy", + "get_robust_list", + "get_thread_area", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "init_module", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "io_destroy", + "io_getevents", + "io_pgetevents", + "io_pgetevents_time64", + "io_setup", + "io_submit", + "io_uring_enter", + "io_uring_register", + "io_uring_setup", + "ioctl", + "ioperm", + "iopl", + "ioprio_get", + "ioprio_set", + "ipc", + "kcmp", + "kill", + "landlock_add_rule", + "landlock_create_ruleset", + "landlock_restrict_self", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listxattr", + "llistxattr", + "lookup_dcookie", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "map_shadow_stack", + "mbind", + "membarrier", + "memfd_create", + "memfd_secret", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "mmap", + "mmap2", + "modify_ldt", + "mount_setattr", + "move_mount", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedreceive_time64", + "mq_timedsend", + "mq_timedsend_time64", + "mq_unlink", + "mremap", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "name_to_handle_at", + "nanosleep", + "newfstatat", + "open", + "open_by_handle_at", + "open_tree", + "openat", + "openat2", + "pause", + "perf_event_open", + "pidfd_getfd", + "pidfd_open", + "pidfd_send_signal", + "pipe", + "pipe2", + "pkey_alloc", + "pkey_free", + "pkey_mprotect", + "poll", + "ppoll", + "ppoll_time64", + "prctl", + "pread64", + "preadv", + "preadv2", + "prlimit64", + "process_madvise", + "process_mrelease", + "process_vm_readv", + "process_vm_writev", + "pselect6", + "pselect6_time64", + "pwrite64", + "pwritev", + "pwritev2", + "quotactl", + "quotactl_fd", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "reboot", + "recv", + "recvfrom", + "recvmmsg", + "recvmmsg_time64", + "recvmsg", + "remap_file_pages", + "removexattr", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "rmdir", + "rseq", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_sigtimedwait_time64", + "rt_tgsigqueueinfo", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_rr_get_interval_time64", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "semtimedop_time64", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "set_mempolicy", + "set_mempolicy_home_node", + "set_robust_list", + "set_thread_area", + "set_tid_address", + "set_tls", + "setdomainname", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "setsid", + "setsockopt", + "settimeofday", + "setuid", + "setuid32", + "setxattr", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signalfd", + "signalfd4", + "sigprocmask", + "sigreturn", + "socket", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "statx", + "stime", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "sync_file_range2", + "syncfs", + "sysinfo", + "syslog", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timer_getoverrun", + "timer_gettime", + "timer_gettime64", + "timer_settime", + "timer_settime64", + "timerfd_create", + "timerfd_gettime", + "timerfd_gettime64", + "timerfd_settime", + "timerfd_settime64", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "umount", + "uname", + "unlink", + "unlinkat", + "utime", + "utimensat", + "utimensat_time64", + "utimes", + "vfork", + "vhangup", + "vmsplice", + "wait4", + "waitid", + "waitpid", + "write", + "writev" + ], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor + pasta: namespace creation and entry (clone/unshare with CLONE_NEW* flags, setns to join namespaces)", + "names": ["clone", "clone3", "unshare", "setns"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "pasta: set hostname inside namespace (cosmetic, avoids warning)", + "names": ["sethostname"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor: sandbox filesystem setup (tmpfs, proc, bind mounts)", + "names": ["mount", "umount2"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor: filesystem root isolation for sentry and gofer", + "names": ["pivot_root"], + "action": "SCMP_ACT_ALLOW" + }, + { + "comment": "gVisor systrap platform: workload executor thread initialization", + "names": ["ptrace"], + "action": "SCMP_ACT_ALLOW" + } + ] +} diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index 9ee5e45..fd28f91 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -289,6 +289,18 @@ Usage: (include "retool.agents.enabled" .) {{- $output -}} {{- end -}} +{{/* +Set R2 agent enabled +Usage: (include "retool.r2Agent.enabled" .) +*/}} +{{- define "retool.r2Agent.enabled" -}} +{{- $output := "" -}} +{{- if (eq (toString .Values.r2Agent.enabled) "true") -}} + {{- $output = "1" -}} +{{- end -}} +{{- $output -}} +{{- end -}} + {{/* Global Temporal configuration */}} {{- define "retool.temporalConfig" -}} {{- .Values.workflows.temporal | default .Values.temporal | toYaml -}} @@ -379,6 +391,160 @@ Set agent eval worker service name {{ template "retool.fullname" . }}-agent-eval-worker {{- end -}} +{{/* +Set R2 agent worker service name +*/}} +{{- define "retool.r2AgentWorker.name" -}} +{{ template "retool.fullname" . }}-r2-agent-worker +{{- end -}} + +{{/* +Selector labels for R2 agent worker. Note changes here will require manual +deployment recreation and incur downtime, so should be avoided. +*/}} +{{- define "retool.r2AgentWorker.selectorLabels" -}} +retoolService: {{ include "retool.r2AgentWorker.name" . }} +{{- end }} + +{{/* +Extra (non-selector) labels for R2 agent worker. +*/}} +{{- define "retool.r2AgentWorker.labels" -}} +app.kubernetes.io/name: {{ include "retool.r2AgentWorker.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +telemetry.retool.com/service-name: r2-agent-worker +{{- end }} + +{{/* +Set agent sandbox base name +*/}} +{{- define "retool.agentSandbox.name" -}} +{{ template "retool.fullname" . }}-agent-sandbox +{{- end -}} + +{{/* +Set agent sandbox controller name +*/}} +{{- define "retool.agentSandbox.controller.name" -}} +{{ template "retool.fullname" . }}-agent-sandbox-controller +{{- end -}} + +{{/* +Set agent sandbox proxy name +*/}} +{{- define "retool.agentSandbox.proxy.name" -}} +{{ template "retool.fullname" . }}-agent-sandbox-proxy +{{- end -}} + +{{/* +Secret name for agent sandbox. +Uses externalSecret.name if set, otherwise the auto-generated name. +*/}} +{{- define "retool.agentSandbox.secretName" -}} +{{- if .Values.agentSandbox.externalSecret.name -}} +{{ .Values.agentSandbox.externalSecret.name }} +{{- else -}} +{{ template "retool.agentSandbox.name" . }} +{{- end -}} +{{- end -}} + +{{/* +Selector labels for agent sandbox (sandbox pods / headless service). +*/}} +{{- define "retool.agentSandbox.selectorLabels" -}} +retoolService: {{ include "retool.agentSandbox.name" . }} +{{- end -}} + +{{/* +Extra labels for agent sandbox. +*/}} +{{- define "retool.agentSandbox.labels" -}} +app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +telemetry.retool.com/service-name: agent-sandbox +{{- end -}} + +{{/* +Selector labels for agent sandbox controller. +*/}} +{{- define "retool.agentSandbox.controller.selectorLabels" -}} +retoolService: {{ include "retool.agentSandbox.controller.name" . }} +{{- end -}} + +{{/* +Extra labels for agent sandbox controller. +*/}} +{{- define "retool.agentSandbox.controller.labels" -}} +app.kubernetes.io/name: {{ include "retool.agentSandbox.controller.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: controller +telemetry.retool.com/service-name: agent-sandbox-controller +{{- end -}} + +{{/* +Selector labels for agent sandbox proxy. +*/}} +{{- define "retool.agentSandbox.proxy.selectorLabels" -}} +retoolService: {{ include "retool.agentSandbox.proxy.name" . }} +{{- end -}} + +{{/* +Extra labels for agent sandbox proxy. +*/}} +{{- define "retool.agentSandbox.proxy.labels" -}} +app.kubernetes.io/name: {{ include "retool.agentSandbox.proxy.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: proxy +telemetry.retool.com/service-name: agent-sandbox-proxy +{{- end -}} + +{{/* +Agent sandbox env vars for the Retool backend, workflow backend, and workers. +Outputs env entries that tell the backend how to reach the agent sandbox services. +Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} +*/}} +{{- define "retool.agentSandbox.backendEnvVars" -}} +{{- if .Values.agentSandbox.enabled }} +- name: AGENT_EXECUTOR_ENABLED + value: "true" +- name: RR_AGENT_PUBSUB_BACKEND + value: "postgres" +- name: AGENT_EXECUTOR_CONTROLLER_INGRESS_DOMAIN + value: {{ .Values.agentSandbox.controllerUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.controller.name" .) (toString .Values.agentSandbox.controller.port)) | quote }} +- name: AGENT_EXECUTOR_PROXY_INGRESS_DOMAIN + value: {{ .Values.agentSandbox.proxyUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.proxy.name" .) (toString .Values.agentSandbox.proxy.port)) | quote }} +{{- if .Values.agentSandbox.frontendWsProxyDomain }} +- name: AGENT_EXECUTOR_FRONTEND_WS_PROXY_DOMAIN + value: {{ .Values.agentSandbox.frontendWsProxyDomain | quote }} +{{- end }} +{{- if or .Values.agentSandbox.proxyDomain .Values.agentSandbox.frontendWsProxyDomain }} +- name: AGENT_EXECUTOR_PROXY_DOMAIN + value: {{ .Values.agentSandbox.proxyDomain | default .Values.agentSandbox.frontendWsProxyDomain | quote }} +{{- end }} +{{- if or .Values.agentSandbox.jwtPrivateKey .Values.agentSandbox.externalSecret.name }} +- name: AGENT_EXECUTOR_JWT_PRIVATE_KEY + valueFrom: + secretKeyRef: + name: {{ include "retool.agentSandbox.secretName" . }} + key: jwt-private-key +{{- end }} +{{- if or .Values.agentSandbox.jwtPublicKey .Values.agentSandbox.externalSecret.name }} +- name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: {{ include "retool.agentSandbox.secretName" . }} + key: jwt-public-key +{{- end }} +{{- if or .Values.agentSandbox.encryptionKey .Values.agentSandbox.externalSecret.name }} +- name: AGENT_EXECUTOR_ENCRYPTION_KEY + valueFrom: + secretKeyRef: + name: {{ include "retool.agentSandbox.secretName" . }} + key: encryption-key +{{- end }} +{{- end }} +{{- end -}} + {{/* Set code executor image tag Usage: (template "retool.codeExecutor.image.tag" .) diff --git a/charts/retool/templates/_workers.tpl b/charts/retool/templates/_workers.tpl index 989bba9..79c214d 100644 --- a/charts/retool/templates/_workers.tpl +++ b/charts/retool/templates/_workers.tpl @@ -3,6 +3,8 @@ type: agent - parent: agents type: agentEval +- parent: r2Agent + type: r2Agent - parent: workflows type: workflow {{- end -}} @@ -36,9 +38,20 @@ {{- end }} {{- end -}} -{{- $healthcheckPort := ternary 3012 3005 (eq $workerType "agentEval") -}} -{{- $serviceType := ternary "AGENT_EVAL_TEMPORAL_WORKER" "WORKFLOW_TEMPORAL_WORKER" (eq $workerType "agentEval") -}} -{{- $taskqueue := ternary "agent-eval" (ternary "agent" "" (eq $workerType "agent")) (eq $workerType "agentEval") -}} +{{- $healthcheckPort := 3005 -}} +{{- $serviceType := "WORKFLOW_TEMPORAL_WORKER" -}} +{{- $taskqueue := "" -}} +{{- if eq $workerType "agentEval" -}} + {{- $healthcheckPort = 3012 -}} + {{- $serviceType = "AGENT_EVAL_TEMPORAL_WORKER" -}} + {{- $taskqueue = "agent-eval" -}} +{{- else if eq $workerType "r2Agent" -}} + {{- $healthcheckPort = 3016 -}} + {{- $serviceType = "R2_AGENT_TEMPORAL_WORKER" -}} + {{- $taskqueue = "r2-agent" -}} +{{- else if eq $workerType "agent" -}} + {{- $taskqueue = "agent" -}} +{{- end -}} {{/* yaml starts here */}} apiVersion: apps/v1 @@ -100,7 +113,7 @@ spec: {{- end }} {{- end }} containers: - - name: {{ if eq $workerType "agentEval" }}agent-eval-worker{{ else }}{{ $workerType }}-worker{{ end }} + - name: {{ if eq $workerType "agentEval" }}agent-eval-worker{{ else if eq $workerType "r2Agent" }}r2-agent-worker{{ else }}{{ $workerType }}-worker{{ end }} image: "{{ $.Values.image.repository }}:{{ required "Please set a value for .Values.image.tag" $.Values.image.tag }}" imagePullPolicy: {{ $.Values.image.pullPolicy }} args: @@ -200,6 +213,7 @@ spec: value: {{ template "retool.postgresql.ssl_enabled" $ }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" $ }} + {{- include "retool.agentSandbox.backendEnvVars" $ | nindent 10 }} {{- include "retool.telemetry.includeEnvVars" $ | nindent 10 }} diff --git a/charts/retool/templates/agent_sandbox_device_plugin.yaml b/charts/retool/templates/agent_sandbox_device_plugin.yaml new file mode 100644 index 0000000..c936ae2 --- /dev/null +++ b/charts/retool/templates/agent_sandbox_device_plugin.yaml @@ -0,0 +1,91 @@ +{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.sandboxNetwork.devicePlugin }} +{{- $as := .Values.agentSandbox -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "retool.agentSandbox.name" . }}-device-plugin + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: device-plugin +data: + conf.yaml: | + - devicematch: ^net/tun$ + nummaxdevices: {{ $as.devicePlugin.maxDevices | default 130 }} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "retool.agentSandbox.name" . }}-device-plugin + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: device-plugin +spec: + selector: + matchLabels: + retoolService: {{ include "retool.agentSandbox.name" . }}-device-plugin + template: + metadata: + labels: + retoolService: {{ include "retool.agentSandbox.name" . }}-device-plugin + app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }}-device-plugin + app.kubernetes.io/instance: {{ .Release.Name }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false + priorityClassName: system-node-critical +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} + containers: + - name: smarter-device-manager + image: "{{ $as.devicePlugin.image.repository }}:{{ $as.devicePlugin.image.tag }}" + imagePullPolicy: IfNotPresent + terminationMessagePath: /tmp/termination-log + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: dev + mountPath: /dev + readOnly: true + - name: sys + mountPath: /sys + readOnly: true + - name: config + mountPath: /root/config + resources: + requests: + cpu: 10m + memory: 16Mi + limits: + cpu: 100m + memory: 32Mi + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: dev + hostPath: + path: /dev + - name: sys + hostPath: + path: /sys + - name: config + configMap: + name: {{ include "retool.agentSandbox.name" . }}-device-plugin +{{- end }} diff --git a/charts/retool/templates/agent_sandbox_networkpolicy.yaml b/charts/retool/templates/agent_sandbox_networkpolicy.yaml new file mode 100644 index 0000000..cc6d07e --- /dev/null +++ b/charts/retool/templates/agent_sandbox_networkpolicy.yaml @@ -0,0 +1,216 @@ +{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.networkPolicy.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- /* +======================================================================= + Sandbox Pod NetworkPolicy — restrict ingress/egress for executor Jobs +======================================================================= +*/}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "retool.agentSandbox.name" . }} + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + - from: + {{- if $as.networkPolicy.ingressFrom }} + {{- toYaml $as.networkPolicy.ingressFrom | nindent 8 }} + {{- else }} + - podSelector: + matchLabels: + {{- include "retool.selectorLabels" . | nindent 14 }} + {{- end }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + - from: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + - from: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + egress: + {{- if $as.networkPolicy.dnsSelector }} + - to: + - namespaceSelector: + {{- toYaml $as.networkPolicy.dnsSelector.namespaceSelector | nindent 12 }} + podSelector: + {{- toYaml $as.networkPolicy.dnsSelector.podSelector | nindent 12 }} + ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + {{- end }} + - to: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + {{- with $as.networkPolicy.extraEgress }} + {{- toYaml . | nindent 4 }} + {{- end }} +--- +{{- /* +======================================================================= + Controller NetworkPolicy +======================================================================= +*/}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: + matchLabels: + {{- include "retool.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.controller.port }} + protocol: TCP + egress: + {{- if $as.networkPolicy.dnsSelector }} + - to: + - namespaceSelector: + {{- toYaml $as.networkPolicy.dnsSelector.namespaceSelector | nindent 12 }} + podSelector: + {{- toYaml $as.networkPolicy.dnsSelector.podSelector | nindent 12 }} + ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + {{- end }} + - to: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + - to: + - ipBlock: + cidr: 0.0.0.0/0 + ports: + - port: 443 + protocol: TCP + - port: 6443 + protocol: TCP +--- +{{- /* +======================================================================= + Proxy NetworkPolicy +======================================================================= +*/}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + - from: + - podSelector: + matchLabels: + {{- include "retool.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + {{- if ($as.proxy.ingress).enabled }} + {{- if $as.proxy.ingress.networkPolicy }} + - from: + - podSelector: + matchLabels: + {{- toYaml $as.proxy.ingress.networkPolicy.podSelector | nindent 14 }} + ports: + - port: {{ $as.proxy.port }} + protocol: TCP + {{- end }} + {{- end }} + egress: + {{- if $as.networkPolicy.dnsSelector }} + - to: + - namespaceSelector: + {{- toYaml $as.networkPolicy.dnsSelector.namespaceSelector | nindent 12 }} + podSelector: + {{- toYaml $as.networkPolicy.dnsSelector.podSelector | nindent 12 }} + ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + {{- end }} + - to: + - podSelector: + matchLabels: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 14 }} + ports: + - port: {{ $as.sandbox.port }} + protocol: TCP + {{- if $as.networkPolicy.backendAllowlist }} + - to: + {{- range $as.networkPolicy.backendAllowlist }} + - ipBlock: + cidr: {{ . }} + {{- end }} + {{- end }} + - to: + - ipBlock: + cidr: 0.0.0.0/0 + except: + {{- range $as.networkPolicy.blockedRanges }} + - {{ . }} + {{- end }} + {{- if $as.networkPolicy.blockedRanges6 }} + - to: + - ipBlock: + cidr: ::/0 + except: + {{- range $as.networkPolicy.blockedRanges6 }} + - {{ . }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/retool/templates/agent_sandbox_prepuller.yaml b/charts/retool/templates/agent_sandbox_prepuller.yaml new file mode 100644 index 0000000..d47072d --- /dev/null +++ b/charts/retool/templates/agent_sandbox_prepuller.yaml @@ -0,0 +1,84 @@ +{{- if .Values.agentSandbox.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "retool.agentSandbox.name" . }}-image-prepuller + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: image-prepuller +spec: + selector: + matchLabels: + retoolService: {{ include "retool.agentSandbox.name" . }}-image-prepuller + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 100% + template: + metadata: + labels: + retoolService: {{ include "retool.agentSandbox.name" . }}-image-prepuller + app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }}-image-prepuller + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: image-prepuller + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} + terminationGracePeriodSeconds: 5 + initContainers: + - name: pull-image + image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + imagePullPolicy: {{ $as.image.pullPolicy }} + command: ["true"] + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1001 + capabilities: + drop: ["ALL"] + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi + containers: + - name: pause + image: "{{ $as.initImage.repository }}:{{ $as.initImage.tag }}{{- if $as.initImage.digest }}@{{ $as.initImage.digest }}{{- end }}" + command: ["sleep", "infinity"] + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1001 + capabilities: + drop: ["ALL"] + seccompProfile: + type: RuntimeDefault + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi +{{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} +{{- end }} +{{- end }} diff --git a/charts/retool/templates/agent_sandbox_seccomp.yaml b/charts/retool/templates/agent_sandbox_seccomp.yaml new file mode 100644 index 0000000..c6149e6 --- /dev/null +++ b/charts/retool/templates/agent_sandbox_seccomp.yaml @@ -0,0 +1,91 @@ +{{- if .Values.agentSandbox.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "retool.agentSandbox.name" . }}-seccomp + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +data: + gvisor-seccomp.json: | + {{- .Files.Get "files/gvisor-seccomp.json" | nindent 4 }} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "retool.agentSandbox.name" . }}-node-installer + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} + app.kubernetes.io/component: node-installer +spec: + selector: + matchLabels: + retoolService: {{ include "retool.agentSandbox.name" . }}-node-installer + template: + metadata: + labels: + retoolService: {{ include "retool.agentSandbox.name" . }}-node-installer + app.kubernetes.io/name: {{ include "retool.agentSandbox.name" . }}-node-installer + app.kubernetes.io/instance: {{ .Release.Name }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} + initContainers: + - name: install + image: "{{ $as.initImage.repository }}:{{ $as.initImage.tag }}{{- if $as.initImage.digest }}@{{ $as.initImage.digest }}{{- end }}" + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + command: + - /bin/sh + - -c + - | + DEST="/host-seccomp/{{ $as.seccompProfile }}" + mkdir -p "$(dirname "$DEST")" + cp /seccomp-profile/gvisor-seccomp.json "$DEST" + echo "seccomp profile installed at $DEST" + volumeMounts: + - name: seccomp-profile + mountPath: /seccomp-profile + - name: host-seccomp + mountPath: /host-seccomp + containers: + - name: pause + image: "{{ $as.initImage.repository }}:{{ $as.initImage.tag }}{{- if $as.initImage.digest }}@{{ $as.initImage.digest }}{{- end }}" + command: ["sleep", "infinity"] + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi + volumes: + - name: seccomp-profile + configMap: + name: {{ include "retool.agentSandbox.name" . }}-seccomp + - name: host-seccomp + hostPath: + path: /var/lib/kubelet/seccomp + type: DirectoryOrCreate +{{- end }} diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml new file mode 100644 index 0000000..00e06fa --- /dev/null +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -0,0 +1,714 @@ +{{- if .Values.agentSandbox.enabled }} +{{- $as := .Values.agentSandbox -}} +{{- $secretName := include "retool.agentSandbox.secretName" . -}} +{{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} +{{- $tolerations := $as.tolerations | default .Values.tolerations -}} +{{- /* +======================================================================= + Secret (skipped when externalSecret.name is set) +======================================================================= +*/}} +{{- if not $as.externalSecret.name }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "retool.agentSandbox.name" . }} + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +type: Opaque +data: + jwt-public-key: {{ $as.jwtPublicKey | default "" | b64enc | quote }} + jwt-private-key: {{ $as.jwtPrivateKey | default "" | b64enc | quote }} + encryption-key: {{ $as.encryptionKey | default "" | b64enc | quote }} + api-secret: {{ $as.apiSecret | default "" | b64enc | quote }} + postgres-url: {{ $as.postgres.url | default "" | b64enc | quote }} +--- +{{- end }} +{{- /* +======================================================================= + RBAC for the controller (needs to manage Jobs, Pods, ConfigMaps) +======================================================================= +*/}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +rules: + - apiGroups: ["apps"] + resources: ["deployments"] + resourceNames: ["{{ include "retool.agentSandbox.name" . }}"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["apps"] + resources: ["deployments/scale"] + resourceNames: ["{{ include "retool.agentSandbox.name" . }}"] + verbs: ["get", "patch"] + - apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "retool.agentSandbox.controller.name" . }} +subjects: + - kind: ServiceAccount + name: {{ include "retool.agentSandbox.controller.name" . }} + namespace: {{ .Release.Namespace }} +--- +{{- /* +======================================================================= + Job Template ConfigMap — defines the K8s Job spec the controller uses + to create sandbox pods. +======================================================================= +*/}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "retool.agentSandbox.name" . }}-job-template + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +data: + job-template.json: | + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "labels": { + "retoolService": "{{ include "retool.agentSandbox.name" . }}", + "app.kubernetes.io/name": "{{ include "retool.agentSandbox.name" . }}" + } + }, + "spec": { + "backoffLimit": 0, + "ttlSecondsAfterFinished": {{ $as.controller.scaling.jobRetentionSeconds }}, + "template": { + "metadata": { + "annotations": { + "karpenter.sh/do-not-disrupt": "true" + }, + "labels": { + "retoolService": "{{ include "retool.agentSandbox.name" . }}", + "app.kubernetes.io/name": "{{ include "retool.agentSandbox.name" . }}" + } + }, + "spec": { + "restartPolicy": "Never", + "subdomain": "{{ include "retool.agentSandbox.name" . }}-pods", + "automountServiceAccountToken": false, + {{- if $nodeSelector }} + "nodeSelector": {{ toJson $nodeSelector }}, + {{- end }} + {{- if $tolerations }} + "tolerations": {{ toJson $tolerations }}, + {{- end }} + "initContainers": [ + { + "name": "rootfs-etc-copy", + "image": "{{ $as.image.repository }}:__IMAGE_TAG__", + "command": ["/bin/sh", "-c", "cp -r /opt/sandbox-env/rootfs/etc/. /mnt/etc/"], + "securityContext": { + "runAsUser": 0, + "allowPrivilegeEscalation": false, + "readOnlyRootFilesystem": true, + "capabilities": {"drop": ["ALL"], "add": ["DAC_READ_SEARCH"]} + }, + "volumeMounts": [ + {"name": "rootfs-etc", "mountPath": "/mnt/etc"} + ], + "resources": { + "requests": {"cpu": "10m", "memory": "16Mi"}, + "limits": {"cpu": "100m", "memory": "32Mi"} + } + } + ], + "containers": [ + { + "name": "agent-sandbox", + "image": "{{ $as.image.repository }}:__IMAGE_TAG__", + "ports": [{"containerPort": {{ $as.sandbox.port }}, "protocol": "TCP"}], + "securityContext": { + "runAsUser": 1001, + "runAsGroup": 1001, + "allowPrivilegeEscalation": false, + "readOnlyRootFilesystem": true, + "capabilities": {"drop": ["ALL"]}, + "seccompProfile": {"type": "Localhost", "localhostProfile": "{{ $as.seccompProfile }}"} + }, + "env": [ + {"name": "NODE_ENV", "value": "production"}, + {"name": "EXECUTOR_PORT", "value": "{{ $as.sandbox.port }}"}, + {"name": "POD_NAME", "valueFrom": {"fieldRef": {"fieldPath": "metadata.name"}}}, + {"name": "POD_UID", "valueFrom": {"fieldRef": {"fieldPath": "metadata.uid"}}}, + {"name": "POD_IP", "valueFrom": {"fieldRef": {"fieldPath": "status.podIP"}}}, + {"name": "SANDBOX_NETWORK_ENABLED", "value": "{{ $as.sandboxNetwork.enabled }}"}, + {"name": "SANDBOX_IDLE_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxIdleTimeoutMs }}"}, + {"name": "SANDBOX_MEMORY_LIMIT", "value": "{{ $as.sandbox.sandboxMemoryLimit }}"} + {{- if or $as.jwtPublicKey $as.externalSecret.name }} + ,{"name": "AGENT_EXECUTOR_JWT_PUBLIC_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $secretName }}", "key": "jwt-public-key"}}} + {{- end }} + {{- if $as.proxy.backendDomainSuffixes }} + ,{"name": "BACKEND_DOMAIN_SUFFIXES", "value": "{{ $as.proxy.backendDomainSuffixes }}"} + {{- end }} + {{- if $as.sandboxNetwork.enabled }} + ,{"name": "SANDBOX_HTTP_PROXY", "value": "{{ $as.sandboxNetwork.httpProxy | default (printf "http://%s:%s" (include "retool.agentSandbox.proxy.name" .) (toString $as.proxy.port)) }}"} + {{- end }} + {{- if $as.snapshotStorage.s3Bucket }} + ,{"name": "S3_BUCKET", "value": "{{ $as.snapshotStorage.s3Bucket }}"} + ,{"name": "S3_ENDPOINT", "value": "{{ $as.snapshotStorage.s3Endpoint }}"} + ,{"name": "S3_REGION", "value": "{{ $as.snapshotStorage.s3Region }}"} + ,{"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $secretName }}", "key": "awsAccessKeyId"}}} + ,{"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $secretName }}", "key": "awsSecretAccessKey"}}} + {{- end }} + {{- range $as.sandbox.extraEnv }} + ,{{ toJson . }} + {{- end }} + ], + "volumeMounts": [ + {{- if and $as.sandboxNetwork.enabled (not $as.sandboxNetwork.devicePlugin) }} + {"name": "dev-tun", "mountPath": "/dev/net/tun"}, + {{- end }} + {"name": "run", "mountPath": "/run"}, + {"name": "tmp", "mountPath": "/tmp"}, + {"name": "rootfs-appjob", "mountPath": "/opt/sandbox-env/rootfs/app/job"}, + {"name": "rootfs-etc", "mountPath": "/opt/sandbox-env/rootfs/etc"} + ], + {{- $res := deepCopy $as.sandbox.resources }} + {{- if $as.sandboxNetwork.devicePlugin }} + {{- $_ := set $res.limits "smarter-devices/net_tun" 1 }} + {{- end }} + "resources": {{ toJson $res }} + } + ], + "volumes": [ + {{- if and $as.sandboxNetwork.enabled (not $as.sandboxNetwork.devicePlugin) }} + {"name": "dev-tun", "hostPath": {"path": "/dev/net/tun", "type": "CharDevice"}}, + {{- end }} + {"name": "run", "emptyDir": {"medium": "Memory", "sizeLimit": "64Mi"}}, + {"name": "tmp", "emptyDir": {"sizeLimit": "{{ $as.sandbox.tmpDirSizeLimit | default "20Gi" }}"}}, + {"name": "rootfs-appjob", "emptyDir": {"sizeLimit": "{{ $as.sandbox.rootfsSizeLimit | default "2Gi" }}"}}, + {"name": "rootfs-etc", "emptyDir": {"medium": "Memory", "sizeLimit": "4Mi"}} + ] + } + } + } + } +--- +{{- /* +======================================================================= + Controller Deployment +======================================================================= +*/}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 4 }} + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- if .Values.deployment.labels }} +{{ toYaml .Values.deployment.labels | indent 4 }} +{{- end }} +{{- if .Values.deployment.annotations }} + annotations: +{{ toYaml .Values.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ $as.controller.replicaCount }} + selector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 6 }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} +{{- if $as.annotations }} +{{ toYaml $as.annotations | indent 8 }} +{{- end }} + labels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 8 }} + {{- include "retool.agentSandbox.controller.labels" . | nindent 8 }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if $as.labels }} +{{ toYaml $as.labels | indent 8 }} +{{- end }} + spec: + serviceAccountName: {{ include "retool.agentSandbox.controller.name" . }} + automountServiceAccountToken: true + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} + containers: + - name: controller + image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + imagePullPolicy: {{ $as.image.pullPolicy }} + ports: + - name: http + containerPort: {{ $as.controller.port }} + protocol: TCP + securityContext: + runAsUser: 1001 + runAsGroup: 1001 + runAsNonRoot: true + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + env: + - name: NODE_ENV + value: "production" + - name: AGENT_EXECUTOR_ROLE + value: "controller" + - name: CONTROLLER_PORT + value: {{ $as.controller.port | quote }} + - name: STATE_BACKEND + value: "postgres" + - name: AGENT_EXECUTOR_POSTGRES_URL + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: postgres-url + - name: AGENT_EXECUTOR_POSTGRES_SCHEMA + value: {{ $as.postgres.schema | quote }} + - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX + value: {{ $as.postgres.poolMax | quote }} + - name: STATE_SWEEPER_INTERVAL_MS + value: {{ $as.postgres.sweeperIntervalMs | quote }} + - name: K8S_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: JOB_NAME_PREFIX + value: {{ include "retool.agentSandbox.name" . }}-job + - name: JOB_APP_LABEL + value: {{ include "retool.agentSandbox.name" . }} + - name: EXECUTOR_DEPLOYMENT_NAME + value: {{ include "retool.agentSandbox.name" . }} + - name: EXECUTOR_SERVICE_NAME + value: {{ include "retool.agentSandbox.name" . }}-pods + - name: SLOTS_PER_POD + value: {{ $as.controller.scaling.slotsPerPod | quote }} + - name: EXECUTOR_MIN_REPLICAS + value: {{ $as.controller.scaling.minReplicas | quote }} + - name: EXECUTOR_MAX_REPLICAS + value: {{ $as.controller.scaling.maxReplicas | quote }} + - name: SCALE_UP_THRESHOLD + value: {{ $as.controller.scaling.scaleUpThreshold | quote }} + - name: SCALE_DOWN_THRESHOLD + value: {{ $as.controller.scaling.scaleDownThreshold | quote }} + - name: SCALE_DOWN_GRACE_PERIOD_MS + value: {{ $as.controller.scaling.scaleDownGracePeriodMs | quote }} + - name: PREWARM_POOL_SIZE + value: {{ $as.controller.scaling.prewarmPoolSize | quote }} + - name: MAX_TOTAL_JOBS + value: {{ $as.controller.scaling.maxTotalJobs | quote }} + - name: MAX_CONCURRENT_CREATES + value: {{ $as.controller.scaling.maxConcurrentCreates | quote }} + - name: JOB_RETENTION_SECONDS + value: {{ $as.controller.scaling.jobRetentionSeconds | quote }} + - name: ASSIGNED_SANDBOX_TTL_SECONDS + value: {{ $as.controller.scaling.assignedSandboxTtlSeconds | quote }} + - name: RECONCILE_INTERVAL_MS + value: {{ $as.controller.scaling.reconcileIntervalMs | quote }} + - name: LEADER_TTL_MS + value: {{ $as.controller.scaling.leaderTtlMs | quote }} + - name: LEADER_RENEW_MS + value: {{ $as.controller.scaling.leaderRenewMs | quote }} + - name: DEPLOYED_IMAGE_TAG + value: {{ $as.image.tag | quote }} + - name: JOB_TEMPLATE_CONFIGMAP + value: {{ include "retool.agentSandbox.name" . }}-job-template + - name: DAEMONSET_NAME + value: {{ include "retool.agentSandbox.name" . }}-image-prepuller + {{- if or $as.jwtPublicKey $as.externalSecret.name }} + - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: jwt-public-key + {{- end }} + livenessProbe: + httpGet: + path: /livez + port: http + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 1 + periodSeconds: 2 + timeoutSeconds: 3 + resources: + {{- toYaml $as.controller.resources | nindent 12 }} +{{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} +{{- end }} +{{- if $as.affinity }} + affinity: +{{ toYaml $as.affinity | indent 8 }} +{{- end }} +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} +--- +{{- /* +======================================================================= + Controller Service (ClusterIP) +======================================================================= +*/}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: {{ $as.controller.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 4 }} +--- +{{- /* +======================================================================= + Proxy Deployment +======================================================================= +*/}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 4 }} + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- if .Values.deployment.labels }} +{{ toYaml .Values.deployment.labels | indent 4 }} +{{- end }} +{{- if .Values.deployment.annotations }} + annotations: +{{ toYaml .Values.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ $as.proxy.replicaCount }} + selector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 6 }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} +{{- if $as.annotations }} +{{ toYaml $as.annotations | indent 8 }} +{{- end }} + labels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 8 }} + {{- include "retool.agentSandbox.proxy.labels" . | nindent 8 }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if $as.labels }} +{{ toYaml $as.labels | indent 8 }} +{{- end }} + spec: + automountServiceAccountToken: false + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} + containers: + - name: proxy + image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + imagePullPolicy: {{ $as.image.pullPolicy }} + ports: + - name: http + containerPort: {{ $as.proxy.port }} + protocol: TCP + securityContext: + runAsUser: 1001 + runAsGroup: 1001 + runAsNonRoot: true + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + env: + - name: NODE_ENV + value: "production" + - name: AGENT_EXECUTOR_ROLE + value: "proxy" + - name: PROXY_PORT + value: {{ $as.proxy.port | quote }} + - name: STATE_BACKEND + value: "postgres" + - name: AGENT_EXECUTOR_POSTGRES_URL + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: postgres-url + - name: AGENT_EXECUTOR_POSTGRES_SCHEMA + value: {{ $as.postgres.schema | quote }} + - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX + value: {{ $as.postgres.poolMax | quote }} + - name: STATE_SWEEPER_INTERVAL_MS + value: {{ $as.postgres.sweeperIntervalMs | quote }} + {{- if $as.proxy.allowedDomains }} + - name: ALLOWED_DOMAINS + value: {{ $as.proxy.allowedDomains | quote }} + {{- end }} + - name: BACKEND_URL + value: {{ $as.proxy.backendUrl | default (printf "http://%s:%s" (include "retool.fullname" .) (toString .Values.service.internalPort)) | quote }} + {{- if $as.proxy.backendDomainSuffixes }} + - name: BACKEND_DOMAIN_SUFFIXES + value: {{ $as.proxy.backendDomainSuffixes | quote }} + {{- end }} + {{- if or $as.encryptionKey $as.externalSecret.name }} + - name: AGENT_EXECUTOR_ENCRYPTION_KEY + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: encryption-key + {{- end }} + {{- if or $as.jwtPublicKey $as.externalSecret.name }} + - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: {{ $secretName }} + key: jwt-public-key + {{- end }} + - name: EXECUTOR_PORT + value: {{ $as.sandbox.port | quote }} + - name: EXECUTOR_SERVICE_NAME + value: {{ include "retool.agentSandbox.name" . }}-pods + - name: K8S_NAMESPACE + value: {{ .Release.Namespace | quote }} + {{- if $as.proxy.sandboxProxyTimeoutMs }} + - name: SANDBOX_PROXY_TIMEOUT_MS + value: {{ $as.proxy.sandboxProxyTimeoutMs | quote }} + {{- end }} + livenessProbe: + httpGet: + path: /livez + port: http + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 1 + periodSeconds: 2 + timeoutSeconds: 3 + resources: + {{- toYaml $as.proxy.resources | nindent 12 }} +{{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} +{{- end }} +{{- if $as.affinity }} + affinity: +{{ toYaml $as.affinity | indent 8 }} +{{- end }} +{{- if $nodeSelector }} + nodeSelector: +{{ toYaml $nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml $tolerations | indent 8 }} +--- +{{- /* +======================================================================= + Proxy Service +======================================================================= +*/}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- with ($as.proxy.service).annotations }} + annotations: + {{- toYaml . | nindent 4 }} +{{- end }} +spec: + type: {{ ($as.proxy.service).type | default "ClusterIP" }} + ports: + - port: {{ $as.proxy.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 4 }} +--- +{{- /* +======================================================================= + Proxy Ingress (optional — exposes proxy to frontend for WebSocket) +======================================================================= +*/}} +{{- if ($as.proxy.ingress).enabled }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.Version }} +apiVersion: networking.k8s.io/v1 +{{- else }} +apiVersion: networking.k8s.io/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- with $as.proxy.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- if and $as.proxy.ingress.ingressClassName (semverCompare ">=1.18-0" .Capabilities.KubeVersion.Version) }} + ingressClassName: {{ $as.proxy.ingress.ingressClassName }} + {{- end }} + rules: + - host: {{ $as.proxy.ingress.host | quote }} + http: + paths: + - path: / + {{- if semverCompare ">=1.18-0" .Capabilities.KubeVersion.Version }} + pathType: Prefix + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.Version }} + service: + name: {{ include "retool.agentSandbox.proxy.name" . }} + port: + number: {{ $as.proxy.port }} + {{- else }} + serviceName: {{ include "retool.agentSandbox.proxy.name" . }} + servicePort: {{ $as.proxy.port }} + {{- end }} +{{- with $as.proxy.ingress.tls }} + tls: + {{- toYaml . | nindent 4 }} +{{- end }} +--- +{{- end }} +{{- /* +======================================================================= + Headless Service for direct pod addressing (sandbox routing). + Executor Job pods use subdomain to register DNS: + ...svc.cluster.local: +======================================================================= +*/}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "retool.agentSandbox.name" . }}-pods + labels: + {{- include "retool.agentSandbox.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + clusterIP: None + ports: + - port: {{ $as.sandbox.port }} + targetPort: {{ $as.sandbox.port }} + protocol: TCP + name: http + selector: + {{- include "retool.agentSandbox.selectorLabels" . | nindent 4 }} +--- +{{- /* +======================================================================= + PodDisruptionBudget for controller (when replicas > 1) +======================================================================= +*/}} +{{- if gt (int $as.controller.replicaCount) 1 }} +{{- if semverCompare ">=1.21-0" .Capabilities.KubeVersion.Version -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ include "retool.agentSandbox.controller.name" . }} + labels: + {{- include "retool.agentSandbox.controller.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + maxUnavailable: 1 + selector: + matchLabels: + {{- include "retool.agentSandbox.controller.selectorLabels" . | nindent 6 }} +--- +{{- end }} +{{- if .Values.podDisruptionBudget }} +{{- if semverCompare ">=1.21-0" .Capabilities.KubeVersion.Version -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.agentSandbox.proxy.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +spec: + {{- toYaml .Values.podDisruptionBudget | nindent 2 }} + selector: + matchLabels: + {{- include "retool.agentSandbox.proxy.selectorLabels" . | nindent 6 }} +{{- end }} +{{- end }} diff --git a/charts/retool/templates/deployment_backend.yaml b/charts/retool/templates/deployment_backend.yaml index 9a1a6c4..dd987b7 100644 --- a/charts/retool/templates/deployment_backend.yaml +++ b/charts/retool/templates/deployment_backend.yaml @@ -161,6 +161,7 @@ spec: - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" . }} {{- end }} + {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} {{- if ($temporalConfig).sslEnabled }} - name: WORKFLOW_TEMPORAL_TLS_ENABLED value: "true" diff --git a/charts/retool/templates/deployment_workflows.yaml b/charts/retool/templates/deployment_workflows.yaml index 35b7484..0b8be82 100644 --- a/charts/retool/templates/deployment_workflows.yaml +++ b/charts/retool/templates/deployment_workflows.yaml @@ -176,6 +176,7 @@ spec: value: http://{{ include "retool.workflowBackend.name" . }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" . }} + {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} {{- if include "shouldIncludeConfigSecretsEnvVars" . }} - name: LICENSE_KEY valueFrom: diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 8347d2d..5149ff4 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -643,6 +643,255 @@ agents: # Annotations for agent worker pods annotations: {} +# R2 Agent: server-side agent loop worker (independent from agents above). +r2Agent: + enabled: false + + # Labels for R2 agent worker pods + labels: {} + + # R2 agent configuration + config: {} + + # Annotations for R2 agent worker pods + annotations: {} + + # R2 agent worker configuration + worker: + replicaCount: 1 + + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + +# Agent Sandbox Service: sandboxed code execution for AI agents. +# Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), +# and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. +agentSandbox: + enabled: false + + image: + repository: tryretool/agent-sandbox-service + tag: latest + pullPolicy: IfNotPresent + + # Lightweight init image used by the prepuller and seccomp DaemonSets. + # Pinning by digest is recommended for production. + initImage: + repository: busybox + tag: '1.37.0' + # Manifest list digest — set to '' in test environments where images are + # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). + digest: '' + + # Annotations for agent sandbox pods + annotations: {} + + # Labels for agent sandbox pods + labels: {} + + # Pre-existing K8s Secret. When set, the chart skips creating its own Secret + # and references this for keys: jwt-public-key, jwt-private-key, encryption-key, + # api-secret, postgres-url. + externalSecret: + name: '' + + # Secrets (ignored when externalSecret.name is set) + # JWT key pair (ES256) for sandbox token authentication. + jwtPublicKey: '' + jwtPrivateKey: '' + # Hex-encoded 256-bit key for encrypting credentials stored in state backend. + # Must match the backend's AGENT_EXECUTOR_ENCRYPTION_KEY. + encryptionKey: '' + # API secret for admin/test endpoints. + apiSecret: '' + + # Postgres state backend (shared by controller and proxy for state coordination). + # Connection string for the agent sandbox's state database. + postgres: + url: '' + schema: 'agent_executor' + poolMax: 10 + sweeperIntervalMs: 60000 + + # Sandbox network access via pasta userspace networking. + # When enabled, sandboxes get isolated outbound access with L7 filtering. + sandboxNetwork: + enabled: true + # Deploy smarter-device-manager to register /dev/net/tun with the kubelet. + # Required because containerd's default device cgroup blocks /dev/net/tun; + # the device plugin's DeviceSpec path is the only reliable way to grant + # device cgroup access without privileged mode. + devicePlugin: true + # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster + # agent-sandbox-proxy service URL when empty. + httpProxy: '' + + # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox + # pods can request it via resources.limits. + devicePlugin: + image: + repository: ghcr.io/smarter-project/smarter-device-manager + tag: v1.20.12 + # Number of /dev/net/tun device slots to register. + # Set high enough to accommodate maxTotalJobs + prewarm pool. + maxDevices: 130 + + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. + # The seccomp node-installer DaemonSet copies the profile to this path + # on every node automatically. + seccompProfile: retool/gvisor-seccomp.json + + # S3-compatible snapshot storage. + # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. + snapshotStorage: + s3Bucket: '' + s3Endpoint: '' + s3Region: 'us-east-1' + # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. + # If empty, falls back to the main agent sandbox secret. + credentialsSecretName: '' + + # Sandbox (Job) configuration + sandbox: + port: 3017 + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: '2' + memory: 4Gi + # Per-sandbox cgroup memory.max limit in bytes. + sandboxMemoryLimit: '1610612736' # 1.5 GB + # Idle timeout (ms) before an unassigned sandbox self-terminates. + sandboxIdleTimeoutMs: 300000 + tmpDirSizeLimit: 20Gi + # Separate limit for the rootfs-appjob volume — the sandbox root filesystem + # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi + # allocated for /tmp. + rootfsSizeLimit: 2Gi + # Additional environment variables for sandbox containers. + extraEnv: [] + + # Controller: tracks capacity, assigns sandbox pods, manages scaling + controller: + replicaCount: 1 + port: 3018 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + scaling: + slotsPerPod: 4 + minReplicas: 1 + maxReplicas: 10 + scaleUpThreshold: 2 + scaleDownThreshold: 8 + scaleDownGracePeriodMs: 300000 + prewarmPoolSize: 5 + maxTotalJobs: 50 + maxConcurrentCreates: 3 + jobRetentionSeconds: 300 + assignedSandboxTtlSeconds: 3600 + reconcileIntervalMs: 5000 + leaderTtlMs: 10000 + leaderRenewMs: 3000 + + # Proxy: HTTP proxy for sandbox egress with credential injection. + # The proxy must be reachable by frontend browsers for WebSocket connections. + proxy: + replicaCount: 1 + port: 3019 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + allowedDomains: '' + # URL the proxy uses to reach the Retool backend for token exchange. + # Defaults to http://:3000 (same-cluster backend service). + backendUrl: '' + backendDomainSuffixes: '' + sandboxProxyTimeoutMs: '' + service: + # Set to LoadBalancer or NodePort to expose the proxy externally. + type: ClusterIP + annotations: {} + # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. + # This is separate from the main Retool ingress since the proxy typically runs on its own domain. + ingress: + enabled: false + # ingressClassName: + annotations: {} + # kubernetes.io/ingress.class: nginx + # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: '' + # e.g. sandbox.yourdomain.com + tls: [] + # - secretName: sandbox-tls + # hosts: + # - sandbox.yourdomain.com + + # Backend integration: these tell the Retool backend how to reach agent executor. + # controllerUrl and proxyUrl default to internal service URLs when empty. + controllerUrl: '' + proxyUrl: '' + # Required: public URL for frontend browsers to reach the proxy via WebSocket. + # e.g. https://sandbox.yourdomain.com + frontendWsProxyDomain: '' + # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. + proxyDomain: '' + + # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. + # Strongly recommended for production to isolate sandbox egress. + networkPolicy: + enabled: false + # CIDR ranges to block in proxy egress rules. Must stay in sync with + # DEFAULT_BLOCKED_CIDRS in the agent-executor source. + blockedRanges: + - 169.254.0.0/16 # link-local / cloud metadata + - 10.0.0.0/8 # private (RFC 1918) + - 172.16.0.0/12 # private (RFC 1918) + - 192.168.0.0/16 # private (RFC 1918) + - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) + - 127.0.0.0/8 # loopback + - 0.0.0.0/8 # "this network" (RFC 791) + blockedRanges6: + - fc00::/7 # IPv6 unique local addresses + - fe80::/10 # IPv6 link-local + - '::1/128' # IPv6 loopback + # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). + # Set to empty to allow DNS to any destination (not recommended). + dnsSelector: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + extraEgress: [] + # CIDRs allowed in proxy egress even if they fall within blockedRanges + # (e.g. private backend endpoint). + backendAllowlist: [] + # Override sandbox ingress source selector (defaults to retool backend pods). + ingressFrom: [] + + # Node placement overrides (falls back to global nodeSelector/tolerations if empty) + nodeSelector: {} + tolerations: [] + affinity: {} + # SHARED TEMPORAL CONFIGURATION # This configuration is shared between all workers. # In order to use workers, temporal must be configured. diff --git a/values.yaml b/values.yaml index 8347d2d..5149ff4 100644 --- a/values.yaml +++ b/values.yaml @@ -643,6 +643,255 @@ agents: # Annotations for agent worker pods annotations: {} +# R2 Agent: server-side agent loop worker (independent from agents above). +r2Agent: + enabled: false + + # Labels for R2 agent worker pods + labels: {} + + # R2 agent configuration + config: {} + + # Annotations for R2 agent worker pods + annotations: {} + + # R2 agent worker configuration + worker: + replicaCount: 1 + + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + +# Agent Sandbox Service: sandboxed code execution for AI agents. +# Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), +# and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. +agentSandbox: + enabled: false + + image: + repository: tryretool/agent-sandbox-service + tag: latest + pullPolicy: IfNotPresent + + # Lightweight init image used by the prepuller and seccomp DaemonSets. + # Pinning by digest is recommended for production. + initImage: + repository: busybox + tag: '1.37.0' + # Manifest list digest — set to '' in test environments where images are + # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). + digest: '' + + # Annotations for agent sandbox pods + annotations: {} + + # Labels for agent sandbox pods + labels: {} + + # Pre-existing K8s Secret. When set, the chart skips creating its own Secret + # and references this for keys: jwt-public-key, jwt-private-key, encryption-key, + # api-secret, postgres-url. + externalSecret: + name: '' + + # Secrets (ignored when externalSecret.name is set) + # JWT key pair (ES256) for sandbox token authentication. + jwtPublicKey: '' + jwtPrivateKey: '' + # Hex-encoded 256-bit key for encrypting credentials stored in state backend. + # Must match the backend's AGENT_EXECUTOR_ENCRYPTION_KEY. + encryptionKey: '' + # API secret for admin/test endpoints. + apiSecret: '' + + # Postgres state backend (shared by controller and proxy for state coordination). + # Connection string for the agent sandbox's state database. + postgres: + url: '' + schema: 'agent_executor' + poolMax: 10 + sweeperIntervalMs: 60000 + + # Sandbox network access via pasta userspace networking. + # When enabled, sandboxes get isolated outbound access with L7 filtering. + sandboxNetwork: + enabled: true + # Deploy smarter-device-manager to register /dev/net/tun with the kubelet. + # Required because containerd's default device cgroup blocks /dev/net/tun; + # the device plugin's DeviceSpec path is the only reliable way to grant + # device cgroup access without privileged mode. + devicePlugin: true + # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster + # agent-sandbox-proxy service URL when empty. + httpProxy: '' + + # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox + # pods can request it via resources.limits. + devicePlugin: + image: + repository: ghcr.io/smarter-project/smarter-device-manager + tag: v1.20.12 + # Number of /dev/net/tun device slots to register. + # Set high enough to accommodate maxTotalJobs + prewarm pool. + maxDevices: 130 + + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. + # The seccomp node-installer DaemonSet copies the profile to this path + # on every node automatically. + seccompProfile: retool/gvisor-seccomp.json + + # S3-compatible snapshot storage. + # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. + snapshotStorage: + s3Bucket: '' + s3Endpoint: '' + s3Region: 'us-east-1' + # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. + # If empty, falls back to the main agent sandbox secret. + credentialsSecretName: '' + + # Sandbox (Job) configuration + sandbox: + port: 3017 + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: '2' + memory: 4Gi + # Per-sandbox cgroup memory.max limit in bytes. + sandboxMemoryLimit: '1610612736' # 1.5 GB + # Idle timeout (ms) before an unassigned sandbox self-terminates. + sandboxIdleTimeoutMs: 300000 + tmpDirSizeLimit: 20Gi + # Separate limit for the rootfs-appjob volume — the sandbox root filesystem + # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi + # allocated for /tmp. + rootfsSizeLimit: 2Gi + # Additional environment variables for sandbox containers. + extraEnv: [] + + # Controller: tracks capacity, assigns sandbox pods, manages scaling + controller: + replicaCount: 1 + port: 3018 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + scaling: + slotsPerPod: 4 + minReplicas: 1 + maxReplicas: 10 + scaleUpThreshold: 2 + scaleDownThreshold: 8 + scaleDownGracePeriodMs: 300000 + prewarmPoolSize: 5 + maxTotalJobs: 50 + maxConcurrentCreates: 3 + jobRetentionSeconds: 300 + assignedSandboxTtlSeconds: 3600 + reconcileIntervalMs: 5000 + leaderTtlMs: 10000 + leaderRenewMs: 3000 + + # Proxy: HTTP proxy for sandbox egress with credential injection. + # The proxy must be reachable by frontend browsers for WebSocket connections. + proxy: + replicaCount: 1 + port: 3019 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + allowedDomains: '' + # URL the proxy uses to reach the Retool backend for token exchange. + # Defaults to http://:3000 (same-cluster backend service). + backendUrl: '' + backendDomainSuffixes: '' + sandboxProxyTimeoutMs: '' + service: + # Set to LoadBalancer or NodePort to expose the proxy externally. + type: ClusterIP + annotations: {} + # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. + # This is separate from the main Retool ingress since the proxy typically runs on its own domain. + ingress: + enabled: false + # ingressClassName: + annotations: {} + # kubernetes.io/ingress.class: nginx + # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: '' + # e.g. sandbox.yourdomain.com + tls: [] + # - secretName: sandbox-tls + # hosts: + # - sandbox.yourdomain.com + + # Backend integration: these tell the Retool backend how to reach agent executor. + # controllerUrl and proxyUrl default to internal service URLs when empty. + controllerUrl: '' + proxyUrl: '' + # Required: public URL for frontend browsers to reach the proxy via WebSocket. + # e.g. https://sandbox.yourdomain.com + frontendWsProxyDomain: '' + # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. + proxyDomain: '' + + # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. + # Strongly recommended for production to isolate sandbox egress. + networkPolicy: + enabled: false + # CIDR ranges to block in proxy egress rules. Must stay in sync with + # DEFAULT_BLOCKED_CIDRS in the agent-executor source. + blockedRanges: + - 169.254.0.0/16 # link-local / cloud metadata + - 10.0.0.0/8 # private (RFC 1918) + - 172.16.0.0/12 # private (RFC 1918) + - 192.168.0.0/16 # private (RFC 1918) + - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) + - 127.0.0.0/8 # loopback + - 0.0.0.0/8 # "this network" (RFC 791) + blockedRanges6: + - fc00::/7 # IPv6 unique local addresses + - fe80::/10 # IPv6 link-local + - '::1/128' # IPv6 loopback + # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). + # Set to empty to allow DNS to any destination (not recommended). + dnsSelector: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + extraEgress: [] + # CIDRs allowed in proxy egress even if they fall within blockedRanges + # (e.g. private backend endpoint). + backendAllowlist: [] + # Override sandbox ingress source selector (defaults to retool backend pods). + ingressFrom: [] + + # Node placement overrides (falls back to global nodeSelector/tolerations if empty) + nodeSelector: {} + tolerations: [] + affinity: {} + # SHARED TEMPORAL CONFIGURATION # This configuration is shared between all workers. # In order to use workers, temporal must be configured. From 8b9582f994b017fa7b6df64d5c1b03ad33a71319 Mon Sep 17 00:00:00 2001 From: Dan Merino Date: Thu, 30 Apr 2026 17:01:21 -0700 Subject: [PATCH 02/37] [INF-6675] add js executor (#275) --- charts/retool/files/nsjail-seccomp.json | 692 ++++++++++++++++++ charts/retool/templates/_helpers.tpl | 43 ++ charts/retool/templates/_workers.tpl | 2 + .../templates/configmap_js_executor.yaml | 7 + .../retool/templates/deployment_backend.yaml | 2 + .../templates/deployment_js_executor.yaml | 211 ++++++ .../templates/deployment_workflows.yaml | 2 + charts/retool/values.yaml | 31 + values.yaml | 31 + 9 files changed, 1021 insertions(+) create mode 100644 charts/retool/files/nsjail-seccomp.json create mode 100644 charts/retool/templates/configmap_js_executor.yaml create mode 100644 charts/retool/templates/deployment_js_executor.yaml diff --git a/charts/retool/files/nsjail-seccomp.json b/charts/retool/files/nsjail-seccomp.json new file mode 100644 index 0000000..42c9c5c --- /dev/null +++ b/charts/retool/files/nsjail-seccomp.json @@ -0,0 +1,692 @@ +{ + "defaultAction": "SCMP_ACT_ERRNO", + "defaultErrnoRet": 1, + "archMap": [ + { + "architecture": "SCMP_ARCH_X86_64", + "subArchitectures": ["SCMP_ARCH_X86", "SCMP_ARCH_X32"] + }, + { + "architecture": "SCMP_ARCH_AARCH64", + "subArchitectures": ["SCMP_ARCH_ARM"] + }, + { + "architecture": "SCMP_ARCH_MIPS64", + "subArchitectures": ["SCMP_ARCH_MIPS", "SCMP_ARCH_MIPS64N32"] + }, + { + "architecture": "SCMP_ARCH_MIPS64N32", + "subArchitectures": ["SCMP_ARCH_MIPS", "SCMP_ARCH_MIPS64"] + }, + { + "architecture": "SCMP_ARCH_MIPSEL64", + "subArchitectures": ["SCMP_ARCH_MIPSEL", "SCMP_ARCH_MIPSEL64N32"] + }, + { + "architecture": "SCMP_ARCH_MIPSEL64N32", + "subArchitectures": ["SCMP_ARCH_MIPSEL", "SCMP_ARCH_MIPSEL64"] + }, + { + "architecture": "SCMP_ARCH_S390X", + "subArchitectures": ["SCMP_ARCH_S390"] + }, + { + "architecture": "SCMP_ARCH_RISCV64", + "subArchitectures": null + } + ], + "syscalls": [ + { + "names": [ + "accept", + "accept4", + "access", + "adjtimex", + "alarm", + "bind", + "brk", + "cachestat", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "clock_adjtime", + "clock_adjtime64", + "clock_getres", + "clock_getres_time64", + "clock_gettime", + "clock_gettime64", + "clock_nanosleep", + "clock_nanosleep_time64", + "close", + "close_range", + "connect", + "copy_file_range", + "creat", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_pwait2", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "faccessat2", + "fadvise64", + "fadvise64_64", + "fallocate", + "fanotify_mark", + "fchdir", + "fchmod", + "fchmodat", + "fchmodat2", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsetxattr", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futex_requeue", + "futex_time64", + "futex_wait", + "futex_waitv", + "futex_wake", + "futimesat", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "get_robust_list", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "get_thread_area", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "getxattrat", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "ioctl", + "io_destroy", + "io_getevents", + "io_pgetevents", + "io_pgetevents_time64", + "ioprio_get", + "ioprio_set", + "io_setup", + "io_submit", + "ipc", + "kill", + "landlock_add_rule", + "landlock_create_ruleset", + "landlock_restrict_self", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listmount", + "listxattr", + "listxattrat", + "llistxattr", + "_llseek", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "map_shadow_stack", + "membarrier", + "memfd_create", + "memfd_secret", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "mmap", + "mmap2", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedreceive_time64", + "mq_timedsend", + "mq_timedsend_time64", + "mq_unlink", + "mremap", + "mseal", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "name_to_handle_at", + "nanosleep", + "newfstatat", + "_newselect", + "open", + "openat", + "openat2", + "pause", + "pidfd_open", + "pidfd_send_signal", + "pipe", + "pipe2", + "pkey_alloc", + "pkey_free", + "pkey_mprotect", + "poll", + "ppoll", + "ppoll_time64", + "prctl", + "pread64", + "preadv", + "preadv2", + "prlimit64", + "process_mrelease", + "pselect6", + "pselect6_time64", + "pwrite64", + "pwritev", + "pwritev2", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "recv", + "recvfrom", + "recvmmsg", + "recvmmsg_time64", + "recvmsg", + "remap_file_pages", + "removexattr", + "removexattrat", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "riscv_hwprobe", + "rmdir", + "rseq", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_sigtimedwait_time64", + "rt_tgsigqueueinfo", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_rr_get_interval_time64", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "semtimedop_time64", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "set_robust_list", + "setsid", + "setsockopt", + "set_thread_area", + "set_tid_address", + "setuid", + "setuid32", + "setxattr", + "setxattrat", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signalfd", + "signalfd4", + "sigprocmask", + "sigreturn", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "statmount", + "statx", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "syncfs", + "sysinfo", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timer_getoverrun", + "timer_gettime", + "timer_gettime64", + "timer_settime", + "timer_settime64", + "timerfd_create", + "timerfd_gettime", + "timerfd_gettime64", + "timerfd_settime", + "timerfd_settime64", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "uname", + "unlink", + "unlinkat", + "uretprobe", + "utime", + "utimensat", + "utimensat_time64", + "utimes", + "vfork", + "vmsplice", + "wait4", + "waitid", + "waitpid", + "write", + "writev" + ], + "action": "SCMP_ACT_ALLOW" + }, + { + "names": ["process_vm_readv", "process_vm_writev", "ptrace"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "minKernel": "4.8" + } + }, + { + "names": ["socket"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 40, + "op": "SCMP_CMP_NE" + } + ] + }, + { + "names": ["personality"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 0, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["personality"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 8, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["personality"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131072, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["personality"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131080, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["personality"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 4294967295, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["sync_file_range2", "swapcontext"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": ["ppc64le"] + } + }, + { + "names": ["arm_fadvise64_64", "arm_sync_file_range", "sync_file_range2", "breakpoint", "cacheflush", "set_tls"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": ["arm", "arm64"] + } + }, + { + "names": ["arch_prctl"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": ["amd64", "x32"] + } + }, + { + "names": ["modify_ldt"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": ["amd64", "x32", "x86"] + } + }, + { + "names": ["s390_pci_mmio_read", "s390_pci_mmio_write", "s390_runtime_instr"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": ["s390", "s390x"] + } + }, + { + "names": ["riscv_flush_icache"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": ["riscv64"] + } + }, + { + "names": ["open_by_handle_at"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_DAC_READ_SEARCH"] + } + }, + { + "names": ["clone", "clone2", "mount", "pivot_root", "sethostname", "umount2"], + "action": "SCMP_ACT_ALLOW", + "comment": "Retool specific syscalls to enable nsjail sandboxing" + }, + { + "names": [ + "bpf", + "clone", + "clone3", + "fanotify_init", + "fsconfig", + "fsmount", + "fsopen", + "fspick", + "lookup_dcookie", + "lsm_get_self_attr", + "lsm_list_modules", + "lsm_set_self_attr", + "mount", + "mount_setattr", + "move_mount", + "open_tree", + "perf_event_open", + "quotactl", + "quotactl_fd", + "setdomainname", + "sethostname", + "setns", + "syslog", + "umount", + "umount2", + "unshare" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_ADMIN"] + } + }, + { + "names": ["clone"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 2114060288, + "op": "SCMP_CMP_MASKED_EQ" + } + ], + "excludes": { + "caps": ["CAP_SYS_ADMIN"], + "arches": ["s390", "s390x"] + } + }, + { + "names": ["clone"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 1, + "value": 2114060288, + "op": "SCMP_CMP_MASKED_EQ" + } + ], + "comment": "s390 parameter ordering for clone is different", + "includes": { + "arches": ["s390", "s390x"] + }, + "excludes": { + "caps": ["CAP_SYS_ADMIN"] + } + }, + { + "names": ["clone3"], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38, + "excludes": { + "caps": ["CAP_SYS_ADMIN"] + } + }, + { + "names": ["reboot"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_BOOT"] + } + }, + { + "names": ["chroot"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_CHROOT"] + } + }, + { + "names": ["delete_module", "init_module", "finit_module"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_MODULE"] + } + }, + { + "names": ["acct"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_PACCT"] + } + }, + { + "names": ["kcmp", "pidfd_getfd", "process_madvise", "process_vm_readv", "process_vm_writev", "ptrace"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_PTRACE"] + } + }, + { + "names": ["iopl", "ioperm"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_RAWIO"] + } + }, + { + "names": ["settimeofday", "stime", "clock_settime", "clock_settime64"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_TIME"] + } + }, + { + "names": ["vhangup"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_TTY_CONFIG"] + } + }, + { + "names": ["get_mempolicy", "mbind", "set_mempolicy", "set_mempolicy_home_node"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYS_NICE"] + } + }, + { + "names": ["syslog"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_SYSLOG"] + } + }, + { + "names": ["bpf"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_BPF"] + } + }, + { + "names": ["perf_event_open"], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": ["CAP_PERFMON"] + } + } + ] +} diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index fd28f91..f6f77af 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -125,6 +125,23 @@ app.kubernetes.io/instance: {{ .Release.Name }} telemetry.retool.com/service-name: code-executor {{- end }} +{{/* +Selector labels for js executor. Note changes here will require manual +deployment recreation and incur downtime, so should be avoided. +*/}} +{{- define "retool.jsExecutor.selectorLabels" -}} +retoolService: {{ include "retool.jsExecutor.name" . }} +{{- end }} + +{{/* +Extra (non-selector) labels for js executor. +*/}} +{{- define "retool.jsExecutor.labels" -}} +app.kubernetes.io/name: {{ include "retool.jsExecutor.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +telemetry.retool.com/service-name: js-executor +{{- end }} + {{/* Selector labels for agent worker. Note changes here will require manual deployment recreation and incur downtime, so should be avoided. @@ -370,6 +387,13 @@ Set code executor service name {{ template "retool.fullname" . }}-code-executor {{- end -}} +{{/* +Set JS executor service name +*/}} +{{- define "retool.jsExecutor.name" -}} +{{ template "retool.fullname" . }}-js-executor +{{- end -}} + {{/* Set multiplayer service name */}} @@ -564,6 +588,25 @@ Usage: (template "retool.codeExecutor.image.tag" .) {{- end -}} {{- end -}} +{{/* +Set JS executor image tag +Usage: (template "retool.jsExecutor.image.tag" .) +*/}} +{{- define "retool.jsExecutor.image.tag" -}} +{{- if .Values.image.tag -}} + {{- $valid_retool_version_regexp := "([0-9]+\\.[0-9]+(\\.[0-9]+)?(-[a-zA-Z0-9]+)?)" }} + {{- $semver_version_regexp := "[0-9]+\\.[0-9]+(\\.[0-9]+)?" }} + {{- $retool_version_with_ce := ( and ( regexMatch $valid_retool_version_regexp $.Values.image.tag ) ( semverCompare ">= 3.20.15-0" ( regexFind $semver_version_regexp $.Values.image.tag ) ) ) }} + {{- if $retool_version_with_ce -}} + {{- .Values.image.tag -}} + {{- else -}} + {{- "1.1.0" -}} + {{- end -}} +{{- else -}} + {{- fail "Please set a value for .Values.image.tag" }} +{{- end -}} +{{- end -}} + {{- define "retool_version_with_java_dbconnector_opt_out" -}} {{- $output := "" -}} {{- $valid_retool_version_regexp := "([0-9]+\\.[0-9]+(\\.[0-9]+)?(-[a-zA-Z0-9]+)?)" }} diff --git a/charts/retool/templates/_workers.tpl b/charts/retool/templates/_workers.tpl index 79c214d..7dca091 100644 --- a/charts/retool/templates/_workers.tpl +++ b/charts/retool/templates/_workers.tpl @@ -213,6 +213,8 @@ spec: value: {{ template "retool.postgresql.ssl_enabled" $ }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" $ }} + - name: JS_EXECUTOR_INGRESS_DOMAIN + value: http://{{ template "retool.jsExecutor.name" $ }} {{- include "retool.agentSandbox.backendEnvVars" $ | nindent 10 }} {{- include "retool.telemetry.includeEnvVars" $ | nindent 10 }} diff --git a/charts/retool/templates/configmap_js_executor.yaml b/charts/retool/templates/configmap_js_executor.yaml new file mode 100644 index 0000000..60c45c8 --- /dev/null +++ b/charts/retool/templates/configmap_js_executor.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: js-executor-seccomp +data: + nsjail-seccomp.json: | + {{- .Files.Get "files/nsjail-seccomp.json" | nindent 4 }} diff --git a/charts/retool/templates/deployment_backend.yaml b/charts/retool/templates/deployment_backend.yaml index dd987b7..171e1cf 100644 --- a/charts/retool/templates/deployment_backend.yaml +++ b/charts/retool/templates/deployment_backend.yaml @@ -180,6 +180,8 @@ spec: {{- end }} {{- end }} {{- end }} + - name: JS_EXECUTOR_INGRESS_DOMAIN + value: http://{{ template "retool.jsExecutor.name" . }} {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml new file mode 100644 index 0000000..b432b0f --- /dev/null +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -0,0 +1,211 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "retool.jsExecutor.name" . }} + labels: + {{- include "retool.jsExecutor.selectorLabels" . | nindent 4 }} + {{- include "retool.jsExecutor.labels" . | nindent 4 }} + {{- include "retool.labels" . | nindent 4 }} +{{- if .Values.deployment.labels }} +{{ toYaml .Values.deployment.labels | indent 4 }} +{{- end }} +{{- if .Values.deployment.annotations }} + annotations: +{{ toYaml .Values.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ .Values.jsExecutor.replicaCount }} + selector: + matchLabels: + {{- include "retool.jsExecutor.selectorLabels" . | nindent 6 }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: + checksum/seccomp: {{ .Files.Get "files/nsjail-seccomp.json" | sha256sum }} +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} +{{- if .Values.jsExecutor.annotations }} +{{ toYaml .Values.jsExecutor.annotations | indent 8 }} +{{- end }} + labels: + {{- include "retool.jsExecutor.selectorLabels" . | nindent 8 }} + {{- include "retool.jsExecutor.labels" . | nindent 8 }} + {{- include "retool.labels" . | nindent 8 }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if .Values.jsExecutor.labels }} +{{ toYaml .Values.jsExecutor.labels | indent 8 }} +{{- end }} + spec: + serviceAccountName: {{ template "retool.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} + initContainers: + - name: install-seccomp + image: busybox:1.37.0@sha256:b3255e7dfbcd10cb367af0d409747d511aeb66dfac98cf30e97e87e4207dd76f + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi + command: + - /bin/sh + - -c + - | + DEST="/host-seccomp/{{ .Values.jsExecutor.seccompLocalhostProfile }}" + mkdir -p "$(dirname "$DEST")" + cp /seccomp-profile/nsjail-seccomp.json "$DEST" + echo "seccomp profile installed at $DEST" + volumeMounts: + - name: seccomp-profile + mountPath: /seccomp-profile + - name: host-seccomp + mountPath: /host-seccomp +{{- if .Values.initContainers }} +{{- range $key, $value := .Values.initContainers }} + - name: "{{ $key }}" +{{ toYaml $value | indent 10 }} +{{- end }} +{{- end }} + containers: + - name: js-executor + image: "{{ .Values.jsExecutor.image.repository }}:{{ include "retool.jsExecutor.image.tag" . }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + capabilities: + add: ["NET_ADMIN"] + seccompProfile: + type: Localhost + localhostProfile: {{ .Values.jsExecutor.seccompLocalhostProfile }} + env: + - name: DEPLOYMENT_TEMPLATE_TYPE + value: {{ template "retool.deploymentTemplateType" . }} + - name: DEPLOYMENT_TEMPLATE_VERSION + value: {{ template "retool.deploymentTemplateVersion" . }} + - name: NODE_ENV + value: production + {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} + {{- range $key, $value := .Values.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- range .Values.environmentSecrets }} + - name: {{ .name }} + valueFrom: + secretKeyRef: + name: {{ .secretKeyRef.name }} + key: {{ .secretKeyRef.key }} + {{- end }} + {{- with .Values.environmentVariables }} +{{ toYaml . | indent 10 }} + {{- end }} + ports: + - containerPort: 3000 + name: http-server + protocol: TCP + livenessProbe: + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + httpGet: + path: /api/readiness + port: 3000 + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + resources: +{{ toYaml .Values.jsExecutor.resources | indent 10 }} + volumeMounts: +{{- if .Values.jsExecutor.volumeMounts }} +{{ toYaml .Values.jsExecutor.volumeMounts | indent 10 }} +{{- end }} +{{- if .Values.extraVolumeMounts }} +{{ toYaml .Values.extraVolumeMounts | indent 10 }} +{{- end }} +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} +{{- end }} +{{- with .Values.extraContainers }} +{{ tpl . $ | indent 6 }} +{{- end }} + volumes: + - name: seccomp-profile + configMap: + name: "js-executor-seccomp" + - name: host-seccomp + hostPath: + path: /var/lib/kubelet/seccomp + type: DirectoryOrCreate +{{- if .Values.jsExecutor.volumes }} +{{ toYaml .Values.jsExecutor.volumes | indent 8 }} +{{- end }} +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + configMap: + name: {{ .configMap }} +{{- end }} +{{- if .Values.extraVolumes }} +{{ toYaml .Values.extraVolumes | indent 8 }} +{{- end }} +{{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} +{{- end }} +{{- if .Values.jsExecutor.affinity }} + affinity: +{{ toYaml .Values.jsExecutor.affinity | indent 8 }} +{{- end }} +{{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} +{{- end }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "retool.jsExecutor.name" . }} +spec: + selector: + {{- include "retool.jsExecutor.selectorLabels" . | nindent 4 }} + ports: + - protocol: TCP + port: 80 + targetPort: 3000 + name: http-server +--- +{{- if .Values.podDisruptionBudget }} +{{- if semverCompare ">=1.21-0" .Capabilities.KubeVersion.Version -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ template "retool.jsExecutor.name" . }} +spec: + {{- toYaml .Values.podDisruptionBudget | nindent 2 }} + selector: + matchLabels: + {{- include "retool.jsExecutor.selectorLabels" . | nindent 6 }} +{{- end }} +--- diff --git a/charts/retool/templates/deployment_workflows.yaml b/charts/retool/templates/deployment_workflows.yaml index 0b8be82..bb2bae7 100644 --- a/charts/retool/templates/deployment_workflows.yaml +++ b/charts/retool/templates/deployment_workflows.yaml @@ -153,6 +153,8 @@ spec: {{- end }} {{- end }} {{- end }} + - name: JS_EXECUTOR_INGRESS_DOMAIN + value: http://{{ template "retool.jsExecutor.name" . }} {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 5149ff4..a4104c7 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -606,6 +606,37 @@ codeExecutor: securityContext: privileged: true +# JS Executor +jsExecutor: + image: + repository: tryretool/js-executor-service + pullPolicy: IfNotPresent + + replicaCount: 1 + + seccompLocalhostProfile: profiles/nsjail-seccomp.json + + # Annotations for JS executor pods + annotations: {} + + # Labels for JS executor pods + labels: {} + + volumes: {} + volumeMounts: {} + + # Config affinity and anti-affinity rules for the JS executor pods + affinity: {} + + # Resources for the JS executor + resources: + limits: + cpu: 2000m + memory: 2048Mi + requests: + cpu: 1000m + memory: 1024Mi + agents: # Enable AI Agents enabled: false diff --git a/values.yaml b/values.yaml index 5149ff4..a4104c7 100644 --- a/values.yaml +++ b/values.yaml @@ -606,6 +606,37 @@ codeExecutor: securityContext: privileged: true +# JS Executor +jsExecutor: + image: + repository: tryretool/js-executor-service + pullPolicy: IfNotPresent + + replicaCount: 1 + + seccompLocalhostProfile: profiles/nsjail-seccomp.json + + # Annotations for JS executor pods + annotations: {} + + # Labels for JS executor pods + labels: {} + + volumes: {} + volumeMounts: {} + + # Config affinity and anti-affinity rules for the JS executor pods + affinity: {} + + # Resources for the JS executor + resources: + limits: + cpu: 2000m + memory: 2048Mi + requests: + cpu: 1000m + memory: 1024Mi + agents: # Enable AI Agents enabled: false From 7bb327ea1cbc4d019448dd0e84318e81b6d44cad Mon Sep 17 00:00:00 2001 From: lukefoster11 Date: Mon, 4 May 2026 18:12:47 -0700 Subject: [PATCH 03/37] fix jsExecutor image lookup --- charts/retool/templates/_helpers.tpl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index f6f77af..fb85f2e 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -593,7 +593,9 @@ Set JS executor image tag Usage: (template "retool.jsExecutor.image.tag" .) */}} {{- define "retool.jsExecutor.image.tag" -}} -{{- if .Values.image.tag -}} +{{- if .Values.jsExecutor.image.tag -}} + {{- .Values.jsExecutor.image.tag -}} +{{- else if .Values.image.tag -}} {{- $valid_retool_version_regexp := "([0-9]+\\.[0-9]+(\\.[0-9]+)?(-[a-zA-Z0-9]+)?)" }} {{- $semver_version_regexp := "[0-9]+\\.[0-9]+(\\.[0-9]+)?" }} {{- $retool_version_with_ce := ( and ( regexMatch $valid_retool_version_regexp $.Values.image.tag ) ( semverCompare ">= 3.20.15-0" ( regexFind $semver_version_regexp $.Values.image.tag ) ) ) }} @@ -603,7 +605,7 @@ Usage: (template "retool.jsExecutor.image.tag" .) {{- "1.1.0" -}} {{- end -}} {{- else -}} - {{- fail "Please set a value for .Values.image.tag" }} + {{- fail "Please set a value for .Values.image.tag or .Values.jsExecutor.image.tag" }} {{- end -}} {{- end -}} From 625173d7207f432c94572b3ed9a121f3636b101b Mon Sep 17 00:00:00 2001 From: lukefoster11 Date: Wed, 6 May 2026 13:07:48 -0700 Subject: [PATCH 04/37] fix gvisor seccomp errno ret --- charts/retool/files/gvisor-seccomp.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/retool/files/gvisor-seccomp.json b/charts/retool/files/gvisor-seccomp.json index 9b2a1de..120950f 100644 --- a/charts/retool/files/gvisor-seccomp.json +++ b/charts/retool/files/gvisor-seccomp.json @@ -1,7 +1,7 @@ { "comment": "Docker default seccomp profile extended with syscalls required by gVisor runsc (systrap platform, rootless mode). Use with: docker run --security-opt seccomp=gvisor-seccomp.json", "defaultAction": "SCMP_ACT_ERRNO", - "defaultErrnoRet": 1, + "defaultErrnoRet": 38, "archMap": [ { "architecture": "SCMP_ARCH_X86_64", From 414e1fe052ffd216becb84381f7feb8011a07127 Mon Sep 17 00:00:00 2001 From: arnold-retool Date: Thu, 14 May 2026 14:26:40 -0700 Subject: [PATCH 05/37] Add MCP server support to Retool Helm chart (#285) Adds optional MCP server support to the Retool Helm chart, disabled by default. Main changes: - Adds a new mcp values block in charts/retool/values.yaml and root values.yaml. - Adds a standalone MCP Service, Deployment, and optional PodDisruptionBudget. - Runs MCP using the backend image with SERVICE_TYPE=MCP_SERVER. - Supports MCP configuration for replicas, resources, env vars, toolsets, transport/session limits, service ports, affinity, node selectors, and tolerations. - Routes /mcp and /.well-known/oauth-protected-resource to the MCP service through both Ingress and HTTPRoute. - Adds MCP helper labels/naming in _helpers.tpl. - Adds CI render coverage via test-mcp-enabled-option.yaml. Validation performed: - Helm template render with MCP disabled - Helm template render with MCP enabled - Helm lint with MCP enabled - kubeconform validation during earlier verification --- charts/retool/Chart.yaml | 2 +- charts/retool/ci/test-mcp-enabled-option.yaml | 18 ++ charts/retool/templates/_helpers.tpl | 136 ++++++++-- charts/retool/templates/deployment_mcp.yaml | 239 ++++++++++++++++++ charts/retool/templates/httproute.yaml | 10 + charts/retool/templates/ingress.yaml | 17 ++ charts/retool/templates/service.yaml | 21 +- charts/retool/values.yaml | 142 +++++++++++ values.yaml | 142 +++++++++++ 9 files changed, 706 insertions(+), 21 deletions(-) create mode 100644 charts/retool/ci/test-mcp-enabled-option.yaml create mode 100644 charts/retool/templates/deployment_mcp.yaml diff --git a/charts/retool/Chart.yaml b/charts/retool/Chart.yaml index dbedb80..0ebe5b1 100644 --- a/charts/retool/Chart.yaml +++ b/charts/retool/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: retool description: A Helm chart for Kubernetes type: application -version: 6.11.0 +version: 6.11.1 maintainers: - name: Retool Engineering email: engineering+helm@retool.com diff --git a/charts/retool/ci/test-mcp-enabled-option.yaml b/charts/retool/ci/test-mcp-enabled-option.yaml new file mode 100644 index 0000000..5c51b40 --- /dev/null +++ b/charts/retool/ci/test-mcp-enabled-option.yaml @@ -0,0 +1,18 @@ +mcp: + enabled: true + replicaCount: 2 + config: + oauthIntrospectionAuthToken: test-oauth-introspection-token + enabledToolsets: + - apps + - resources + maxTransportSessions: 50 + sessionIdleTimeoutMs: 600000 + environmentVariables: + - name: MCP_TEST_OPTION + value: "true" + service: + internalPort: + +httpRoute: + enabled: true diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index fb85f2e..fac318c 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -23,6 +23,97 @@ If release name contains chart name it will be used as a full name. {{- end }} {{- end }} +{{/* +Whether MCP routing needs the main Retool Service to expose the backend API +listener in addition to the primary frontend-facing port. +*/}} +{{- define "retool.mcp.needsBackendApi" -}} +{{- $mcp := .Values.mcp | default dict -}} +{{- $mcpIngress := $mcp.ingress | default dict -}} +{{- $mcpHttpRoute := $mcp.httpRoute | default dict -}} +{{- $needsBackendApi := false -}} +{{- if and .Values.ingress.enabled $mcp.enabled $mcpIngress.enabled -}} +{{- range ($mcpIngress.paths | default list) -}} +{{- if eq (.target | default "mcp") "backendApi" -}} +{{- $needsBackendApi = true -}} +{{- end -}} +{{- end -}} +{{- end -}} +{{- if and .Values.httpRoute.enabled $mcp.enabled $mcpHttpRoute.enabled -}} +{{- range ($mcpHttpRoute.rules | default list) -}} +{{- if eq (.target | default "mcp") "backendApi" -}} +{{- $needsBackendApi = true -}} +{{- end -}} +{{- end -}} +{{- end -}} +{{- if $needsBackendApi -}}true{{- else -}}false{{- end -}} +{{- end }} + +{{/* +Render an MCP-related Ingress path. By default paths route to the MCP service; +target: backendApi routes to the main backend API listener instead. +*/}} +{{- define "retool.ingress.mcpPath" -}} +{{- $root := .root -}} +{{- $path := .path -}} +{{- $target := .target | default ($path.target | default "mcp") -}} +{{- if not (or (eq $target "mcp") (eq $target "backendApi")) -}} +{{- fail (printf "Invalid mcp.ingress.paths target %q for path %q. Valid targets are \"mcp\" and \"backendApi\"." $target $path.path) -}} +{{- end -}} +{{- $mcpService := (($root.Values.mcp).service) | default dict -}} +{{- $serviceName := include "retool.mcp.name" $root -}} +{{- $servicePort := $path.port | default ($mcpService.externalPort | default 4010) -}} +{{- $pathType := $path.pathType | default "ImplementationSpecific" -}} +{{- if eq $target "backendApi" -}} +{{- $serviceName = include "retool.fullname" $root -}} +{{- $servicePort = $path.port | default (.backendApiPort | default 3001) -}} +{{- $pathType = $path.pathType | default "Exact" -}} +{{- end -}} +- path: {{ $path.path }} + {{- if (semverCompare ">=1.18-0" $root.Capabilities.KubeVersion.Version) }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $root.Capabilities.KubeVersion.Version }} + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end }} +{{- end }} + +{{/* +Render an MCP-related HTTPRoute rule. By default rules route to the MCP service; +target: backendApi routes to the main backend API listener instead. +*/}} +{{- define "retool.httpRoute.mcpRule" -}} +{{- $root := .root -}} +{{- $rule := .rule -}} +{{- $target := .target | default ($rule.target | default "mcp") -}} +{{- if not (or (eq $target "mcp") (eq $target "backendApi")) -}} +{{- fail (printf "Invalid mcp.httpRoute.rules target %q for path %q. Valid targets are \"mcp\" and \"backendApi\"." $target $rule.path) -}} +{{- end -}} +{{- $mcpService := (($root.Values.mcp).service) | default dict -}} +{{- $serviceName := include "retool.mcp.name" $root -}} +{{- $servicePort := $rule.port | default ($mcpService.externalPort | default 4010) -}} +{{- $pathType := $rule.pathType | default "PathPrefix" -}} +{{- if eq $target "backendApi" -}} +{{- $serviceName = include "retool.fullname" $root -}} +{{- $servicePort = $rule.port | default (.backendApiPort | default 3001) -}} +{{- $pathType = $rule.pathType | default "Exact" -}} +{{- end -}} +- matches: + - path: + type: {{ $pathType }} + value: {{ $rule.path }} + backendRefs: + - name: {{ $serviceName }} + port: {{ $servicePort }} +{{- end }} + {{/* Create chart name and version as used by the chart label. */}} @@ -187,6 +278,28 @@ Create the name of the service account to use {{- end }} {{- end }} +{{/* +Render map-style env values as Kubernetes EnvVar entries. +Scalar values are always quoted so YAML booleans and numbers become strings. +Map values allow structured EnvVar fields such as valueFrom. +*/}} +{{- define "retool.env" -}} +{{- range $key, $value := . }} +- name: {{ $key | quote }} +{{- if kindIs "map" $value }} +{{- if hasKey $value "value" }} + value: {{ get $value "value" | quote }} +{{- end }} +{{- range $field, $fieldValue := omit $value "value" }} + {{ $field }}: +{{ toYaml $fieldValue | indent 4 }} +{{- end }} +{{- else }} + value: {{ $value | quote }} +{{- end }} +{{- end }} +{{- end }} + {{- define "retool.postgresql.fullname" -}} {{- $name := default "postgresql" .Values.postgresql.nameOverride -}} {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} @@ -569,6 +682,13 @@ Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} {{- end }} {{- end -}} +{{/* +Set MCP server service name +*/}} +{{- define "retool.mcp.name" -}} +{{ template "retool.fullname" . }}-mcp +{{- end -}} + {{/* Set code executor image tag Usage: (template "retool.codeExecutor.image.tag" .) @@ -626,22 +746,6 @@ Usage: (template "retool.jsExecutor.image.tag" .) {{/* Checks whether or not ExternalSecret definitions are enabled and can potentially clobber secrets or explicitly allow additional direct secret refs. */}} -{{/* -Render env vars from .Values.env, handling both string values and object values (e.g. valueFrom). -Usage: {{- include "retool.env" .Values.env | nindent 10 }} -*/}} -{{- define "retool.env" -}} -{{- range $key, $value := . }} -{{- if not (kindIs "map" $value) }} -- name: "{{ $key }}" - value: "{{ $value }}" -{{- else }} -- name: "{{ $key }}" -{{ toYaml $value | indent 2 }} -{{- end }} -{{- end }} -{{- end -}} - {{- define "shouldIncludeConfigSecretsEnvVars" -}} {{- $output := "" -}} {{- if or (not (or (.Values.externalSecrets.enabled) (.Values.externalSecrets.externalSecretsOperator.enabled))) .Values.externalSecrets.includeConfigSecrets -}} diff --git a/charts/retool/templates/deployment_mcp.yaml b/charts/retool/templates/deployment_mcp.yaml new file mode 100644 index 0000000..fdcd632 --- /dev/null +++ b/charts/retool/templates/deployment_mcp.yaml @@ -0,0 +1,239 @@ +{{- if .Values.mcp.enabled }} +{{- $mcpConfig := .Values.mcp.config | default dict }} +{{- $hasOAuthIntrospectionAuthTokenEnv := false }} +{{- range .Values.mcp.environmentVariables }} +{{- if eq .name "OAUTH_INTROSPECTION_AUTH_TOKEN" }} +{{- $hasOAuthIntrospectionAuthTokenEnv = true }} +{{- end }} +{{- end }} +{{- if not (or $mcpConfig.oauthIntrospectionAuthTokenSecretName $mcpConfig.oauthIntrospectionAuthToken $hasOAuthIntrospectionAuthTokenEnv) }} +{{- fail "Please set .Values.mcp.config.oauthIntrospectionAuthTokenSecretName, .Values.mcp.config.oauthIntrospectionAuthToken, or an OAUTH_INTROSPECTION_AUTH_TOKEN entry in .Values.mcp.environmentVariables when .Values.mcp.enabled is true" }} +{{- end }} +{{- $mcpInternalPort := .Values.mcp.service.internalPort | default 4010 }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "retool.mcp.name" . }} + labels: + {{- include "retool.labels" . | nindent 4 }} + {{- if .Values.mcp.service.labels }} + {{- range $key, $value := .Values.mcp.service.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + {{- if .Values.mcp.service.annotations }} + {{- with .Values.mcp.service.annotations }} + annotations: + {{- range $key, $value := . }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + {{- end }} +spec: + selector: + retoolService: {{ template "retool.mcp.name" . }} + ports: + - name: http-server + protocol: TCP + {{- if .Values.mcp.service.externalPort }} + port: {{ .Values.mcp.service.externalPort }} + {{- else }} + port: 4010 + {{- end }} + {{- if .Values.mcp.service.internalPort }} + targetPort: {{ $mcpInternalPort }} + {{- else }} + targetPort: 4010 + {{- end }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "retool.mcp.name" . }} + labels: +{{- include "retool.labels" . | nindent 4 }} +{{- if .Values.deployment.annotations }} + annotations: +{{ toYaml .Values.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ .Values.mcp.replicaCount }} + selector: + matchLabels: + retoolService: {{ template "retool.mcp.name" . }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} +{{- if .Values.mcp.annotations }} +{{ toYaml .Values.mcp.annotations | indent 8 }} +{{- end }} + labels: + retoolService: {{ template "retool.mcp.name" . }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if .Values.mcp.labels }} +{{ toYaml .Values.mcp.labels | indent 8 }} +{{- end }} + spec: + serviceAccountName: {{ template "retool.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} +{{- if .Values.initContainers }} + initContainers: +{{- range $key, $value := .Values.initContainers }} + - name: "{{ $key }}" +{{ toYaml $value | indent 8 }} +{{- end }} +{{- end }} + containers: + - name: mcp + image: "{{ .Values.image.repository }}:{{ required "Please set a value for .Values.image.tag" .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - bash + - -c + - chmod -R +x ./docker_scripts; sync; ./docker_scripts/start_api.sh + {{- if .Values.commandline.args }} +{{ toYaml .Values.commandline.args | indent 10 }} + {{- end }} + env: + - name: DEPLOYMENT_TEMPLATE_TYPE + value: {{ template "retool.deploymentTemplateType" . }} + - name: DEPLOYMENT_TEMPLATE_VERSION + value: {{ template "retool.deploymentTemplateVersion" . }} + - name: NODE_ENV + value: production + - name: SERVICE_TYPE + value: MCP_SERVER + - name: MCP_PORT + value: {{ $mcpInternalPort | quote }} + - name: RETOOL_BACKEND_URL + value: {{ $mcpConfig.retoolBackendUrl | default (printf "http://%s:%v" (include "retool.fullname" .) .Values.service.externalPort) | quote }} + {{- if $mcpConfig.retoolUrl }} + - name: RETOOL_URL + value: {{ $mcpConfig.retoolUrl | quote }} + {{- end }} + {{- if $mcpConfig.oauthIntrospectionAuthTokenSecretName }} + - name: OAUTH_INTROSPECTION_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: {{ $mcpConfig.oauthIntrospectionAuthTokenSecretName }} + key: {{ $mcpConfig.oauthIntrospectionAuthTokenSecretKey | default "oauthIntrospectionAuthToken" }} + {{- else if $mcpConfig.oauthIntrospectionAuthToken }} + - name: OAUTH_INTROSPECTION_AUTH_TOKEN + value: {{ $mcpConfig.oauthIntrospectionAuthToken | quote }} + {{- end }} + {{- if $mcpConfig.nodeOptions }} + - name: NODE_OPTIONS + value: {{ $mcpConfig.nodeOptions | quote }} + {{- end }} + {{- with $mcpConfig.enabledToolsets }} + - name: RETOOL_TOOLSETS + value: {{ join "," . | quote }} + {{- end }} + {{- if hasKey $mcpConfig "maxTransportSessions" }} + - name: MCP_MAX_TRANSPORT_SESSIONS + value: {{ $mcpConfig.maxTransportSessions | quote }} + {{- end }} + {{- if hasKey $mcpConfig "sessionIdleTimeoutMs" }} + - name: MCP_SESSION_IDLE_TIMEOUT_MS + value: {{ $mcpConfig.sessionIdleTimeoutMs | quote }} + {{- end }} + {{- if hasKey $mcpConfig "sessionSweepIntervalMs" }} + - name: MCP_SESSION_SWEEP_INTERVAL_MS + value: {{ $mcpConfig.sessionSweepIntervalMs | quote }} + {{- end }} + {{- if hasKey $mcpConfig "sessionGaugeEmitIntervalMs" }} + - name: MCP_SESSION_GAUGE_EMIT_INTERVAL_MS + value: {{ $mcpConfig.sessionGaugeEmitIntervalMs | quote }} + {{- end }} + {{- with .Values.mcp.environmentVariables }} +{{ toYaml . | indent 10 }} + {{- end }} + ports: + - containerPort: {{ $mcpInternalPort }} + name: http-server + protocol: TCP + readinessProbe: + httpGet: + path: /healthcheck + port: {{ $mcpInternalPort }} + periodSeconds: 10 + livenessProbe: + httpGet: + path: /healthcheck + port: {{ $mcpInternalPort }} + initialDelaySeconds: 30 + failureThreshold: 10 + timeoutSeconds: 10 + periodSeconds: 20 + resources: +{{ toYaml .Values.mcp.resources | indent 10 }} + volumeMounts: + {{- range $configFile := (keys .Values.files) }} + - name: {{ template "retool.name" $ }} + mountPath: "/usr/share/retool/config/{{ $configFile }}" + subPath: {{ $configFile }} + {{- end }} + {{if and .Values.persistentVolumeClaim.enabled .Values.persistentVolumeClaim.mountPath }} + - name: retool-pv + mountPath: {{ .Values.persistentVolumeClaim.mountPath }} + {{- end }} +{{- if .Values.extraVolumeMounts }} +{{ toYaml .Values.extraVolumeMounts | indent 8 }} +{{- end }} +{{- if .Values.securityContext.extraContainerSecurityContext }} + securityContext: +{{ toYaml .Values.securityContext.extraContainerSecurityContext | indent 10 }} +{{- end }} +{{- with .Values.extraContainers }} +{{ tpl . $ | indent 6 }} +{{- end }} +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} +{{- end }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} + {{- end }} + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} +{{- if .Values.securityContext.enabled }} + securityContext: + runAsUser: {{ .Values.securityContext.runAsUser }} + fsGroup: {{ .Values.securityContext.fsGroup }} +{{- if .Values.securityContext.extraSecurityContext }} +{{ toYaml .Values.securityContext.extraSecurityContext | indent 8 }} +{{- end }} +{{- end }} + volumes: +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + configMap: + name: {{ .configMap }} +{{- end }} + {{- if .Values.persistentVolumeClaim.enabled }} + - name: retool-pv + persistentVolumeClaim: + claimName: {{ default (include "retool.fullname" .) .Values.persistentVolumeClaim.existingClaim }} + {{- end }} +{{- if .Values.extraVolumes }} +{{ toYaml .Values.extraVolumes | indent 8 }} +{{- end }} +{{- end }} diff --git a/charts/retool/templates/httproute.yaml b/charts/retool/templates/httproute.yaml index e880892..222b072 100644 --- a/charts/retool/templates/httproute.yaml +++ b/charts/retool/templates/httproute.yaml @@ -1,6 +1,11 @@ {{- if .Values.httpRoute.enabled }} {{- $fullName := include "retool.fullname" . -}} {{- $svcPort := .Values.service.externalPort -}} +{{- $mcp := .Values.mcp | default dict -}} +{{- $mcpBackendMetadata := $mcp.backendMetadata | default dict -}} +{{- $mcpHttpRoute := $mcp.httpRoute | default dict -}} +{{- $backendApiService := $mcpBackendMetadata.service | default dict -}} +{{- $backendApiPort := $backendApiService.externalPort | default 3001 -}} apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: @@ -35,6 +40,11 @@ spec: port: {{ .port }} {{- end }} {{- end }} + {{- if ( and ((.Values.mcp).enabled) $mcpHttpRoute.enabled ) }} + {{- range $mcpHttpRoute.rules }} + {{- include "retool.httpRoute.mcpRule" (dict "root" $ "rule" . "backendApiPort" $backendApiPort) | nindent 4 }} + {{- end }} + {{- end }} {{- if .Values.httpRoute.rules }} {{- toYaml .Values.httpRoute.rules | nindent 4 }} {{- else }} diff --git a/charts/retool/templates/ingress.yaml b/charts/retool/templates/ingress.yaml index d63f779..d99d8e6 100644 --- a/charts/retool/templates/ingress.yaml +++ b/charts/retool/templates/ingress.yaml @@ -1,6 +1,11 @@ {{- if .Values.ingress.enabled }} {{- $fullName := include "retool.fullname" . -}} {{- $svcPort := .Values.service.externalPort -}} +{{- $mcp := .Values.mcp | default dict -}} +{{- $mcpBackendMetadata := $mcp.backendMetadata | default dict -}} +{{- $mcpIngress := $mcp.ingress | default dict -}} +{{- $backendApiService := $mcpBackendMetadata.service | default dict -}} +{{- $backendApiPort := $backendApiService.externalPort | default 3001 -}} {{- $pathType := .Values.ingress.pathType -}} {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.Version -}} apiVersion: networking.k8s.io/v1 @@ -44,6 +49,12 @@ spec: number: {{ .port }} {{- end }} {{- end }} + # MCP-related paths must be added before the main path to avoid less specific paths being matched first. + {{- if ( and ((.Values.mcp).enabled) $mcpIngress.enabled ) }} + {{- range $mcpIngress.paths }} + {{- include "retool.ingress.mcpPath" (dict "root" $ "path" . "backendApiPort" $backendApiPort) | nindent 10 }} + {{- end }} + {{- end }} - path: {{- if and $pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.Version) }} pathType: {{ $pathType }} @@ -80,6 +91,12 @@ spec: number: {{ .port }} {{- end }} {{- end }} + # MCP-related paths must be added before the main path to avoid less specific paths being matched first. + {{- if ( and (($.Values.mcp).enabled) $mcpIngress.enabled ) }} + {{- range $mcpIngress.paths }} + {{- include "retool.ingress.mcpPath" (dict "root" $ "path" . "backendApiPort" $backendApiPort) | nindent 10 }} + {{- end }} + {{- end }} {{- range .paths }} - path: {{ .path }} {{- if and $pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.Version) }} diff --git a/charts/retool/templates/service.yaml b/charts/retool/templates/service.yaml index f7c5049..4c0b8bc 100644 --- a/charts/retool/templates/service.yaml +++ b/charts/retool/templates/service.yaml @@ -1,3 +1,10 @@ +{{- $mcpNeedsBackendApi := eq (include "retool.mcp.needsBackendApi" .) "true" -}} +{{- $backendApiService := (((.Values.mcp).backendMetadata).service) | default dict -}} +{{- $servicePortName := .Values.service.portName | default "http" -}} +{{- $backendApiPortName := $backendApiService.portName | default "http-api" -}} +{{- if and $mcpNeedsBackendApi (eq $servicePortName $backendApiPortName) -}} +{{- fail "When MCP backend API routing is enabled, .Values.service.portName and .Values.mcp.backendMetadata.service.portName must be different" -}} +{{- end -}} apiVersion: v1 kind: Service metadata: @@ -28,11 +35,17 @@ spec: - port: {{ .Values.service.externalPort }} targetPort: {{ .Values.service.internalPort }} protocol: TCP -{{ if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }} +{{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }} nodePort: {{ .Values.service.nodePort }} -{{ end }} -{{- if .Values.service.portName }} - name: {{ .Values.service.portName }} +{{- end }} +{{- if or .Values.service.portName $mcpNeedsBackendApi }} + name: {{ $servicePortName }} +{{- end }} +{{- if $mcpNeedsBackendApi }} + - name: {{ $backendApiPortName }} + port: {{ $backendApiService.externalPort | default 3001 }} + targetPort: {{ $backendApiService.internalPort | default 3001 }} + protocol: TCP {{- end }} {{- if .Values.service.externalIPs }} externalIPs: diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index a4104c7..836e8cd 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -563,6 +563,148 @@ multiplayer: annotations: {} labels: {} +mcp: + # Enable this to run Retool's MCP server as a separate deployment. + enabled: false + + replicaCount: 1 + + # Annotations for MCP server pods + annotations: {} + + # Labels for MCP server pods + labels: {} + + # MCP-specific environment variables. Can include valueFrom entries. + # Use this for MCP-only env vars that are not exposed under mcp.config. + environmentVariables: [] + + # MCP server configuration. + # The chart always sets RETOOL_BACKEND_URL, defaulting to the release's + # internal Retool service, e.g. http://:. + # For other config keys, the chart only emits environment variables when you set + # them. When unset, the MCP service uses its runtime defaults: + # enabledToolsets: all available toolsets + # apps, resources, workflows, folders, environments, users, + # organization, user_invites, feedback + # maxTransportSessions: 1000 + # sessionIdleTimeoutMs: 1800000 (30 minutes) + # sessionSweepIntervalMs: 60000 (1 minute) + # sessionGaugeEmitIntervalMs: 30000 (30 seconds) + config: {} + # Example overrides: + # config: + # # Internal URL used by the MCP server to call the Retool backend. + # # Defaults to the release's internal Retool service when unset. + # retoolBackendUrl: + # + # # Public Retool URL used for links. Set explicitly when the request + # # origin is not the right public URL. + # retoolUrl: + # + # # Secret-backed token used by MCP to call /api/oauth2/introspect. + # # Required when mcp.enabled is true unless OAUTH_INTROSPECTION_AUTH_TOKEN + # # is provided directly in mcp.environmentVariables. + # oauthIntrospectionAuthTokenSecretName: + # oauthIntrospectionAuthTokenSecretKey: oauthIntrospectionAuthToken + # + # # Literal token override for development/testing only. Prefer the + # # secret-backed setting above for real deployments. + # # Required when mcp.enabled is true unless OAUTH_INTROSPECTION_AUTH_TOKEN + # # is provided directly in mcp.environmentVariables. + # oauthIntrospectionAuthToken: + # + # # Optional Node.js options for the MCP server process. Unset by default. + # nodeOptions: --max_old_space_size=1024 + # + # # Optional MCP service configuration. If unset, all available toolsets + # # are enabled by default. + # enabledToolsets: + # - apps + # - resources + # maxTransportSessions: 1000 + # sessionIdleTimeoutMs: 1800000 + # sessionSweepIntervalMs: 60000 + # sessionGaugeEmitIntervalMs: 30000 + + # Resources for MCP server pods. MCP runs from the backend image and does not + # horizontally scale yet, so its default memory limit is higher than multiplayer. + resources: + requests: + cpu: "200m" + memory: "256Mi" + limits: + memory: "4096Mi" + + # MCP OAuth metadata routes are served by the main Retool backend, not the MCP + # pod. This config exposes the backend API listener as an additional port on + # the main Retool service when a route uses target: backendApi. + backendMetadata: + service: + # Service port that exposes the backend API listener for metadata paths + # that should not fall through to the static frontend server. + portName: http-api + externalPort: 3001 + internalPort: 3001 + + # Public MCP-related ingress paths. Paths are emitted in order before the main + # Retool route. Use target: backendApi for OAuth metadata routes that must hit + # the main backend API listener; use target: mcp for MCP server routes. + ingress: + # This conditional is dependent on mcp.enabled. + enabled: true + paths: + - path: /.well-known/oauth-authorization-server + pathType: Exact + target: backendApi + port: 3001 + - path: /.well-known/oauth-protected-resource/mcp + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp/.well-known/oauth-protected-resource + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp + target: mcp + port: 4010 + - path: /.well-known/oauth-protected-resource + pathType: Exact + target: mcp + port: 4010 + + # HTTPRoute rules for MCP when using Gateway API instead of ingress. + httpRoute: + # This conditional is dependent on mcp.enabled. + enabled: true + rules: + - path: /.well-known/oauth-authorization-server + pathType: Exact + target: backendApi + port: 3001 + - path: /.well-known/oauth-protected-resource/mcp + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp/.well-known/oauth-protected-resource + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp + target: mcp + port: 4010 + - path: /.well-known/oauth-protected-resource + pathType: Exact + target: mcp + port: 4010 + + service: + externalPort: 4010 + internalPort: 4010 + annotations: {} + labels: {} + codeExecutor: # as of Chart version 6.7.0, code-executor image version must align with the top-level `image` parameters # explicitly set other fields as needed diff --git a/values.yaml b/values.yaml index a4104c7..836e8cd 100644 --- a/values.yaml +++ b/values.yaml @@ -563,6 +563,148 @@ multiplayer: annotations: {} labels: {} +mcp: + # Enable this to run Retool's MCP server as a separate deployment. + enabled: false + + replicaCount: 1 + + # Annotations for MCP server pods + annotations: {} + + # Labels for MCP server pods + labels: {} + + # MCP-specific environment variables. Can include valueFrom entries. + # Use this for MCP-only env vars that are not exposed under mcp.config. + environmentVariables: [] + + # MCP server configuration. + # The chart always sets RETOOL_BACKEND_URL, defaulting to the release's + # internal Retool service, e.g. http://:. + # For other config keys, the chart only emits environment variables when you set + # them. When unset, the MCP service uses its runtime defaults: + # enabledToolsets: all available toolsets + # apps, resources, workflows, folders, environments, users, + # organization, user_invites, feedback + # maxTransportSessions: 1000 + # sessionIdleTimeoutMs: 1800000 (30 minutes) + # sessionSweepIntervalMs: 60000 (1 minute) + # sessionGaugeEmitIntervalMs: 30000 (30 seconds) + config: {} + # Example overrides: + # config: + # # Internal URL used by the MCP server to call the Retool backend. + # # Defaults to the release's internal Retool service when unset. + # retoolBackendUrl: + # + # # Public Retool URL used for links. Set explicitly when the request + # # origin is not the right public URL. + # retoolUrl: + # + # # Secret-backed token used by MCP to call /api/oauth2/introspect. + # # Required when mcp.enabled is true unless OAUTH_INTROSPECTION_AUTH_TOKEN + # # is provided directly in mcp.environmentVariables. + # oauthIntrospectionAuthTokenSecretName: + # oauthIntrospectionAuthTokenSecretKey: oauthIntrospectionAuthToken + # + # # Literal token override for development/testing only. Prefer the + # # secret-backed setting above for real deployments. + # # Required when mcp.enabled is true unless OAUTH_INTROSPECTION_AUTH_TOKEN + # # is provided directly in mcp.environmentVariables. + # oauthIntrospectionAuthToken: + # + # # Optional Node.js options for the MCP server process. Unset by default. + # nodeOptions: --max_old_space_size=1024 + # + # # Optional MCP service configuration. If unset, all available toolsets + # # are enabled by default. + # enabledToolsets: + # - apps + # - resources + # maxTransportSessions: 1000 + # sessionIdleTimeoutMs: 1800000 + # sessionSweepIntervalMs: 60000 + # sessionGaugeEmitIntervalMs: 30000 + + # Resources for MCP server pods. MCP runs from the backend image and does not + # horizontally scale yet, so its default memory limit is higher than multiplayer. + resources: + requests: + cpu: "200m" + memory: "256Mi" + limits: + memory: "4096Mi" + + # MCP OAuth metadata routes are served by the main Retool backend, not the MCP + # pod. This config exposes the backend API listener as an additional port on + # the main Retool service when a route uses target: backendApi. + backendMetadata: + service: + # Service port that exposes the backend API listener for metadata paths + # that should not fall through to the static frontend server. + portName: http-api + externalPort: 3001 + internalPort: 3001 + + # Public MCP-related ingress paths. Paths are emitted in order before the main + # Retool route. Use target: backendApi for OAuth metadata routes that must hit + # the main backend API listener; use target: mcp for MCP server routes. + ingress: + # This conditional is dependent on mcp.enabled. + enabled: true + paths: + - path: /.well-known/oauth-authorization-server + pathType: Exact + target: backendApi + port: 3001 + - path: /.well-known/oauth-protected-resource/mcp + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp/.well-known/oauth-protected-resource + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp + target: mcp + port: 4010 + - path: /.well-known/oauth-protected-resource + pathType: Exact + target: mcp + port: 4010 + + # HTTPRoute rules for MCP when using Gateway API instead of ingress. + httpRoute: + # This conditional is dependent on mcp.enabled. + enabled: true + rules: + - path: /.well-known/oauth-authorization-server + pathType: Exact + target: backendApi + port: 3001 + - path: /.well-known/oauth-protected-resource/mcp + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp/.well-known/oauth-protected-resource + pathType: Exact + target: backendApi + port: 3001 + - path: /mcp + target: mcp + port: 4010 + - path: /.well-known/oauth-protected-resource + pathType: Exact + target: mcp + port: 4010 + + service: + externalPort: 4010 + internalPort: 4010 + annotations: {} + labels: {} + codeExecutor: # as of Chart version 6.7.0, code-executor image version must align with the top-level `image` parameters # explicitly set other fields as needed From 386c23ba13c9b7bbde1c4587e22125bd29deead5 Mon Sep 17 00:00:00 2001 From: arnold-retool Date: Fri, 15 May 2026 11:37:14 -0700 Subject: [PATCH 06/37] revert accidental version bump (#286) --- charts/retool/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/retool/Chart.yaml b/charts/retool/Chart.yaml index 0ebe5b1..dbedb80 100644 --- a/charts/retool/Chart.yaml +++ b/charts/retool/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: retool description: A Helm chart for Kubernetes type: application -version: 6.11.1 +version: 6.11.0 maintainers: - name: Retool Engineering email: engineering+helm@retool.com From 63adf18c55c3389fd08bfd547ade57f7d9fb21bc Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Tue, 19 May 2026 13:48:03 -0700 Subject: [PATCH 07/37] [INF-6865] increase js executor mem (#289) * increase mem * update file --- charts/retool/values.yaml | 4 ++-- values.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 836e8cd..fecd076 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -774,10 +774,10 @@ jsExecutor: resources: limits: cpu: 2000m - memory: 2048Mi + memory: 8192Mi requests: cpu: 1000m - memory: 1024Mi + memory: 4096Mi agents: # Enable AI Agents diff --git a/values.yaml b/values.yaml index 836e8cd..fecd076 100644 --- a/values.yaml +++ b/values.yaml @@ -774,10 +774,10 @@ jsExecutor: resources: limits: cpu: 2000m - memory: 2048Mi + memory: 8192Mi requests: cpu: 1000m - memory: 1024Mi + memory: 4096Mi agents: # Enable AI Agents From 45a04a83de56929327acfc44259f39a0dc806bf5 Mon Sep 17 00:00:00 2001 From: lukefoster11 Date: Tue, 19 May 2026 14:50:35 -0700 Subject: [PATCH 08/37] optional agentsandbox postgres secret --- charts/retool/templates/_helpers.tpl | 19 +++++++++++++++++++ .../templates/deployment_agent_sandbox.yaml | 8 ++++---- charts/retool/values.yaml | 6 ++++++ values.yaml | 6 ++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index fac318c..ffd3790 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -585,6 +585,25 @@ Uses externalSecret.name if set, otherwise the auto-generated name. {{- end -}} {{- end -}} +{{/* +Secret name for the agent sandbox Postgres connection string. +Falls back to the main agentSandbox secret when postgres.secretName is empty. +*/}} +{{- define "retool.agentSandbox.postgresSecretName" -}} +{{- if .Values.agentSandbox.postgres.secretName -}} +{{ .Values.agentSandbox.postgres.secretName }} +{{- else -}} +{{ include "retool.agentSandbox.secretName" . }} +{{- end -}} +{{- end -}} + +{{/* +Secret key for the agent sandbox Postgres connection string. +*/}} +{{- define "retool.agentSandbox.postgresSecretKey" -}} +{{ .Values.agentSandbox.postgres.secretKey | default "postgres-url" }} +{{- end -}} + {{/* Selector labels for agent sandbox (sandbox pods / headless service). */}} diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 00e06fa..3f4ebd5 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -301,8 +301,8 @@ spec: - name: AGENT_EXECUTOR_POSTGRES_URL valueFrom: secretKeyRef: - name: {{ $secretName }} - key: postgres-url + name: {{ include "retool.agentSandbox.postgresSecretName" . }} + key: {{ include "retool.agentSandbox.postgresSecretKey" . }} - name: AGENT_EXECUTOR_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX @@ -497,8 +497,8 @@ spec: - name: AGENT_EXECUTOR_POSTGRES_URL valueFrom: secretKeyRef: - name: {{ $secretName }} - key: postgres-url + name: {{ include "retool.agentSandbox.postgresSecretName" . }} + key: {{ include "retool.agentSandbox.postgresSecretKey" . }} - name: AGENT_EXECUTOR_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index fecd076..37a07f4 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -887,6 +887,12 @@ agentSandbox: # Connection string for the agent sandbox's state database. postgres: url: '' + # Reference a separate K8s Secret for the Postgres connection string. + # When set, the postgres-url is read from this Secret instead of the + # main agentSandbox secret (useful when multiple instances share JWT/ + # encryption keys but connect to different Postgres databases). + secretName: '' + secretKey: 'postgres-url' schema: 'agent_executor' poolMax: 10 sweeperIntervalMs: 60000 diff --git a/values.yaml b/values.yaml index fecd076..37a07f4 100644 --- a/values.yaml +++ b/values.yaml @@ -887,6 +887,12 @@ agentSandbox: # Connection string for the agent sandbox's state database. postgres: url: '' + # Reference a separate K8s Secret for the Postgres connection string. + # When set, the postgres-url is read from this Secret instead of the + # main agentSandbox secret (useful when multiple instances share JWT/ + # encryption keys but connect to different Postgres databases). + secretName: '' + secretKey: 'postgres-url' schema: 'agent_executor' poolMax: 10 sweeperIntervalMs: 60000 From 9dc36c95e83f446ff0aa6f0a33822ada9fc19890 Mon Sep 17 00:00:00 2001 From: lukefoster11 Date: Tue, 19 May 2026 14:53:13 -0700 Subject: [PATCH 09/37] Revert "optional agentsandbox postgres secret" This reverts commit 98ecae0fce79defbfc4f6dc2437f833b79131052. --- charts/retool/templates/_helpers.tpl | 19 ------------------- .../templates/deployment_agent_sandbox.yaml | 8 ++++---- charts/retool/values.yaml | 6 ------ values.yaml | 6 ------ 4 files changed, 4 insertions(+), 35 deletions(-) diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index ffd3790..fac318c 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -585,25 +585,6 @@ Uses externalSecret.name if set, otherwise the auto-generated name. {{- end -}} {{- end -}} -{{/* -Secret name for the agent sandbox Postgres connection string. -Falls back to the main agentSandbox secret when postgres.secretName is empty. -*/}} -{{- define "retool.agentSandbox.postgresSecretName" -}} -{{- if .Values.agentSandbox.postgres.secretName -}} -{{ .Values.agentSandbox.postgres.secretName }} -{{- else -}} -{{ include "retool.agentSandbox.secretName" . }} -{{- end -}} -{{- end -}} - -{{/* -Secret key for the agent sandbox Postgres connection string. -*/}} -{{- define "retool.agentSandbox.postgresSecretKey" -}} -{{ .Values.agentSandbox.postgres.secretKey | default "postgres-url" }} -{{- end -}} - {{/* Selector labels for agent sandbox (sandbox pods / headless service). */}} diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 3f4ebd5..00e06fa 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -301,8 +301,8 @@ spec: - name: AGENT_EXECUTOR_POSTGRES_URL valueFrom: secretKeyRef: - name: {{ include "retool.agentSandbox.postgresSecretName" . }} - key: {{ include "retool.agentSandbox.postgresSecretKey" . }} + name: {{ $secretName }} + key: postgres-url - name: AGENT_EXECUTOR_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX @@ -497,8 +497,8 @@ spec: - name: AGENT_EXECUTOR_POSTGRES_URL valueFrom: secretKeyRef: - name: {{ include "retool.agentSandbox.postgresSecretName" . }} - key: {{ include "retool.agentSandbox.postgresSecretKey" . }} + name: {{ $secretName }} + key: postgres-url - name: AGENT_EXECUTOR_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 37a07f4..fecd076 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -887,12 +887,6 @@ agentSandbox: # Connection string for the agent sandbox's state database. postgres: url: '' - # Reference a separate K8s Secret for the Postgres connection string. - # When set, the postgres-url is read from this Secret instead of the - # main agentSandbox secret (useful when multiple instances share JWT/ - # encryption keys but connect to different Postgres databases). - secretName: '' - secretKey: 'postgres-url' schema: 'agent_executor' poolMax: 10 sweeperIntervalMs: 60000 diff --git a/values.yaml b/values.yaml index 37a07f4..fecd076 100644 --- a/values.yaml +++ b/values.yaml @@ -887,12 +887,6 @@ agentSandbox: # Connection string for the agent sandbox's state database. postgres: url: '' - # Reference a separate K8s Secret for the Postgres connection string. - # When set, the postgres-url is read from this Secret instead of the - # main agentSandbox secret (useful when multiple instances share JWT/ - # encryption keys but connect to different Postgres databases). - secretName: '' - secretKey: 'postgres-url' schema: 'agent_executor' poolMax: 10 sweeperIntervalMs: 60000 From 21a2c40d07d42c2897491afabe347b0fc1000e10 Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Tue, 19 May 2026 15:46:18 -0700 Subject: [PATCH 10/37] new agent sandbox secret modularity (#290) --- charts/retool/templates/_helpers.tpl | 34 +++++++------- .../templates/deployment_agent_sandbox.yaml | 45 +++++++++++++------ charts/retool/values.yaml | 17 ++++--- values.yaml | 17 ++++--- 4 files changed, 72 insertions(+), 41 deletions(-) diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index fac318c..6d8afd8 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -573,18 +573,6 @@ Set agent sandbox proxy name {{ template "retool.fullname" . }}-agent-sandbox-proxy {{- end -}} -{{/* -Secret name for agent sandbox. -Uses externalSecret.name if set, otherwise the auto-generated name. -*/}} -{{- define "retool.agentSandbox.secretName" -}} -{{- if .Values.agentSandbox.externalSecret.name -}} -{{ .Values.agentSandbox.externalSecret.name }} -{{- else -}} -{{ template "retool.agentSandbox.name" . }} -{{- end -}} -{{- end -}} - {{/* Selector labels for agent sandbox (sandbox pods / headless service). */}} @@ -642,6 +630,7 @@ Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} */}} {{- define "retool.agentSandbox.backendEnvVars" -}} {{- if .Values.agentSandbox.enabled }} +{{- $defaultSecretName := .Values.agentSandbox.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} - name: AGENT_EXECUTOR_ENABLED value: "true" - name: RR_AGENT_PUBSUB_BACKEND @@ -658,25 +647,34 @@ Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} - name: AGENT_EXECUTOR_PROXY_DOMAIN value: {{ .Values.agentSandbox.proxyDomain | default .Values.agentSandbox.frontendWsProxyDomain | quote }} {{- end }} -{{- if or .Values.agentSandbox.jwtPrivateKey .Values.agentSandbox.externalSecret.name }} +{{- if .Values.agentSandbox.jwtPrivateKey }} +- name: AGENT_EXECUTOR_JWT_PRIVATE_KEY + value: {{ .Values.agentSandbox.jwtPrivateKey | quote }} +{{- else if .Values.agentSandbox.externalSecret.name }} - name: AGENT_EXECUTOR_JWT_PRIVATE_KEY valueFrom: secretKeyRef: - name: {{ include "retool.agentSandbox.secretName" . }} + name: {{ $defaultSecretName }} key: jwt-private-key {{- end }} -{{- if or .Values.agentSandbox.jwtPublicKey .Values.agentSandbox.externalSecret.name }} +{{- if .Values.agentSandbox.jwtPublicKey }} +- name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + value: {{ .Values.agentSandbox.jwtPublicKey | quote }} +{{- else if .Values.agentSandbox.externalSecret.name }} - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY valueFrom: secretKeyRef: - name: {{ include "retool.agentSandbox.secretName" . }} + name: {{ $defaultSecretName }} key: jwt-public-key {{- end }} -{{- if or .Values.agentSandbox.encryptionKey .Values.agentSandbox.externalSecret.name }} +{{- if .Values.agentSandbox.encryptionKey }} +- name: AGENT_EXECUTOR_ENCRYPTION_KEY + value: {{ .Values.agentSandbox.encryptionKey | quote }} +{{- else if .Values.agentSandbox.externalSecret.name }} - name: AGENT_EXECUTOR_ENCRYPTION_KEY valueFrom: secretKeyRef: - name: {{ include "retool.agentSandbox.secretName" . }} + name: {{ $defaultSecretName }} key: encryption-key {{- end }} {{- end }} diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 00e06fa..126ce5d 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -1,6 +1,6 @@ {{- if .Values.agentSandbox.enabled }} {{- $as := .Values.agentSandbox -}} -{{- $secretName := include "retool.agentSandbox.secretName" . -}} +{{- $defaultSecretName := $as.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} {{- /* @@ -172,8 +172,10 @@ data: {"name": "SANDBOX_NETWORK_ENABLED", "value": "{{ $as.sandboxNetwork.enabled }}"}, {"name": "SANDBOX_IDLE_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxIdleTimeoutMs }}"}, {"name": "SANDBOX_MEMORY_LIMIT", "value": "{{ $as.sandbox.sandboxMemoryLimit }}"} - {{- if or $as.jwtPublicKey $as.externalSecret.name }} - ,{"name": "AGENT_EXECUTOR_JWT_PUBLIC_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $secretName }}", "key": "jwt-public-key"}}} + {{- if $as.jwtPublicKey }} + ,{"name": "AGENT_EXECUTOR_JWT_PUBLIC_KEY", "value": "{{ $as.jwtPublicKey }}"} + {{- else if $as.externalSecret.name }} + ,{"name": "AGENT_EXECUTOR_JWT_PUBLIC_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $defaultSecretName }}", "key": "jwt-public-key"}}} {{- end }} {{- if $as.proxy.backendDomainSuffixes }} ,{"name": "BACKEND_DOMAIN_SUFFIXES", "value": "{{ $as.proxy.backendDomainSuffixes }}"} @@ -185,8 +187,8 @@ data: ,{"name": "S3_BUCKET", "value": "{{ $as.snapshotStorage.s3Bucket }}"} ,{"name": "S3_ENDPOINT", "value": "{{ $as.snapshotStorage.s3Endpoint }}"} ,{"name": "S3_REGION", "value": "{{ $as.snapshotStorage.s3Region }}"} - ,{"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $secretName }}", "key": "awsAccessKeyId"}}} - ,{"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $secretName }}", "key": "awsSecretAccessKey"}}} + ,{"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $defaultSecretName }}", "key": "awsAccessKeyId"}}} + ,{"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $as.snapshotStorage.credentialsSecretName | default $defaultSecretName }}", "key": "awsSecretAccessKey"}}} {{- end }} {{- range $as.sandbox.extraEnv }} ,{{ toJson . }} @@ -299,10 +301,14 @@ spec: - name: STATE_BACKEND value: "postgres" - name: AGENT_EXECUTOR_POSTGRES_URL + {{- if $as.postgres.url }} + value: {{ $as.postgres.url | quote }} + {{- else }} valueFrom: secretKeyRef: - name: {{ $secretName }} + name: {{ $defaultSecretName }} key: postgres-url + {{- end }} - name: AGENT_EXECUTOR_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX @@ -359,11 +365,14 @@ spec: value: {{ include "retool.agentSandbox.name" . }}-job-template - name: DAEMONSET_NAME value: {{ include "retool.agentSandbox.name" . }}-image-prepuller - {{- if or $as.jwtPublicKey $as.externalSecret.name }} + {{- if $as.jwtPublicKey }} + - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + value: {{ $as.jwtPublicKey | quote }} + {{- else if $as.externalSecret.name }} - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY valueFrom: secretKeyRef: - name: {{ $secretName }} + name: {{ $defaultSecretName }} key: jwt-public-key {{- end }} livenessProbe: @@ -495,10 +504,14 @@ spec: - name: STATE_BACKEND value: "postgres" - name: AGENT_EXECUTOR_POSTGRES_URL + {{- if $as.postgres.url }} + value: {{ $as.postgres.url | quote }} + {{- else }} valueFrom: secretKeyRef: - name: {{ $secretName }} + name: {{ $defaultSecretName }} key: postgres-url + {{- end }} - name: AGENT_EXECUTOR_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX @@ -515,18 +528,24 @@ spec: - name: BACKEND_DOMAIN_SUFFIXES value: {{ $as.proxy.backendDomainSuffixes | quote }} {{- end }} - {{- if or $as.encryptionKey $as.externalSecret.name }} + {{- if $as.encryptionKey }} + - name: AGENT_EXECUTOR_ENCRYPTION_KEY + value: {{ $as.encryptionKey | quote }} + {{- else if $as.externalSecret.name }} - name: AGENT_EXECUTOR_ENCRYPTION_KEY valueFrom: secretKeyRef: - name: {{ $secretName }} + name: {{ $defaultSecretName }} key: encryption-key {{- end }} - {{- if or $as.jwtPublicKey $as.externalSecret.name }} + {{- if $as.jwtPublicKey }} + - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + value: {{ $as.jwtPublicKey | quote }} + {{- else if $as.externalSecret.name }} - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY valueFrom: secretKeyRef: - name: {{ $secretName }} + name: {{ $defaultSecretName }} key: jwt-public-key {{- end }} - name: EXECUTOR_PORT diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index fecd076..dd3ee9e 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -867,13 +867,19 @@ agentSandbox: # Labels for agent sandbox pods labels: {} - # Pre-existing K8s Secret. When set, the chart skips creating its own Secret - # and references this for keys: jwt-public-key, jwt-private-key, encryption-key, - # api-secret, postgres-url. + # Pre-existing K8s Secret containing keys: jwt-public-key, jwt-private-key, + # encryption-key, api-secret, postgres-url. When set, the chart references + # this secret by default for all secret-backed env vars. + # + # Individual keys can still be overridden by setting the corresponding + # plaintext values below (e.g. jwtPublicKey, postgres.url). When a plaintext + # value is provided alongside externalSecret.name, the plaintext value takes + # precedence for that key and the external secret is used for the rest. externalSecret: name: '' - # Secrets (ignored when externalSecret.name is set) + # Secrets — used directly when externalSecret.name is not set, or as + # per-key overrides when externalSecret.name IS set. # JWT key pair (ES256) for sandbox token authentication. jwtPublicKey: '' jwtPrivateKey: '' @@ -884,7 +890,8 @@ agentSandbox: apiSecret: '' # Postgres state backend (shared by controller and proxy for state coordination). - # Connection string for the agent sandbox's state database. + # Connection string for the agent sandbox's state database. When set, takes + # precedence over the postgres-url key in externalSecret. postgres: url: '' schema: 'agent_executor' diff --git a/values.yaml b/values.yaml index fecd076..dd3ee9e 100644 --- a/values.yaml +++ b/values.yaml @@ -867,13 +867,19 @@ agentSandbox: # Labels for agent sandbox pods labels: {} - # Pre-existing K8s Secret. When set, the chart skips creating its own Secret - # and references this for keys: jwt-public-key, jwt-private-key, encryption-key, - # api-secret, postgres-url. + # Pre-existing K8s Secret containing keys: jwt-public-key, jwt-private-key, + # encryption-key, api-secret, postgres-url. When set, the chart references + # this secret by default for all secret-backed env vars. + # + # Individual keys can still be overridden by setting the corresponding + # plaintext values below (e.g. jwtPublicKey, postgres.url). When a plaintext + # value is provided alongside externalSecret.name, the plaintext value takes + # precedence for that key and the external secret is used for the rest. externalSecret: name: '' - # Secrets (ignored when externalSecret.name is set) + # Secrets — used directly when externalSecret.name is not set, or as + # per-key overrides when externalSecret.name IS set. # JWT key pair (ES256) for sandbox token authentication. jwtPublicKey: '' jwtPrivateKey: '' @@ -884,7 +890,8 @@ agentSandbox: apiSecret: '' # Postgres state backend (shared by controller and proxy for state coordination). - # Connection string for the agent sandbox's state database. + # Connection string for the agent sandbox's state database. When set, takes + # precedence over the postgres-url key in externalSecret. postgres: url: '' schema: 'agent_executor' From c5ed71f262fe8134bea44d2eac0fb84a4795f02f Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Tue, 19 May 2026 21:09:51 -0700 Subject: [PATCH 11/37] disable (#292) --- charts/retool/templates/_workers.tpl | 2 ++ charts/retool/templates/configmap_js_executor.yaml | 2 ++ charts/retool/templates/deployment_backend.yaml | 2 ++ charts/retool/templates/deployment_js_executor.yaml | 3 ++- charts/retool/templates/deployment_workflows.yaml | 2 ++ charts/retool/values.yaml | 2 ++ values.yaml | 2 ++ 7 files changed, 14 insertions(+), 1 deletion(-) diff --git a/charts/retool/templates/_workers.tpl b/charts/retool/templates/_workers.tpl index 7dca091..64aa977 100644 --- a/charts/retool/templates/_workers.tpl +++ b/charts/retool/templates/_workers.tpl @@ -213,8 +213,10 @@ spec: value: {{ template "retool.postgresql.ssl_enabled" $ }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" $ }} + {{- if $.Values.jsExecutor.enabled }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" $ }} + {{- end }} {{- include "retool.agentSandbox.backendEnvVars" $ | nindent 10 }} {{- include "retool.telemetry.includeEnvVars" $ | nindent 10 }} diff --git a/charts/retool/templates/configmap_js_executor.yaml b/charts/retool/templates/configmap_js_executor.yaml index 60c45c8..68009e9 100644 --- a/charts/retool/templates/configmap_js_executor.yaml +++ b/charts/retool/templates/configmap_js_executor.yaml @@ -1,3 +1,4 @@ +{{- if .Values.jsExecutor.enabled }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,3 +6,4 @@ metadata: data: nsjail-seccomp.json: | {{- .Files.Get "files/nsjail-seccomp.json" | nindent 4 }} +{{- end }} diff --git a/charts/retool/templates/deployment_backend.yaml b/charts/retool/templates/deployment_backend.yaml index 171e1cf..b3c2c8d 100644 --- a/charts/retool/templates/deployment_backend.yaml +++ b/charts/retool/templates/deployment_backend.yaml @@ -180,8 +180,10 @@ spec: {{- end }} {{- end }} {{- end }} + {{- if .Values.jsExecutor.enabled }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" . }} + {{- end }} {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml index b432b0f..c70b9e3 100644 --- a/charts/retool/templates/deployment_js_executor.yaml +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -1,3 +1,4 @@ +{{- if .Values.jsExecutor.enabled }} apiVersion: apps/v1 kind: Deployment metadata: @@ -208,4 +209,4 @@ spec: matchLabels: {{- include "retool.jsExecutor.selectorLabels" . | nindent 6 }} {{- end }} ---- +{{- end }} diff --git a/charts/retool/templates/deployment_workflows.yaml b/charts/retool/templates/deployment_workflows.yaml index bb2bae7..a589907 100644 --- a/charts/retool/templates/deployment_workflows.yaml +++ b/charts/retool/templates/deployment_workflows.yaml @@ -153,8 +153,10 @@ spec: {{- end }} {{- end }} {{- end }} + {{- if .Values.jsExecutor.enabled }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" . }} + {{- end }} {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index dd3ee9e..b183dde 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -750,6 +750,8 @@ codeExecutor: # JS Executor jsExecutor: + enabled: false + image: repository: tryretool/js-executor-service pullPolicy: IfNotPresent diff --git a/values.yaml b/values.yaml index dd3ee9e..b183dde 100644 --- a/values.yaml +++ b/values.yaml @@ -750,6 +750,8 @@ codeExecutor: # JS Executor jsExecutor: + enabled: false + image: repository: tryretool/js-executor-service pullPolicy: IfNotPresent From c16febeff46320a3721543ad8fff5c298b787f50 Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Tue, 19 May 2026 21:10:14 -0700 Subject: [PATCH 12/37] make retool.fullname prefixed (#291) --- charts/retool/templates/configmap_js_executor.yaml | 4 +++- charts/retool/templates/deployment_js_executor.yaml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/charts/retool/templates/configmap_js_executor.yaml b/charts/retool/templates/configmap_js_executor.yaml index 68009e9..0dedb4e 100644 --- a/charts/retool/templates/configmap_js_executor.yaml +++ b/charts/retool/templates/configmap_js_executor.yaml @@ -2,7 +2,9 @@ apiVersion: v1 kind: ConfigMap metadata: - name: js-executor-seccomp + name: {{ template "retool.fullname" . }}-js-executor-seccomp + labels: + {{- include "retool.labels" . | nindent 4 }} data: nsjail-seccomp.json: | {{- .Files.Get "files/nsjail-seccomp.json" | nindent 4 }} diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml index c70b9e3..787daae 100644 --- a/charts/retool/templates/deployment_js_executor.yaml +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -150,7 +150,7 @@ spec: volumes: - name: seccomp-profile configMap: - name: "js-executor-seccomp" + name: {{ template "retool.fullname" . }}-js-executor-seccomp - name: host-seccomp hostPath: path: /var/lib/kubelet/seccomp From 180be6f8acc5ec6997d424e26b5dbbfc20582c27 Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Tue, 19 May 2026 21:10:35 -0700 Subject: [PATCH 13/37] Revert "make retool.fullname prefixed (#291)" (#293) This reverts commit f7410669d58c3be2827d793daf5f67b04c2f7406. --- charts/retool/templates/configmap_js_executor.yaml | 4 +--- charts/retool/templates/deployment_js_executor.yaml | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/charts/retool/templates/configmap_js_executor.yaml b/charts/retool/templates/configmap_js_executor.yaml index 0dedb4e..68009e9 100644 --- a/charts/retool/templates/configmap_js_executor.yaml +++ b/charts/retool/templates/configmap_js_executor.yaml @@ -2,9 +2,7 @@ apiVersion: v1 kind: ConfigMap metadata: - name: {{ template "retool.fullname" . }}-js-executor-seccomp - labels: - {{- include "retool.labels" . | nindent 4 }} + name: js-executor-seccomp data: nsjail-seccomp.json: | {{- .Files.Get "files/nsjail-seccomp.json" | nindent 4 }} diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml index 787daae..c70b9e3 100644 --- a/charts/retool/templates/deployment_js_executor.yaml +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -150,7 +150,7 @@ spec: volumes: - name: seccomp-profile configMap: - name: {{ template "retool.fullname" . }}-js-executor-seccomp + name: "js-executor-seccomp" - name: host-seccomp hostPath: path: /var/lib/kubelet/seccomp From 4a58e2c9f5d88d6d928bef06f0356c79bb535144 Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Wed, 20 May 2026 11:38:30 -0700 Subject: [PATCH 14/37] make retool.fullname prefixed (#294) --- charts/retool/templates/configmap_js_executor.yaml | 4 +++- charts/retool/templates/deployment_js_executor.yaml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/charts/retool/templates/configmap_js_executor.yaml b/charts/retool/templates/configmap_js_executor.yaml index 68009e9..0dedb4e 100644 --- a/charts/retool/templates/configmap_js_executor.yaml +++ b/charts/retool/templates/configmap_js_executor.yaml @@ -2,7 +2,9 @@ apiVersion: v1 kind: ConfigMap metadata: - name: js-executor-seccomp + name: {{ template "retool.fullname" . }}-js-executor-seccomp + labels: + {{- include "retool.labels" . | nindent 4 }} data: nsjail-seccomp.json: | {{- .Files.Get "files/nsjail-seccomp.json" | nindent 4 }} diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml index c70b9e3..787daae 100644 --- a/charts/retool/templates/deployment_js_executor.yaml +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -150,7 +150,7 @@ spec: volumes: - name: seccomp-profile configMap: - name: "js-executor-seccomp" + name: {{ template "retool.fullname" . }}-js-executor-seccomp - name: host-seccomp hostPath: path: /var/lib/kubelet/seccomp From ef2d5bbaa9651ac4ab4ebd3f7b1189242a42d1c9 Mon Sep 17 00:00:00 2001 From: lukefoster11 Date: Wed, 20 May 2026 20:53:33 -0700 Subject: [PATCH 15/37] separate deviceplugin use and deployment --- .../templates/agent_sandbox_device_plugin.yaml | 2 +- charts/retool/values.yaml | 12 ++++++++---- values.yaml | 12 ++++++++---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/charts/retool/templates/agent_sandbox_device_plugin.yaml b/charts/retool/templates/agent_sandbox_device_plugin.yaml index c936ae2..35a6535 100644 --- a/charts/retool/templates/agent_sandbox_device_plugin.yaml +++ b/charts/retool/templates/agent_sandbox_device_plugin.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.sandboxNetwork.devicePlugin }} +{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.sandboxNetwork.deployDaemonSet }} {{- $as := .Values.agentSandbox -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index b183dde..022b6ff 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -904,11 +904,15 @@ agentSandbox: # When enabled, sandboxes get isolated outbound access with L7 filtering. sandboxNetwork: enabled: true - # Deploy smarter-device-manager to register /dev/net/tun with the kubelet. - # Required because containerd's default device cgroup blocks /dev/net/tun; - # the device plugin's DeviceSpec path is the only reliable way to grant - # device cgroup access without privileged mode. + # Request smarter-devices/net_tun via resources.limits on sandbox pods. + # When true, the kubelet grants /dev/net/tun device cgroup access without + # privileged mode. Requires smarter-device-manager to be running on each + # node (see deployDaemonSet below). devicePlugin: true + # Deploy the smarter-device-manager DaemonSet from this Helm release. + # Set to false when another release (or external process) already manages + # the DaemonSet — only one instance should run per node. + deployDaemonSet: true # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster # agent-sandbox-proxy service URL when empty. httpProxy: '' diff --git a/values.yaml b/values.yaml index b183dde..022b6ff 100644 --- a/values.yaml +++ b/values.yaml @@ -904,11 +904,15 @@ agentSandbox: # When enabled, sandboxes get isolated outbound access with L7 filtering. sandboxNetwork: enabled: true - # Deploy smarter-device-manager to register /dev/net/tun with the kubelet. - # Required because containerd's default device cgroup blocks /dev/net/tun; - # the device plugin's DeviceSpec path is the only reliable way to grant - # device cgroup access without privileged mode. + # Request smarter-devices/net_tun via resources.limits on sandbox pods. + # When true, the kubelet grants /dev/net/tun device cgroup access without + # privileged mode. Requires smarter-device-manager to be running on each + # node (see deployDaemonSet below). devicePlugin: true + # Deploy the smarter-device-manager DaemonSet from this Helm release. + # Set to false when another release (or external process) already manages + # the DaemonSet — only one instance should run per node. + deployDaemonSet: true # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster # agent-sandbox-proxy service URL when empty. httpProxy: '' From 70c47c19abad2744095085a2715e4d7a0e775269 Mon Sep 17 00:00:00 2001 From: Ryan Artecona Date: Thu, 21 May 2026 11:14:55 -0700 Subject: [PATCH 16/37] minor r2 fixes (#287) * make agentSandbox.image.tag non-required * Make agentSandbox.devicePlugin.priorityClassName configurable for GKE support * try adding ingress support for agentsandbox proxy url * disable apparmor in sandbox jobs for gke/aks support * try adding httproute support for r2 agent-proxy * trim whitespace --- .../agent_sandbox_device_plugin.yaml | 2 +- .../templates/agent_sandbox_prepuller.yaml | 2 +- .../templates/deployment_agent_sandbox.yaml | 9 +++--- charts/retool/templates/httproute.yaml | 31 +++++++++++++++++++ charts/retool/values.yaml | 11 ++++++- values.yaml | 11 ++++++- 6 files changed, 58 insertions(+), 8 deletions(-) diff --git a/charts/retool/templates/agent_sandbox_device_plugin.yaml b/charts/retool/templates/agent_sandbox_device_plugin.yaml index 35a6535..44c028f 100644 --- a/charts/retool/templates/agent_sandbox_device_plugin.yaml +++ b/charts/retool/templates/agent_sandbox_device_plugin.yaml @@ -39,7 +39,7 @@ spec: {{- end }} spec: automountServiceAccountToken: false - priorityClassName: system-node-critical + priorityClassName: {{ $as.devicePlugin.priorityClassName }} {{- if $nodeSelector }} nodeSelector: {{ toYaml $nodeSelector | indent 8 }} diff --git a/charts/retool/templates/agent_sandbox_prepuller.yaml b/charts/retool/templates/agent_sandbox_prepuller.yaml index d47072d..854d1eb 100644 --- a/charts/retool/templates/agent_sandbox_prepuller.yaml +++ b/charts/retool/templates/agent_sandbox_prepuller.yaml @@ -40,7 +40,7 @@ spec: terminationGracePeriodSeconds: 5 initContainers: - name: pull-image - image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + image: "{{ $as.image.repository }}:{{ $as.image.tag | default .Values.image.tag }}" imagePullPolicy: {{ $as.image.pullPolicy }} command: ["true"] securityContext: diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 126ce5d..6b0634a 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -161,7 +161,8 @@ data: "allowPrivilegeEscalation": false, "readOnlyRootFilesystem": true, "capabilities": {"drop": ["ALL"]}, - "seccompProfile": {"type": "Localhost", "localhostProfile": "{{ $as.seccompProfile }}"} + "seccompProfile": {"type": "Localhost", "localhostProfile": "{{ $as.seccompProfile }}"}, + "appArmorProfile": {"type": "Unconfined"} }, "env": [ {"name": "NODE_ENV", "value": "production"}, @@ -277,7 +278,7 @@ spec: {{- end }} containers: - name: controller - image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + image: "{{ $as.image.repository }}:{{ $as.image.tag | default .Values.image.tag }}" imagePullPolicy: {{ $as.image.pullPolicy }} ports: - name: http @@ -360,7 +361,7 @@ spec: - name: LEADER_RENEW_MS value: {{ $as.controller.scaling.leaderRenewMs | quote }} - name: DEPLOYED_IMAGE_TAG - value: {{ $as.image.tag | quote }} + value: {{ $as.image.tag | default .Values.image.tag | quote }} - name: JOB_TEMPLATE_CONFIGMAP value: {{ include "retool.agentSandbox.name" . }}-job-template - name: DAEMONSET_NAME @@ -480,7 +481,7 @@ spec: {{- end }} containers: - name: proxy - image: "{{ $as.image.repository }}:{{ $as.image.tag }}" + image: "{{ $as.image.repository }}:{{ $as.image.tag | default .Values.image.tag }}" imagePullPolicy: {{ $as.image.pullPolicy }} ports: - name: http diff --git a/charts/retool/templates/httproute.yaml b/charts/retool/templates/httproute.yaml index 222b072..a182088 100644 --- a/charts/retool/templates/httproute.yaml +++ b/charts/retool/templates/httproute.yaml @@ -56,4 +56,35 @@ spec: - name: {{ $fullName }} port: {{ $svcPort }} {{- end }} +{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.frontendWsProxyDomain }} +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: {{ include "retool.agentSandbox.proxy.name" . }} + labels: + {{- include "retool.labels" . | nindent 4 }} + {{- with .Values.httpRoute.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.httpRoute.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.httpRoute.parentRefs }} + parentRefs: + {{- toYaml . | nindent 4 }} + {{- end }} + hostnames: + - {{ .Values.agentSandbox.frontendWsProxyDomain | trimPrefix "http://" | trimPrefix "https://" | quote }} + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: {{ include "retool.agentSandbox.proxy.name" . }} + port: {{ .Values.agentSandbox.proxy.port }} +{{- end }} {{- end }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 022b6ff..acd543a 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -754,6 +754,8 @@ jsExecutor: image: repository: tryretool/js-executor-service + # defaults to top level image.tag + tag: null pullPolicy: IfNotPresent replicaCount: 1 @@ -851,7 +853,8 @@ agentSandbox: image: repository: tryretool/agent-sandbox-service - tag: latest + # defaults to top level image.tag + tag: null pullPolicy: IfNotPresent # Lightweight init image used by the prepuller and seccomp DaemonSets. @@ -927,6 +930,12 @@ agentSandbox: # Set high enough to accommodate maxTotalJobs + prewarm pool. maxDevices: 130 + # When possible, we want the devicePlugin daemonset to preempt normal pods. + # Note: in some cases this is inconvenient or unsupported, i.e. in GKE which + # requires a custom ResourceQuota to use the `system-node-critical` + # PriorityClass in user namespaces. In those cases, set this to `null`. + priorityClassName: system-node-critical + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. # The seccomp node-installer DaemonSet copies the profile to this path # on every node automatically. diff --git a/values.yaml b/values.yaml index 022b6ff..acd543a 100644 --- a/values.yaml +++ b/values.yaml @@ -754,6 +754,8 @@ jsExecutor: image: repository: tryretool/js-executor-service + # defaults to top level image.tag + tag: null pullPolicy: IfNotPresent replicaCount: 1 @@ -851,7 +853,8 @@ agentSandbox: image: repository: tryretool/agent-sandbox-service - tag: latest + # defaults to top level image.tag + tag: null pullPolicy: IfNotPresent # Lightweight init image used by the prepuller and seccomp DaemonSets. @@ -927,6 +930,12 @@ agentSandbox: # Set high enough to accommodate maxTotalJobs + prewarm pool. maxDevices: 130 + # When possible, we want the devicePlugin daemonset to preempt normal pods. + # Note: in some cases this is inconvenient or unsupported, i.e. in GKE which + # requires a custom ResourceQuota to use the `system-node-critical` + # PriorityClass in user namespaces. In those cases, set this to `null`. + priorityClassName: system-node-critical + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. # The seccomp node-installer DaemonSet copies the profile to this path # on every node automatically. From 710b822b58f9b1ca548724a22392f0619876b9c2 Mon Sep 17 00:00:00 2001 From: jatin Date: Thu, 21 May 2026 21:26:06 -0400 Subject: [PATCH 17/37] [feat][r2] enable git_server in-process with rrGitServer.enabled + blobStorage config (#296) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [chore][r2] add RR_GIT_SERVER to main backend's default SERVICE_TYPE Pairs with retool_development's RR_GIT_SERVER scaffold (commit 68162710ee0 on jatin/git-server-scaffold). The git-server runs in-process alongside MAIN_BACKEND rather than as a split-out deployment. Co-Authored-By: Claude Opus 4.7 (1M context) * [feat][r2] gate RR_GIT_SERVER on rrGitServer.enabled and add blobStorage config git_server needs an object store for repo blobs/packs (and snapshots use the same backend abstraction). The earlier commit unconditionally appended RR_GIT_SERVER to SERVICE_TYPE, which would have main backend crash at runtime on the first git op when blob storage isn't configured. Adds: - rrGitServer.enabled (default false) — gates the SERVICE_TYPE append - blobStorage block with s3 / gcs / azure sub-blocks (set exactly one) - {{ fail }} guard requiring exactly one provider when rrGitServer.enabled - Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on the main backend deployment, with secretKeyRef support for the secret (S3 secret access key, Azure connection string, GCS credentials) - Optional rrGitServer.repackThreshold -> RR_GIT_REPACK_THRESHOLD blobStorage is a top-level block (not nested under rrGitServer) because the backend's RR_DEFAULT_* vars are shared with snapshots; this same config will feed them once they get wired up. Co-Authored-By: Claude Opus 4.7 (1M context) * [refactor][r2] extract rrGitServer blob storage provider check to a helper Co-Authored-By: Claude Opus 4.7 (1M context) * [chore][r2] allow blobStorage opt-out via direct env vars The rrGitServer.enabled fail-fast was blocking customers who'd rather plumb RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_*_* in directly via environmentVariables / environmentSecrets. Mirror the mcp pattern of detecting the env var and skipping the guard when present. Co-Authored-By: Claude Opus 4.7 (1M context) * [chore] sync top-level values.yaml with charts/retool/values.yaml Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- charts/retool/templates/_helpers.tpl | 33 +++++++++ .../retool/templates/deployment_backend.yaml | 69 +++++++++++++++++++ charts/retool/values.yaml | 48 +++++++++++++ values.yaml | 48 +++++++++++++ 4 files changed, 198 insertions(+) diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index 6d8afd8..e2f71b1 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -687,6 +687,39 @@ Set MCP server service name {{ template "retool.fullname" . }}-mcp {{- end -}} +{{/* +Validate that exactly one blob-storage provider is configured when rrGitServer +is enabled. Skipped when the user has plumbed the RR_BLOB_STORAGE_PROVIDER / +RR_DEFAULT_*_* env vars in directly via environmentVariables/environmentSecrets, +which is treated as an opt-out from the first-class blobStorage config. +No-op when rrGitServer is disabled. +*/}} +{{- define "retool.rrGitServer.validateBlobStorage" -}} +{{- if .Values.rrGitServer.enabled -}} +{{- $hasDirectEnv := false -}} +{{- range .Values.environmentVariables -}} +{{- if or (hasPrefix "RR_DEFAULT_" .name) (eq .name "RR_BLOB_STORAGE_PROVIDER") -}} +{{- $hasDirectEnv = true -}} +{{- end -}} +{{- end -}} +{{- range .Values.environmentSecrets -}} +{{- if or (hasPrefix "RR_DEFAULT_" .name) (eq .name "RR_BLOB_STORAGE_PROVIDER") -}} +{{- $hasDirectEnv = true -}} +{{- end -}} +{{- end -}} +{{- if not $hasDirectEnv -}} +{{- $bs := .Values.blobStorage | default dict -}} +{{- $providers := list -}} +{{- if $bs.s3 }}{{ $providers = append $providers "s3" }}{{ end -}} +{{- if $bs.gcs }}{{ $providers = append $providers "gcs" }}{{ end -}} +{{- if $bs.azure }}{{ $providers = append $providers "azure" }}{{ end -}} +{{- if ne (len $providers) 1 -}} +{{- fail "rrGitServer.enabled requires exactly one of blobStorage.s3, blobStorage.gcs, blobStorage.azure to be configured, or set RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* directly via environmentVariables / environmentSecrets" -}} +{{- end -}} +{{- end -}} +{{- end -}} +{{- end -}} + {{/* Set code executor image tag Usage: (template "retool.codeExecutor.image.tag" .) diff --git a/charts/retool/templates/deployment_backend.yaml b/charts/retool/templates/deployment_backend.yaml index b3c2c8d..123cab2 100644 --- a/charts/retool/templates/deployment_backend.yaml +++ b/charts/retool/templates/deployment_backend.yaml @@ -1,3 +1,4 @@ +{{- include "retool.rrGitServer.validateBlobStorage" . }} apiVersion: apps/v1 kind: Deployment metadata: @@ -100,6 +101,9 @@ spec: {{- if not ( include "retool.jobRunner.enabled" . ) }} {{- $serviceType = append $serviceType "JOBS_RUNNER" }} {{- end }} + {{- if .Values.rrGitServer.enabled }} + {{- $serviceType = append $serviceType "RR_GIT_SERVER" }} + {{- end }} - name: SERVICE_TYPE value: {{ join "," $serviceType }} {{ if and ( not $.Values.dbconnector.enabled ) ( and ( include "retool_version_with_java_dbconnector_opt_out" . ) ( not $.Values.dbconnector.java.enabled ) ) }} @@ -252,6 +256,71 @@ spec: {{- end }} {{- end }} {{- end }} + {{- if .Values.rrGitServer.enabled }} + {{- $bs := .Values.blobStorage }} + {{- if $bs.s3 }} + - name: RR_BLOB_STORAGE_PROVIDER + value: "s3" + - name: RR_DEFAULT_S3_BUCKET + value: {{ $bs.s3.bucket | quote }} + {{- if $bs.s3.region }} + - name: RR_DEFAULT_S3_REGION + value: {{ $bs.s3.region | quote }} + {{- end }} + {{- if $bs.s3.endpoint }} + - name: RR_DEFAULT_S3_ENDPOINT + value: {{ $bs.s3.endpoint | quote }} + {{- end }} + {{- if $bs.s3.accessKeyId }} + - name: RR_DEFAULT_S3_ACCESS_KEY_ID + value: {{ $bs.s3.accessKeyId | quote }} + {{- end }} + {{- if $bs.s3.secretAccessKeySecretName }} + - name: RR_DEFAULT_S3_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ $bs.s3.secretAccessKeySecretName }} + key: {{ $bs.s3.secretAccessKeySecretKey | default "secret-access-key" }} + {{- else if $bs.s3.secretAccessKey }} + - name: RR_DEFAULT_S3_SECRET_ACCESS_KEY + value: {{ $bs.s3.secretAccessKey | quote }} + {{- end }} + {{- else if $bs.gcs }} + - name: RR_BLOB_STORAGE_PROVIDER + value: "gcs" + - name: RR_DEFAULT_GCS_BUCKET + value: {{ $bs.gcs.bucket | quote }} + {{- if $bs.gcs.credentialsSecretName }} + - name: RR_DEFAULT_GCS_CREDENTIALS + valueFrom: + secretKeyRef: + name: {{ $bs.gcs.credentialsSecretName }} + key: {{ $bs.gcs.credentialsSecretKey | default "credentials.json" }} + {{- else if $bs.gcs.credentials }} + - name: RR_DEFAULT_GCS_CREDENTIALS + value: {{ $bs.gcs.credentials | quote }} + {{- end }} + {{- else if $bs.azure }} + - name: RR_BLOB_STORAGE_PROVIDER + value: "azure" + - name: RR_DEFAULT_AZURE_CONTAINER + value: {{ $bs.azure.container | quote }} + {{- if $bs.azure.connectionStringSecretName }} + - name: RR_DEFAULT_AZURE_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: {{ $bs.azure.connectionStringSecretName }} + key: {{ $bs.azure.connectionStringSecretKey | default "connection-string" }} + {{- else if $bs.azure.connectionString }} + - name: RR_DEFAULT_AZURE_CONNECTION_STRING + value: {{ $bs.azure.connectionString | quote }} + {{- end }} + {{- end }} + {{- if .Values.rrGitServer.repackThreshold }} + - name: RR_GIT_REPACK_THRESHOLD + value: {{ .Values.rrGitServer.repackThreshold | quote }} + {{- end }} + {{- end }} {{- include "retool.env" .Values.env | nindent 10 }} {{- range .Values.environmentSecrets }} - name: {{ .name }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index acd543a..0391b9c 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -705,6 +705,54 @@ mcp: annotations: {} labels: {} +rrGitServer: + # Runs the React Retool Git Server in-process on the main backend pod + # (SERVICE_TYPE=...,RR_GIT_SERVER). The main backend internally proxies + # /api/ai/rr/git/v2/* to localhost:RR_GIT_SERVER_PORT, so no extra ingress + # routing is required. Required for the r2 / React Retool app pipeline. + # + # When enabled, exactly one of blobStorage.s3, blobStorage.gcs, or + # blobStorage.azure must be configured below — git_server stores all + # objects/packs in blob storage. + enabled: false + + # Optional: number of loose objects before git_server triggers a repack. + # Backend default is 100; unset to inherit it. + repackThreshold: ~ + +# Shared blob-storage config used by git_server (and other features that +# need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. +# Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on +# the backend deployment. +# +# This block can be omitted entirely if RR_BLOB_STORAGE_PROVIDER and the +# RR_DEFAULT_*_* env vars are provided directly via environmentVariables / +# environmentSecrets above — the chart detects that and skips this guard. +blobStorage: {} + # s3: + # bucket: my-rr-bucket + # region: us-east-1 + # endpoint: "" # optional, for S3-compatible (MinIO, R2, etc.) + # accessKeyId: AKIA... + # # Provide secretAccessKey OR the secretName/secretKey pair below. + # secretAccessKey: "" + # secretAccessKeySecretName: "" + # secretAccessKeySecretKey: secret-access-key + # + # gcs: + # bucket: my-rr-bucket + # # Provide credentials (JSON string) OR the secretName/secretKey pair below. + # credentials: "" + # credentialsSecretName: "" + # credentialsSecretKey: credentials.json + # + # azure: + # container: my-rr-container + # # Provide connectionString OR the secretName/secretKey pair below. + # connectionString: "" + # connectionStringSecretName: "" + # connectionStringSecretKey: connection-string + codeExecutor: # as of Chart version 6.7.0, code-executor image version must align with the top-level `image` parameters # explicitly set other fields as needed diff --git a/values.yaml b/values.yaml index acd543a..0391b9c 100644 --- a/values.yaml +++ b/values.yaml @@ -705,6 +705,54 @@ mcp: annotations: {} labels: {} +rrGitServer: + # Runs the React Retool Git Server in-process on the main backend pod + # (SERVICE_TYPE=...,RR_GIT_SERVER). The main backend internally proxies + # /api/ai/rr/git/v2/* to localhost:RR_GIT_SERVER_PORT, so no extra ingress + # routing is required. Required for the r2 / React Retool app pipeline. + # + # When enabled, exactly one of blobStorage.s3, blobStorage.gcs, or + # blobStorage.azure must be configured below — git_server stores all + # objects/packs in blob storage. + enabled: false + + # Optional: number of loose objects before git_server triggers a repack. + # Backend default is 100; unset to inherit it. + repackThreshold: ~ + +# Shared blob-storage config used by git_server (and other features that +# need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. +# Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on +# the backend deployment. +# +# This block can be omitted entirely if RR_BLOB_STORAGE_PROVIDER and the +# RR_DEFAULT_*_* env vars are provided directly via environmentVariables / +# environmentSecrets above — the chart detects that and skips this guard. +blobStorage: {} + # s3: + # bucket: my-rr-bucket + # region: us-east-1 + # endpoint: "" # optional, for S3-compatible (MinIO, R2, etc.) + # accessKeyId: AKIA... + # # Provide secretAccessKey OR the secretName/secretKey pair below. + # secretAccessKey: "" + # secretAccessKeySecretName: "" + # secretAccessKeySecretKey: secret-access-key + # + # gcs: + # bucket: my-rr-bucket + # # Provide credentials (JSON string) OR the secretName/secretKey pair below. + # credentials: "" + # credentialsSecretName: "" + # credentialsSecretKey: credentials.json + # + # azure: + # container: my-rr-container + # # Provide connectionString OR the secretName/secretKey pair below. + # connectionString: "" + # connectionStringSecretName: "" + # connectionStringSecretKey: connection-string + codeExecutor: # as of Chart version 6.7.0, code-executor image version must align with the top-level `image` parameters # explicitly set other fields as needed From b61114514a285b86e1a2b0ec373be9e11ca80ec0 Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Fri, 22 May 2026 14:17:10 -0700 Subject: [PATCH 18/37] tune (#297) --- charts/retool/templates/deployment_agent_sandbox.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 6b0634a..f68ebd7 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -380,9 +380,10 @@ spec: httpGet: path: /livez port: http - initialDelaySeconds: 3 + initialDelaySeconds: 10 periodSeconds: 10 - timeoutSeconds: 3 + timeoutSeconds: 5 + failureThreshold: 5 readinessProbe: httpGet: path: /health @@ -563,9 +564,10 @@ spec: httpGet: path: /livez port: http - initialDelaySeconds: 3 + initialDelaySeconds: 10 periodSeconds: 10 - timeoutSeconds: 3 + timeoutSeconds: 5 + failureThreshold: 5 readinessProbe: httpGet: path: /health From be8d372727448ff795e1c0d0ff503b37097470d3 Mon Sep 17 00:00:00 2001 From: arnold-retool Date: Tue, 26 May 2026 13:33:28 -0700 Subject: [PATCH 19/37] Add OAUTH_MAIN_DOMAIN for consistent authorization server domain in MCP metadata (#298) Adds `mcp.config.oauthMainDomain`, which renders `OAUTH_MAIN_DOMAIN` into the MCP deployment for OAuth metadata base URL configuration. Documents the new MCP OAuth domain configuration in both chart values files. Updates the MCP render fixture so Helm rendering exercises the new environment variable. Validated with Helm rendering and linting. --- charts/retool/ci/test-mcp-enabled-option.yaml | 1 + charts/retool/templates/deployment_mcp.yaml | 4 ++++ charts/retool/values.yaml | 4 ++++ values.yaml | 4 ++++ 4 files changed, 13 insertions(+) diff --git a/charts/retool/ci/test-mcp-enabled-option.yaml b/charts/retool/ci/test-mcp-enabled-option.yaml index 5c51b40..95a3d38 100644 --- a/charts/retool/ci/test-mcp-enabled-option.yaml +++ b/charts/retool/ci/test-mcp-enabled-option.yaml @@ -2,6 +2,7 @@ mcp: enabled: true replicaCount: 2 config: + oauthMainDomain: https://oauth.example.com oauthIntrospectionAuthToken: test-oauth-introspection-token enabledToolsets: - apps diff --git a/charts/retool/templates/deployment_mcp.yaml b/charts/retool/templates/deployment_mcp.yaml index fdcd632..c23a468 100644 --- a/charts/retool/templates/deployment_mcp.yaml +++ b/charts/retool/templates/deployment_mcp.yaml @@ -119,6 +119,10 @@ spec: - name: RETOOL_URL value: {{ $mcpConfig.retoolUrl | quote }} {{- end }} + {{- if $mcpConfig.oauthMainDomain }} + - name: OAUTH_MAIN_DOMAIN + value: {{ $mcpConfig.oauthMainDomain | quote }} + {{- end }} {{- if $mcpConfig.oauthIntrospectionAuthTokenSecretName }} - name: OAUTH_INTROSPECTION_AUTH_TOKEN valueFrom: diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 0391b9c..3608320 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -602,6 +602,10 @@ mcp: # # origin is not the right public URL. # retoolUrl: # + # # Public OAuth domain used by MCP OAuth metadata routes. Set explicitly + # # when the request origin is not the right OAuth base URL. + # oauthMainDomain: + # # # Secret-backed token used by MCP to call /api/oauth2/introspect. # # Required when mcp.enabled is true unless OAUTH_INTROSPECTION_AUTH_TOKEN # # is provided directly in mcp.environmentVariables. diff --git a/values.yaml b/values.yaml index 0391b9c..3608320 100644 --- a/values.yaml +++ b/values.yaml @@ -602,6 +602,10 @@ mcp: # # origin is not the right public URL. # retoolUrl: # + # # Public OAuth domain used by MCP OAuth metadata routes. Set explicitly + # # when the request origin is not the right OAuth base URL. + # oauthMainDomain: + # # # Secret-backed token used by MCP to call /api/oauth2/introspect. # # Required when mcp.enabled is true unless OAUTH_INTROSPECTION_AUTH_TOKEN # # is provided directly in mcp.environmentVariables. From 5a2e0a0b37ca5a1975af118e52f2a199f8e38ad9 Mon Sep 17 00:00:00 2001 From: arnold-retool Date: Tue, 2 Jun 2026 16:25:58 -0700 Subject: [PATCH 20/37] Add optional MCP git server URL (#299) --- charts/retool/templates/deployment_mcp.yaml | 4 ++++ charts/retool/values.yaml | 4 ++++ values.yaml | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/charts/retool/templates/deployment_mcp.yaml b/charts/retool/templates/deployment_mcp.yaml index c23a468..d53dd9d 100644 --- a/charts/retool/templates/deployment_mcp.yaml +++ b/charts/retool/templates/deployment_mcp.yaml @@ -115,6 +115,10 @@ spec: value: {{ $mcpInternalPort | quote }} - name: RETOOL_BACKEND_URL value: {{ $mcpConfig.retoolBackendUrl | default (printf "http://%s:%v" (include "retool.fullname" .) .Values.service.externalPort) | quote }} + {{- if $mcpConfig.retoolGitServerUrl }} + - name: RETOOL_GIT_SERVER_URL + value: {{ $mcpConfig.retoolGitServerUrl | quote }} + {{- end }} {{- if $mcpConfig.retoolUrl }} - name: RETOOL_URL value: {{ $mcpConfig.retoolUrl | quote }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 3608320..a77ebf8 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -598,6 +598,10 @@ mcp: # # Defaults to the release's internal Retool service when unset. # retoolBackendUrl: # + # # Internal URL used by the MCP server to call the Retool git server. + # # Unset by default. + # retoolGitServerUrl: + # # # Public Retool URL used for links. Set explicitly when the request # # origin is not the right public URL. # retoolUrl: diff --git a/values.yaml b/values.yaml index 3608320..a77ebf8 100644 --- a/values.yaml +++ b/values.yaml @@ -598,6 +598,10 @@ mcp: # # Defaults to the release's internal Retool service when unset. # retoolBackendUrl: # + # # Internal URL used by the MCP server to call the Retool git server. + # # Unset by default. + # retoolGitServerUrl: + # # # Public Retool URL used for links. Set explicitly when the request # # origin is not the right public URL. # retoolUrl: From 34bcf517f4b13e1b3f5a2e1e4048fdad01373e5d Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:57:21 -0700 Subject: [PATCH 21/37] rr_agent_pubsub_backend (#300) --- charts/retool/templates/deployment_jobs.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/charts/retool/templates/deployment_jobs.yaml b/charts/retool/templates/deployment_jobs.yaml index fa08b81..dee84dd 100644 --- a/charts/retool/templates/deployment_jobs.yaml +++ b/charts/retool/templates/deployment_jobs.yaml @@ -93,6 +93,11 @@ spec: {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} + {{- if .Values.agentSandbox.enabled }} + - name: RR_AGENT_PUBSUB_BACKEND + value: "postgres" + {{- end }} + {{- if include "shouldIncludeConfigSecretsEnvVars" . }} - name: LICENSE_KEY valueFrom: From ed33a82f3026779bbb6d1dde1c3eed94a0f90221 Mon Sep 17 00:00:00 2001 From: Matt Westrik Date: Wed, 3 Jun 2026 14:42:15 -0700 Subject: [PATCH 22/37] Rename sandbox env vars (#295) * Rename sandbox env vars Also remove stale unused env vars & update job resource requests * fix sandbox job template commas --------- Co-authored-by: Ryan Artecona --- charts/retool/templates/_helpers.tpl | 24 ++++------ .../templates/deployment_agent_sandbox.yaml | 47 +++++++++---------- charts/retool/values.yaml | 10 ++-- values.yaml | 10 ++-- 4 files changed, 40 insertions(+), 51 deletions(-) diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index e2f71b1..660fe32 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -631,47 +631,41 @@ Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} {{- define "retool.agentSandbox.backendEnvVars" -}} {{- if .Values.agentSandbox.enabled }} {{- $defaultSecretName := .Values.agentSandbox.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} -- name: AGENT_EXECUTOR_ENABLED - value: "true" - name: RR_AGENT_PUBSUB_BACKEND value: "postgres" -- name: AGENT_EXECUTOR_CONTROLLER_INGRESS_DOMAIN +- name: AGENT_SANDBOX_CONTROLLER_INGRESS_DOMAIN value: {{ .Values.agentSandbox.controllerUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.controller.name" .) (toString .Values.agentSandbox.controller.port)) | quote }} -- name: AGENT_EXECUTOR_PROXY_INGRESS_DOMAIN +- name: AGENT_SANDBOX_PROXY_INGRESS_DOMAIN value: {{ .Values.agentSandbox.proxyUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.proxy.name" .) (toString .Values.agentSandbox.proxy.port)) | quote }} {{- if .Values.agentSandbox.frontendWsProxyDomain }} -- name: AGENT_EXECUTOR_FRONTEND_WS_PROXY_DOMAIN +- name: AGENT_SANDBOX_FRONTEND_WS_PROXY_DOMAIN value: {{ .Values.agentSandbox.frontendWsProxyDomain | quote }} {{- end }} -{{- if or .Values.agentSandbox.proxyDomain .Values.agentSandbox.frontendWsProxyDomain }} -- name: AGENT_EXECUTOR_PROXY_DOMAIN - value: {{ .Values.agentSandbox.proxyDomain | default .Values.agentSandbox.frontendWsProxyDomain | quote }} -{{- end }} {{- if .Values.agentSandbox.jwtPrivateKey }} -- name: AGENT_EXECUTOR_JWT_PRIVATE_KEY +- name: AGENT_SANDBOX_JWT_PRIVATE_KEY value: {{ .Values.agentSandbox.jwtPrivateKey | quote }} {{- else if .Values.agentSandbox.externalSecret.name }} -- name: AGENT_EXECUTOR_JWT_PRIVATE_KEY +- name: AGENT_SANDBOX_JWT_PRIVATE_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} key: jwt-private-key {{- end }} {{- if .Values.agentSandbox.jwtPublicKey }} -- name: AGENT_EXECUTOR_JWT_PUBLIC_KEY +- name: AGENT_SANDBOX_JWT_PUBLIC_KEY value: {{ .Values.agentSandbox.jwtPublicKey | quote }} {{- else if .Values.agentSandbox.externalSecret.name }} -- name: AGENT_EXECUTOR_JWT_PUBLIC_KEY +- name: AGENT_SANDBOX_JWT_PUBLIC_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} key: jwt-public-key {{- end }} {{- if .Values.agentSandbox.encryptionKey }} -- name: AGENT_EXECUTOR_ENCRYPTION_KEY +- name: AGENT_SANDBOX_ENCRYPTION_KEY value: {{ .Values.agentSandbox.encryptionKey | quote }} {{- else if .Values.agentSandbox.externalSecret.name }} -- name: AGENT_EXECUTOR_ENCRYPTION_KEY +- name: AGENT_SANDBOX_ENCRYPTION_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index f68ebd7..9f7e54b 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -165,18 +165,17 @@ data: "appArmorProfile": {"type": "Unconfined"} }, "env": [ - {"name": "NODE_ENV", "value": "production"}, - {"name": "EXECUTOR_PORT", "value": "{{ $as.sandbox.port }}"}, - {"name": "POD_NAME", "valueFrom": {"fieldRef": {"fieldPath": "metadata.name"}}}, - {"name": "POD_UID", "valueFrom": {"fieldRef": {"fieldPath": "metadata.uid"}}}, - {"name": "POD_IP", "valueFrom": {"fieldRef": {"fieldPath": "status.podIP"}}}, - {"name": "SANDBOX_NETWORK_ENABLED", "value": "{{ $as.sandboxNetwork.enabled }}"}, - {"name": "SANDBOX_IDLE_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxIdleTimeoutMs }}"}, - {"name": "SANDBOX_MEMORY_LIMIT", "value": "{{ $as.sandbox.sandboxMemoryLimit }}"} + {"name": "NODE_ENV", "value": "production"} + ,{"name": "EXECUTOR_PORT", "value": "{{ $as.sandbox.port }}"} + ,{"name": "POD_NAME", "valueFrom": {"fieldRef": {"fieldPath": "metadata.name"}}} + ,{"name": "POD_UID", "valueFrom": {"fieldRef": {"fieldPath": "metadata.uid"}}} + ,{"name": "POD_IP", "valueFrom": {"fieldRef": {"fieldPath": "status.podIP"}}} + ,{"name": "SANDBOX_NETWORK_ENABLED", "value": "{{ $as.sandboxNetwork.enabled }}"} + ,{"name": "SANDBOX_IDLE_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxIdleTimeoutMs }}"} {{- if $as.jwtPublicKey }} - ,{"name": "AGENT_EXECUTOR_JWT_PUBLIC_KEY", "value": "{{ $as.jwtPublicKey }}"} + ,{"name": "AGENT_SANDBOX_JWT_PUBLIC_KEY", "value": "{{ $as.jwtPublicKey }}"} {{- else if $as.externalSecret.name }} - ,{"name": "AGENT_EXECUTOR_JWT_PUBLIC_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $defaultSecretName }}", "key": "jwt-public-key"}}} + ,{"name": "AGENT_SANDBOX_JWT_PUBLIC_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $defaultSecretName }}", "key": "jwt-public-key"}}} {{- end }} {{- if $as.proxy.backendDomainSuffixes }} ,{"name": "BACKEND_DOMAIN_SUFFIXES", "value": "{{ $as.proxy.backendDomainSuffixes }}"} @@ -295,13 +294,13 @@ spec: env: - name: NODE_ENV value: "production" - - name: AGENT_EXECUTOR_ROLE + - name: AGENT_SANDBOX_ROLE value: "controller" - name: CONTROLLER_PORT value: {{ $as.controller.port | quote }} - name: STATE_BACKEND value: "postgres" - - name: AGENT_EXECUTOR_POSTGRES_URL + - name: AGENT_SANDBOX_POSTGRES_URL {{- if $as.postgres.url }} value: {{ $as.postgres.url | quote }} {{- else }} @@ -310,9 +309,9 @@ spec: name: {{ $defaultSecretName }} key: postgres-url {{- end }} - - name: AGENT_EXECUTOR_POSTGRES_SCHEMA + - name: AGENT_SANDBOX_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX + - name: AGENT_SANDBOX_POSTGRES_POOL_MAX value: {{ $as.postgres.poolMax | quote }} - name: STATE_SWEEPER_INTERVAL_MS value: {{ $as.postgres.sweeperIntervalMs | quote }} @@ -367,10 +366,10 @@ spec: - name: DAEMONSET_NAME value: {{ include "retool.agentSandbox.name" . }}-image-prepuller {{- if $as.jwtPublicKey }} - - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + - name: AGENT_SANDBOX_JWT_PUBLIC_KEY value: {{ $as.jwtPublicKey | quote }} {{- else if $as.externalSecret.name }} - - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + - name: AGENT_SANDBOX_JWT_PUBLIC_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} @@ -499,13 +498,13 @@ spec: env: - name: NODE_ENV value: "production" - - name: AGENT_EXECUTOR_ROLE + - name: AGENT_SANDBOX_ROLE value: "proxy" - name: PROXY_PORT value: {{ $as.proxy.port | quote }} - name: STATE_BACKEND value: "postgres" - - name: AGENT_EXECUTOR_POSTGRES_URL + - name: AGENT_SANDBOX_POSTGRES_URL {{- if $as.postgres.url }} value: {{ $as.postgres.url | quote }} {{- else }} @@ -514,9 +513,9 @@ spec: name: {{ $defaultSecretName }} key: postgres-url {{- end }} - - name: AGENT_EXECUTOR_POSTGRES_SCHEMA + - name: AGENT_SANDBOX_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - - name: AGENT_EXECUTOR_POSTGRES_POOL_MAX + - name: AGENT_SANDBOX_POSTGRES_POOL_MAX value: {{ $as.postgres.poolMax | quote }} - name: STATE_SWEEPER_INTERVAL_MS value: {{ $as.postgres.sweeperIntervalMs | quote }} @@ -531,20 +530,20 @@ spec: value: {{ $as.proxy.backendDomainSuffixes | quote }} {{- end }} {{- if $as.encryptionKey }} - - name: AGENT_EXECUTOR_ENCRYPTION_KEY + - name: AGENT_SANDBOX_ENCRYPTION_KEY value: {{ $as.encryptionKey | quote }} {{- else if $as.externalSecret.name }} - - name: AGENT_EXECUTOR_ENCRYPTION_KEY + - name: AGENT_SANDBOX_ENCRYPTION_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} key: encryption-key {{- end }} {{- if $as.jwtPublicKey }} - - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + - name: AGENT_SANDBOX_JWT_PUBLIC_KEY value: {{ $as.jwtPublicKey | quote }} {{- else if $as.externalSecret.name }} - - name: AGENT_EXECUTOR_JWT_PUBLIC_KEY + - name: AGENT_SANDBOX_JWT_PUBLIC_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index a77ebf8..2e89b5b 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -945,7 +945,7 @@ agentSandbox: jwtPublicKey: '' jwtPrivateKey: '' # Hex-encoded 256-bit key for encrypting credentials stored in state backend. - # Must match the backend's AGENT_EXECUTOR_ENCRYPTION_KEY. + # Must match the backend's AGENT_SANDBOX_ENCRYPTION_KEY. encryptionKey: '' # API secret for admin/test endpoints. apiSecret: '' @@ -1012,15 +1012,13 @@ agentSandbox: port: 3017 resources: requests: - cpu: 500m - memory: 512Mi + cpu: 1 + memory: 2Gi limits: cpu: '2' memory: 4Gi - # Per-sandbox cgroup memory.max limit in bytes. - sandboxMemoryLimit: '1610612736' # 1.5 GB # Idle timeout (ms) before an unassigned sandbox self-terminates. - sandboxIdleTimeoutMs: 300000 + sandboxIdleTimeoutMs: 600000 tmpDirSizeLimit: 20Gi # Separate limit for the rootfs-appjob volume — the sandbox root filesystem # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi diff --git a/values.yaml b/values.yaml index a77ebf8..2e89b5b 100644 --- a/values.yaml +++ b/values.yaml @@ -945,7 +945,7 @@ agentSandbox: jwtPublicKey: '' jwtPrivateKey: '' # Hex-encoded 256-bit key for encrypting credentials stored in state backend. - # Must match the backend's AGENT_EXECUTOR_ENCRYPTION_KEY. + # Must match the backend's AGENT_SANDBOX_ENCRYPTION_KEY. encryptionKey: '' # API secret for admin/test endpoints. apiSecret: '' @@ -1012,15 +1012,13 @@ agentSandbox: port: 3017 resources: requests: - cpu: 500m - memory: 512Mi + cpu: 1 + memory: 2Gi limits: cpu: '2' memory: 4Gi - # Per-sandbox cgroup memory.max limit in bytes. - sandboxMemoryLimit: '1610612736' # 1.5 GB # Idle timeout (ms) before an unassigned sandbox self-terminates. - sandboxIdleTimeoutMs: 300000 + sandboxIdleTimeoutMs: 600000 tmpDirSizeLimit: 20Gi # Separate limit for the rootfs-appjob volume — the sandbox root filesystem # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi From d83fa5ce0609465cbbfa08fae012a6f1ba4d858c Mon Sep 17 00:00:00 2001 From: Luke Foster <54965784+lukefoster11@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:23:38 -0700 Subject: [PATCH 23/37] add new env vars (#301) --- charts/retool/templates/deployment_jobs.yaml | 28 ++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/charts/retool/templates/deployment_jobs.yaml b/charts/retool/templates/deployment_jobs.yaml index dee84dd..009ade1 100644 --- a/charts/retool/templates/deployment_jobs.yaml +++ b/charts/retool/templates/deployment_jobs.yaml @@ -98,6 +98,34 @@ spec: value: "postgres" {{- end }} + {{- $temporalConfig := (include "retool.temporalConfig" . | fromYaml) }} + {{- if or (index .Values "retool-temporal-services-helm" "enabled") ($temporalConfig).enabled }} + - name: WORKFLOW_TEMPORAL_CLUSTER_FRONTEND_HOST + value: {{ template "retool.temporal.host" . }} + - name: WORKFLOW_TEMPORAL_CLUSTER_FRONTEND_PORT + value: {{ template "retool.temporal.port" . }} + - name: WORKFLOW_TEMPORAL_CLUSTER_NAMESPACE + value: {{ template "retool.temporal.namespace" . }} + {{- end }} + {{- if ($temporalConfig).sslEnabled }} + - name: WORKFLOW_TEMPORAL_TLS_ENABLED + value: "true" + {{- if (and ($temporalConfig).sslCert ($temporalConfig).sslKey) }} + - name: WORKFLOW_TEMPORAL_TLS_CRT + value: {{ $temporalConfig.sslCert }} + - name: WORKFLOW_TEMPORAL_TLS_KEY + valueFrom: + secretKeyRef: + {{- if ($temporalConfig).sslKeySecretName }} + name: {{ $temporalConfig.sslKeySecretName }} + key: {{ ($temporalConfig).sslKeySecretKey | default "temporal-tls-key" }} + {{- else }} + name: {{ template "retool.fullname" . }} + key: "temporal-tls-key" + {{- end }} + {{- end }} + {{- end }} + {{- if include "shouldIncludeConfigSecretsEnvVars" . }} - name: LICENSE_KEY valueFrom: From 09f00454aa9e5975b822487dd08b4a96e1e832db Mon Sep 17 00:00:00 2001 From: jamie Date: Mon, 8 Jun 2026 18:01:50 -0500 Subject: [PATCH 24/37] [fix[R2] Increase the AE proxy timeout to be inline with fix in retool-k8s (#310) * [fix[R2] Increase the AE proxy timeout to be inline with fix in retool-k8s * Update charts/retool/values.yaml Co-authored-by: Ryan Artecona * Update values.yaml Co-authored-by: Ryan Artecona * lint fix --------- Co-authored-by: Ryan Artecona --- charts/retool/values.yaml | 2 +- values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 2e89b5b..58c1411 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -1071,7 +1071,7 @@ agentSandbox: # Defaults to http://:3000 (same-cluster backend service). backendUrl: '' backendDomainSuffixes: '' - sandboxProxyTimeoutMs: '' + sandboxProxyTimeoutMs: '180000' # 3 minutes service: # Set to LoadBalancer or NodePort to expose the proxy externally. type: ClusterIP diff --git a/values.yaml b/values.yaml index 2e89b5b..58c1411 100644 --- a/values.yaml +++ b/values.yaml @@ -1071,7 +1071,7 @@ agentSandbox: # Defaults to http://:3000 (same-cluster backend service). backendUrl: '' backendDomainSuffixes: '' - sandboxProxyTimeoutMs: '' + sandboxProxyTimeoutMs: '180000' # 3 minutes service: # Set to LoadBalancer or NodePort to expose the proxy externally. type: ClusterIP From 7da7f86b2e6dc63cc1b20867821aee6522d521ea Mon Sep 17 00:00:00 2001 From: jatin Date: Tue, 9 Jun 2026 17:32:48 -0400 Subject: [PATCH 25/37] =?UTF-8?q?r2-cleanup=20=E2=86=92=20r2:=20R2=20enabl?= =?UTF-8?q?ement=20polish=20(master=20switch,=20same-origin=20proxy,=20sec?= =?UTF-8?q?rets,=20git-server=20split)=20(#315)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * js-executor: drop backend-shared env inheritance + resize resources (#304) * js-executor: stop inheriting backend-shared env The js-executor deployment looped over the backend-shared .Values.env and .Values.environmentSecrets (and .Values.environmentVariables) unfiltered, injecting db creds, auth/encryption secrets, license key, and other backend config into a pod that needs none of it. This pollutes the workload and widens the blast radius of any change to shared env. js-executor is a standalone nsjail JS sandbox that reads none of the backend-shared env vars. Replace the inheritance with per-workload overrides: jsExecutor.env / jsExecutor.environmentSecrets / jsExecutor.environmentVariables (all default empty), matching the self-contained pattern already used by the mcp and agent_sandbox workloads. Co-Authored-By: Claude Opus 4.8 (1M context) * js-executor: bump CPU to 6000m, set memory 6Gi Bump js-executor CPU rather than shrinking memory. Set requests == limits at cpu: 6000m / memory: 6Gi (Guaranteed QoS). The memory request is kept equal to the limit because JSE reads its memory limit and rejects requests at 80% of it, so the request must reserve the full amount. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) * rrGitServer: accept blob-storage env vars from .Values.env (#307) * rrGitServer: also accept blob-storage env vars from .Values.env validateBlobStorage only scanned environmentVariables and environmentSecrets for RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_*, so deployments that configure those via the .Values.env map had to duplicate them into environmentVariables to satisfy the check. Range over .Values.env (keyed by var name) as well, and mention env in the doc comment and failure message. Co-Authored-By: Claude Opus 4.8 (1M context) * rrGitServer: add skipBlobStorageValidation escape hatch The blob-storage guard can only inspect blobStorage / env / environmentVariables / environmentSecrets at template time. Env vars injected via envFrom (a Secret/ConfigMap splat) are invisible to it, so a valid configuration that supplies RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* that way would fail the check with no way out. Add rrGitServer.skipBlobStorageValidation (default false) to bypass the check entirely, and point at it from the failure message. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) * Document self-hosted same-origin agent-sandbox proxy (no extra ingress) (#302) Clarify that leaving agentSandbox.frontendWsProxyDomain empty makes the backend serve the sandbox proxy same-origin via the main ingress, so no dedicated proxy domain or ingress object is required for self-hosted. Co-authored-by: Claude Opus 4.8 (1M context) * re-sync values.yaml with chart copy after #302 PR #302 updated the agentSandbox.frontendWsProxyDomain comment in charts/retool/values.yaml but not the mirrored root values.yaml, leaving the two out of sync (and failing the values-yaml-synced check on PRs targeting this branch). Copy the richer comment into the root values.yaml. Co-Authored-By: Claude Opus 4.8 (1M context) * [feat][r2] optionally split rrGitServer into its own deployment (#309) Adds rrGitServer.separate.enabled to run the git server as a dedicated deployment + service instead of in-process on the main backend, mirroring how the workload is split in Retool Cloud (reached via normal k8s service discovery). When enabled: - a dedicated -git-server Deployment runs SERVICE_TYPE=RR_GIT_SERVER on RR_GIT_SERVER_PORT, with the Postgres connection, bootstrap secrets, blob-storage env, and telemetry - the main backend drops RR_GIT_SERVER from its SERVICE_TYPE and proxies git traffic to the service via RR_GIT_SERVER_HOST / RR_GIT_SERVER_PORT - the MCP server (if enabled) is auto-pointed at the service unless mcp.config.retoolGitServerUrl is set explicitly The blob-storage env block is extracted into a shared helper (retool.rrGitServer.commonEnv) so the in-process backend and the standalone deployment stay in sync. In-process mode (rrGitServer.enabled without separate) is unchanged. Adds ci/test-rr-git-server-separate-option.yaml exercising the split + S3 blob storage + MCP auto-wiring. Co-authored-by: Claude Opus 4.8 (1M context) * agent-sandbox: validate required secrets + existing-secret DSN ref (#308) * agent-sandbox: validate required secrets, flexible Postgres DSN sourcing The agent-sandbox secret story was under-validated and rigid: - An empty postgres.url silently base64-encoded to nothing ({{ $as.postgres.url | default "" | b64enc }}), so a misconfigured deploy installed cleanly and the controller/proxy crash-looped at runtime. - jwtPublicKey / jwtPrivateKey (required for the controller/proxy to boot and for the backend to sign sandbox tokens) had no guard when absent. - Postgres could only be supplied as a plaintext DSN; operators could not reuse an existing password-only secret (e.g. the backend's Postgres password). The agent-sandbox app consumes a single connection string (no split-field code path), so the chart now offers four ways to supply it, validated at install: 1. postgres.url -- plaintext DSN. 2. postgres.host (+ user + database) -- the chart assembles postgres://user@host:port/database and supplies the password out-of-band via the PGPASSWORD env var, from postgres.password or postgres.passwordSecretName. node-postgres reads PGPASSWORD when the DSN omits the password, so the password needs no URL escaping -- any characters are safe. This is what lets a password-only secret be reused. 3. postgres.urlSecretName -- existing secret holding the full DSN. 4. externalSecret.name -- catch-all secret, postgres-url key. user/database are embedded in the assembled DSN verbatim. Percent-encoding does not round-trip here (pg-connection-string decodes userinfo before splitting on ':' and runs the path through decodeURI), so validateSecrets instead rejects the characters that would break parsing -- ':' '/' '?' '#' / whitespace in user and '?' '#' / whitespace in database. '@' is allowed (Azure-style user@servername parses correctly, splitting on the last '@'); for other characters use options 1 or 3. Other changes: - Add retool.agentSandbox.validateSecrets: fail at install time when an enabled workload is missing a Postgres source, user/database for the assemble path, a JWT public key, or a JWT private key, or has unsafe characters in user/database. - Promote the controller/proxy URL block to retool.agentSandbox.postgresUrlEnv. - Only write postgres-url into the chart-managed secret when a plaintext url is set, so empty keys are never emitted. - Document the canonical shapes and the password-secret reuse path. Audit: mcp already fails on its missing required secret; js_executor has no secrets, so neither needs changes. Co-Authored-By: Claude Opus 4.8 (1M context) * agent-sandbox: inherit backend Postgres connection by default Enabling the agent sandbox on an existing deployment previously meant re-entering the Postgres host/database/user (and pointing at the password) under agentSandbox.postgres, even though the sandbox lives in the same database as the backend, just under a separate schema. Add inheritance as the default: when none of agentSandbox.postgres.url / .host / .urlSecretName / agentSandbox.externalSecret.name is set, the chart assembles the DSN from the backend's connection (config.postgresql or the postgresql subchart, via the retool.postgresql.* helpers) and sources PGPASSWORD from the same secret the backend uses (mirrors POSTGRES_PASSWORD in deployment_backend.yaml). So enabling r2 against the existing database needs no new Postgres values; the schema stays separate (postgres.schema, default agent_executor). Any explicit option still overrides. validateSecrets gates the one combination inheritance can't reach: when the backend password is supplied via external secrets (envFrom) with no discrete key, it fails with guidance to set an explicit option. The assembled URL defaults the port to 5432 when config.postgresql.port is unset. Co-Authored-By: Claude Opus 4.8 (1M context) * agent-sandbox: fix stale Option 4 postgres comment After adding default inheritance, "leave options 1-3 blank" no longer selects Option 4 -- it selects the default (inherit config.postgresql). Clarify that Option 4 is chosen by setting externalSecret.name (in the Secrets section), and that leaving options 1-4 all unset falls through to inheritance. Co-Authored-By: Claude Opus 4.8 (1M context) * agent-sandbox: guard host-assembly path with no password source When postgres.host was set without postgres.password or postgres.passwordSecretName, postgresUrlEnv emitted a DSN with no password and no PGPASSWORD, so the misconfiguration only surfaced at runtime. validateSecrets now fails at install in that case, pointing to postgres.url / urlSecretName for intentionally passwordless setups (IAM/trust auth). Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) * ci: test coverage for r2 workloads (js-executor, agent-sandbox, r2Agent) (#312) * ci: add test values for agent-sandbox and js-executor workloads The R2 js-executor and agent-sandbox workloads had no CI test values, so a values change could break their templates silently. Only agents and mcp were covered under charts/retool/ci/. Add test-js-executor-enabled-option.yaml and test-agent-sandbox-enabled-option.yaml enabling each workload with realistic config. These are auto-discovered by .github/kubeconform.sh (find -name '*option.yaml') and overlaid on every base values file across the kubeconform matrix — no workflow change needed. Both pass helm template + kubeconform against all base values files on k8s 1.27.16 through 1.31.6. Co-Authored-By: Claude Opus 4.8 (1M context) * ci: expand r2 workload coverage (secret/postgres matrix, ingress modes, r2Agent) Rebased onto the latest r2-cleanup, which merged #308 (agent-sandbox validateSecrets + flexible Postgres sourcing) and #309 (split rrGitServer). Adds test values exercising the full new surface: agent-sandbox — one option file per secret/Postgres precedence path so every branch of postgresUrlEnv/validateSecrets is templated: - existing externalSecret.name file → Postgres option 4 + dedicated proxy domain WITH ingress + TLS + networkPolicy + device plugin + both PDBs - inline secrets (chart-rendered Secret) + plaintext DSN (option 1) + same-origin proxy / NO ingress + hostPath /dev/net/tun (devicePlugin off) - assemble DSN from fields + PGPASSWORD secretKeyRef (option 2), Azure-style user@server username, external device-manager (deployDaemonSet off) - full DSN from an existing Secret via urlSecretName (option 3) - zero-config inherit of the backend Postgres connection (option 5) r2Agent — new worker (R2_AGENT_TEMPORAL_WORKER, port 3016) Deployment/Service/PDB. js-executor — add environmentSecrets to cover the per-workload secretKeyRef branch. All ci/*option.yaml validate via helm template + kubeconform against all three base values files on k8s 1.27.16 and 1.31.6 (108 combinations). Co-Authored-By: Claude Opus 4.8 (1M context) * fix: honor jsExecutor.image.pullPolicy in js-executor deployment deployment_js_executor.yaml read the global .Values.image.pullPolicy, so the per-workload jsExecutor.image.pullPolicy knob (present in values.yaml) was dead. This was inconsistent with the js-executor image *tag* (per-workload via the retool.jsExecutor.image.tag helper) and with agent-sandbox (reads $as.image.pullPolicy). Read jsExecutor.image.pullPolicy with a fallback to the global value. The js-executor CI test now sets pullPolicy: Always (differs from the global IfNotPresent) so a regression back to the global value is caught. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) * [feat][r2] add single r2.enabled master switch for R2 components (#313) * [feat][r2] add single r2.enabled master switch for R2 components Turning on the R2 stack previously meant flipping four independent flags (r2Agent, jsExecutor, agentSandbox, mcp). Add a top-level `r2.enabled` master switch that toggles all four collectively, with room for shared R2 config later. Semantics: inherit + override. Each component's `enabled` default changes from false to null; when null it inherits `r2.enabled`, and an explicit true/false on the component overrides the master for that component only. Backward compatible: existing configs that set the per-component flags explicitly behave identically. Add generic helper `retool.r2.componentEnabled`; `retool.r2Agent.enabled` delegates to it. Every read of these flags is routed through the helper -- not just the deployment guards but the cross-component env wiring in backend/workflows/jobs/_workers and the agentSandbox validate/backendEnv/ httproute helpers -- so an inherited (null) flag still drives JS_EXECUTOR and AGENT_SANDBOX env injection instead of reading as false. Add ci/test-r2-enabled-option.yaml covering the master-switch inherit path. Co-Authored-By: Claude Opus 4.8 (1M context) * [r2] update MCP oauth-token fail message for inherited enablement The error still said "when .Values.mcp.enabled is true", which misleads operators who enable MCP via the new master switch (r2.enabled: true) and leave mcp.enabled null. Reword to cover both the explicit flag and inheritance. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) * agent-sandbox: reject ':' and '/' in postgres.database DSN assembly The host-fields DSN path assembles postgres://user@host:port/database via printf, and validateSecrets guards the embedded user/database against characters that break URL parsing. The user check rejected [\s:/?#] but the database check only rejected [\s?#], so a database name containing '/' (e.g. 'my/db') silently produced postgres://user@host:5432/my/db -- which pg URL parsers read as database 'my' with a trailing path, connecting to the wrong database. Align the database check with the user check ([\s:/?#]); affected names must instead supply a full DSN via postgres.url / postgres.urlSecretName. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) --- .../ci/test-agent-sandbox-enabled-option.yaml | 77 +++++ ...agent-sandbox-inherit-postgres-option.yaml | 20 ++ ...t-agent-sandbox-inline-secrets-option.yaml | 46 +++ ...-agent-sandbox-postgres-fields-option.yaml | 39 +++ ...nt-sandbox-postgres-url-secret-option.yaml | 29 ++ .../ci/test-js-executor-enabled-option.yaml | 37 +++ .../ci/test-r2-agent-enabled-option.yaml | 25 ++ charts/retool/ci/test-r2-enabled-option.yaml | 32 ++ .../test-rr-git-server-separate-option.yaml | 38 +++ charts/retool/templates/_helpers.tpl | 277 +++++++++++++++- charts/retool/templates/_workers.tpl | 2 +- .../agent_sandbox_device_plugin.yaml | 2 +- .../agent_sandbox_networkpolicy.yaml | 2 +- .../templates/agent_sandbox_prepuller.yaml | 2 +- .../templates/agent_sandbox_seccomp.yaml | 2 +- .../templates/configmap_js_executor.yaml | 2 +- .../templates/deployment_agent_sandbox.yaml | 27 +- .../retool/templates/deployment_backend.yaml | 82 ++--- .../templates/deployment_git_server.yaml | 296 ++++++++++++++++++ charts/retool/templates/deployment_jobs.yaml | 2 +- .../templates/deployment_js_executor.yaml | 13 +- charts/retool/templates/deployment_mcp.yaml | 16 +- .../templates/deployment_workflows.yaml | 2 +- charts/retool/templates/httproute.yaml | 2 +- charts/retool/values.yaml | 158 +++++++--- values.yaml | 158 +++++++--- 26 files changed, 1201 insertions(+), 187 deletions(-) create mode 100644 charts/retool/ci/test-agent-sandbox-enabled-option.yaml create mode 100644 charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml create mode 100644 charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml create mode 100644 charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml create mode 100644 charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml create mode 100644 charts/retool/ci/test-js-executor-enabled-option.yaml create mode 100644 charts/retool/ci/test-r2-agent-enabled-option.yaml create mode 100644 charts/retool/ci/test-r2-enabled-option.yaml create mode 100644 charts/retool/ci/test-rr-git-server-separate-option.yaml create mode 100644 charts/retool/templates/deployment_git_server.yaml diff --git a/charts/retool/ci/test-agent-sandbox-enabled-option.yaml b/charts/retool/ci/test-agent-sandbox-enabled-option.yaml new file mode 100644 index 0000000..935d1c4 --- /dev/null +++ b/charts/retool/ci/test-agent-sandbox-enabled-option.yaml @@ -0,0 +1,77 @@ +# Agent Sandbox — external secret + dedicated proxy domain WITH ingress. +# +# This is the "max surface" scenario: controller + proxy deployments, the +# job-template ConfigMap, RBAC, headless/proxy services, proxy ingress + TLS, +# the image-prepuller + seccomp DaemonSets, the smarter-device-manager device +# plugin DaemonSet, the NetworkPolicies, and both PDBs. +# +# Secret/Postgres sourcing here uses externalSecret.name (Postgres OPTION 4: +# the secret's postgres-url key). The other secret/Postgres precedence paths and +# the same-origin (no-ingress) proxy mode are covered by sibling files: +# - test-agent-sandbox-inline-secrets-option.yaml (inline secrets, plaintext DSN, same-origin/no ingress, hostPath tun) +# - test-agent-sandbox-postgres-fields-option.yaml (assemble DSN from fields + PGPASSWORD secret) +# - test-agent-sandbox-postgres-url-secret-option.yaml (full DSN from an existing secret) +# - test-agent-sandbox-inherit-postgres-option.yaml (zero-config inherit of the backend Postgres) +# Overlaid on test-install-values.yaml. +agentSandbox: + enabled: true + + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent + + # Reference a pre-existing K8s Secret (the production-recommended path) rather + # than inlining JWT/encryption material into the chart. With externalSecret.name + # set, secret-backed env vars — including the ones injected into the sandbox + # job-template — resolve via secretKeyRef instead of plaintext. + externalSecret: + name: agent-sandbox-secrets + + postgres: + schema: agent_executor + poolMax: 10 + + sandboxNetwork: + enabled: true + devicePlugin: true + deployDaemonSet: true + + snapshotStorage: + s3Bucket: retool-agent-sandbox-snapshots + s3Endpoint: https://s3.us-east-1.amazonaws.com + s3Region: us-east-1 + credentialsSecretName: agent-sandbox-s3-credentials + + # replicaCount > 1 renders the controller PodDisruptionBudget. + controller: + replicaCount: 2 + + proxy: + replicaCount: 2 + allowedDomains: api.example.com,example.com + backendDomainSuffixes: .example.com + sandboxProxyTimeoutMs: "3600000" + service: + type: ClusterIP + # Dedicated proxy domain → renders the proxy Ingress. + ingress: + enabled: true + ingressClassName: nginx + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: sandbox.example.com + tls: + - secretName: agent-sandbox-tls + hosts: + - sandbox.example.com + frontendWsProxyDomain: https://sandbox.example.com + + # Restrict sandbox/controller/proxy traffic → renders the NetworkPolicies. + networkPolicy: + enabled: true + +# Exercise the proxy PodDisruptionBudget branch. +podDisruptionBudget: + maxUnavailable: 1 diff --git a/charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml b/charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml new file mode 100644 index 0000000..adfba57 --- /dev/null +++ b/charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml @@ -0,0 +1,20 @@ +# Agent Sandbox — Postgres sourcing OPTION 5 (default): inherit the backend's +# Postgres connection. With agentSandbox.postgres left entirely unset, the +# controller/proxy reuse config.postgresql / the postgresql subchart (same +# instance and database, separate schema) — the zero-config path for enabling +# the sandbox on an existing deployment. PGPASSWORD mirrors the backend's +# POSTGRES_PASSWORD secretKeyRef, and the DSN is assembled from the postgresql +# helpers. The base test-install-values.yaml enables the postgresql subchart, +# which is what makes inheritance resolve. +# +# Only the (required) JWT secrets are provided; everything else is left default. +agentSandbox: + enabled: true + + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent + + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' diff --git a/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml b/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml new file mode 100644 index 0000000..163ea8a --- /dev/null +++ b/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml @@ -0,0 +1,46 @@ +# Agent Sandbox — inline secrets + plaintext DSN + same-origin proxy (no ingress). +# +# Complements test-agent-sandbox-enabled-option.yaml (external secret + dedicated +# proxy ingress). Here we exercise the *other* halves of those branches: +# - Secrets inline (no externalSecret.name) → the chart renders its own Secret +# (jwt-public-key / jwt-private-key / encryption-key / api-secret). jwtPublicKey +# MUST be single-line: it is injected raw into the sandbox job-template JSON. +# - Postgres sourcing OPTION 1: plaintext DSN via postgres.url. +# - Same-origin proxy: no dedicated proxy domain and no proxy ingress — the +# backend reverse-proxies /sandbox/* (frontendWsProxyDomain left empty). +# - sandboxNetwork.devicePlugin=false → sandbox pods get /dev/net/tun via +# hostPath (the non-device-plugin branch), and no device-manager DaemonSet. +# - networkPolicy disabled. +agentSandbox: + enabled: true + + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent + + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + encryptionKey: a12b01429fe0fe69a80da94e9e837ab2f1e9bda378ed8a25905a238f6fea6b7a + apiSecret: test-agent-sandbox-api-secret + + # Option 1: plaintext DSN. + postgres: + url: postgres://retool:retool@agent-sandbox-db.example.internal:5432/agent_sandbox + schema: agent_executor + poolMax: 10 + + sandboxNetwork: + enabled: true + devicePlugin: false + deployDaemonSet: false + + proxy: + # Same-origin: ClusterIP service, no ingress. + service: + type: ClusterIP + ingress: + enabled: false + + networkPolicy: + enabled: false diff --git a/charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml b/charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml new file mode 100644 index 0000000..714c761 --- /dev/null +++ b/charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml @@ -0,0 +1,39 @@ +# Agent Sandbox — Postgres sourcing OPTION 2: assemble the DSN from discrete +# fields, with the password supplied via PGPASSWORD from a pre-existing Secret +# (never embedded in the URL, so any password characters are safe). +# +# Also exercises: +# - An Azure-style "user@servername" username, which validateSecrets allows +# (the parser splits userinfo on the last '@'). +# - sandboxNetwork with devicePlugin=true but deployDaemonSet=false (a +# smarter-device-manager already runs on the nodes, managed elsewhere) → +# sandbox pods request smarter-devices/net_tun but no DS is rendered. +# - networkPolicy enabled. +agentSandbox: + enabled: true + + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent + + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + + # Option 2: host + user + database, password via PGPASSWORD secretKeyRef. + postgres: + host: agentdb-prod.postgres.database.azure.com + port: 5432 + database: agent_sandbox + user: retool@agentdb-prod + passwordSecretName: agent-sandbox-db-password + passwordSecretKey: password + schema: agent_executor + + sandboxNetwork: + enabled: true + devicePlugin: true + deployDaemonSet: false + + networkPolicy: + enabled: true diff --git a/charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml b/charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml new file mode 100644 index 0000000..35953af --- /dev/null +++ b/charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml @@ -0,0 +1,29 @@ +# Agent Sandbox — Postgres sourcing OPTION 3: the full DSN comes from a +# pre-existing Secret (postgres.urlSecretName / urlSecretKey), while the JWT/ +# encryption secrets are provided inline. This is the "BYO DB secret, chart- +# managed app secrets" combination. +# +# Also exercises S3 snapshot storage WITHOUT a dedicated credentialsSecretName, +# so the sandbox AWS creds fall back to the default (chart-rendered) Secret. +agentSandbox: + enabled: true + + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent + + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + encryptionKey: a12b01429fe0fe69a80da94e9e837ab2f1e9bda378ed8a25905a238f6fea6b7a + + # Option 3: full DSN from an existing Secret. + postgres: + urlSecretName: agent-sandbox-db-dsn + urlSecretKey: connection-string + schema: agent_executor + + snapshotStorage: + s3Bucket: retool-agent-sandbox-snapshots + s3Endpoint: https://s3.us-west-2.amazonaws.com + s3Region: us-west-2 diff --git a/charts/retool/ci/test-js-executor-enabled-option.yaml b/charts/retool/ci/test-js-executor-enabled-option.yaml new file mode 100644 index 0000000..ee8a4a1 --- /dev/null +++ b/charts/retool/ci/test-js-executor-enabled-option.yaml @@ -0,0 +1,37 @@ +# Exercises the JS executor workload (deployment_js_executor.yaml + +# configmap_js_executor.yaml). Overlaid on top of test-install-values.yaml. +jsExecutor: + enabled: true + replicaCount: 2 + image: + repository: tryretool/js-executor-service + tag: 3.123.4 + # Deliberately differs from the global image.pullPolicy (IfNotPresent in the + # base values) so the rendered deployment proves the per-workload override is + # honored rather than the global value. + pullPolicy: Always + # JS-executor-specific env (not inherited from top-level .Values.env). + env: + LOG_LEVEL: info + # Exercise the per-workload secretKeyRef branch. + environmentSecrets: + - name: JS_EXECUTOR_TOKEN + secretKeyRef: + name: js-executor-secrets + key: token + environmentVariables: + - name: JS_EXECUTOR_TEST_OPTION + value: "true" + # Memory request and limit are kept equal: JSE rejects requests at 80% of + # its limit, so the request must reserve the full amount. + resources: + limits: + cpu: 4000m + memory: 4Gi + requests: + cpu: 4000m + memory: 4Gi + +# Exercise the PDB branch shared by the JS executor deployment. +podDisruptionBudget: + maxUnavailable: 1 diff --git a/charts/retool/ci/test-r2-agent-enabled-option.yaml b/charts/retool/ci/test-r2-agent-enabled-option.yaml new file mode 100644 index 0000000..a76cb34 --- /dev/null +++ b/charts/retool/ci/test-r2-agent-enabled-option.yaml @@ -0,0 +1,25 @@ +# Exercises the R2 Agent worker (the server-side agent loop worker rendered via +# _workers.tpl as SERVICE_TYPE=R2_AGENT_TEMPORAL_WORKER on healthcheck port 3016). +# Renders a Deployment + Service, plus a PodDisruptionBudget when one is set. +# Overlaid on test-install-values.yaml. +r2Agent: + enabled: true + config: + nodeOptions: "--max_old_space_size=2048" + worker: + replicaCount: 2 + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + labels: + test-pod-label: "true" + annotations: + test-pod-annotation: "true" + +# Exercise the worker PodDisruptionBudget branch. +podDisruptionBudget: + maxUnavailable: 1 diff --git a/charts/retool/ci/test-r2-enabled-option.yaml b/charts/retool/ci/test-r2-enabled-option.yaml new file mode 100644 index 0000000..0a05b16 --- /dev/null +++ b/charts/retool/ci/test-r2-enabled-option.yaml @@ -0,0 +1,32 @@ +# R2 master switch — single flag turns on the whole R2 stack. +# +# Exercises the `r2.enabled: true` inherit path: r2Agent, jsExecutor, +# agentSandbox, and mcp all leave their own `enabled` unset (null) and inherit +# the master switch. This guards `retool.r2.componentEnabled` and the +# helper-routed cross-component env wiring (backend/workflows/jobs/workers read +# effective-enabled, not the raw per-component flag). +# +# Secrets/config below are only what each component requires to template when +# enabled; none of them set `*.enabled`, so enablement comes solely from r2. +r2: + enabled: true + +agentSandbox: + # jwtPublicKey is injected raw into the sandbox job-template JSON, so it MUST + # be single-line (\n-escaped) or templating breaks. + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49AwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + postgres: + url: postgres://retool:retool@agent-sandbox-db.example.internal:5432/agent_sandbox + schema: agent_executor + proxy: + service: + type: ClusterIP + ingress: + enabled: false + networkPolicy: + enabled: false + +mcp: + config: + oauthIntrospectionAuthToken: test-oauth-introspection-token diff --git a/charts/retool/ci/test-rr-git-server-separate-option.yaml b/charts/retool/ci/test-rr-git-server-separate-option.yaml new file mode 100644 index 0000000..e07296b --- /dev/null +++ b/charts/retool/ci/test-rr-git-server-separate-option.yaml @@ -0,0 +1,38 @@ +podDisruptionBudget: + minAvailable: 1 + +rrGitServer: + enabled: true + repackThreshold: 200 + separate: + enabled: true + replicaCount: 2 + port: 3010 + resources: + requests: + cpu: 250m + memory: 512Mi + annotations: + test-pod-annotation: "true" + labels: + test-pod-label: "true" + service: + annotations: + test-service-annotation: "true" + labels: + test-service-label: "true" + +blobStorage: + s3: + bucket: test-rr-bucket + region: us-east-1 + accessKeyId: AKIATEST + secretAccessKeySecretName: rr-blob-storage + secretAccessKeySecretKey: secret-access-key + +# Exercise the MCP auto-wiring to the standalone git server service. +mcp: + enabled: true + config: + oauthMainDomain: https://oauth.example.com + oauthIntrospectionAuthToken: test-oauth-introspection-token diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index 660fe32..f5734c3 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -420,15 +420,25 @@ Usage: (include "retool.agents.enabled" .) {{- end -}} {{/* -Set R2 agent enabled +Resolve whether an R2 component (r2Agent, jsExecutor, agentSandbox, mcp) is +enabled. The component's own `enabled` wins when explicitly set to true/false; +when left unset (null) it inherits the shared master switch .Values.r2.enabled. +Usage: (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) +Returns "1" when enabled, "" otherwise. +*/}} +{{- define "retool.r2.componentEnabled" -}} +{{- $cfg := index .root.Values .component -}} +{{- if kindIs "invalid" $cfg.enabled -}} + {{- if eq (toString .root.Values.r2.enabled) "true" -}}1{{- end -}} +{{- else if eq (toString $cfg.enabled) "true" -}}1{{- end -}} +{{- end -}} + +{{/* +Set R2 agent worker enabled. Honors the shared R2 master switch. Usage: (include "retool.r2Agent.enabled" .) */}} {{- define "retool.r2Agent.enabled" -}} -{{- $output := "" -}} -{{- if (eq (toString .Values.r2Agent.enabled) "true") -}} - {{- $output = "1" -}} -{{- end -}} -{{- $output -}} +{{- include "retool.r2.componentEnabled" (dict "root" . "component" "r2Agent") -}} {{- end -}} {{/* Global Temporal configuration */}} @@ -623,13 +633,146 @@ app.kubernetes.io/component: proxy telemetry.retool.com/service-name: agent-sandbox-proxy {{- end -}} +{{/* +Validate that an enabled agent sandbox has its required secrets supplied. The +controller and proxy fail to boot without a Postgres connection and a JWT +public key, and the Retool backend needs the JWT private key to sign sandbox +tokens. Each may come from a plaintext value, the per-key existing-secret refs, +or the catch-all externalSecret.name. No-op when agentSandbox is disabled. +*/}} +{{- define "retool.agentSandbox.validateSecrets" -}} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" . "component" "agentSandbox")) "1" -}} +{{- $as := .Values.agentSandbox -}} +{{- $ext := $as.externalSecret.name -}} +{{- $explicitPg := or $as.postgres.url $as.postgres.urlSecretName $as.postgres.host $ext -}} +{{- if not $explicitPg -}} +{{- /* No explicit source: inherit the backend's Postgres connection. */ -}} +{{- if not (include "retool.postgresql.host" . | trimAll "\"") -}} +{{- fail "agentSandbox.enabled defaults to reusing the backend's Postgres connection, but config.postgresql resolved no host. Set agentSandbox.postgres.url / .host / .urlSecretName / externalSecret.name, or configure config.postgresql." -}} +{{- end -}} +{{- if not (or .Values.postgresql.enabled .Values.config.postgresql.passwordSecretName (eq (include "shouldIncludeConfigSecretsEnvVars" . | trim) "1")) -}} +{{- fail "agentSandbox.postgres is unset so it would inherit the backend's Postgres password, but that password is supplied via external secrets (envFrom) and cannot be referenced from a separate pod. Set agentSandbox.postgres.url / .urlSecretName / .host (+ passwordSecretName), or agentSandbox.externalSecret.name." -}} +{{- end -}} +{{- end -}} +{{- if $as.postgres.host -}} +{{- if not (and $as.postgres.user $as.postgres.database) -}} +{{- fail "agentSandbox.postgres.host is set, so postgres.user and postgres.database are also required to assemble the DSN." -}} +{{- end -}} +{{- if not (or $as.postgres.password $as.postgres.passwordSecretName) -}} +{{- fail "agentSandbox.postgres.host is set, so a password is required: set postgres.password or postgres.passwordSecretName. For a passwordless connection (e.g. IAM/trust auth), supply the full connection string via postgres.url or postgres.urlSecretName instead." -}} +{{- end -}} +{{- /* + user and database are embedded verbatim in the assembled DSN, so reject the + characters that would break URL parsing. '@' is allowed in user (managed + services like Azure use user@servername; the parser splits on the last '@'), + but ':' '/' and whitespace would be mis-parsed as a password/host/path. For + values needing other characters, supply a full DSN via postgres.url or + postgres.urlSecretName instead. +*/}} +{{- if regexMatch "[\\s:/?#]" ($as.postgres.user | toString) -}} +{{- fail "agentSandbox.postgres.user contains a character that breaks DSN assembly (whitespace, : / ? #). '@' is fine (e.g. Azure user@server); otherwise supply a full DSN via agentSandbox.postgres.url or postgres.urlSecretName." -}} +{{- end -}} +{{- if regexMatch "[\\s:/?#]" ($as.postgres.database | toString) -}} +{{- fail "agentSandbox.postgres.database contains a character that breaks DSN assembly (whitespace, : / ? #); supply a full DSN via agentSandbox.postgres.url or postgres.urlSecretName." -}} +{{- end -}} +{{- end -}} +{{- if not (or $as.jwtPublicKey $ext) -}} +{{- fail "agentSandbox.enabled requires a JWT public key. Set agentSandbox.jwtPublicKey or agentSandbox.externalSecret.name." -}} +{{- end -}} +{{- if not (or $as.jwtPrivateKey $ext) -}} +{{- fail "agentSandbox.enabled requires a JWT private key (the backend signs sandbox tokens with it). Set agentSandbox.jwtPrivateKey or agentSandbox.externalSecret.name." -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Render the AGENT_SANDBOX_POSTGRES_URL env entry for the controller/proxy (plus a +PGPASSWORD entry when assembling from fields). validateSecrets guarantees one of +these applies, in order: postgres.url -> postgres.host -> postgres.urlSecretName +-> externalSecret.name -> inherit the backend's config.postgresql connection +(the default when nothing agent-specific is set). + +For the host path the password is passed via PGPASSWORD rather than embedded in +the URL: node-postgres reads PGPASSWORD when the connection string omits the +password, so it needs no URL escaping. PGPASSWORD is process-global but safe +here because the controller/proxy open exactly one Postgres connection. user and +database are embedded verbatim (percent-encoding doesn't round-trip here -- the +parser decodes userinfo before splitting on ':', and runs the path through +decodeURI); validateSecrets instead rejects the characters that would break +parsing. An Azure-style "user@servername" is fine -- the parser splits on the +last '@'. +Usage: {{- include "retool.agentSandbox.postgresUrlEnv" . | nindent 12 }} +*/}} +{{- define "retool.agentSandbox.postgresUrlEnv" -}} +{{- $pg := .Values.agentSandbox.postgres -}} +{{- $ext := .Values.agentSandbox.externalSecret.name -}} +{{- if $pg.url }} +- name: AGENT_SANDBOX_POSTGRES_URL + value: {{ $pg.url | quote }} +{{- else if $pg.host }} +{{- $port := $pg.port | default 5432 -}} +{{- if $pg.passwordSecretName }} +- name: PGPASSWORD + valueFrom: + secretKeyRef: + name: {{ $pg.passwordSecretName }} + key: {{ $pg.passwordSecretKey | default "password" }} +{{- else if $pg.password }} +- name: PGPASSWORD + value: {{ $pg.password | quote }} +{{- end }} +- name: AGENT_SANDBOX_POSTGRES_URL + value: {{ printf "postgres://%s@%s:%v/%s" $pg.user $pg.host $port $pg.database | quote }} +{{- else if $pg.urlSecretName }} +- name: AGENT_SANDBOX_POSTGRES_URL + valueFrom: + secretKeyRef: + name: {{ $pg.urlSecretName }} + key: {{ $pg.urlSecretKey | default "postgres-url" }} +{{- else if $ext }} +- name: AGENT_SANDBOX_POSTGRES_URL + valueFrom: + secretKeyRef: + name: {{ $ext }} + key: postgres-url +{{- else }} +{{- /* + Default: inherit the backend's Postgres connection (config.postgresql or the + postgresql subchart) -- same instance/database, separate schema. The password + is sourced from the same secret the backend uses; this block mirrors the + POSTGRES_PASSWORD secretKeyRef in deployment_backend.yaml. validateSecrets + rejects the one combination this can't reach (external-secrets mode with no + discrete password key). +*/}} +- name: PGPASSWORD + valueFrom: + secretKeyRef: + {{- if .Values.postgresql.enabled }} + name: {{ template "retool.postgresql.fullname" . }} + {{- if eq .Values.postgresql.auth.username "postgres" }} + key: postgres-password + {{- else }} + key: password + {{- end }} + {{- else if .Values.config.postgresql.passwordSecretName }} + name: {{ .Values.config.postgresql.passwordSecretName }} + key: {{ .Values.config.postgresql.passwordSecretKey | default "postgresql-password" }} + {{- else }} + name: {{ template "retool.fullname" . }} + key: postgresql-password + {{- end }} +- name: AGENT_SANDBOX_POSTGRES_URL + value: {{ printf "postgres://%s@%s:%s/%s" (include "retool.postgresql.user" . | trimAll "\"") (include "retool.postgresql.host" . | trimAll "\"") (include "retool.postgresql.port" . | trimAll "\"" | default "5432") (include "retool.postgresql.database" . | trimAll "\"") | quote }} +{{- end }} +{{- end -}} + {{/* Agent sandbox env vars for the Retool backend, workflow backend, and workers. Outputs env entries that tell the backend how to reach the agent sandbox services. Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} */}} {{- define "retool.agentSandbox.backendEnvVars" -}} -{{- if .Values.agentSandbox.enabled }} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" . "component" "agentSandbox")) "1" }} {{- $defaultSecretName := .Values.agentSandbox.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} - name: RR_AGENT_PUBSUB_BACKEND value: "postgres" @@ -681,16 +824,130 @@ Set MCP server service name {{ template "retool.fullname" . }}-mcp {{- end -}} +{{/* +Set git server deployment/service name (only used when rrGitServer.separate is enabled) +*/}} +{{- define "retool.rrGitServer.name" -}} +{{ template "retool.fullname" . }}-git-server +{{- end -}} + +{{/* +Returns "1" when the git server should run as its own deployment/service +(rrGitServer.enabled AND rrGitServer.separate.enabled), empty otherwise. +*/}} +{{- define "retool.rrGitServer.separateEnabled" -}} +{{- if and .Values.rrGitServer.enabled (.Values.rrGitServer.separate | default dict).enabled -}} +1 +{{- end -}} +{{- end -}} + +{{/* +Port the standalone git server listens on (RR_GIT_SERVER_PORT) and exposes via its service. +*/}} +{{- define "retool.rrGitServer.port" -}} +{{- (.Values.rrGitServer.separate | default dict).port | default 3010 -}} +{{- end -}} + +{{/* +In-cluster URL of the standalone git server service, e.g. http://-git-server:3010. +Used to point the MCP server (and any other consumer) at the split-out git server. +*/}} +{{- define "retool.rrGitServer.url" -}} +http://{{ template "retool.rrGitServer.name" . }}:{{ include "retool.rrGitServer.port" . }} +{{- end -}} + +{{/* +Blob-storage + git repack env vars shared by the in-process git server (main +backend) and the standalone git server deployment. git_server stores all +objects/packs in blob storage; the same RR_DEFAULT_* vars are also used by +snapshots. Emits nothing when no blobStorage provider is configured (in which +case the user is expected to plumb RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* +directly via environmentVariables / environmentSecrets). +*/}} +{{- define "retool.rrGitServer.commonEnv" -}} +{{- $bs := .Values.blobStorage | default dict }} +{{- if $bs.s3 }} +- name: RR_BLOB_STORAGE_PROVIDER + value: "s3" +- name: RR_DEFAULT_S3_BUCKET + value: {{ $bs.s3.bucket | quote }} +{{- if $bs.s3.region }} +- name: RR_DEFAULT_S3_REGION + value: {{ $bs.s3.region | quote }} +{{- end }} +{{- if $bs.s3.endpoint }} +- name: RR_DEFAULT_S3_ENDPOINT + value: {{ $bs.s3.endpoint | quote }} +{{- end }} +{{- if $bs.s3.accessKeyId }} +- name: RR_DEFAULT_S3_ACCESS_KEY_ID + value: {{ $bs.s3.accessKeyId | quote }} +{{- end }} +{{- if $bs.s3.secretAccessKeySecretName }} +- name: RR_DEFAULT_S3_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ $bs.s3.secretAccessKeySecretName }} + key: {{ $bs.s3.secretAccessKeySecretKey | default "secret-access-key" }} +{{- else if $bs.s3.secretAccessKey }} +- name: RR_DEFAULT_S3_SECRET_ACCESS_KEY + value: {{ $bs.s3.secretAccessKey | quote }} +{{- end }} +{{- else if $bs.gcs }} +- name: RR_BLOB_STORAGE_PROVIDER + value: "gcs" +- name: RR_DEFAULT_GCS_BUCKET + value: {{ $bs.gcs.bucket | quote }} +{{- if $bs.gcs.credentialsSecretName }} +- name: RR_DEFAULT_GCS_CREDENTIALS + valueFrom: + secretKeyRef: + name: {{ $bs.gcs.credentialsSecretName }} + key: {{ $bs.gcs.credentialsSecretKey | default "credentials.json" }} +{{- else if $bs.gcs.credentials }} +- name: RR_DEFAULT_GCS_CREDENTIALS + value: {{ $bs.gcs.credentials | quote }} +{{- end }} +{{- else if $bs.azure }} +- name: RR_BLOB_STORAGE_PROVIDER + value: "azure" +- name: RR_DEFAULT_AZURE_CONTAINER + value: {{ $bs.azure.container | quote }} +{{- if $bs.azure.connectionStringSecretName }} +- name: RR_DEFAULT_AZURE_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: {{ $bs.azure.connectionStringSecretName }} + key: {{ $bs.azure.connectionStringSecretKey | default "connection-string" }} +{{- else if $bs.azure.connectionString }} +- name: RR_DEFAULT_AZURE_CONNECTION_STRING + value: {{ $bs.azure.connectionString | quote }} +{{- end }} +{{- end }} +{{- if .Values.rrGitServer.repackThreshold }} +- name: RR_GIT_REPACK_THRESHOLD + value: {{ .Values.rrGitServer.repackThreshold | quote }} +{{- end }} +{{- end -}} + {{/* Validate that exactly one blob-storage provider is configured when rrGitServer is enabled. Skipped when the user has plumbed the RR_BLOB_STORAGE_PROVIDER / -RR_DEFAULT_*_* env vars in directly via environmentVariables/environmentSecrets, +RR_DEFAULT_*_* env vars in directly via env/environmentVariables/environmentSecrets, which is treated as an opt-out from the first-class blobStorage config. +Also skipped entirely when rrGitServer.skipBlobStorageValidation is true, which +is the escape hatch for sources we cannot inspect at template time (e.g. env +vars injected via envFrom from a Secret/ConfigMap). No-op when rrGitServer is disabled. */}} {{- define "retool.rrGitServer.validateBlobStorage" -}} -{{- if .Values.rrGitServer.enabled -}} +{{- if and .Values.rrGitServer.enabled (not .Values.rrGitServer.skipBlobStorageValidation) -}} {{- $hasDirectEnv := false -}} +{{- range $name, $value := .Values.env -}} +{{- if or (hasPrefix "RR_DEFAULT_" $name) (eq $name "RR_BLOB_STORAGE_PROVIDER") -}} +{{- $hasDirectEnv = true -}} +{{- end -}} +{{- end -}} {{- range .Values.environmentVariables -}} {{- if or (hasPrefix "RR_DEFAULT_" .name) (eq .name "RR_BLOB_STORAGE_PROVIDER") -}} {{- $hasDirectEnv = true -}} @@ -708,7 +965,7 @@ No-op when rrGitServer is disabled. {{- if $bs.gcs }}{{ $providers = append $providers "gcs" }}{{ end -}} {{- if $bs.azure }}{{ $providers = append $providers "azure" }}{{ end -}} {{- if ne (len $providers) 1 -}} -{{- fail "rrGitServer.enabled requires exactly one of blobStorage.s3, blobStorage.gcs, blobStorage.azure to be configured, or set RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* directly via environmentVariables / environmentSecrets" -}} +{{- fail "rrGitServer.enabled requires exactly one of blobStorage.s3, blobStorage.gcs, blobStorage.azure to be configured, or set RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* directly via env / environmentVariables / environmentSecrets. If those vars are supplied another way (e.g. envFrom), set rrGitServer.skipBlobStorageValidation=true to bypass this check." -}} {{- end -}} {{- end -}} {{- end -}} diff --git a/charts/retool/templates/_workers.tpl b/charts/retool/templates/_workers.tpl index 64aa977..b2ea566 100644 --- a/charts/retool/templates/_workers.tpl +++ b/charts/retool/templates/_workers.tpl @@ -213,7 +213,7 @@ spec: value: {{ template "retool.postgresql.ssl_enabled" $ }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" $ }} - {{- if $.Values.jsExecutor.enabled }} + {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" $ }} {{- end }} diff --git a/charts/retool/templates/agent_sandbox_device_plugin.yaml b/charts/retool/templates/agent_sandbox_device_plugin.yaml index 44c028f..40e87d8 100644 --- a/charts/retool/templates/agent_sandbox_device_plugin.yaml +++ b/charts/retool/templates/agent_sandbox_device_plugin.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.sandboxNetwork.deployDaemonSet }} +{{- if and (eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.agentSandbox.sandboxNetwork.deployDaemonSet }} {{- $as := .Values.agentSandbox -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} diff --git a/charts/retool/templates/agent_sandbox_networkpolicy.yaml b/charts/retool/templates/agent_sandbox_networkpolicy.yaml index cc6d07e..ec7bcc1 100644 --- a/charts/retool/templates/agent_sandbox_networkpolicy.yaml +++ b/charts/retool/templates/agent_sandbox_networkpolicy.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.networkPolicy.enabled }} +{{- if and (eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.agentSandbox.networkPolicy.enabled }} {{- $as := .Values.agentSandbox -}} {{- /* ======================================================================= diff --git a/charts/retool/templates/agent_sandbox_prepuller.yaml b/charts/retool/templates/agent_sandbox_prepuller.yaml index 854d1eb..33e2c19 100644 --- a/charts/retool/templates/agent_sandbox_prepuller.yaml +++ b/charts/retool/templates/agent_sandbox_prepuller.yaml @@ -1,4 +1,4 @@ -{{- if .Values.agentSandbox.enabled }} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} {{- $as := .Values.agentSandbox -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} diff --git a/charts/retool/templates/agent_sandbox_seccomp.yaml b/charts/retool/templates/agent_sandbox_seccomp.yaml index c6149e6..7579b5c 100644 --- a/charts/retool/templates/agent_sandbox_seccomp.yaml +++ b/charts/retool/templates/agent_sandbox_seccomp.yaml @@ -1,4 +1,4 @@ -{{- if .Values.agentSandbox.enabled }} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} {{- $as := .Values.agentSandbox -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} diff --git a/charts/retool/templates/configmap_js_executor.yaml b/charts/retool/templates/configmap_js_executor.yaml index 0dedb4e..2212a41 100644 --- a/charts/retool/templates/configmap_js_executor.yaml +++ b/charts/retool/templates/configmap_js_executor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.jsExecutor.enabled }} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 9f7e54b..fde125c 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -1,4 +1,5 @@ -{{- if .Values.agentSandbox.enabled }} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} +{{- include "retool.agentSandbox.validateSecrets" . }} {{- $as := .Values.agentSandbox -}} {{- $defaultSecretName := $as.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} @@ -22,7 +23,9 @@ data: jwt-private-key: {{ $as.jwtPrivateKey | default "" | b64enc | quote }} encryption-key: {{ $as.encryptionKey | default "" | b64enc | quote }} api-secret: {{ $as.apiSecret | default "" | b64enc | quote }} - postgres-url: {{ $as.postgres.url | default "" | b64enc | quote }} + {{- if $as.postgres.url }} + postgres-url: {{ $as.postgres.url | b64enc | quote }} + {{- end }} --- {{- end }} {{- /* @@ -300,15 +303,7 @@ spec: value: {{ $as.controller.port | quote }} - name: STATE_BACKEND value: "postgres" - - name: AGENT_SANDBOX_POSTGRES_URL - {{- if $as.postgres.url }} - value: {{ $as.postgres.url | quote }} - {{- else }} - valueFrom: - secretKeyRef: - name: {{ $defaultSecretName }} - key: postgres-url - {{- end }} + {{- include "retool.agentSandbox.postgresUrlEnv" . | nindent 12 }} - name: AGENT_SANDBOX_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_SANDBOX_POSTGRES_POOL_MAX @@ -504,15 +499,7 @@ spec: value: {{ $as.proxy.port | quote }} - name: STATE_BACKEND value: "postgres" - - name: AGENT_SANDBOX_POSTGRES_URL - {{- if $as.postgres.url }} - value: {{ $as.postgres.url | quote }} - {{- else }} - valueFrom: - secretKeyRef: - name: {{ $defaultSecretName }} - key: postgres-url - {{- end }} + {{- include "retool.agentSandbox.postgresUrlEnv" . | nindent 12 }} - name: AGENT_SANDBOX_POSTGRES_SCHEMA value: {{ $as.postgres.schema | quote }} - name: AGENT_SANDBOX_POSTGRES_POOL_MAX diff --git a/charts/retool/templates/deployment_backend.yaml b/charts/retool/templates/deployment_backend.yaml index 123cab2..b741681 100644 --- a/charts/retool/templates/deployment_backend.yaml +++ b/charts/retool/templates/deployment_backend.yaml @@ -101,7 +101,11 @@ spec: {{- if not ( include "retool.jobRunner.enabled" . ) }} {{- $serviceType = append $serviceType "JOBS_RUNNER" }} {{- end }} - {{- if .Values.rrGitServer.enabled }} + {{- /* + Run the git server in-process on the main backend unless it has been + split out into its own deployment (rrGitServer.separate.enabled). + */}} + {{- if and .Values.rrGitServer.enabled (not (include "retool.rrGitServer.separateEnabled" .)) }} {{- $serviceType = append $serviceType "RR_GIT_SERVER" }} {{- end }} - name: SERVICE_TYPE @@ -184,7 +188,7 @@ spec: {{- end }} {{- end }} {{- end }} - {{- if .Values.jsExecutor.enabled }} + {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" . }} {{- end }} @@ -257,68 +261,18 @@ spec: {{- end }} {{- end }} {{- if .Values.rrGitServer.enabled }} - {{- $bs := .Values.blobStorage }} - {{- if $bs.s3 }} - - name: RR_BLOB_STORAGE_PROVIDER - value: "s3" - - name: RR_DEFAULT_S3_BUCKET - value: {{ $bs.s3.bucket | quote }} - {{- if $bs.s3.region }} - - name: RR_DEFAULT_S3_REGION - value: {{ $bs.s3.region | quote }} - {{- end }} - {{- if $bs.s3.endpoint }} - - name: RR_DEFAULT_S3_ENDPOINT - value: {{ $bs.s3.endpoint | quote }} - {{- end }} - {{- if $bs.s3.accessKeyId }} - - name: RR_DEFAULT_S3_ACCESS_KEY_ID - value: {{ $bs.s3.accessKeyId | quote }} - {{- end }} - {{- if $bs.s3.secretAccessKeySecretName }} - - name: RR_DEFAULT_S3_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: {{ $bs.s3.secretAccessKeySecretName }} - key: {{ $bs.s3.secretAccessKeySecretKey | default "secret-access-key" }} - {{- else if $bs.s3.secretAccessKey }} - - name: RR_DEFAULT_S3_SECRET_ACCESS_KEY - value: {{ $bs.s3.secretAccessKey | quote }} - {{- end }} - {{- else if $bs.gcs }} - - name: RR_BLOB_STORAGE_PROVIDER - value: "gcs" - - name: RR_DEFAULT_GCS_BUCKET - value: {{ $bs.gcs.bucket | quote }} - {{- if $bs.gcs.credentialsSecretName }} - - name: RR_DEFAULT_GCS_CREDENTIALS - valueFrom: - secretKeyRef: - name: {{ $bs.gcs.credentialsSecretName }} - key: {{ $bs.gcs.credentialsSecretKey | default "credentials.json" }} - {{- else if $bs.gcs.credentials }} - - name: RR_DEFAULT_GCS_CREDENTIALS - value: {{ $bs.gcs.credentials | quote }} - {{- end }} - {{- else if $bs.azure }} - - name: RR_BLOB_STORAGE_PROVIDER - value: "azure" - - name: RR_DEFAULT_AZURE_CONTAINER - value: {{ $bs.azure.container | quote }} - {{- if $bs.azure.connectionStringSecretName }} - - name: RR_DEFAULT_AZURE_CONNECTION_STRING - valueFrom: - secretKeyRef: - name: {{ $bs.azure.connectionStringSecretName }} - key: {{ $bs.azure.connectionStringSecretKey | default "connection-string" }} - {{- else if $bs.azure.connectionString }} - - name: RR_DEFAULT_AZURE_CONNECTION_STRING - value: {{ $bs.azure.connectionString | quote }} - {{- end }} - {{- end }} - {{- if .Values.rrGitServer.repackThreshold }} - - name: RR_GIT_REPACK_THRESHOLD - value: {{ .Values.rrGitServer.repackThreshold | quote }} + {{- if include "retool.rrGitServer.separateEnabled" . }} + {{- /* + git server runs in its own deployment; point the main backend's + proxy (/api/ai/rr/git/v2/*) at the git-server service instead of + localhost. + */}} + - name: RR_GIT_SERVER_HOST + value: {{ template "retool.rrGitServer.name" . }} + - name: RR_GIT_SERVER_PORT + value: {{ include "retool.rrGitServer.port" . | quote }} + {{- else }} + {{- include "retool.rrGitServer.commonEnv" . | nindent 10 }} {{- end }} {{- end }} {{- include "retool.env" .Values.env | nindent 10 }} diff --git a/charts/retool/templates/deployment_git_server.yaml b/charts/retool/templates/deployment_git_server.yaml new file mode 100644 index 0000000..4fbf3f8 --- /dev/null +++ b/charts/retool/templates/deployment_git_server.yaml @@ -0,0 +1,296 @@ +{{- if include "retool.rrGitServer.separateEnabled" . }} +{{- include "retool.rrGitServer.validateBlobStorage" . }} +{{- $gitServerPort := include "retool.rrGitServer.port" . }} +{{- $gitServerValues := .Values.rrGitServer.separate }} +{{- $gitServerService := $gitServerValues.service | default dict }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "retool.rrGitServer.name" . }} + labels: + {{- include "retool.labels" . | nindent 4 }} + {{- with $gitServerService.labels }} + {{- range $key, $value := . }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + {{- with $gitServerService.annotations }} + annotations: + {{- range $key, $value := . }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +spec: + selector: + retoolService: {{ template "retool.rrGitServer.name" . }} + ports: + - name: http-server + protocol: TCP + port: {{ $gitServerPort }} + targetPort: {{ $gitServerPort }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "retool.rrGitServer.name" . }} + labels: +{{- include "retool.labels" . | nindent 4 }} +{{- if .Values.deployment.annotations }} + annotations: +{{ toYaml .Values.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ $gitServerValues.replicaCount | default 1 }} + selector: + matchLabels: + retoolService: {{ template "retool.rrGitServer.name" . }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} +{{- with $gitServerValues.annotations }} +{{ toYaml . | indent 8 }} +{{- end }} + labels: + {{- include "retool.labels" . | nindent 8 }} + retoolService: {{ template "retool.rrGitServer.name" . }} + telemetry.retool.com/service-name: rr-git-server +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- with $gitServerValues.labels }} +{{ toYaml . | indent 8 }} +{{- end }} + spec: + serviceAccountName: {{ template "retool.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} +{{- if .Values.initContainers }} + initContainers: +{{- range $key, $value := .Values.initContainers }} + - name: "{{ $key }}" +{{ toYaml $value | indent 8 }} +{{- end }} +{{- end }} + containers: + - name: rr-git-server + image: "{{ .Values.image.repository }}:{{ required "Please set a value for .Values.image.tag" .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - bash + - -c + - chmod -R +x ./docker_scripts; sync; ./docker_scripts/wait-for-it.sh -t 0 {{ template "retool.postgresql.host" . }}:{{ template "retool.postgresql.port" . }}; ./docker_scripts/start_api.sh + {{- if .Values.commandline.args }} +{{ toYaml .Values.commandline.args | indent 10 }} + {{- end }} + env: + - name: DEPLOYMENT_TEMPLATE_TYPE + value: {{ template "retool.deploymentTemplateType" . }} + - name: DEPLOYMENT_TEMPLATE_VERSION + value: {{ template "retool.deploymentTemplateVersion" . }} + - name: NODE_ENV + value: production + - name: SERVICE_TYPE + value: RR_GIT_SERVER + - name: RR_GIT_SERVER_PORT + value: {{ $gitServerPort | quote }} + # The standalone git server does not run migrations; the main backend owns them. + - name: DISABLE_DATABASE_MIGRATIONS + value: "true" + - name: COOKIE_INSECURE + value: {{ .Values.config.useInsecureCookies | quote }} + - name: POSTGRES_HOST + value: {{ template "retool.postgresql.host" . }} + - name: POSTGRES_PORT + value: {{ template "retool.postgresql.port" . }} + - name: POSTGRES_DB + value: {{ template "retool.postgresql.database" . }} + - name: POSTGRES_USER + value: {{ template "retool.postgresql.user" . }} + - name: POSTGRES_SSL_ENABLED + value: {{ template "retool.postgresql.ssl_enabled" . }} + + {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} + + {{- if include "shouldIncludeConfigSecretsEnvVars" . }} + - name: LICENSE_KEY + valueFrom: + secretKeyRef: + {{- if .Values.config.licenseKeySecretName }} + name: {{ .Values.config.licenseKeySecretName }} + key: {{ .Values.config.licenseKeySecretKey | default "license-key" }} + {{- else }} + name: {{ template "retool.fullname" . }} + key: license-key + {{- end }} + - name: JWT_SECRET + valueFrom: + secretKeyRef: + {{- if .Values.config.jwtSecretSecretName }} + name: {{ .Values.config.jwtSecretSecretName }} + key: {{ .Values.config.jwtSecretSecretKey | default "jwt-secret" }} + {{- else }} + name: {{ template "retool.fullname" . }} + key: jwt-secret + {{- end }} + - name: ENCRYPTION_KEY + valueFrom: + secretKeyRef: + {{- if .Values.config.encryptionKeySecretName }} + name: {{ .Values.config.encryptionKeySecretName }} + key: {{ .Values.config.encryptionKeySecretKey | default "encryption-key" }} + {{- else }} + name: {{ template "retool.fullname" . }} + key: encryption-key + {{- end }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + {{- if .Values.postgresql.enabled }} + name: {{ template "retool.postgresql.fullname" . }} + # `postgres` is the default admin username for postgres in the subchart we use, so it needs the admin password + # if a different username is picked, then it needs the custom password instead. + {{- if eq .Values.postgresql.auth.username "postgres" }} + key: postgres-password + {{- else }} + key: password + {{- end }} + {{- else }} + {{- if .Values.config.postgresql.passwordSecretName }} + name: {{ .Values.config.postgresql.passwordSecretName }} + key: {{ .Values.config.postgresql.passwordSecretKey | default "postgresql-password" }} + {{- else }} + name: {{ template "retool.fullname" . }} + key: postgresql-password + {{- end }} + {{- end }} + {{- end }} + {{- include "retool.rrGitServer.commonEnv" . | nindent 10 }} + {{- include "retool.env" .Values.env | nindent 10 }} + {{- range .Values.environmentSecrets }} + - name: {{ .name }} + valueFrom: + secretKeyRef: + name: {{ .secretKeyRef.name }} + key: {{ .secretKeyRef.key }} + {{- end }} + {{- with .Values.environmentVariables }} +{{ toYaml . | indent 10 }} + {{- end }} + {{- if .Values.externalSecrets.enabled }} + envFrom: + - secretRef: + name: {{ .Values.externalSecrets.name }} + {{- range .Values.externalSecrets.secrets }} + - secretRef: + name: {{ .name }} + {{- end }} + {{- end }} + {{- if .Values.externalSecrets.externalSecretsOperator.enabled }} + envFrom: + {{- range .Values.externalSecrets.externalSecretsOperator.secretRef }} + - secretRef: + name: {{ .name }} + optional: {{ .optional | default false }} + {{- end }} + {{- end }} + ports: + - containerPort: {{ $gitServerPort }} + name: http-server + protocol: TCP + readinessProbe: + tcpSocket: + port: {{ $gitServerPort }} + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: {{ $gitServerPort }} + initialDelaySeconds: 30 + failureThreshold: 10 + timeoutSeconds: 10 + periodSeconds: 20 + resources: +{{ toYaml ($gitServerValues.resources | default .Values.resources) | indent 10 }} + volumeMounts: + {{- range $configFile := (keys .Values.files) }} + - name: {{ template "retool.name" $ }} + mountPath: "/usr/share/retool/config/{{ $configFile }}" + subPath: {{ $configFile }} + {{- end }} + {{if and .Values.persistentVolumeClaim.enabled .Values.persistentVolumeClaim.mountPath }} + - name: retool-pv + mountPath: {{ .Values.persistentVolumeClaim.mountPath }} + {{- end }} +{{- if .Values.extraVolumeMounts }} +{{ toYaml .Values.extraVolumeMounts | indent 8 }} +{{- end }} +{{- if .Values.securityContext.extraContainerSecurityContext }} + securityContext: +{{ toYaml .Values.securityContext.extraContainerSecurityContext | indent 10 }} +{{- end }} +{{- with .Values.extraContainers }} +{{ tpl . $ | indent 6 }} +{{- end }} +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} +{{- end }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 8 }} + {{- end }} + {{- $affinity := $gitServerValues.affinity | default .Values.affinity }} + {{- if $affinity }} + affinity: +{{ toYaml $affinity | indent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} +{{- if .Values.securityContext.enabled }} + securityContext: + runAsUser: {{ .Values.securityContext.runAsUser }} + fsGroup: {{ .Values.securityContext.fsGroup }} +{{- if .Values.securityContext.extraSecurityContext }} +{{ toYaml .Values.securityContext.extraSecurityContext | indent 8 }} +{{- end }} +{{- end }} + volumes: +{{- range .Values.extraConfigMapMounts }} + - name: {{ .name }} + configMap: + name: {{ .configMap }} +{{- end }} + {{- if .Values.persistentVolumeClaim.enabled }} + - name: retool-pv + persistentVolumeClaim: + claimName: {{ default (include "retool.fullname" .) .Values.persistentVolumeClaim.existingClaim }} + {{- end }} +{{- if .Values.extraVolumes }} +{{ toYaml .Values.extraVolumes | indent 8 }} +{{- end }} +{{- if .Values.podDisruptionBudget }} +--- +{{- if semverCompare ">=1.21-0" .Capabilities.KubeVersion.Version }} +apiVersion: policy/v1 +{{- else }} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ template "retool.rrGitServer.name" . }} +spec: + {{- toYaml .Values.podDisruptionBudget | nindent 2 }} + selector: + matchLabels: + retoolService: {{ template "retool.rrGitServer.name" . }} +{{- end }} +{{- end }} diff --git a/charts/retool/templates/deployment_jobs.yaml b/charts/retool/templates/deployment_jobs.yaml index 009ade1..92639b7 100644 --- a/charts/retool/templates/deployment_jobs.yaml +++ b/charts/retool/templates/deployment_jobs.yaml @@ -93,7 +93,7 @@ spec: {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} - {{- if .Values.agentSandbox.enabled }} + {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} - name: RR_AGENT_PUBSUB_BACKEND value: "postgres" {{- end }} diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml index 787daae..fe0f66f 100644 --- a/charts/retool/templates/deployment_js_executor.yaml +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.jsExecutor.enabled }} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} apiVersion: apps/v1 kind: Deployment metadata: @@ -82,7 +82,7 @@ spec: containers: - name: js-executor image: "{{ .Values.jsExecutor.image.repository }}:{{ include "retool.jsExecutor.image.tag" . }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.jsExecutor.image.pullPolicy | default .Values.image.pullPolicy }} securityContext: capabilities: add: ["NET_ADMIN"] @@ -97,18 +97,15 @@ spec: - name: NODE_ENV value: production {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} - {{- range $key, $value := .Values.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - {{- range .Values.environmentSecrets }} + {{- include "retool.env" .Values.jsExecutor.env | nindent 10 }} + {{- range .Values.jsExecutor.environmentSecrets }} - name: {{ .name }} valueFrom: secretKeyRef: name: {{ .secretKeyRef.name }} key: {{ .secretKeyRef.key }} {{- end }} - {{- with .Values.environmentVariables }} + {{- with .Values.jsExecutor.environmentVariables }} {{ toYaml . | indent 10 }} {{- end }} ports: diff --git a/charts/retool/templates/deployment_mcp.yaml b/charts/retool/templates/deployment_mcp.yaml index d53dd9d..788e7c8 100644 --- a/charts/retool/templates/deployment_mcp.yaml +++ b/charts/retool/templates/deployment_mcp.yaml @@ -1,4 +1,4 @@ -{{- if .Values.mcp.enabled }} +{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "mcp")) "1" }} {{- $mcpConfig := .Values.mcp.config | default dict }} {{- $hasOAuthIntrospectionAuthTokenEnv := false }} {{- range .Values.mcp.environmentVariables }} @@ -7,7 +7,7 @@ {{- end }} {{- end }} {{- if not (or $mcpConfig.oauthIntrospectionAuthTokenSecretName $mcpConfig.oauthIntrospectionAuthToken $hasOAuthIntrospectionAuthTokenEnv) }} -{{- fail "Please set .Values.mcp.config.oauthIntrospectionAuthTokenSecretName, .Values.mcp.config.oauthIntrospectionAuthToken, or an OAUTH_INTROSPECTION_AUTH_TOKEN entry in .Values.mcp.environmentVariables when .Values.mcp.enabled is true" }} +{{- fail "Please set .Values.mcp.config.oauthIntrospectionAuthTokenSecretName, .Values.mcp.config.oauthIntrospectionAuthToken, or an OAUTH_INTROSPECTION_AUTH_TOKEN entry in .Values.mcp.environmentVariables when the MCP server is enabled (.Values.mcp.enabled, or inherited from .Values.r2.enabled)" }} {{- end }} {{- $mcpInternalPort := .Values.mcp.service.internalPort | default 4010 }} apiVersion: v1 @@ -115,9 +115,17 @@ spec: value: {{ $mcpInternalPort | quote }} - name: RETOOL_BACKEND_URL value: {{ $mcpConfig.retoolBackendUrl | default (printf "http://%s:%v" (include "retool.fullname" .) .Values.service.externalPort) | quote }} - {{- if $mcpConfig.retoolGitServerUrl }} + {{- /* + Prefer an explicit mcp.config.retoolGitServerUrl; otherwise, when the + git server is split into its own deployment, auto-point MCP at it. + */}} + {{- $retoolGitServerUrl := $mcpConfig.retoolGitServerUrl }} + {{- if and (not $retoolGitServerUrl) (include "retool.rrGitServer.separateEnabled" .) }} + {{- $retoolGitServerUrl = include "retool.rrGitServer.url" . }} + {{- end }} + {{- if $retoolGitServerUrl }} - name: RETOOL_GIT_SERVER_URL - value: {{ $mcpConfig.retoolGitServerUrl | quote }} + value: {{ $retoolGitServerUrl | quote }} {{- end }} {{- if $mcpConfig.retoolUrl }} - name: RETOOL_URL diff --git a/charts/retool/templates/deployment_workflows.yaml b/charts/retool/templates/deployment_workflows.yaml index a589907..a453f5a 100644 --- a/charts/retool/templates/deployment_workflows.yaml +++ b/charts/retool/templates/deployment_workflows.yaml @@ -153,7 +153,7 @@ spec: {{- end }} {{- end }} {{- end }} - {{- if .Values.jsExecutor.enabled }} + {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" . }} {{- end }} diff --git a/charts/retool/templates/httproute.yaml b/charts/retool/templates/httproute.yaml index a182088..d084c70 100644 --- a/charts/retool/templates/httproute.yaml +++ b/charts/retool/templates/httproute.yaml @@ -56,7 +56,7 @@ spec: - name: {{ $fullName }} port: {{ $svcPort }} {{- end }} -{{- if and .Values.agentSandbox.enabled .Values.agentSandbox.frontendWsProxyDomain }} +{{- if and (eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.agentSandbox.frontendWsProxyDomain }} --- apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 58c1411..3afee13 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -564,8 +564,9 @@ multiplayer: labels: {} mcp: - # Enable this to run Retool's MCP server as a separate deployment. - enabled: false + # Run Retool's MCP server as a separate deployment. Inherits .Values.r2.enabled + # when left unset (null); set true/false to override. + enabled: null replicaCount: 1 @@ -728,6 +729,41 @@ rrGitServer: # Backend default is 100; unset to inherit it. repackThreshold: ~ + # Escape hatch for the blob-storage validation below. The chart can only + # inspect blobStorage, env, environmentVariables, and environmentSecrets at + # template time; it cannot see env vars injected via envFrom (Secret/ConfigMap + # splat). Set this to true to bypass the check when RR_BLOB_STORAGE_PROVIDER / + # RR_DEFAULT_* are provided that way. + skipBlobStorageValidation: false + + # Optionally split the git server out of the main backend into its own + # deployment + service (mirrors how the workload is split in Retool Cloud). + # Requires rrGitServer.enabled: true. When enabled: + # - a dedicated -git-server Deployment runs SERVICE_TYPE=RR_GIT_SERVER + # - the main backend drops RR_GIT_SERVER from its SERVICE_TYPE and proxies git + # traffic to the service via RR_GIT_SERVER_HOST / RR_GIT_SERVER_PORT + # - the MCP server (if enabled) is auto-pointed at the same service unless + # mcp.config.retoolGitServerUrl is set explicitly + # The blobStorage config below is rendered onto the git-server pod instead of + # the main backend in this mode. + separate: + enabled: false + replicaCount: 1 + # Port the git server listens on (RR_GIT_SERVER_PORT) and that its service exposes. + port: 3010 + # Pod resource requests/limits. Falls back to top-level `resources` if unset. + resources: {} + # Falls back to top-level `affinity` if unset. + affinity: {} + # Annotations/labels applied to the git-server pod template. + annotations: {} + labels: {} + # Annotations/labels applied to the git-server Service (kept separate from + # the pod ones above). + service: + annotations: {} + labels: {} + # Shared blob-storage config used by git_server (and other features that # need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. # Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on @@ -804,9 +840,20 @@ codeExecutor: securityContext: privileged: true +# === R2 (Retool agent runtime) ============================================= +# Master switch for the whole R2 stack: the r2Agent worker, jsExecutor, +# agentSandbox, and mcp server. Set `r2.enabled: true` to turn them all on with +# one line. Each component's own `enabled` (left null by default) inherits this +# switch; set a component's `enabled` to true/false to override the master for +# that component only. Shared R2 configuration can be added under this block +# later. +r2: + enabled: false + # JS Executor jsExecutor: - enabled: false + # Inherits .Values.r2.enabled when left unset (null); set true/false to override. + enabled: null image: repository: tryretool/js-executor-service @@ -818,6 +865,12 @@ jsExecutor: seccompLocalhostProfile: profiles/nsjail-seccomp.json + # JS-executor-specific environment; not inherited from the top-level + # .Values.env / .Values.environmentSecrets / .Values.environmentVariables. + env: {} + environmentSecrets: [] + environmentVariables: [] + # Annotations for JS executor pods annotations: {} @@ -830,14 +883,16 @@ jsExecutor: # Config affinity and anti-affinity rules for the JS executor pods affinity: {} - # Resources for the JS executor + # Resources for the JS executor. Memory request and limit are kept equal: + # JSE reads its memory limit and rejects requests at 80% of it, so the + # request must reserve the full amount to avoid premature rejections. resources: limits: - cpu: 2000m - memory: 8192Mi + cpu: 6000m + memory: 6Gi requests: - cpu: 1000m - memory: 4096Mi + cpu: 6000m + memory: 6Gi agents: # Enable AI Agents @@ -878,7 +933,8 @@ agents: # R2 Agent: server-side agent loop worker (independent from agents above). r2Agent: - enabled: false + # Inherits .Values.r2.enabled when left unset (null); set true/false to override. + enabled: null # Labels for R2 agent worker pods labels: {} @@ -905,7 +961,8 @@ r2Agent: # Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), # and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. agentSandbox: - enabled: false + # Inherits .Values.r2.enabled when left unset (null); set true/false to override. + enabled: null image: repository: tryretool/agent-sandbox-service @@ -928,33 +985,57 @@ agentSandbox: # Labels for agent sandbox pods labels: {} - # Pre-existing K8s Secret containing keys: jwt-public-key, jwt-private-key, - # encryption-key, api-secret, postgres-url. When set, the chart references - # this secret by default for all secret-backed env vars. - # - # Individual keys can still be overridden by setting the corresponding - # plaintext values below (e.g. jwtPublicKey, postgres.url). When a plaintext - # value is provided alongside externalSecret.name, the plaintext value takes - # precedence for that key and the external secret is used for the rest. + # === Secrets ============================================================ + # Provide each secret as a plaintext value below, OR set externalSecret.name + # to a pre-existing Secret with keys jwt-public-key, jwt-private-key, + # encryption-key, api-secret, postgres-url. A plaintext value always wins over + # the external secret for that key. externalSecret: - name: '' - - # Secrets — used directly when externalSecret.name is not set, or as - # per-key overrides when externalSecret.name IS set. - # JWT key pair (ES256) for sandbox token authentication. - jwtPublicKey: '' - jwtPrivateKey: '' - # Hex-encoded 256-bit key for encrypting credentials stored in state backend. - # Must match the backend's AGENT_SANDBOX_ENCRYPTION_KEY. - encryptionKey: '' - # API secret for admin/test endpoints. - apiSecret: '' - - # Postgres state backend (shared by controller and proxy for state coordination). - # Connection string for the agent sandbox's state database. When set, takes - # precedence over the postgres-url key in externalSecret. + name: '' # optional: existing Secret holding all keys below + + jwtPublicKey: '' # REQUIRED (ES256) unless provided via externalSecret + jwtPrivateKey: '' # REQUIRED (ES256) unless provided via externalSecret + encryptionKey: '' # optional: hex 256-bit; must match backend AGENT_SANDBOX_ENCRYPTION_KEY + apiSecret: '' # optional: admin/test endpoints + + # === Postgres state backend ============================================= + # By DEFAULT (all options below left blank) the agent sandbox reuses the + # backend's Postgres connection from config.postgresql / the postgresql + # subchart -- same instance and database, separate schema (see schema below). + # So enabling it on an existing deployment needs nothing here. (Exception: + # if the backend's DB password is supplied via external secrets / envFrom, it + # can't be inherited by a separate pod -- set an option below in that case.) + # To point the sandbox at a different database, set exactly ONE option: postgres: + # -- Option 1: plaintext DSN -- url: '' + + # -- Option 2: assemble from fields -- + # The password is passed via PGPASSWORD (never embedded in the URL), so any + # characters are safe and a password-only secret can be reused as-is. + # Set either password or passwordSecretName. + # user/database are embedded in the DSN verbatim (user may contain '@', e.g. + # Azure user@servername); for values with : / ? # use Option 1 or 3. + host: '' + port: 5432 + database: '' + user: '' + password: '' + passwordSecretName: '' + passwordSecretKey: 'password' + + # -- Option 3: existing Secret holding the full DSN -- + urlSecretName: '' + urlSecretKey: 'postgres-url' + + # -- Option 4: reuse externalSecret.name (its postgres-url key) -- + # Selected by setting agentSandbox.externalSecret.name (in the Secrets + # section above), not by anything here. Used when options 1-3 are blank. + # + # If options 1-4 are ALL unset, the default (inherit config.postgresql) + # applies -- see the note at the top of this block. + + # -- Optional tuning (defaults shown) -- schema: 'agent_executor' poolMax: 10 sweeperIntervalMs: 60000 @@ -1096,8 +1177,13 @@ agentSandbox: # controllerUrl and proxyUrl default to internal service URLs when empty. controllerUrl: '' proxyUrl: '' - # Required: public URL for frontend browsers to reach the proxy via WebSocket. - # e.g. https://sandbox.yourdomain.com + # Public URL for frontend browsers to reach the proxy via WebSocket. + # Leave EMPTY for self-hosted: the backend then serves the sandbox same-origin + # as the editor (your Retool base URL) and the front server reverse-proxies the + # /sandbox/* WS+Vite paths to the in-cluster proxy Service — so no dedicated + # proxy domain or ingress is required, and your catch-all ingress is untouched. + # Only set this (e.g. https://sandbox.yourdomain.com) if you deliberately want + # the proxy on a separate domain, in which case also enable proxy.ingress above. frontendWsProxyDomain: '' # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. proxyDomain: '' diff --git a/values.yaml b/values.yaml index 58c1411..3afee13 100644 --- a/values.yaml +++ b/values.yaml @@ -564,8 +564,9 @@ multiplayer: labels: {} mcp: - # Enable this to run Retool's MCP server as a separate deployment. - enabled: false + # Run Retool's MCP server as a separate deployment. Inherits .Values.r2.enabled + # when left unset (null); set true/false to override. + enabled: null replicaCount: 1 @@ -728,6 +729,41 @@ rrGitServer: # Backend default is 100; unset to inherit it. repackThreshold: ~ + # Escape hatch for the blob-storage validation below. The chart can only + # inspect blobStorage, env, environmentVariables, and environmentSecrets at + # template time; it cannot see env vars injected via envFrom (Secret/ConfigMap + # splat). Set this to true to bypass the check when RR_BLOB_STORAGE_PROVIDER / + # RR_DEFAULT_* are provided that way. + skipBlobStorageValidation: false + + # Optionally split the git server out of the main backend into its own + # deployment + service (mirrors how the workload is split in Retool Cloud). + # Requires rrGitServer.enabled: true. When enabled: + # - a dedicated -git-server Deployment runs SERVICE_TYPE=RR_GIT_SERVER + # - the main backend drops RR_GIT_SERVER from its SERVICE_TYPE and proxies git + # traffic to the service via RR_GIT_SERVER_HOST / RR_GIT_SERVER_PORT + # - the MCP server (if enabled) is auto-pointed at the same service unless + # mcp.config.retoolGitServerUrl is set explicitly + # The blobStorage config below is rendered onto the git-server pod instead of + # the main backend in this mode. + separate: + enabled: false + replicaCount: 1 + # Port the git server listens on (RR_GIT_SERVER_PORT) and that its service exposes. + port: 3010 + # Pod resource requests/limits. Falls back to top-level `resources` if unset. + resources: {} + # Falls back to top-level `affinity` if unset. + affinity: {} + # Annotations/labels applied to the git-server pod template. + annotations: {} + labels: {} + # Annotations/labels applied to the git-server Service (kept separate from + # the pod ones above). + service: + annotations: {} + labels: {} + # Shared blob-storage config used by git_server (and other features that # need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. # Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on @@ -804,9 +840,20 @@ codeExecutor: securityContext: privileged: true +# === R2 (Retool agent runtime) ============================================= +# Master switch for the whole R2 stack: the r2Agent worker, jsExecutor, +# agentSandbox, and mcp server. Set `r2.enabled: true` to turn them all on with +# one line. Each component's own `enabled` (left null by default) inherits this +# switch; set a component's `enabled` to true/false to override the master for +# that component only. Shared R2 configuration can be added under this block +# later. +r2: + enabled: false + # JS Executor jsExecutor: - enabled: false + # Inherits .Values.r2.enabled when left unset (null); set true/false to override. + enabled: null image: repository: tryretool/js-executor-service @@ -818,6 +865,12 @@ jsExecutor: seccompLocalhostProfile: profiles/nsjail-seccomp.json + # JS-executor-specific environment; not inherited from the top-level + # .Values.env / .Values.environmentSecrets / .Values.environmentVariables. + env: {} + environmentSecrets: [] + environmentVariables: [] + # Annotations for JS executor pods annotations: {} @@ -830,14 +883,16 @@ jsExecutor: # Config affinity and anti-affinity rules for the JS executor pods affinity: {} - # Resources for the JS executor + # Resources for the JS executor. Memory request and limit are kept equal: + # JSE reads its memory limit and rejects requests at 80% of it, so the + # request must reserve the full amount to avoid premature rejections. resources: limits: - cpu: 2000m - memory: 8192Mi + cpu: 6000m + memory: 6Gi requests: - cpu: 1000m - memory: 4096Mi + cpu: 6000m + memory: 6Gi agents: # Enable AI Agents @@ -878,7 +933,8 @@ agents: # R2 Agent: server-side agent loop worker (independent from agents above). r2Agent: - enabled: false + # Inherits .Values.r2.enabled when left unset (null); set true/false to override. + enabled: null # Labels for R2 agent worker pods labels: {} @@ -905,7 +961,8 @@ r2Agent: # Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), # and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. agentSandbox: - enabled: false + # Inherits .Values.r2.enabled when left unset (null); set true/false to override. + enabled: null image: repository: tryretool/agent-sandbox-service @@ -928,33 +985,57 @@ agentSandbox: # Labels for agent sandbox pods labels: {} - # Pre-existing K8s Secret containing keys: jwt-public-key, jwt-private-key, - # encryption-key, api-secret, postgres-url. When set, the chart references - # this secret by default for all secret-backed env vars. - # - # Individual keys can still be overridden by setting the corresponding - # plaintext values below (e.g. jwtPublicKey, postgres.url). When a plaintext - # value is provided alongside externalSecret.name, the plaintext value takes - # precedence for that key and the external secret is used for the rest. + # === Secrets ============================================================ + # Provide each secret as a plaintext value below, OR set externalSecret.name + # to a pre-existing Secret with keys jwt-public-key, jwt-private-key, + # encryption-key, api-secret, postgres-url. A plaintext value always wins over + # the external secret for that key. externalSecret: - name: '' - - # Secrets — used directly when externalSecret.name is not set, or as - # per-key overrides when externalSecret.name IS set. - # JWT key pair (ES256) for sandbox token authentication. - jwtPublicKey: '' - jwtPrivateKey: '' - # Hex-encoded 256-bit key for encrypting credentials stored in state backend. - # Must match the backend's AGENT_SANDBOX_ENCRYPTION_KEY. - encryptionKey: '' - # API secret for admin/test endpoints. - apiSecret: '' - - # Postgres state backend (shared by controller and proxy for state coordination). - # Connection string for the agent sandbox's state database. When set, takes - # precedence over the postgres-url key in externalSecret. + name: '' # optional: existing Secret holding all keys below + + jwtPublicKey: '' # REQUIRED (ES256) unless provided via externalSecret + jwtPrivateKey: '' # REQUIRED (ES256) unless provided via externalSecret + encryptionKey: '' # optional: hex 256-bit; must match backend AGENT_SANDBOX_ENCRYPTION_KEY + apiSecret: '' # optional: admin/test endpoints + + # === Postgres state backend ============================================= + # By DEFAULT (all options below left blank) the agent sandbox reuses the + # backend's Postgres connection from config.postgresql / the postgresql + # subchart -- same instance and database, separate schema (see schema below). + # So enabling it on an existing deployment needs nothing here. (Exception: + # if the backend's DB password is supplied via external secrets / envFrom, it + # can't be inherited by a separate pod -- set an option below in that case.) + # To point the sandbox at a different database, set exactly ONE option: postgres: + # -- Option 1: plaintext DSN -- url: '' + + # -- Option 2: assemble from fields -- + # The password is passed via PGPASSWORD (never embedded in the URL), so any + # characters are safe and a password-only secret can be reused as-is. + # Set either password or passwordSecretName. + # user/database are embedded in the DSN verbatim (user may contain '@', e.g. + # Azure user@servername); for values with : / ? # use Option 1 or 3. + host: '' + port: 5432 + database: '' + user: '' + password: '' + passwordSecretName: '' + passwordSecretKey: 'password' + + # -- Option 3: existing Secret holding the full DSN -- + urlSecretName: '' + urlSecretKey: 'postgres-url' + + # -- Option 4: reuse externalSecret.name (its postgres-url key) -- + # Selected by setting agentSandbox.externalSecret.name (in the Secrets + # section above), not by anything here. Used when options 1-3 are blank. + # + # If options 1-4 are ALL unset, the default (inherit config.postgresql) + # applies -- see the note at the top of this block. + + # -- Optional tuning (defaults shown) -- schema: 'agent_executor' poolMax: 10 sweeperIntervalMs: 60000 @@ -1096,8 +1177,13 @@ agentSandbox: # controllerUrl and proxyUrl default to internal service URLs when empty. controllerUrl: '' proxyUrl: '' - # Required: public URL for frontend browsers to reach the proxy via WebSocket. - # e.g. https://sandbox.yourdomain.com + # Public URL for frontend browsers to reach the proxy via WebSocket. + # Leave EMPTY for self-hosted: the backend then serves the sandbox same-origin + # as the editor (your Retool base URL) and the front server reverse-proxies the + # /sandbox/* WS+Vite paths to the in-cluster proxy Service — so no dedicated + # proxy domain or ingress is required, and your catch-all ingress is untouched. + # Only set this (e.g. https://sandbox.yourdomain.com) if you deliberately want + # the proxy on a separate domain, in which case also enable proxy.ingress above. frontendWsProxyDomain: '' # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. proxyDomain: '' From 323d30194691dbaa94c197ca0a6f51e3289eac96 Mon Sep 17 00:00:00 2001 From: jatin Date: Wed, 10 Jun 2026 15:04:29 -0400 Subject: [PATCH 26/37] [fix][r2] drop mcp from the r2.enabled master switch (#316) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mcp requires an OAuth introspection token to template (oauthIntrospectionAuthToken / secret / env), unlike the other R2 components. Having mcp inherit the r2.enabled master switch meant `r2.enabled: true` hard-failed out of the box ("Please set ...oauthIntrospectionAuthToken... when the MCP server is enabled") unless the user also configured mcp — defeating the one-line enable. Make mcp independent: mcp.enabled defaults to false and is read directly (deployment_mcp.yaml gates on .Values.mcp.enabled), so the master switch governs only r2Agent/jsExecutor/agentSandbox. mcp stays opt-in via mcp.enabled: true. Update the componentEnabled doc, the OAuth fail message, and the test-r2-enabled-option fixture (mcp must no longer render from r2.enabled alone). Co-authored-by: Claude Opus 4.8 (1M context) --- charts/retool/ci/test-r2-enabled-option.yaml | 16 +++++++--------- charts/retool/templates/_helpers.tpl | 2 +- charts/retool/templates/deployment_mcp.yaml | 4 ++-- charts/retool/values.yaml | 7 ++++--- values.yaml | 7 ++++--- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/charts/retool/ci/test-r2-enabled-option.yaml b/charts/retool/ci/test-r2-enabled-option.yaml index 0a05b16..ce8718b 100644 --- a/charts/retool/ci/test-r2-enabled-option.yaml +++ b/charts/retool/ci/test-r2-enabled-option.yaml @@ -1,10 +1,12 @@ # R2 master switch — single flag turns on the whole R2 stack. # -# Exercises the `r2.enabled: true` inherit path: r2Agent, jsExecutor, -# agentSandbox, and mcp all leave their own `enabled` unset (null) and inherit -# the master switch. This guards `retool.r2.componentEnabled` and the -# helper-routed cross-component env wiring (backend/workflows/jobs/workers read -# effective-enabled, not the raw per-component flag). +# Exercises the `r2.enabled: true` inherit path: r2Agent, jsExecutor, and +# agentSandbox all leave their own `enabled` unset (null) and inherit the master +# switch. This guards `retool.r2.componentEnabled` and the helper-routed +# cross-component env wiring (backend/workflows/jobs/workers read +# effective-enabled, not the raw per-component flag). mcp is intentionally NOT +# part of the master switch (it is opt-in via mcp.enabled, since it needs its +# own OAuth config), so it must not render from r2.enabled alone. # # Secrets/config below are only what each component requires to template when # enabled; none of them set `*.enabled`, so enablement comes solely from r2. @@ -26,7 +28,3 @@ agentSandbox: enabled: false networkPolicy: enabled: false - -mcp: - config: - oauthIntrospectionAuthToken: test-oauth-introspection-token diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index f5734c3..fb0460a 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -420,7 +420,7 @@ Usage: (include "retool.agents.enabled" .) {{- end -}} {{/* -Resolve whether an R2 component (r2Agent, jsExecutor, agentSandbox, mcp) is +Resolve whether an R2 component (r2Agent, jsExecutor, agentSandbox) is enabled. The component's own `enabled` wins when explicitly set to true/false; when left unset (null) it inherits the shared master switch .Values.r2.enabled. Usage: (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) diff --git a/charts/retool/templates/deployment_mcp.yaml b/charts/retool/templates/deployment_mcp.yaml index 788e7c8..6577949 100644 --- a/charts/retool/templates/deployment_mcp.yaml +++ b/charts/retool/templates/deployment_mcp.yaml @@ -1,4 +1,4 @@ -{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "mcp")) "1" }} +{{- if .Values.mcp.enabled }} {{- $mcpConfig := .Values.mcp.config | default dict }} {{- $hasOAuthIntrospectionAuthTokenEnv := false }} {{- range .Values.mcp.environmentVariables }} @@ -7,7 +7,7 @@ {{- end }} {{- end }} {{- if not (or $mcpConfig.oauthIntrospectionAuthTokenSecretName $mcpConfig.oauthIntrospectionAuthToken $hasOAuthIntrospectionAuthTokenEnv) }} -{{- fail "Please set .Values.mcp.config.oauthIntrospectionAuthTokenSecretName, .Values.mcp.config.oauthIntrospectionAuthToken, or an OAUTH_INTROSPECTION_AUTH_TOKEN entry in .Values.mcp.environmentVariables when the MCP server is enabled (.Values.mcp.enabled, or inherited from .Values.r2.enabled)" }} +{{- fail "Please set .Values.mcp.config.oauthIntrospectionAuthTokenSecretName, .Values.mcp.config.oauthIntrospectionAuthToken, or an OAUTH_INTROSPECTION_AUTH_TOKEN entry in .Values.mcp.environmentVariables when the MCP server is enabled (.Values.mcp.enabled)" }} {{- end }} {{- $mcpInternalPort := .Values.mcp.service.internalPort | default 4010 }} apiVersion: v1 diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 3afee13..bb7edae 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -564,9 +564,10 @@ multiplayer: labels: {} mcp: - # Run Retool's MCP server as a separate deployment. Inherits .Values.r2.enabled - # when left unset (null); set true/false to override. - enabled: null + # Run Retool's MCP server as a separate deployment. Independent of the + # .Values.r2.enabled master switch (the MCP server needs its own OAuth + # introspection config, so it is opt-in): set true to enable. + enabled: false replicaCount: 1 diff --git a/values.yaml b/values.yaml index 3afee13..bb7edae 100644 --- a/values.yaml +++ b/values.yaml @@ -564,9 +564,10 @@ multiplayer: labels: {} mcp: - # Run Retool's MCP server as a separate deployment. Inherits .Values.r2.enabled - # when left unset (null); set true/false to override. - enabled: null + # Run Retool's MCP server as a separate deployment. Independent of the + # .Values.r2.enabled master switch (the MCP server needs its own OAuth + # introspection config, so it is opt-in): set true to enable. + enabled: false replicaCount: 1 From 44e775416e55631cea787d80d2c47b4b282884fa Mon Sep 17 00:00:00 2001 From: Matt Westrik Date: Wed, 10 Jun 2026 12:59:04 -0700 Subject: [PATCH 27/37] Add controller.scaling.perUserSandboxLimit and sandbox.sandboxGlobalLifetimeMs (#317) Wire up controller.scaling.perUserSandboxLimit config option (default 5) and sandbox.sandboxGlobalLifetimeMs (default 2.5 hrs). Remove environment variables that are no longer used: SLOTS_PER_POD, EXECUTOR_{MIN,MAX}_REPLICAS, SCALE_{UP,DOWN}_THRESHOLD, SCALE_DOWN_GRACE_PERIOD_MS. --- .../templates/deployment_agent_sandbox.yaml | 15 +++------------ charts/retool/values.yaml | 11 +++++------ values.yaml | 11 +++++------ 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index fde125c..5d5d2ed 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -175,6 +175,7 @@ data: ,{"name": "POD_IP", "valueFrom": {"fieldRef": {"fieldPath": "status.podIP"}}} ,{"name": "SANDBOX_NETWORK_ENABLED", "value": "{{ $as.sandboxNetwork.enabled }}"} ,{"name": "SANDBOX_IDLE_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxIdleTimeoutMs }}"} + ,{"name": "SANDBOX_GLOBAL_LIFETIME_MS", "value": "{{ $as.sandbox.sandboxGlobalLifetimeMs }}"} {{- if $as.jwtPublicKey }} ,{"name": "AGENT_SANDBOX_JWT_PUBLIC_KEY", "value": "{{ $as.jwtPublicKey }}"} {{- else if $as.externalSecret.name }} @@ -326,18 +327,6 @@ spec: value: {{ include "retool.agentSandbox.name" . }} - name: EXECUTOR_SERVICE_NAME value: {{ include "retool.agentSandbox.name" . }}-pods - - name: SLOTS_PER_POD - value: {{ $as.controller.scaling.slotsPerPod | quote }} - - name: EXECUTOR_MIN_REPLICAS - value: {{ $as.controller.scaling.minReplicas | quote }} - - name: EXECUTOR_MAX_REPLICAS - value: {{ $as.controller.scaling.maxReplicas | quote }} - - name: SCALE_UP_THRESHOLD - value: {{ $as.controller.scaling.scaleUpThreshold | quote }} - - name: SCALE_DOWN_THRESHOLD - value: {{ $as.controller.scaling.scaleDownThreshold | quote }} - - name: SCALE_DOWN_GRACE_PERIOD_MS - value: {{ $as.controller.scaling.scaleDownGracePeriodMs | quote }} - name: PREWARM_POOL_SIZE value: {{ $as.controller.scaling.prewarmPoolSize | quote }} - name: MAX_TOTAL_JOBS @@ -354,6 +343,8 @@ spec: value: {{ $as.controller.scaling.leaderTtlMs | quote }} - name: LEADER_RENEW_MS value: {{ $as.controller.scaling.leaderRenewMs | quote }} + - name: PER_USER_CONCURRENCY_PAID + value: {{ $as.controller.scaling.perUserSandboxLimit | quote }} - name: DEPLOYED_IMAGE_TAG value: {{ $as.image.tag | default .Values.image.tag | quote }} - name: JOB_TEMPLATE_CONFIGMAP diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index bb7edae..9fc552f 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -1101,6 +1101,10 @@ agentSandbox: memory: 4Gi # Idle timeout (ms) before an unassigned sandbox self-terminates. sandboxIdleTimeoutMs: 600000 + # Hard ceiling (ms) on total sandbox lifetime, regardless of activity. When + # reached, the sandbox is destroyed (deferred until the current agent loop + # ends). Defaults to 2.5 hours. + sandboxGlobalLifetimeMs: 9000000 tmpDirSizeLimit: 20Gi # Separate limit for the rootfs-appjob volume — the sandbox root filesystem # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi @@ -1121,12 +1125,6 @@ agentSandbox: cpu: 500m memory: 512Mi scaling: - slotsPerPod: 4 - minReplicas: 1 - maxReplicas: 10 - scaleUpThreshold: 2 - scaleDownThreshold: 8 - scaleDownGracePeriodMs: 300000 prewarmPoolSize: 5 maxTotalJobs: 50 maxConcurrentCreates: 3 @@ -1135,6 +1133,7 @@ agentSandbox: reconcileIntervalMs: 5000 leaderTtlMs: 10000 leaderRenewMs: 3000 + perUserSandboxLimit: 5 # Proxy: HTTP proxy for sandbox egress with credential injection. # The proxy must be reachable by frontend browsers for WebSocket connections. diff --git a/values.yaml b/values.yaml index bb7edae..9fc552f 100644 --- a/values.yaml +++ b/values.yaml @@ -1101,6 +1101,10 @@ agentSandbox: memory: 4Gi # Idle timeout (ms) before an unassigned sandbox self-terminates. sandboxIdleTimeoutMs: 600000 + # Hard ceiling (ms) on total sandbox lifetime, regardless of activity. When + # reached, the sandbox is destroyed (deferred until the current agent loop + # ends). Defaults to 2.5 hours. + sandboxGlobalLifetimeMs: 9000000 tmpDirSizeLimit: 20Gi # Separate limit for the rootfs-appjob volume — the sandbox root filesystem # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi @@ -1121,12 +1125,6 @@ agentSandbox: cpu: 500m memory: 512Mi scaling: - slotsPerPod: 4 - minReplicas: 1 - maxReplicas: 10 - scaleUpThreshold: 2 - scaleDownThreshold: 8 - scaleDownGracePeriodMs: 300000 prewarmPoolSize: 5 maxTotalJobs: 50 maxConcurrentCreates: 3 @@ -1135,6 +1133,7 @@ agentSandbox: reconcileIntervalMs: 5000 leaderTtlMs: 10000 leaderRenewMs: 3000 + perUserSandboxLimit: 5 # Proxy: HTTP proxy for sandbox egress with credential injection. # The proxy must be reachable by frontend browsers for WebSocket connections. From 9398fb7d86f4c2d27989f313751e3efb509ecdfc Mon Sep 17 00:00:00 2001 From: Mert Bozfakioglu Date: Wed, 10 Jun 2026 13:58:16 -0700 Subject: [PATCH 28/37] Sync code-executor nsjail seccomp profile with retool-k8s (#318) retool-k8s (helm/retool-workflow-jail/files/nsjail-seccomp.json) is the source of truth for the nsjail seccomp profile. The public chart copy had drifted in its `socket` syscall family rules; this re-syncs it verbatim so the public jsExecutor/codeExecutor sandbox matches what we run internally. Co-authored-by: Cursor --- charts/retool/files/nsjail-seccomp.json | 48 +++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/charts/retool/files/nsjail-seccomp.json b/charts/retool/files/nsjail-seccomp.json index 42c9c5c..a9c55e9 100644 --- a/charts/retool/files/nsjail-seccomp.json +++ b/charts/retool/files/nsjail-seccomp.json @@ -415,8 +415,52 @@ "args": [ { "index": 0, - "value": 40, - "op": "SCMP_CMP_NE" + "value": 1, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["socket"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 2, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["socket"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 10, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["socket"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 16, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": ["socket"], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 17, + "op": "SCMP_CMP_EQ" } ] }, From 6d636b1f788d3a57c633089df8c00eb6b83d3fd4 Mon Sep 17 00:00:00 2001 From: Mert Bozfakioglu Date: Wed, 10 Jun 2026 18:19:58 -0700 Subject: [PATCH 29/37] [fix] [plat] Set appArmorProfile Unconfined for js-executor (#320) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Set appArmorProfile Unconfined for js-executor nsjail (used by js-executor to sandbox user code) remounts the rootfs and sets up its mount namespace at startup. On nodes where the container runtime attaches an AppArmor profile to non-privileged containers — e.g. GKE Container-Optimized OS, where containerd applies cri-containerd.apparmor.d with `deny mount` — that mount is rejected with EPERM and the sandbox fails to launch. EKS (Amazon Linux 2023) uses SELinux and attaches no AppArmor profile, so this never surfaced there. Run js-executor with appArmorProfile Unconfined so nsjail can set up its sandbox, mirroring the existing agent-sandbox container. The Localhost seccomp profile continues to provide syscall-level isolation. Co-authored-by: Cursor * Remove explanatory comment from js-executor appArmorProfile Co-authored-by: Cursor * Use AppArmor annotation instead of securityContext field for js-executor The appArmorProfile securityContext field only exists in the Kubernetes API from v1.30+, so strict kubeconform validation against v1.27-v1.29 rejected it with "additionalProperties 'appArmorProfile' not allowed". Switch to the container.apparmor.security.beta.kubernetes.io/ pod annotation, which is honored across all supported Kubernetes versions and is not subject to schema validation. Co-authored-by: Cursor --------- Co-authored-by: Cursor --- charts/retool/templates/deployment_js_executor.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml index fe0f66f..27c82cc 100644 --- a/charts/retool/templates/deployment_js_executor.yaml +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -24,6 +24,7 @@ spec: metadata: annotations: checksum/seccomp: {{ .Files.Get "files/nsjail-seccomp.json" | sha256sum }} + container.apparmor.security.beta.kubernetes.io/js-executor: unconfined {{- if .Values.podAnnotations }} {{ toYaml .Values.podAnnotations | indent 8 }} {{- end }} From 4e7ac78b45a2afec520b3e949c8ef751957d791d Mon Sep 17 00:00:00 2001 From: Mert Bozfakioglu Date: Wed, 10 Jun 2026 18:48:59 -0700 Subject: [PATCH 30/37] [feat][plat-1012] Add opt-in seccomp sandboxing for code-executor (codeExecutor.useSeccompProfile) (#311) * Run code-executor unprivileged with seccomp on k8s >= 1.33 On Kubernetes 1.33+ (where the ProcMountType and UserNamespacesSupport feature gates are on by default), the code-executor now runs unprivileged using a localhost seccomp profile, NET_ADMIN, an unmasked /proc, and user namespaces - mirroring how the JS executor sandboxes itself. The nsjail seccomp profile is installed onto the node by an install-seccomp init container. On older clusters it falls back to the existing privileged mode, so the chart still installs without requiring 1.33+. Setting codeExecutor.securityContext explicitly continues to override this behavior for either mode. Co-authored-by: Cursor * Keep root values.yaml in sync with charts/retool/values.yaml Co-authored-by: Cursor * Drop codeExecutor securityContext comments Co-authored-by: Cursor * Document why code-executor uses seccomp on k8s 1.33+ Co-authored-by: Cursor * Note 1.33+ upgrade for fine-grained privileges Co-authored-by: Cursor * Gate code-executor seccomp behind codeExecutor.useSeccompProfile flag Replace the automatic k8s >= 1.33 version detection with an explicit opt-in flag (codeExecutor.useSeccompProfile, default false). The chart defaults to the existing privileged mode and only renders the unprivileged seccomp path (seccomp profile + NET_ADMIN + unmasked /proc + user namespaces + install-seccomp init container) when the operator sets the flag. An explicitly pinned codeExecutor.securityContext still wins. Enabling the flag requires Kubernetes 1.33+ (ProcMountType and UserNamespacesSupport feature gates); this is now the operator's responsibility rather than auto-detected. Co-authored-by: Cursor * Consolidate seccomp docs into values.yaml comment Move the detailed rationale for codeExecutor.useSeccompProfile into the values.yaml comment (operator-facing) and reduce the template comment to a short pointer explaining the $useSecComp local. Co-authored-by: Cursor * Set AppArmor unconfined for code-executor seccomp path When codeExecutor.useSeccompProfile drops the privileged securityContext, the container is run under the container runtime's default AppArmor profile on AppArmor-enabled nodes (e.g. GKE Container-Optimized OS, where containerd attaches cri-containerd.apparmor.d with `deny mount`). nsjail remounts the rootfs and /proc to build its sandbox, so that profile rejects the mounts with EPERM and code-executor crash-loops. Privileged containers were unaffected because AppArmor confinement is not applied to them. Add the container.apparmor.security.beta.kubernetes.io/code-executor: unconfined pod annotation, gated to the same $useSecComp path as the seccomp profile, hostUsers and procMount changes. The annotation is honored across all supported Kubernetes versions (unlike the appArmorProfile field, which is v1.30+) and is not subject to strict schema validation. The Localhost seccomp profile continues to provide syscall isolation. Co-authored-by: Cursor * Reword useSeccompProfile gate comment to drop operator phrasing Co-authored-by: Cursor --------- Co-authored-by: Cursor --- charts/retool/Chart.yaml | 2 +- .../templates/configmap_code_executor.yaml | 13 ++++ .../templates/deployment_code_executor.yaml | 66 +++++++++++++++++-- charts/retool/values.yaml | 17 +++-- values.yaml | 17 +++-- 5 files changed, 98 insertions(+), 17 deletions(-) create mode 100644 charts/retool/templates/configmap_code_executor.yaml diff --git a/charts/retool/Chart.yaml b/charts/retool/Chart.yaml index dbedb80..7d8ed5e 100644 --- a/charts/retool/Chart.yaml +++ b/charts/retool/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: retool description: A Helm chart for Kubernetes type: application -version: 6.11.0 +version: 6.12.0 maintainers: - name: Retool Engineering email: engineering+helm@retool.com diff --git a/charts/retool/templates/configmap_code_executor.yaml b/charts/retool/templates/configmap_code_executor.yaml new file mode 100644 index 0000000..026ba90 --- /dev/null +++ b/charts/retool/templates/configmap_code_executor.yaml @@ -0,0 +1,13 @@ +{{- if include "retool.workflows.enabled" . }} +{{- if and (not .Values.codeExecutor.securityContext) .Values.codeExecutor.useSeccompProfile }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "retool.fullname" . }}-code-executor-seccomp + labels: + {{- include "retool.labels" . | nindent 4 }} +data: + nsjail-seccomp.json: | + {{- .Files.Get "files/nsjail-seccomp.json" | nindent 4 }} +{{- end }} +{{- end }} diff --git a/charts/retool/templates/deployment_code_executor.yaml b/charts/retool/templates/deployment_code_executor.yaml index 6750e61..6c75b3e 100644 --- a/charts/retool/templates/deployment_code_executor.yaml +++ b/charts/retool/templates/deployment_code_executor.yaml @@ -1,4 +1,6 @@ {{- if include "retool.workflows.enabled" . }} +{{- /* Use the less-privileged seccomp sandbox (see codeExecutor.useSeccompProfile in values.yaml) only when it is enabled and no explicit codeExecutor.securityContext is set. */ -}} +{{- $useSecComp := and (not .Values.codeExecutor.securityContext) .Values.codeExecutor.useSeccompProfile -}} apiVersion: apps/v1 kind: Deployment metadata: @@ -23,6 +25,10 @@ spec: template: metadata: annotations: +{{- if $useSecComp }} + checksum/seccomp: {{ .Files.Get "files/nsjail-seccomp.json" | sha256sum }} + container.apparmor.security.beta.kubernetes.io/code-executor: unconfined +{{- end }} {{- if .Values.podAnnotations }} {{ toYaml .Values.podAnnotations | indent 8 }} {{- end }} @@ -44,11 +50,43 @@ spec: {{- if .Values.priorityClassName }} priorityClassName: "{{ .Values.priorityClassName }}" {{- end }} -{{- if .Values.initContainers }} +{{- if $useSecComp }} + hostUsers: false +{{- end }} +{{- if or $useSecComp .Values.initContainers }} initContainers: +{{- if $useSecComp }} + - name: install-seccomp + image: busybox:1.37.0@sha256:b3255e7dfbcd10cb367af0d409747d511aeb66dfac98cf30e97e87e4207dd76f + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + resources: + requests: + cpu: 1m + memory: 4Mi + limits: + cpu: 10m + memory: 16Mi + command: + - /bin/sh + - -c + - | + DEST="/host-seccomp/{{ .Values.codeExecutor.seccompLocalhostProfile }}" + mkdir -p "$(dirname "$DEST")" + cp /seccomp-profile/nsjail-seccomp.json "$DEST" + echo "seccomp profile installed at $DEST" + volumeMounts: + - name: seccomp-profile + mountPath: /seccomp-profile + - name: host-seccomp + mountPath: /host-seccomp +{{- end }} {{- range $key, $value := .Values.initContainers }} - - name: "{{ $key }}" -{{ toYaml $value | indent 8 }} + - name: "{{ $key }}" +{{ toYaml $value | indent 10 }} {{- end }} {{- end }} containers: @@ -56,11 +94,18 @@ spec: image: "{{ .Values.codeExecutor.image.repository }}:{{ include "retool.codeExecutor.image.tag" . }}" imagePullPolicy: {{ .Values.image.pullPolicy }} securityContext: - {{ if .Values.codeExecutor.securityContext }} + {{- if .Values.codeExecutor.securityContext }} {{ toYaml .Values.codeExecutor.securityContext | indent 10 }} - {{ else }} + {{- else if $useSecComp }} + capabilities: + add: ["NET_ADMIN"] + procMount: Unmasked + seccompProfile: + type: Localhost + localhostProfile: {{ .Values.codeExecutor.seccompLocalhostProfile }} + {{- else }} privileged: true - {{ end }} + {{- end }} {{- if .Values.securityContext.extraContainerSecurityContext }} {{ toYaml .Values.securityContext.extraContainerSecurityContext | indent 10 }} {{- end }} @@ -128,6 +173,15 @@ spec: {{ tpl . $ | indent 6 }} {{- end }} volumes: +{{- if $useSecComp }} + - name: seccomp-profile + configMap: + name: {{ template "retool.fullname" . }}-code-executor-seccomp + - name: host-seccomp + hostPath: + path: /var/lib/kubelet/seccomp + type: DirectoryOrCreate +{{- end }} {{- if .Values.codeExecutor.volumes }} {{ toYaml .Values.codeExecutor.volumes | indent 8 }} {{- end }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 9fc552f..533441d 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -835,11 +835,18 @@ codeExecutor: cpu: 1000m memory: 1024Mi - # code executor uses nsjail to sandbox code execution. nsjail requires privileged container access. - # If your deployment does not support privileged access, you can set `privileged` to false to not - # use nsjail. Without nsjail, all code is run without sandboxing within your deployment. - securityContext: - privileged: true + # The code executor runs workflow code inside nsjail sandboxes, which require + # elevated privileges to create. By default these are granted by running the + # container as privileged (securityContext.privileged: true). Set + # useSeccompProfile: true to instead grant only what nsjail needs, far more + # granularly than the privileged flag: a slightly relaxed version of Docker's + # default seccomp profile, the NET_ADMIN capability for network isolation, and an + # unmasked /proc for process resource monitoring. This requires Kubernetes 1.33 or + # higher (for the ProcMountType and UserNamespacesSupport feature gates) -- do not + # enable it on older clusters. Pinning codeExecutor.securityContext overrides both + # paths and is used verbatim. + useSeccompProfile: false + seccompLocalhostProfile: profiles/nsjail-seccomp.json # === R2 (Retool agent runtime) ============================================= # Master switch for the whole R2 stack: the r2Agent worker, jsExecutor, diff --git a/values.yaml b/values.yaml index 9fc552f..533441d 100644 --- a/values.yaml +++ b/values.yaml @@ -835,11 +835,18 @@ codeExecutor: cpu: 1000m memory: 1024Mi - # code executor uses nsjail to sandbox code execution. nsjail requires privileged container access. - # If your deployment does not support privileged access, you can set `privileged` to false to not - # use nsjail. Without nsjail, all code is run without sandboxing within your deployment. - securityContext: - privileged: true + # The code executor runs workflow code inside nsjail sandboxes, which require + # elevated privileges to create. By default these are granted by running the + # container as privileged (securityContext.privileged: true). Set + # useSeccompProfile: true to instead grant only what nsjail needs, far more + # granularly than the privileged flag: a slightly relaxed version of Docker's + # default seccomp profile, the NET_ADMIN capability for network isolation, and an + # unmasked /proc for process resource monitoring. This requires Kubernetes 1.33 or + # higher (for the ProcMountType and UserNamespacesSupport feature gates) -- do not + # enable it on older clusters. Pinning codeExecutor.securityContext overrides both + # paths and is used verbatim. + useSeccompProfile: false + seccompLocalhostProfile: profiles/nsjail-seccomp.json # === R2 (Retool agent runtime) ============================================= # Master switch for the whole R2 stack: the r2Agent worker, jsExecutor, From e983e19c641f538282512ccee1b40cb3d2fdb2e5 Mon Sep 17 00:00:00 2001 From: jatin Date: Thu, 11 Jun 2026 09:37:46 -0400 Subject: [PATCH 31/37] [rr] Restructure into a top-level rr.enabled master switch (#321) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the RR (formerly "r2") stack into a single top-level `rr:` block whose `rr.enabled` is the master switch, with every component RR needs nested directly under it: rr: enabled: false jsExecutor: {...} # inherits rr.enabled agent: {...} # RR server-side agent worker — inherits rr.enabled agentSandbox: {...} # inherits rr.enabled gitServer: {...} # required for rr blobStorage: {...} # required for rr The vocabulary is renamed r2 -> rr to match the RR_ env vars, and the nested keys carry no redundant prefix (the `rr:` namespace scopes them) — the full path composes to the env var (rr.gitServer -> RR_GIT_SERVER, rr.blobStorage -> RR_BLOB_STORAGE), and rr.gitServer matches the rendered `-git-server` resource name. mcp and the separate AI-`agents` feature stay top-level (mcp is intentionally independent of the master switch). Helpers are retool.rr.* (componentEnabled, validateLegacyValues) and retool.gitServer.* / retool.agent.enabled. Intentionally NOT renamed, so this is a no-op for running pods (no resource recreation / no backend contract break): - SERVICE_TYPE=R2_AGENT_TEMPORAL_WORKER, temporal taskqueue r2-agent, and the r2-agent-worker resource + telemetry name. - the agent's internal worker identity: worker `type: rrAgent` and the retool.rrAgentWorker.* helpers, kept distinct from the AI-`agents` worker's retool.agentWorker.* to avoid a collision. Only the user-facing value key (rr.agent) and its enable helper are de-prefixed. - the unrelated "Cloudflare R2" mention in the blob-storage example. Robustness: - retool.rr.componentEnabled is kind-aware: an absent/null component block is disabled (no config to render); a map uses its `enabled` (inheriting the master switch when unset); a non-mapping value (e.g. a bare bool) fails loudly with guidance. Fixes the nil-dereference on an explicitly-nulled component and avoids relocating the crash into the deployment templates. - retool.rr.validateLegacyValues catches BOTH old top-level keys (the `r2:` master switch and the un-nested components) AND old leaf names left under the new `rr:` block (rr.r2Agent/rrAgent/rrGitServer/rrBlobStorage), mapping each to its new path. helm template/upgrade fails loudly rather than silently disabling RR. - the nested worker's values owner is resolved from a declarative `nested: rr` field on the worker descriptor instead of a hardcoded parent-name match. Verified: rendered manifests are byte-identical to the original r2 branch across all six scenarios (only the random postgres-password differs); helm lint clean; all 10 RR CI overlays render; both values.yaml copies kept byte-identical. Renamed test overlays test-r2-*-option.yaml -> test-rr-*-option.yaml. Co-authored-by: Claude Opus 4.8 (1M context) --- .../ci/test-agent-sandbox-enabled-option.yaml | 132 +-- ...agent-sandbox-inherit-postgres-option.yaml | 38 +- ...t-agent-sandbox-inline-secrets-option.yaml | 82 +- ...-agent-sandbox-postgres-fields-option.yaml | 70 +- ...nt-sandbox-postgres-url-secret-option.yaml | 52 +- .../ci/test-js-executor-enabled-option.yaml | 68 +- .../ci/test-r2-agent-enabled-option.yaml | 25 - charts/retool/ci/test-r2-enabled-option.yaml | 30 - .../ci/test-rr-agent-enabled-option.yaml | 27 + charts/retool/ci/test-rr-enabled-option.yaml | 30 + .../test-rr-git-server-separate-option.yaml | 51 +- charts/retool/templates/_helpers.tpl | 184 ++-- charts/retool/templates/_workers.tpl | 26 +- .../agent_sandbox_device_plugin.yaml | 4 +- .../agent_sandbox_networkpolicy.yaml | 4 +- .../templates/agent_sandbox_prepuller.yaml | 4 +- .../templates/agent_sandbox_seccomp.yaml | 4 +- .../templates/configmap_js_executor.yaml | 2 +- .../templates/deployment_agent_sandbox.yaml | 4 +- .../retool/templates/deployment_backend.yaml | 19 +- .../templates/deployment_git_server.yaml | 24 +- charts/retool/templates/deployment_jobs.yaml | 2 +- .../templates/deployment_js_executor.yaml | 40 +- charts/retool/templates/deployment_mcp.yaml | 4 +- .../templates/deployment_workflows.yaml | 2 +- charts/retool/templates/httproute.yaml | 6 +- charts/retool/values.yaml | 844 +++++++++--------- values.yaml | 844 +++++++++--------- 28 files changed, 1357 insertions(+), 1265 deletions(-) delete mode 100644 charts/retool/ci/test-r2-agent-enabled-option.yaml delete mode 100644 charts/retool/ci/test-r2-enabled-option.yaml create mode 100644 charts/retool/ci/test-rr-agent-enabled-option.yaml create mode 100644 charts/retool/ci/test-rr-enabled-option.yaml diff --git a/charts/retool/ci/test-agent-sandbox-enabled-option.yaml b/charts/retool/ci/test-agent-sandbox-enabled-option.yaml index 935d1c4..32ba7f6 100644 --- a/charts/retool/ci/test-agent-sandbox-enabled-option.yaml +++ b/charts/retool/ci/test-agent-sandbox-enabled-option.yaml @@ -1,76 +1,78 @@ -# Agent Sandbox — external secret + dedicated proxy domain WITH ingress. -# -# This is the "max surface" scenario: controller + proxy deployments, the -# job-template ConfigMap, RBAC, headless/proxy services, proxy ingress + TLS, -# the image-prepuller + seccomp DaemonSets, the smarter-device-manager device -# plugin DaemonSet, the NetworkPolicies, and both PDBs. -# -# Secret/Postgres sourcing here uses externalSecret.name (Postgres OPTION 4: -# the secret's postgres-url key). The other secret/Postgres precedence paths and -# the same-origin (no-ingress) proxy mode are covered by sibling files: -# - test-agent-sandbox-inline-secrets-option.yaml (inline secrets, plaintext DSN, same-origin/no ingress, hostPath tun) -# - test-agent-sandbox-postgres-fields-option.yaml (assemble DSN from fields + PGPASSWORD secret) -# - test-agent-sandbox-postgres-url-secret-option.yaml (full DSN from an existing secret) -# - test-agent-sandbox-inherit-postgres-option.yaml (zero-config inherit of the backend Postgres) -# Overlaid on test-install-values.yaml. -agentSandbox: - enabled: true +rr: - image: - repository: tryretool/agent-sandbox-service - tag: 3.123.4 - pullPolicy: IfNotPresent + # Agent Sandbox — external secret + dedicated proxy domain WITH ingress. + # + # This is the "max surface" scenario: controller + proxy deployments, the + # job-template ConfigMap, RBAC, headless/proxy services, proxy ingress + TLS, + # the image-prepuller + seccomp DaemonSets, the smarter-device-manager device + # plugin DaemonSet, the NetworkPolicies, and both PDBs. + # + # Secret/Postgres sourcing here uses externalSecret.name (Postgres OPTION 4: + # the secret's postgres-url key). The other secret/Postgres precedence paths and + # the same-origin (no-ingress) proxy mode are covered by sibling files: + # - test-agent-sandbox-inline-secrets-option.yaml (inline secrets, plaintext DSN, same-origin/no ingress, hostPath tun) + # - test-agent-sandbox-postgres-fields-option.yaml (assemble DSN from fields + PGPASSWORD secret) + # - test-agent-sandbox-postgres-url-secret-option.yaml (full DSN from an existing secret) + # - test-agent-sandbox-inherit-postgres-option.yaml (zero-config inherit of the backend Postgres) + # Overlaid on test-install-values.yaml. + agentSandbox: + enabled: true - # Reference a pre-existing K8s Secret (the production-recommended path) rather - # than inlining JWT/encryption material into the chart. With externalSecret.name - # set, secret-backed env vars — including the ones injected into the sandbox - # job-template — resolve via secretKeyRef instead of plaintext. - externalSecret: - name: agent-sandbox-secrets + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent - postgres: - schema: agent_executor - poolMax: 10 + # Reference a pre-existing K8s Secret (the production-recommended path) rather + # than inlining JWT/encryption material into the chart. With externalSecret.name + # set, secret-backed env vars — including the ones injected into the sandbox + # job-template — resolve via secretKeyRef instead of plaintext. + externalSecret: + name: agent-sandbox-secrets - sandboxNetwork: - enabled: true - devicePlugin: true - deployDaemonSet: true + postgres: + schema: agent_executor + poolMax: 10 - snapshotStorage: - s3Bucket: retool-agent-sandbox-snapshots - s3Endpoint: https://s3.us-east-1.amazonaws.com - s3Region: us-east-1 - credentialsSecretName: agent-sandbox-s3-credentials + sandboxNetwork: + enabled: true + devicePlugin: true + deployDaemonSet: true - # replicaCount > 1 renders the controller PodDisruptionBudget. - controller: - replicaCount: 2 + snapshotStorage: + s3Bucket: retool-agent-sandbox-snapshots + s3Endpoint: https://s3.us-east-1.amazonaws.com + s3Region: us-east-1 + credentialsSecretName: agent-sandbox-s3-credentials - proxy: - replicaCount: 2 - allowedDomains: api.example.com,example.com - backendDomainSuffixes: .example.com - sandboxProxyTimeoutMs: "3600000" - service: - type: ClusterIP - # Dedicated proxy domain → renders the proxy Ingress. - ingress: - enabled: true - ingressClassName: nginx - annotations: - nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" - host: sandbox.example.com - tls: - - secretName: agent-sandbox-tls - hosts: - - sandbox.example.com - frontendWsProxyDomain: https://sandbox.example.com + # replicaCount > 1 renders the controller PodDisruptionBudget. + controller: + replicaCount: 2 - # Restrict sandbox/controller/proxy traffic → renders the NetworkPolicies. - networkPolicy: - enabled: true + proxy: + replicaCount: 2 + allowedDomains: api.example.com,example.com + backendDomainSuffixes: .example.com + sandboxProxyTimeoutMs: "3600000" + service: + type: ClusterIP + # Dedicated proxy domain → renders the proxy Ingress. + ingress: + enabled: true + ingressClassName: nginx + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: sandbox.example.com + tls: + - secretName: agent-sandbox-tls + hosts: + - sandbox.example.com + frontendWsProxyDomain: https://sandbox.example.com + + # Restrict sandbox/controller/proxy traffic → renders the NetworkPolicies. + networkPolicy: + enabled: true # Exercise the proxy PodDisruptionBudget branch. podDisruptionBudget: diff --git a/charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml b/charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml index adfba57..c5d09a4 100644 --- a/charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml +++ b/charts/retool/ci/test-agent-sandbox-inherit-postgres-option.yaml @@ -1,20 +1,22 @@ -# Agent Sandbox — Postgres sourcing OPTION 5 (default): inherit the backend's -# Postgres connection. With agentSandbox.postgres left entirely unset, the -# controller/proxy reuse config.postgresql / the postgresql subchart (same -# instance and database, separate schema) — the zero-config path for enabling -# the sandbox on an existing deployment. PGPASSWORD mirrors the backend's -# POSTGRES_PASSWORD secretKeyRef, and the DSN is assembled from the postgresql -# helpers. The base test-install-values.yaml enables the postgresql subchart, -# which is what makes inheritance resolve. -# -# Only the (required) JWT secrets are provided; everything else is left default. -agentSandbox: - enabled: true +rr: - image: - repository: tryretool/agent-sandbox-service - tag: 3.123.4 - pullPolicy: IfNotPresent + # Agent Sandbox — Postgres sourcing OPTION 5 (default): inherit the backend's + # Postgres connection. With agentSandbox.postgres left entirely unset, the + # controller/proxy reuse config.postgresql / the postgresql subchart (same + # instance and database, separate schema) — the zero-config path for enabling + # the sandbox on an existing deployment. PGPASSWORD mirrors the backend's + # POSTGRES_PASSWORD secretKeyRef, and the DSN is assembled from the postgresql + # helpers. The base test-install-values.yaml enables the postgresql subchart, + # which is what makes inheritance resolve. + # + # Only the (required) JWT secrets are provided; everything else is left default. + agentSandbox: + enabled: true - jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' - jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent + + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' diff --git a/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml b/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml index 163ea8a..2ac11c6 100644 --- a/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml +++ b/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml @@ -1,46 +1,48 @@ -# Agent Sandbox — inline secrets + plaintext DSN + same-origin proxy (no ingress). -# -# Complements test-agent-sandbox-enabled-option.yaml (external secret + dedicated -# proxy ingress). Here we exercise the *other* halves of those branches: -# - Secrets inline (no externalSecret.name) → the chart renders its own Secret -# (jwt-public-key / jwt-private-key / encryption-key / api-secret). jwtPublicKey -# MUST be single-line: it is injected raw into the sandbox job-template JSON. -# - Postgres sourcing OPTION 1: plaintext DSN via postgres.url. -# - Same-origin proxy: no dedicated proxy domain and no proxy ingress — the -# backend reverse-proxies /sandbox/* (frontendWsProxyDomain left empty). -# - sandboxNetwork.devicePlugin=false → sandbox pods get /dev/net/tun via -# hostPath (the non-device-plugin branch), and no device-manager DaemonSet. -# - networkPolicy disabled. -agentSandbox: - enabled: true +rr: - image: - repository: tryretool/agent-sandbox-service - tag: 3.123.4 - pullPolicy: IfNotPresent + # Agent Sandbox — inline secrets + plaintext DSN + same-origin proxy (no ingress). + # + # Complements test-agent-sandbox-enabled-option.yaml (external secret + dedicated + # proxy ingress). Here we exercise the *other* halves of those branches: + # - Secrets inline (no externalSecret.name) → the chart renders its own Secret + # (jwt-public-key / jwt-private-key / encryption-key / api-secret). jwtPublicKey + # MUST be single-line: it is injected raw into the sandbox job-template JSON. + # - Postgres sourcing OPTION 1: plaintext DSN via postgres.url. + # - Same-origin proxy: no dedicated proxy domain and no proxy ingress — the + # backend reverse-proxies /sandbox/* (frontendWsProxyDomain left empty). + # - sandboxNetwork.devicePlugin=false → sandbox pods get /dev/net/tun via + # hostPath (the non-device-plugin branch), and no device-manager DaemonSet. + # - networkPolicy disabled. + agentSandbox: + enabled: true - jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' - jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' - encryptionKey: a12b01429fe0fe69a80da94e9e837ab2f1e9bda378ed8a25905a238f6fea6b7a - apiSecret: test-agent-sandbox-api-secret + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent - # Option 1: plaintext DSN. - postgres: - url: postgres://retool:retool@agent-sandbox-db.example.internal:5432/agent_sandbox - schema: agent_executor - poolMax: 10 + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + encryptionKey: a12b01429fe0fe69a80da94e9e837ab2f1e9bda378ed8a25905a238f6fea6b7a + apiSecret: test-agent-sandbox-api-secret - sandboxNetwork: - enabled: true - devicePlugin: false - deployDaemonSet: false + # Option 1: plaintext DSN. + postgres: + url: postgres://retool:retool@agent-sandbox-db.example.internal:5432/agent_sandbox + schema: agent_executor + poolMax: 10 - proxy: - # Same-origin: ClusterIP service, no ingress. - service: - type: ClusterIP - ingress: - enabled: false + sandboxNetwork: + enabled: true + devicePlugin: false + deployDaemonSet: false - networkPolicy: - enabled: false + proxy: + # Same-origin: ClusterIP service, no ingress. + service: + type: ClusterIP + ingress: + enabled: false + + networkPolicy: + enabled: false diff --git a/charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml b/charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml index 714c761..2b4e25b 100644 --- a/charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml +++ b/charts/retool/ci/test-agent-sandbox-postgres-fields-option.yaml @@ -1,39 +1,41 @@ -# Agent Sandbox — Postgres sourcing OPTION 2: assemble the DSN from discrete -# fields, with the password supplied via PGPASSWORD from a pre-existing Secret -# (never embedded in the URL, so any password characters are safe). -# -# Also exercises: -# - An Azure-style "user@servername" username, which validateSecrets allows -# (the parser splits userinfo on the last '@'). -# - sandboxNetwork with devicePlugin=true but deployDaemonSet=false (a -# smarter-device-manager already runs on the nodes, managed elsewhere) → -# sandbox pods request smarter-devices/net_tun but no DS is rendered. -# - networkPolicy enabled. -agentSandbox: - enabled: true +rr: - image: - repository: tryretool/agent-sandbox-service - tag: 3.123.4 - pullPolicy: IfNotPresent + # Agent Sandbox — Postgres sourcing OPTION 2: assemble the DSN from discrete + # fields, with the password supplied via PGPASSWORD from a pre-existing Secret + # (never embedded in the URL, so any password characters are safe). + # + # Also exercises: + # - An Azure-style "user@servername" username, which validateSecrets allows + # (the parser splits userinfo on the last '@'). + # - sandboxNetwork with devicePlugin=true but deployDaemonSet=false (a + # smarter-device-manager already runs on the nodes, managed elsewhere) → + # sandbox pods request smarter-devices/net_tun but no DS is rendered. + # - networkPolicy enabled. + agentSandbox: + enabled: true - jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' - jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent - # Option 2: host + user + database, password via PGPASSWORD secretKeyRef. - postgres: - host: agentdb-prod.postgres.database.azure.com - port: 5432 - database: agent_sandbox - user: retool@agentdb-prod - passwordSecretName: agent-sandbox-db-password - passwordSecretKey: password - schema: agent_executor + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' - sandboxNetwork: - enabled: true - devicePlugin: true - deployDaemonSet: false + # Option 2: host + user + database, password via PGPASSWORD secretKeyRef. + postgres: + host: agentdb-prod.postgres.database.azure.com + port: 5432 + database: agent_sandbox + user: retool@agentdb-prod + passwordSecretName: agent-sandbox-db-password + passwordSecretKey: password + schema: agent_executor - networkPolicy: - enabled: true + sandboxNetwork: + enabled: true + devicePlugin: true + deployDaemonSet: false + + networkPolicy: + enabled: true diff --git a/charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml b/charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml index 35953af..7e0db28 100644 --- a/charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml +++ b/charts/retool/ci/test-agent-sandbox-postgres-url-secret-option.yaml @@ -1,29 +1,31 @@ -# Agent Sandbox — Postgres sourcing OPTION 3: the full DSN comes from a -# pre-existing Secret (postgres.urlSecretName / urlSecretKey), while the JWT/ -# encryption secrets are provided inline. This is the "BYO DB secret, chart- -# managed app secrets" combination. -# -# Also exercises S3 snapshot storage WITHOUT a dedicated credentialsSecretName, -# so the sandbox AWS creds fall back to the default (chart-rendered) Secret. -agentSandbox: - enabled: true +rr: - image: - repository: tryretool/agent-sandbox-service - tag: 3.123.4 - pullPolicy: IfNotPresent + # Agent Sandbox — Postgres sourcing OPTION 3: the full DSN comes from a + # pre-existing Secret (postgres.urlSecretName / urlSecretKey), while the JWT/ + # encryption secrets are provided inline. This is the "BYO DB secret, chart- + # managed app secrets" combination. + # + # Also exercises S3 snapshot storage WITHOUT a dedicated credentialsSecretName, + # so the sandbox AWS creds fall back to the default (chart-rendered) Secret. + agentSandbox: + enabled: true - jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' - jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' - encryptionKey: a12b01429fe0fe69a80da94e9e837ab2f1e9bda378ed8a25905a238f6fea6b7a + image: + repository: tryretool/agent-sandbox-service + tag: 3.123.4 + pullPolicy: IfNotPresent - # Option 3: full DSN from an existing Secret. - postgres: - urlSecretName: agent-sandbox-db-dsn - urlSecretKey: connection-string - schema: agent_executor + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + encryptionKey: a12b01429fe0fe69a80da94e9e837ab2f1e9bda378ed8a25905a238f6fea6b7a - snapshotStorage: - s3Bucket: retool-agent-sandbox-snapshots - s3Endpoint: https://s3.us-west-2.amazonaws.com - s3Region: us-west-2 + # Option 3: full DSN from an existing Secret. + postgres: + urlSecretName: agent-sandbox-db-dsn + urlSecretKey: connection-string + schema: agent_executor + + snapshotStorage: + s3Bucket: retool-agent-sandbox-snapshots + s3Endpoint: https://s3.us-west-2.amazonaws.com + s3Region: us-west-2 diff --git a/charts/retool/ci/test-js-executor-enabled-option.yaml b/charts/retool/ci/test-js-executor-enabled-option.yaml index ee8a4a1..4de597c 100644 --- a/charts/retool/ci/test-js-executor-enabled-option.yaml +++ b/charts/retool/ci/test-js-executor-enabled-option.yaml @@ -1,36 +1,38 @@ -# Exercises the JS executor workload (deployment_js_executor.yaml + -# configmap_js_executor.yaml). Overlaid on top of test-install-values.yaml. -jsExecutor: - enabled: true - replicaCount: 2 - image: - repository: tryretool/js-executor-service - tag: 3.123.4 - # Deliberately differs from the global image.pullPolicy (IfNotPresent in the - # base values) so the rendered deployment proves the per-workload override is - # honored rather than the global value. - pullPolicy: Always - # JS-executor-specific env (not inherited from top-level .Values.env). - env: - LOG_LEVEL: info - # Exercise the per-workload secretKeyRef branch. - environmentSecrets: - - name: JS_EXECUTOR_TOKEN - secretKeyRef: - name: js-executor-secrets - key: token - environmentVariables: - - name: JS_EXECUTOR_TEST_OPTION - value: "true" - # Memory request and limit are kept equal: JSE rejects requests at 80% of - # its limit, so the request must reserve the full amount. - resources: - limits: - cpu: 4000m - memory: 4Gi - requests: - cpu: 4000m - memory: 4Gi +rr: + + # Exercises the JS executor workload (deployment_js_executor.yaml + + # configmap_js_executor.yaml). Overlaid on top of test-install-values.yaml. + jsExecutor: + enabled: true + replicaCount: 2 + image: + repository: tryretool/js-executor-service + tag: 3.123.4 + # Deliberately differs from the global image.pullPolicy (IfNotPresent in the + # base values) so the rendered deployment proves the per-workload override is + # honored rather than the global value. + pullPolicy: Always + # JS-executor-specific env (not inherited from top-level .Values.env). + env: + LOG_LEVEL: info + # Exercise the per-workload secretKeyRef branch. + environmentSecrets: + - name: JS_EXECUTOR_TOKEN + secretKeyRef: + name: js-executor-secrets + key: token + environmentVariables: + - name: JS_EXECUTOR_TEST_OPTION + value: "true" + # Memory request and limit are kept equal: JSE rejects requests at 80% of + # its limit, so the request must reserve the full amount. + resources: + limits: + cpu: 4000m + memory: 4Gi + requests: + cpu: 4000m + memory: 4Gi # Exercise the PDB branch shared by the JS executor deployment. podDisruptionBudget: diff --git a/charts/retool/ci/test-r2-agent-enabled-option.yaml b/charts/retool/ci/test-r2-agent-enabled-option.yaml deleted file mode 100644 index a76cb34..0000000 --- a/charts/retool/ci/test-r2-agent-enabled-option.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Exercises the R2 Agent worker (the server-side agent loop worker rendered via -# _workers.tpl as SERVICE_TYPE=R2_AGENT_TEMPORAL_WORKER on healthcheck port 3016). -# Renders a Deployment + Service, plus a PodDisruptionBudget when one is set. -# Overlaid on test-install-values.yaml. -r2Agent: - enabled: true - config: - nodeOptions: "--max_old_space_size=2048" - worker: - replicaCount: 2 - resources: - limits: - cpu: 2000m - memory: 4096Mi - requests: - cpu: 1000m - memory: 2048Mi - labels: - test-pod-label: "true" - annotations: - test-pod-annotation: "true" - -# Exercise the worker PodDisruptionBudget branch. -podDisruptionBudget: - maxUnavailable: 1 diff --git a/charts/retool/ci/test-r2-enabled-option.yaml b/charts/retool/ci/test-r2-enabled-option.yaml deleted file mode 100644 index ce8718b..0000000 --- a/charts/retool/ci/test-r2-enabled-option.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# R2 master switch — single flag turns on the whole R2 stack. -# -# Exercises the `r2.enabled: true` inherit path: r2Agent, jsExecutor, and -# agentSandbox all leave their own `enabled` unset (null) and inherit the master -# switch. This guards `retool.r2.componentEnabled` and the helper-routed -# cross-component env wiring (backend/workflows/jobs/workers read -# effective-enabled, not the raw per-component flag). mcp is intentionally NOT -# part of the master switch (it is opt-in via mcp.enabled, since it needs its -# own OAuth config), so it must not render from r2.enabled alone. -# -# Secrets/config below are only what each component requires to template when -# enabled; none of them set `*.enabled`, so enablement comes solely from r2. -r2: - enabled: true - -agentSandbox: - # jwtPublicKey is injected raw into the sandbox job-template JSON, so it MUST - # be single-line (\n-escaped) or templating breaks. - jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' - jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49AwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' - postgres: - url: postgres://retool:retool@agent-sandbox-db.example.internal:5432/agent_sandbox - schema: agent_executor - proxy: - service: - type: ClusterIP - ingress: - enabled: false - networkPolicy: - enabled: false diff --git a/charts/retool/ci/test-rr-agent-enabled-option.yaml b/charts/retool/ci/test-rr-agent-enabled-option.yaml new file mode 100644 index 0000000..678c467 --- /dev/null +++ b/charts/retool/ci/test-rr-agent-enabled-option.yaml @@ -0,0 +1,27 @@ +rr: + + # Exercises the RR Agent worker (the server-side agent loop worker rendered via + # _workers.tpl as SERVICE_TYPE=R2_AGENT_TEMPORAL_WORKER on healthcheck port 3016). + # Renders a Deployment + Service, plus a PodDisruptionBudget when one is set. + # Overlaid on test-install-values.yaml. + agent: + enabled: true + config: + nodeOptions: "--max_old_space_size=2048" + worker: + replicaCount: 2 + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + labels: + test-pod-label: "true" + annotations: + test-pod-annotation: "true" + +# Exercise the worker PodDisruptionBudget branch. +podDisruptionBudget: + maxUnavailable: 1 diff --git a/charts/retool/ci/test-rr-enabled-option.yaml b/charts/retool/ci/test-rr-enabled-option.yaml new file mode 100644 index 0000000..aae0c9d --- /dev/null +++ b/charts/retool/ci/test-rr-enabled-option.yaml @@ -0,0 +1,30 @@ +# RR master switch — single flag turns on the whole RR stack. +# +# Exercises the `rr.enabled: true` inherit path: agent, jsExecutor, and +# agentSandbox all leave their own `enabled` unset (null) and inherit the master +# switch. This guards `retool.rr.componentEnabled` and the helper-routed +# cross-component env wiring (backend/workflows/jobs/workers read +# effective-enabled, not the raw per-component flag). mcp is intentionally NOT +# part of the master switch (it is opt-in via mcp.enabled, since it needs its +# own OAuth config), so it must not render from rr.enabled alone. +# +# Secrets/config below are only what each component requires to template when +# enabled; none of them set `*.enabled`, so enablement comes solely from rr. +rr: + enabled: true + + agentSandbox: + # jwtPublicKey is injected raw into the sandbox job-template JSON, so it MUST + # be single-line (\n-escaped) or templating breaks. + jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' + jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49AwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + postgres: + url: postgres://retool:retool@agent-sandbox-db.example.internal:5432/agent_sandbox + schema: agent_executor + proxy: + service: + type: ClusterIP + ingress: + enabled: false + networkPolicy: + enabled: false diff --git a/charts/retool/ci/test-rr-git-server-separate-option.yaml b/charts/retool/ci/test-rr-git-server-separate-option.yaml index e07296b..1c2e23a 100644 --- a/charts/retool/ci/test-rr-git-server-separate-option.yaml +++ b/charts/retool/ci/test-rr-git-server-separate-option.yaml @@ -1,34 +1,37 @@ podDisruptionBudget: minAvailable: 1 -rrGitServer: - enabled: true - repackThreshold: 200 - separate: +rr: + + gitServer: enabled: true - replicaCount: 2 - port: 3010 - resources: - requests: - cpu: 250m - memory: 512Mi - annotations: - test-pod-annotation: "true" - labels: - test-pod-label: "true" - service: + repackThreshold: 200 + separate: + enabled: true + replicaCount: 2 + port: 3010 + resources: + requests: + cpu: 250m + memory: 512Mi annotations: - test-service-annotation: "true" + test-pod-annotation: "true" labels: - test-service-label: "true" + test-pod-label: "true" + service: + annotations: + test-service-annotation: "true" + labels: + test-service-label: "true" + -blobStorage: - s3: - bucket: test-rr-bucket - region: us-east-1 - accessKeyId: AKIATEST - secretAccessKeySecretName: rr-blob-storage - secretAccessKeySecretKey: secret-access-key + blobStorage: + s3: + bucket: test-rr-bucket + region: us-east-1 + accessKeyId: AKIATEST + secretAccessKeySecretName: rr-blob-storage + secretAccessKeySecretKey: secret-access-key # Exercise the MCP auto-wiring to the standalone git server service. mcp: diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index fb0460a..261f6fd 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -420,25 +420,37 @@ Usage: (include "retool.agents.enabled" .) {{- end -}} {{/* -Resolve whether an R2 component (r2Agent, jsExecutor, agentSandbox) is -enabled. The component's own `enabled` wins when explicitly set to true/false; -when left unset (null) it inherits the shared master switch .Values.r2.enabled. -Usage: (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) +Resolve whether an RR component (agent, jsExecutor, agentSandbox) is +enabled. Components are nested under .Values.rr (each ships as a default block +with `enabled: null`). The component's own `enabled` wins when explicitly set to +true/false; when left unset (null) it inherits the shared master switch +.Values.rr.enabled. If the component block itself is absent or explicitly nulled +it is treated as disabled (there is no config to render). A non-mapping value +(e.g. a bare bool) is a misconfiguration and fails loudly. +Usage: (include "retool.rr.componentEnabled" (dict "root" $ "component" "jsExecutor")) Returns "1" when enabled, "" otherwise. */}} -{{- define "retool.r2.componentEnabled" -}} -{{- $cfg := index .root.Values .component -}} -{{- if kindIs "invalid" $cfg.enabled -}} - {{- if eq (toString .root.Values.r2.enabled) "true" -}}1{{- end -}} -{{- else if eq (toString $cfg.enabled) "true" -}}1{{- end -}} +{{- define "retool.rr.componentEnabled" -}} +{{- $rr := .root.Values.rr | default dict -}} +{{- $cfg := index $rr .component -}} +{{- if kindIs "invalid" $cfg -}} + {{/* component block absent or explicitly nulled -> disabled (there is no + config to render, so it cannot inherit the master switch on) */}} +{{- else if kindIs "map" $cfg -}} + {{- if kindIs "invalid" $cfg.enabled -}} + {{- if eq (toString $rr.enabled) "true" -}}1{{- end -}} + {{- else if eq (toString $cfg.enabled) "true" -}}1{{- end -}} +{{- else -}} + {{- fail (printf "rr.%s must be a mapping (got %s). To toggle this component set rr.%s.enabled: true|false; to inherit the rr.enabled master switch, leave rr.%s unset." .component (kindOf $cfg) .component .component) -}} +{{- end -}} {{- end -}} {{/* -Set R2 agent worker enabled. Honors the shared R2 master switch. -Usage: (include "retool.r2Agent.enabled" .) +Set RR agent worker enabled. Honors the shared RR master switch. +Usage: (include "retool.agent.enabled" .) */}} -{{- define "retool.r2Agent.enabled" -}} -{{- include "retool.r2.componentEnabled" (dict "root" . "component" "r2Agent") -}} +{{- define "retool.agent.enabled" -}} +{{- include "retool.rr.componentEnabled" (dict "root" . "component" "agent") -}} {{- end -}} {{/* Global Temporal configuration */}} @@ -539,25 +551,25 @@ Set agent eval worker service name {{- end -}} {{/* -Set R2 agent worker service name +Set RR agent worker service name */}} -{{- define "retool.r2AgentWorker.name" -}} +{{- define "retool.rrAgentWorker.name" -}} {{ template "retool.fullname" . }}-r2-agent-worker {{- end -}} {{/* -Selector labels for R2 agent worker. Note changes here will require manual +Selector labels for RR agent worker. Note changes here will require manual deployment recreation and incur downtime, so should be avoided. */}} -{{- define "retool.r2AgentWorker.selectorLabels" -}} -retoolService: {{ include "retool.r2AgentWorker.name" . }} +{{- define "retool.rrAgentWorker.selectorLabels" -}} +retoolService: {{ include "retool.rrAgentWorker.name" . }} {{- end }} {{/* -Extra (non-selector) labels for R2 agent worker. +Extra (non-selector) labels for RR agent worker. */}} -{{- define "retool.r2AgentWorker.labels" -}} -app.kubernetes.io/name: {{ include "retool.r2AgentWorker.name" . }} +{{- define "retool.rrAgentWorker.labels" -}} +app.kubernetes.io/name: {{ include "retool.rrAgentWorker.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} telemetry.retool.com/service-name: r2-agent-worker {{- end }} @@ -641,8 +653,8 @@ tokens. Each may come from a plaintext value, the per-key existing-secret refs, or the catch-all externalSecret.name. No-op when agentSandbox is disabled. */}} {{- define "retool.agentSandbox.validateSecrets" -}} -{{- if eq (include "retool.r2.componentEnabled" (dict "root" . "component" "agentSandbox")) "1" -}} -{{- $as := .Values.agentSandbox -}} +{{- if eq (include "retool.rr.componentEnabled" (dict "root" . "component" "agentSandbox")) "1" -}} +{{- $as := .Values.rr.agentSandbox -}} {{- $ext := $as.externalSecret.name -}} {{- $explicitPg := or $as.postgres.url $as.postgres.urlSecretName $as.postgres.host $ext -}} {{- if not $explicitPg -}} @@ -704,8 +716,8 @@ last '@'. Usage: {{- include "retool.agentSandbox.postgresUrlEnv" . | nindent 12 }} */}} {{- define "retool.agentSandbox.postgresUrlEnv" -}} -{{- $pg := .Values.agentSandbox.postgres -}} -{{- $ext := .Values.agentSandbox.externalSecret.name -}} +{{- $pg := .Values.rr.agentSandbox.postgres -}} +{{- $ext := .Values.rr.agentSandbox.externalSecret.name -}} {{- if $pg.url }} - name: AGENT_SANDBOX_POSTGRES_URL value: {{ $pg.url | quote }} @@ -772,42 +784,42 @@ Outputs env entries that tell the backend how to reach the agent sandbox service Usage: {{- include "retool.agentSandbox.backendEnvVars" . | nindent 10 }} */}} {{- define "retool.agentSandbox.backendEnvVars" -}} -{{- if eq (include "retool.r2.componentEnabled" (dict "root" . "component" "agentSandbox")) "1" }} -{{- $defaultSecretName := .Values.agentSandbox.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} +{{- if eq (include "retool.rr.componentEnabled" (dict "root" . "component" "agentSandbox")) "1" }} +{{- $defaultSecretName := .Values.rr.agentSandbox.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} - name: RR_AGENT_PUBSUB_BACKEND value: "postgres" - name: AGENT_SANDBOX_CONTROLLER_INGRESS_DOMAIN - value: {{ .Values.agentSandbox.controllerUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.controller.name" .) (toString .Values.agentSandbox.controller.port)) | quote }} + value: {{ .Values.rr.agentSandbox.controllerUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.controller.name" .) (toString .Values.rr.agentSandbox.controller.port)) | quote }} - name: AGENT_SANDBOX_PROXY_INGRESS_DOMAIN - value: {{ .Values.agentSandbox.proxyUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.proxy.name" .) (toString .Values.agentSandbox.proxy.port)) | quote }} -{{- if .Values.agentSandbox.frontendWsProxyDomain }} + value: {{ .Values.rr.agentSandbox.proxyUrl | default (printf "http://%s:%s" (include "retool.agentSandbox.proxy.name" .) (toString .Values.rr.agentSandbox.proxy.port)) | quote }} +{{- if .Values.rr.agentSandbox.frontendWsProxyDomain }} - name: AGENT_SANDBOX_FRONTEND_WS_PROXY_DOMAIN - value: {{ .Values.agentSandbox.frontendWsProxyDomain | quote }} + value: {{ .Values.rr.agentSandbox.frontendWsProxyDomain | quote }} {{- end }} -{{- if .Values.agentSandbox.jwtPrivateKey }} +{{- if .Values.rr.agentSandbox.jwtPrivateKey }} - name: AGENT_SANDBOX_JWT_PRIVATE_KEY - value: {{ .Values.agentSandbox.jwtPrivateKey | quote }} -{{- else if .Values.agentSandbox.externalSecret.name }} + value: {{ .Values.rr.agentSandbox.jwtPrivateKey | quote }} +{{- else if .Values.rr.agentSandbox.externalSecret.name }} - name: AGENT_SANDBOX_JWT_PRIVATE_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} key: jwt-private-key {{- end }} -{{- if .Values.agentSandbox.jwtPublicKey }} +{{- if .Values.rr.agentSandbox.jwtPublicKey }} - name: AGENT_SANDBOX_JWT_PUBLIC_KEY - value: {{ .Values.agentSandbox.jwtPublicKey | quote }} -{{- else if .Values.agentSandbox.externalSecret.name }} + value: {{ .Values.rr.agentSandbox.jwtPublicKey | quote }} +{{- else if .Values.rr.agentSandbox.externalSecret.name }} - name: AGENT_SANDBOX_JWT_PUBLIC_KEY valueFrom: secretKeyRef: name: {{ $defaultSecretName }} key: jwt-public-key {{- end }} -{{- if .Values.agentSandbox.encryptionKey }} +{{- if .Values.rr.agentSandbox.encryptionKey }} - name: AGENT_SANDBOX_ENCRYPTION_KEY - value: {{ .Values.agentSandbox.encryptionKey | quote }} -{{- else if .Values.agentSandbox.externalSecret.name }} + value: {{ .Values.rr.agentSandbox.encryptionKey | quote }} +{{- else if .Values.rr.agentSandbox.externalSecret.name }} - name: AGENT_SANDBOX_ENCRYPTION_KEY valueFrom: secretKeyRef: @@ -825,18 +837,18 @@ Set MCP server service name {{- end -}} {{/* -Set git server deployment/service name (only used when rrGitServer.separate is enabled) +Set git server deployment/service name (only used when rr.gitServer.separate is enabled) */}} -{{- define "retool.rrGitServer.name" -}} +{{- define "retool.gitServer.name" -}} {{ template "retool.fullname" . }}-git-server {{- end -}} {{/* Returns "1" when the git server should run as its own deployment/service -(rrGitServer.enabled AND rrGitServer.separate.enabled), empty otherwise. +(rr.gitServer.enabled AND rr.gitServer.separate.enabled), empty otherwise. */}} -{{- define "retool.rrGitServer.separateEnabled" -}} -{{- if and .Values.rrGitServer.enabled (.Values.rrGitServer.separate | default dict).enabled -}} +{{- define "retool.gitServer.separateEnabled" -}} +{{- if and .Values.rr.gitServer.enabled (.Values.rr.gitServer.separate | default dict).enabled -}} 1 {{- end -}} {{- end -}} @@ -844,16 +856,16 @@ Returns "1" when the git server should run as its own deployment/service {{/* Port the standalone git server listens on (RR_GIT_SERVER_PORT) and exposes via its service. */}} -{{- define "retool.rrGitServer.port" -}} -{{- (.Values.rrGitServer.separate | default dict).port | default 3010 -}} +{{- define "retool.gitServer.port" -}} +{{- (.Values.rr.gitServer.separate | default dict).port | default 3010 -}} {{- end -}} {{/* In-cluster URL of the standalone git server service, e.g. http://-git-server:3010. Used to point the MCP server (and any other consumer) at the split-out git server. */}} -{{- define "retool.rrGitServer.url" -}} -http://{{ template "retool.rrGitServer.name" . }}:{{ include "retool.rrGitServer.port" . }} +{{- define "retool.gitServer.url" -}} +http://{{ template "retool.gitServer.name" . }}:{{ include "retool.gitServer.port" . }} {{- end -}} {{/* @@ -864,8 +876,8 @@ snapshots. Emits nothing when no blobStorage provider is configured (in which case the user is expected to plumb RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* directly via environmentVariables / environmentSecrets). */}} -{{- define "retool.rrGitServer.commonEnv" -}} -{{- $bs := .Values.blobStorage | default dict }} +{{- define "retool.gitServer.commonEnv" -}} +{{- $bs := .Values.rr.blobStorage | default dict }} {{- if $bs.s3 }} - name: RR_BLOB_STORAGE_PROVIDER value: "s3" @@ -924,24 +936,24 @@ directly via environmentVariables / environmentSecrets). value: {{ $bs.azure.connectionString | quote }} {{- end }} {{- end }} -{{- if .Values.rrGitServer.repackThreshold }} +{{- if .Values.rr.gitServer.repackThreshold }} - name: RR_GIT_REPACK_THRESHOLD - value: {{ .Values.rrGitServer.repackThreshold | quote }} + value: {{ .Values.rr.gitServer.repackThreshold | quote }} {{- end }} {{- end -}} {{/* -Validate that exactly one blob-storage provider is configured when rrGitServer +Validate that exactly one blob-storage provider is configured when rr.gitServer is enabled. Skipped when the user has plumbed the RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_*_* env vars in directly via env/environmentVariables/environmentSecrets, which is treated as an opt-out from the first-class blobStorage config. -Also skipped entirely when rrGitServer.skipBlobStorageValidation is true, which +Also skipped entirely when rr.gitServer.skipBlobStorageValidation is true, which is the escape hatch for sources we cannot inspect at template time (e.g. env vars injected via envFrom from a Secret/ConfigMap). -No-op when rrGitServer is disabled. +No-op when rr.gitServer is disabled. */}} -{{- define "retool.rrGitServer.validateBlobStorage" -}} -{{- if and .Values.rrGitServer.enabled (not .Values.rrGitServer.skipBlobStorageValidation) -}} +{{- define "retool.gitServer.validateBlobStorage" -}} +{{- if and .Values.rr.gitServer.enabled (not .Values.rr.gitServer.skipBlobStorageValidation) -}} {{- $hasDirectEnv := false -}} {{- range $name, $value := .Values.env -}} {{- if or (hasPrefix "RR_DEFAULT_" $name) (eq $name "RR_BLOB_STORAGE_PROVIDER") -}} @@ -959,15 +971,61 @@ No-op when rrGitServer is disabled. {{- end -}} {{- end -}} {{- if not $hasDirectEnv -}} -{{- $bs := .Values.blobStorage | default dict -}} +{{- $bs := .Values.rr.blobStorage | default dict -}} {{- $providers := list -}} {{- if $bs.s3 }}{{ $providers = append $providers "s3" }}{{ end -}} {{- if $bs.gcs }}{{ $providers = append $providers "gcs" }}{{ end -}} {{- if $bs.azure }}{{ $providers = append $providers "azure" }}{{ end -}} {{- if ne (len $providers) 1 -}} -{{- fail "rrGitServer.enabled requires exactly one of blobStorage.s3, blobStorage.gcs, blobStorage.azure to be configured, or set RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* directly via env / environmentVariables / environmentSecrets. If those vars are supplied another way (e.g. envFrom), set rrGitServer.skipBlobStorageValidation=true to bypass this check." -}} +{{- fail "rr.gitServer.enabled requires exactly one of rr.blobStorage.s3, rr.blobStorage.gcs, rr.blobStorage.azure to be configured, or set RR_BLOB_STORAGE_PROVIDER / RR_DEFAULT_* directly via env / environmentVariables / environmentSecrets. If those vars are supplied another way (e.g. envFrom), set rr.gitServer.skipBlobStorageValidation=true to bypass this check." -}} +{{- end -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Guard against the pre-rename RR values layout. The whole stack used to be named +"r2" (top-level `r2:` master switch), and its components used to be top-level +keys (jsExecutor, r2Agent, agentSandbox, rrGitServer, blobStorage); everything is +now named "rr" and nested under .Values.rr. A chart upgrade would otherwise +SILENTLY ignore any config still set under the old names — quietly disabling RR — +so fail loudly with the exact key moves instead. + +Two classes of stale config are caught: + 1. Old TOP-LEVEL keys (the master switch and the un-nested components). + 2. Old component LEAF names nested under the new `rr:` block (e.g. someone + who moved config under `rr:` but kept `r2Agent`/`rrAgent`/`rrGitServer`/ + `rrBlobStorage` instead of the renamed `agent`/`gitServer`/`blobStorage`). +*/}} +{{- define "retool.rr.validateLegacyValues" -}} +{{- $found := list -}} +{{/* 1. old top-level keys */}} +{{- $topMoves := list + (list "r2" "rr") + (list "jsExecutor" "rr.jsExecutor") + (list "r2Agent" "rr.agent") + (list "agentSandbox" "rr.agentSandbox") + (list "rrGitServer" "rr.gitServer") + (list "blobStorage" "rr.blobStorage") -}} +{{- range $move := $topMoves -}} +{{- if hasKey $.Values (index $move 0) -}} +{{- $found = append $found (printf " %s: -> %s:" (index $move 0) (index $move 1)) -}} +{{- end -}} +{{- end -}} +{{/* 2. old leaf names nested under rr: */}} +{{- $rr := $.Values.rr | default dict -}} +{{- $childMoves := list + (list "r2Agent" "rr.agent") + (list "rrAgent" "rr.agent") + (list "rrGitServer" "rr.gitServer") + (list "rrBlobStorage" "rr.blobStorage") -}} +{{- range $move := $childMoves -}} +{{- if hasKey $rr (index $move 0) -}} +{{- $found = append $found (printf " rr.%s: -> %s:" (index $move 0) (index $move 1)) -}} {{- end -}} {{- end -}} +{{- if $found -}} +{{- fail (printf "\n\nThe RR (formerly \"r2\") values layout changed: the master switch and every component it needs now live under the top-level `rr:` block. These keys in your values are NO LONGER READ and would silently disable RR. Rename / move them as shown:\n\n%s\n\nThe master switch is now `rr.enabled`. See values.yaml for the new layout." (join "\n" $found)) -}} {{- end -}} {{- end -}} @@ -995,8 +1053,8 @@ Set JS executor image tag Usage: (template "retool.jsExecutor.image.tag" .) */}} {{- define "retool.jsExecutor.image.tag" -}} -{{- if .Values.jsExecutor.image.tag -}} - {{- .Values.jsExecutor.image.tag -}} +{{- if .Values.rr.jsExecutor.image.tag -}} + {{- .Values.rr.jsExecutor.image.tag -}} {{- else if .Values.image.tag -}} {{- $valid_retool_version_regexp := "([0-9]+\\.[0-9]+(\\.[0-9]+)?(-[a-zA-Z0-9]+)?)" }} {{- $semver_version_regexp := "[0-9]+\\.[0-9]+(\\.[0-9]+)?" }} @@ -1007,7 +1065,7 @@ Usage: (template "retool.jsExecutor.image.tag" .) {{- "1.1.0" -}} {{- end -}} {{- else -}} - {{- fail "Please set a value for .Values.image.tag or .Values.jsExecutor.image.tag" }} + {{- fail "Please set a value for .Values.image.tag or .Values.rr.jsExecutor.image.tag" }} {{- end -}} {{- end -}} diff --git a/charts/retool/templates/_workers.tpl b/charts/retool/templates/_workers.tpl index b2ea566..36c1e7e 100644 --- a/charts/retool/templates/_workers.tpl +++ b/charts/retool/templates/_workers.tpl @@ -1,10 +1,18 @@ +{{/* +Worker descriptors. `parent` is the values key holding the worker's config and +also names its enable helper (retool..enabled). `type` selects the +per-worker rendering (resource name, SERVICE_TYPE, taskqueue). `nested`, when +set, is the parent values block the config lives under (e.g. the rr stack +keeps its workers under .Values.rr); omitted means the key is top-level. +*/}} {{- define "retool.workers" -}} - parent: agents type: agent - parent: agents type: agentEval -- parent: r2Agent - type: r2Agent +- parent: agent + type: rrAgent + nested: rr - parent: workflows type: workflow {{- end -}} @@ -15,7 +23,7 @@ {{- range $worker := $workers -}} {{- if eq (include (printf "retool.%s.enabled" $worker.parent) $root) "1" -}} -{{ include "retool.worker.deployment" (dict "root" $root "parent" $worker.parent "workerType" $worker.type) }} +{{ include "retool.worker.deployment" (dict "root" $root "parent" $worker.parent "workerType" $worker.type "nested" $worker.nested) }} {{- end -}} {{- end -}} {{- end -}} @@ -24,7 +32,11 @@ {{- $ := .root -}} {{- $parent := .parent -}} {{- $workerType := .workerType -}} -{{- $parentValues := index $.Values $parent -}} +{{- $owner := $.Values -}} +{{- if .nested -}} +{{- $owner = index $.Values .nested -}} +{{- end -}} +{{- $parentValues := index $owner $parent -}} {{- $workerValues := $parentValues.worker -}} {{- if eq $workerType "agentEval" -}} @@ -45,7 +57,7 @@ {{- $healthcheckPort = 3012 -}} {{- $serviceType = "AGENT_EVAL_TEMPORAL_WORKER" -}} {{- $taskqueue = "agent-eval" -}} -{{- else if eq $workerType "r2Agent" -}} +{{- else if eq $workerType "rrAgent" -}} {{- $healthcheckPort = 3016 -}} {{- $serviceType = "R2_AGENT_TEMPORAL_WORKER" -}} {{- $taskqueue = "r2-agent" -}} @@ -113,7 +125,7 @@ spec: {{- end }} {{- end }} containers: - - name: {{ if eq $workerType "agentEval" }}agent-eval-worker{{ else if eq $workerType "r2Agent" }}r2-agent-worker{{ else }}{{ $workerType }}-worker{{ end }} + - name: {{ if eq $workerType "agentEval" }}agent-eval-worker{{ else if eq $workerType "rrAgent" }}r2-agent-worker{{ else }}{{ $workerType }}-worker{{ end }} image: "{{ $.Values.image.repository }}:{{ required "Please set a value for .Values.image.tag" $.Values.image.tag }}" imagePullPolicy: {{ $.Values.image.pullPolicy }} args: @@ -213,7 +225,7 @@ spec: value: {{ template "retool.postgresql.ssl_enabled" $ }} - name: CODE_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.codeExecutor.name" $ }} - {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} + {{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" $ }} {{- end }} diff --git a/charts/retool/templates/agent_sandbox_device_plugin.yaml b/charts/retool/templates/agent_sandbox_device_plugin.yaml index 40e87d8..f1f8664 100644 --- a/charts/retool/templates/agent_sandbox_device_plugin.yaml +++ b/charts/retool/templates/agent_sandbox_device_plugin.yaml @@ -1,5 +1,5 @@ -{{- if and (eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.agentSandbox.sandboxNetwork.deployDaemonSet }} -{{- $as := .Values.agentSandbox -}} +{{- if and (eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.rr.agentSandbox.sandboxNetwork.deployDaemonSet }} +{{- $as := .Values.rr.agentSandbox -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} apiVersion: v1 diff --git a/charts/retool/templates/agent_sandbox_networkpolicy.yaml b/charts/retool/templates/agent_sandbox_networkpolicy.yaml index ec7bcc1..4ba72d4 100644 --- a/charts/retool/templates/agent_sandbox_networkpolicy.yaml +++ b/charts/retool/templates/agent_sandbox_networkpolicy.yaml @@ -1,5 +1,5 @@ -{{- if and (eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.agentSandbox.networkPolicy.enabled }} -{{- $as := .Values.agentSandbox -}} +{{- if and (eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.rr.agentSandbox.networkPolicy.enabled }} +{{- $as := .Values.rr.agentSandbox -}} {{- /* ======================================================================= Sandbox Pod NetworkPolicy — restrict ingress/egress for executor Jobs diff --git a/charts/retool/templates/agent_sandbox_prepuller.yaml b/charts/retool/templates/agent_sandbox_prepuller.yaml index 33e2c19..817c0ce 100644 --- a/charts/retool/templates/agent_sandbox_prepuller.yaml +++ b/charts/retool/templates/agent_sandbox_prepuller.yaml @@ -1,5 +1,5 @@ -{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} -{{- $as := .Values.agentSandbox -}} +{{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} +{{- $as := .Values.rr.agentSandbox -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} apiVersion: apps/v1 diff --git a/charts/retool/templates/agent_sandbox_seccomp.yaml b/charts/retool/templates/agent_sandbox_seccomp.yaml index 7579b5c..4550dd9 100644 --- a/charts/retool/templates/agent_sandbox_seccomp.yaml +++ b/charts/retool/templates/agent_sandbox_seccomp.yaml @@ -1,5 +1,5 @@ -{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} -{{- $as := .Values.agentSandbox -}} +{{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} +{{- $as := .Values.rr.agentSandbox -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} apiVersion: v1 diff --git a/charts/retool/templates/configmap_js_executor.yaml b/charts/retool/templates/configmap_js_executor.yaml index 2212a41..413e675 100644 --- a/charts/retool/templates/configmap_js_executor.yaml +++ b/charts/retool/templates/configmap_js_executor.yaml @@ -1,4 +1,4 @@ -{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} +{{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 5d5d2ed..138fdba 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -1,6 +1,6 @@ -{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} +{{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} {{- include "retool.agentSandbox.validateSecrets" . }} -{{- $as := .Values.agentSandbox -}} +{{- $as := .Values.rr.agentSandbox -}} {{- $defaultSecretName := $as.externalSecret.name | default (include "retool.agentSandbox.name" .) -}} {{- $nodeSelector := $as.nodeSelector | default .Values.nodeSelector -}} {{- $tolerations := $as.tolerations | default .Values.tolerations -}} diff --git a/charts/retool/templates/deployment_backend.yaml b/charts/retool/templates/deployment_backend.yaml index b741681..ff1a9e5 100644 --- a/charts/retool/templates/deployment_backend.yaml +++ b/charts/retool/templates/deployment_backend.yaml @@ -1,4 +1,5 @@ -{{- include "retool.rrGitServer.validateBlobStorage" . }} +{{- include "retool.rr.validateLegacyValues" . }} +{{- include "retool.gitServer.validateBlobStorage" . }} apiVersion: apps/v1 kind: Deployment metadata: @@ -103,9 +104,9 @@ spec: {{- end }} {{- /* Run the git server in-process on the main backend unless it has been - split out into its own deployment (rrGitServer.separate.enabled). + split out into its own deployment (rr.gitServer.separate.enabled). */}} - {{- if and .Values.rrGitServer.enabled (not (include "retool.rrGitServer.separateEnabled" .)) }} + {{- if and .Values.rr.gitServer.enabled (not (include "retool.gitServer.separateEnabled" .)) }} {{- $serviceType = append $serviceType "RR_GIT_SERVER" }} {{- end }} - name: SERVICE_TYPE @@ -188,7 +189,7 @@ spec: {{- end }} {{- end }} {{- end }} - {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} + {{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" . }} {{- end }} @@ -260,19 +261,19 @@ spec: {{- end }} {{- end }} {{- end }} - {{- if .Values.rrGitServer.enabled }} - {{- if include "retool.rrGitServer.separateEnabled" . }} + {{- if .Values.rr.gitServer.enabled }} + {{- if include "retool.gitServer.separateEnabled" . }} {{- /* git server runs in its own deployment; point the main backend's proxy (/api/ai/rr/git/v2/*) at the git-server service instead of localhost. */}} - name: RR_GIT_SERVER_HOST - value: {{ template "retool.rrGitServer.name" . }} + value: {{ template "retool.gitServer.name" . }} - name: RR_GIT_SERVER_PORT - value: {{ include "retool.rrGitServer.port" . | quote }} + value: {{ include "retool.gitServer.port" . | quote }} {{- else }} - {{- include "retool.rrGitServer.commonEnv" . | nindent 10 }} + {{- include "retool.gitServer.commonEnv" . | nindent 10 }} {{- end }} {{- end }} {{- include "retool.env" .Values.env | nindent 10 }} diff --git a/charts/retool/templates/deployment_git_server.yaml b/charts/retool/templates/deployment_git_server.yaml index 4fbf3f8..af99a6f 100644 --- a/charts/retool/templates/deployment_git_server.yaml +++ b/charts/retool/templates/deployment_git_server.yaml @@ -1,12 +1,12 @@ -{{- if include "retool.rrGitServer.separateEnabled" . }} -{{- include "retool.rrGitServer.validateBlobStorage" . }} -{{- $gitServerPort := include "retool.rrGitServer.port" . }} -{{- $gitServerValues := .Values.rrGitServer.separate }} +{{- if include "retool.gitServer.separateEnabled" . }} +{{- include "retool.gitServer.validateBlobStorage" . }} +{{- $gitServerPort := include "retool.gitServer.port" . }} +{{- $gitServerValues := .Values.rr.gitServer.separate }} {{- $gitServerService := $gitServerValues.service | default dict }} apiVersion: v1 kind: Service metadata: - name: {{ template "retool.rrGitServer.name" . }} + name: {{ template "retool.gitServer.name" . }} labels: {{- include "retool.labels" . | nindent 4 }} {{- with $gitServerService.labels }} @@ -22,7 +22,7 @@ metadata: {{- end }} spec: selector: - retoolService: {{ template "retool.rrGitServer.name" . }} + retoolService: {{ template "retool.gitServer.name" . }} ports: - name: http-server protocol: TCP @@ -32,7 +32,7 @@ spec: apiVersion: apps/v1 kind: Deployment metadata: - name: {{ template "retool.rrGitServer.name" . }} + name: {{ template "retool.gitServer.name" . }} labels: {{- include "retool.labels" . | nindent 4 }} {{- if .Values.deployment.annotations }} @@ -43,7 +43,7 @@ spec: replicas: {{ $gitServerValues.replicaCount | default 1 }} selector: matchLabels: - retoolService: {{ template "retool.rrGitServer.name" . }} + retoolService: {{ template "retool.gitServer.name" . }} revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} template: metadata: @@ -56,7 +56,7 @@ spec: {{- end }} labels: {{- include "retool.labels" . | nindent 8 }} - retoolService: {{ template "retool.rrGitServer.name" . }} + retoolService: {{ template "retool.gitServer.name" . }} telemetry.retool.com/service-name: rr-git-server {{- if .Values.podLabels }} {{ toYaml .Values.podLabels | indent 8 }} @@ -169,7 +169,7 @@ spec: {{- end }} {{- end }} {{- end }} - {{- include "retool.rrGitServer.commonEnv" . | nindent 10 }} + {{- include "retool.gitServer.commonEnv" . | nindent 10 }} {{- include "retool.env" .Values.env | nindent 10 }} {{- range .Values.environmentSecrets }} - name: {{ .name }} @@ -286,11 +286,11 @@ apiVersion: policy/v1beta1 {{- end }} kind: PodDisruptionBudget metadata: - name: {{ template "retool.rrGitServer.name" . }} + name: {{ template "retool.gitServer.name" . }} spec: {{- toYaml .Values.podDisruptionBudget | nindent 2 }} selector: matchLabels: - retoolService: {{ template "retool.rrGitServer.name" . }} + retoolService: {{ template "retool.gitServer.name" . }} {{- end }} {{- end }} diff --git a/charts/retool/templates/deployment_jobs.yaml b/charts/retool/templates/deployment_jobs.yaml index 92639b7..5306800 100644 --- a/charts/retool/templates/deployment_jobs.yaml +++ b/charts/retool/templates/deployment_jobs.yaml @@ -93,7 +93,7 @@ spec: {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} - {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} + {{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1" }} - name: RR_AGENT_PUBSUB_BACKEND value: "postgres" {{- end }} diff --git a/charts/retool/templates/deployment_js_executor.yaml b/charts/retool/templates/deployment_js_executor.yaml index 27c82cc..09a371e 100644 --- a/charts/retool/templates/deployment_js_executor.yaml +++ b/charts/retool/templates/deployment_js_executor.yaml @@ -1,4 +1,4 @@ -{{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} +{{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} apiVersion: apps/v1 kind: Deployment metadata: @@ -15,7 +15,7 @@ metadata: {{ toYaml .Values.deployment.annotations | indent 4 }} {{- end }} spec: - replicas: {{ .Values.jsExecutor.replicaCount }} + replicas: {{ .Values.rr.jsExecutor.replicaCount }} selector: matchLabels: {{- include "retool.jsExecutor.selectorLabels" . | nindent 6 }} @@ -28,8 +28,8 @@ spec: {{- if .Values.podAnnotations }} {{ toYaml .Values.podAnnotations | indent 8 }} {{- end }} -{{- if .Values.jsExecutor.annotations }} -{{ toYaml .Values.jsExecutor.annotations | indent 8 }} +{{- if .Values.rr.jsExecutor.annotations }} +{{ toYaml .Values.rr.jsExecutor.annotations | indent 8 }} {{- end }} labels: {{- include "retool.jsExecutor.selectorLabels" . | nindent 8 }} @@ -38,8 +38,8 @@ spec: {{- if .Values.podLabels }} {{ toYaml .Values.podLabels | indent 8 }} {{- end }} -{{- if .Values.jsExecutor.labels }} -{{ toYaml .Values.jsExecutor.labels | indent 8 }} +{{- if .Values.rr.jsExecutor.labels }} +{{ toYaml .Values.rr.jsExecutor.labels | indent 8 }} {{- end }} spec: serviceAccountName: {{ template "retool.serviceAccountName" . }} @@ -65,7 +65,7 @@ spec: - /bin/sh - -c - | - DEST="/host-seccomp/{{ .Values.jsExecutor.seccompLocalhostProfile }}" + DEST="/host-seccomp/{{ .Values.rr.jsExecutor.seccompLocalhostProfile }}" mkdir -p "$(dirname "$DEST")" cp /seccomp-profile/nsjail-seccomp.json "$DEST" echo "seccomp profile installed at $DEST" @@ -82,14 +82,14 @@ spec: {{- end }} containers: - name: js-executor - image: "{{ .Values.jsExecutor.image.repository }}:{{ include "retool.jsExecutor.image.tag" . }}" - imagePullPolicy: {{ .Values.jsExecutor.image.pullPolicy | default .Values.image.pullPolicy }} + image: "{{ .Values.rr.jsExecutor.image.repository }}:{{ include "retool.jsExecutor.image.tag" . }}" + imagePullPolicy: {{ .Values.rr.jsExecutor.image.pullPolicy | default .Values.image.pullPolicy }} securityContext: capabilities: add: ["NET_ADMIN"] seccompProfile: type: Localhost - localhostProfile: {{ .Values.jsExecutor.seccompLocalhostProfile }} + localhostProfile: {{ .Values.rr.jsExecutor.seccompLocalhostProfile }} env: - name: DEPLOYMENT_TEMPLATE_TYPE value: {{ template "retool.deploymentTemplateType" . }} @@ -98,15 +98,15 @@ spec: - name: NODE_ENV value: production {{- include "retool.telemetry.includeEnvVars" . | nindent 10 }} - {{- include "retool.env" .Values.jsExecutor.env | nindent 10 }} - {{- range .Values.jsExecutor.environmentSecrets }} + {{- include "retool.env" .Values.rr.jsExecutor.env | nindent 10 }} + {{- range .Values.rr.jsExecutor.environmentSecrets }} - name: {{ .name }} valueFrom: secretKeyRef: name: {{ .secretKeyRef.name }} key: {{ .secretKeyRef.key }} {{- end }} - {{- with .Values.jsExecutor.environmentVariables }} + {{- with .Values.rr.jsExecutor.environmentVariables }} {{ toYaml . | indent 10 }} {{- end }} ports: @@ -129,10 +129,10 @@ spec: successThreshold: {{ .Values.readinessProbe.successThreshold }} periodSeconds: {{ .Values.readinessProbe.periodSeconds }} resources: -{{ toYaml .Values.jsExecutor.resources | indent 10 }} +{{ toYaml .Values.rr.jsExecutor.resources | indent 10 }} volumeMounts: -{{- if .Values.jsExecutor.volumeMounts }} -{{ toYaml .Values.jsExecutor.volumeMounts | indent 10 }} +{{- if .Values.rr.jsExecutor.volumeMounts }} +{{ toYaml .Values.rr.jsExecutor.volumeMounts | indent 10 }} {{- end }} {{- if .Values.extraVolumeMounts }} {{ toYaml .Values.extraVolumeMounts | indent 10 }} @@ -153,8 +153,8 @@ spec: hostPath: path: /var/lib/kubelet/seccomp type: DirectoryOrCreate -{{- if .Values.jsExecutor.volumes }} -{{ toYaml .Values.jsExecutor.volumes | indent 8 }} +{{- if .Values.rr.jsExecutor.volumes }} +{{ toYaml .Values.rr.jsExecutor.volumes | indent 8 }} {{- end }} {{- range .Values.extraConfigMapMounts }} - name: {{ .name }} @@ -168,9 +168,9 @@ spec: imagePullSecrets: {{ toYaml .Values.image.pullSecrets | indent 8 }} {{- end }} -{{- if .Values.jsExecutor.affinity }} +{{- if .Values.rr.jsExecutor.affinity }} affinity: -{{ toYaml .Values.jsExecutor.affinity | indent 8 }} +{{ toYaml .Values.rr.jsExecutor.affinity | indent 8 }} {{- end }} {{- if .Values.nodeSelector }} nodeSelector: diff --git a/charts/retool/templates/deployment_mcp.yaml b/charts/retool/templates/deployment_mcp.yaml index 6577949..1c37a6e 100644 --- a/charts/retool/templates/deployment_mcp.yaml +++ b/charts/retool/templates/deployment_mcp.yaml @@ -120,8 +120,8 @@ spec: git server is split into its own deployment, auto-point MCP at it. */}} {{- $retoolGitServerUrl := $mcpConfig.retoolGitServerUrl }} - {{- if and (not $retoolGitServerUrl) (include "retool.rrGitServer.separateEnabled" .) }} - {{- $retoolGitServerUrl = include "retool.rrGitServer.url" . }} + {{- if and (not $retoolGitServerUrl) (include "retool.gitServer.separateEnabled" .) }} + {{- $retoolGitServerUrl = include "retool.gitServer.url" . }} {{- end }} {{- if $retoolGitServerUrl }} - name: RETOOL_GIT_SERVER_URL diff --git a/charts/retool/templates/deployment_workflows.yaml b/charts/retool/templates/deployment_workflows.yaml index a453f5a..b66f7c3 100644 --- a/charts/retool/templates/deployment_workflows.yaml +++ b/charts/retool/templates/deployment_workflows.yaml @@ -153,7 +153,7 @@ spec: {{- end }} {{- end }} {{- end }} - {{- if eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} + {{- if eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "jsExecutor")) "1" }} - name: JS_EXECUTOR_INGRESS_DOMAIN value: http://{{ template "retool.jsExecutor.name" . }} {{- end }} diff --git a/charts/retool/templates/httproute.yaml b/charts/retool/templates/httproute.yaml index d084c70..dc70064 100644 --- a/charts/retool/templates/httproute.yaml +++ b/charts/retool/templates/httproute.yaml @@ -56,7 +56,7 @@ spec: - name: {{ $fullName }} port: {{ $svcPort }} {{- end }} -{{- if and (eq (include "retool.r2.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.agentSandbox.frontendWsProxyDomain }} +{{- if and (eq (include "retool.rr.componentEnabled" (dict "root" $ "component" "agentSandbox")) "1") .Values.rr.agentSandbox.frontendWsProxyDomain }} --- apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute @@ -77,7 +77,7 @@ spec: {{- toYaml . | nindent 4 }} {{- end }} hostnames: - - {{ .Values.agentSandbox.frontendWsProxyDomain | trimPrefix "http://" | trimPrefix "https://" | quote }} + - {{ .Values.rr.agentSandbox.frontendWsProxyDomain | trimPrefix "http://" | trimPrefix "https://" | quote }} rules: - matches: - path: @@ -85,6 +85,6 @@ spec: value: / backendRefs: - name: {{ include "retool.agentSandbox.proxy.name" . }} - port: {{ .Values.agentSandbox.proxy.port }} + port: {{ .Values.rr.agentSandbox.proxy.port }} {{- end }} {{- end }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 533441d..9f803b4 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -565,7 +565,7 @@ multiplayer: mcp: # Run Retool's MCP server as a separate deployment. Independent of the - # .Values.r2.enabled master switch (the MCP server needs its own OAuth + # .Values.rr.enabled master switch (the MCP server needs its own OAuth # introspection config, so it is opt-in): set true to enable. enabled: false @@ -715,89 +715,6 @@ mcp: annotations: {} labels: {} -rrGitServer: - # Runs the React Retool Git Server in-process on the main backend pod - # (SERVICE_TYPE=...,RR_GIT_SERVER). The main backend internally proxies - # /api/ai/rr/git/v2/* to localhost:RR_GIT_SERVER_PORT, so no extra ingress - # routing is required. Required for the r2 / React Retool app pipeline. - # - # When enabled, exactly one of blobStorage.s3, blobStorage.gcs, or - # blobStorage.azure must be configured below — git_server stores all - # objects/packs in blob storage. - enabled: false - - # Optional: number of loose objects before git_server triggers a repack. - # Backend default is 100; unset to inherit it. - repackThreshold: ~ - - # Escape hatch for the blob-storage validation below. The chart can only - # inspect blobStorage, env, environmentVariables, and environmentSecrets at - # template time; it cannot see env vars injected via envFrom (Secret/ConfigMap - # splat). Set this to true to bypass the check when RR_BLOB_STORAGE_PROVIDER / - # RR_DEFAULT_* are provided that way. - skipBlobStorageValidation: false - - # Optionally split the git server out of the main backend into its own - # deployment + service (mirrors how the workload is split in Retool Cloud). - # Requires rrGitServer.enabled: true. When enabled: - # - a dedicated -git-server Deployment runs SERVICE_TYPE=RR_GIT_SERVER - # - the main backend drops RR_GIT_SERVER from its SERVICE_TYPE and proxies git - # traffic to the service via RR_GIT_SERVER_HOST / RR_GIT_SERVER_PORT - # - the MCP server (if enabled) is auto-pointed at the same service unless - # mcp.config.retoolGitServerUrl is set explicitly - # The blobStorage config below is rendered onto the git-server pod instead of - # the main backend in this mode. - separate: - enabled: false - replicaCount: 1 - # Port the git server listens on (RR_GIT_SERVER_PORT) and that its service exposes. - port: 3010 - # Pod resource requests/limits. Falls back to top-level `resources` if unset. - resources: {} - # Falls back to top-level `affinity` if unset. - affinity: {} - # Annotations/labels applied to the git-server pod template. - annotations: {} - labels: {} - # Annotations/labels applied to the git-server Service (kept separate from - # the pod ones above). - service: - annotations: {} - labels: {} - -# Shared blob-storage config used by git_server (and other features that -# need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. -# Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on -# the backend deployment. -# -# This block can be omitted entirely if RR_BLOB_STORAGE_PROVIDER and the -# RR_DEFAULT_*_* env vars are provided directly via environmentVariables / -# environmentSecrets above — the chart detects that and skips this guard. -blobStorage: {} - # s3: - # bucket: my-rr-bucket - # region: us-east-1 - # endpoint: "" # optional, for S3-compatible (MinIO, R2, etc.) - # accessKeyId: AKIA... - # # Provide secretAccessKey OR the secretName/secretKey pair below. - # secretAccessKey: "" - # secretAccessKeySecretName: "" - # secretAccessKeySecretKey: secret-access-key - # - # gcs: - # bucket: my-rr-bucket - # # Provide credentials (JSON string) OR the secretName/secretKey pair below. - # credentials: "" - # credentialsSecretName: "" - # credentialsSecretKey: credentials.json - # - # azure: - # container: my-rr-container - # # Provide connectionString OR the secretName/secretKey pair below. - # connectionString: "" - # connectionStringSecretName: "" - # connectionStringSecretKey: connection-string - codeExecutor: # as of Chart version 6.7.0, code-executor image version must align with the top-level `image` parameters # explicitly set other fields as needed @@ -848,59 +765,439 @@ codeExecutor: useSeccompProfile: false seccompLocalhostProfile: profiles/nsjail-seccomp.json -# === R2 (Retool agent runtime) ============================================= -# Master switch for the whole R2 stack: the r2Agent worker, jsExecutor, -# agentSandbox, and mcp server. Set `r2.enabled: true` to turn them all on with -# one line. Each component's own `enabled` (left null by default) inherits this -# switch; set a component's `enabled` to true/false to override the master for -# that component only. Shared R2 configuration can be added under this block -# later. -r2: +# === RR (Retool agent runtime) ============================================= +# Master switch for the whole RR stack. Set `rr.enabled: true` to turn on the +# components nested below — jsExecutor, agent, and agentSandbox — with a +# single line. Each component's own `enabled` (left null by default) inherits +# this switch; set a component's `enabled` to true/false to override the master +# for that component only. The gitServer and blobStorage blocks below +# provide the React Retool git server and the object storage the stack needs. +# (The MCP server is configured separately at the top level: it needs its own +# OAuth opt-in and is intentionally independent of this master switch.) +rr: enabled: false -# JS Executor -jsExecutor: - # Inherits .Values.r2.enabled when left unset (null); set true/false to override. - enabled: null + # JS Executor + jsExecutor: + # Inherits .Values.rr.enabled when left unset (null); set true/false to override. + enabled: null - image: - repository: tryretool/js-executor-service - # defaults to top level image.tag - tag: null - pullPolicy: IfNotPresent + image: + repository: tryretool/js-executor-service + # defaults to top level image.tag + tag: null + pullPolicy: IfNotPresent - replicaCount: 1 + replicaCount: 1 - seccompLocalhostProfile: profiles/nsjail-seccomp.json + seccompLocalhostProfile: profiles/nsjail-seccomp.json - # JS-executor-specific environment; not inherited from the top-level - # .Values.env / .Values.environmentSecrets / .Values.environmentVariables. - env: {} - environmentSecrets: [] - environmentVariables: [] + # JS-executor-specific environment; not inherited from the top-level + # .Values.env / .Values.environmentSecrets / .Values.environmentVariables. + env: {} + environmentSecrets: [] + environmentVariables: [] - # Annotations for JS executor pods - annotations: {} + # Annotations for JS executor pods + annotations: {} - # Labels for JS executor pods - labels: {} + # Labels for JS executor pods + labels: {} - volumes: {} - volumeMounts: {} + volumes: {} + volumeMounts: {} - # Config affinity and anti-affinity rules for the JS executor pods - affinity: {} + # Config affinity and anti-affinity rules for the JS executor pods + affinity: {} - # Resources for the JS executor. Memory request and limit are kept equal: - # JSE reads its memory limit and rejects requests at 80% of it, so the - # request must reserve the full amount to avoid premature rejections. - resources: - limits: - cpu: 6000m - memory: 6Gi - requests: - cpu: 6000m - memory: 6Gi + # Resources for the JS executor. Memory request and limit are kept equal: + # JSE reads its memory limit and rejects requests at 80% of it, so the + # request must reserve the full amount to avoid premature rejections. + resources: + limits: + cpu: 6000m + memory: 6Gi + requests: + cpu: 6000m + memory: 6Gi + + # RR Agent: server-side agent loop worker (independent from agents above). + agent: + # Inherits .Values.rr.enabled when left unset (null); set true/false to override. + enabled: null + + # Labels for RR agent worker pods + labels: {} + + # RR agent configuration + config: {} + + # Annotations for RR agent worker pods + annotations: {} + + # RR agent worker configuration + worker: + replicaCount: 1 + + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + + # Agent Sandbox Service: sandboxed code execution for AI agents. + # Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), + # and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. + agentSandbox: + # Inherits .Values.rr.enabled when left unset (null); set true/false to override. + enabled: null + + image: + repository: tryretool/agent-sandbox-service + # defaults to top level image.tag + tag: null + pullPolicy: IfNotPresent + + # Lightweight init image used by the prepuller and seccomp DaemonSets. + # Pinning by digest is recommended for production. + initImage: + repository: busybox + tag: '1.37.0' + # Manifest list digest — set to '' in test environments where images are + # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). + digest: '' + + # Annotations for agent sandbox pods + annotations: {} + + # Labels for agent sandbox pods + labels: {} + + # === Secrets ============================================================ + # Provide each secret as a plaintext value below, OR set externalSecret.name + # to a pre-existing Secret with keys jwt-public-key, jwt-private-key, + # encryption-key, api-secret, postgres-url. A plaintext value always wins over + # the external secret for that key. + externalSecret: + name: '' # optional: existing Secret holding all keys below + + jwtPublicKey: '' # REQUIRED (ES256) unless provided via externalSecret + jwtPrivateKey: '' # REQUIRED (ES256) unless provided via externalSecret + encryptionKey: '' # optional: hex 256-bit; must match backend AGENT_SANDBOX_ENCRYPTION_KEY + apiSecret: '' # optional: admin/test endpoints + + # === Postgres state backend ============================================= + # By DEFAULT (all options below left blank) the agent sandbox reuses the + # backend's Postgres connection from config.postgresql / the postgresql + # subchart -- same instance and database, separate schema (see schema below). + # So enabling it on an existing deployment needs nothing here. (Exception: + # if the backend's DB password is supplied via external secrets / envFrom, it + # can't be inherited by a separate pod -- set an option below in that case.) + # To point the sandbox at a different database, set exactly ONE option: + postgres: + # -- Option 1: plaintext DSN -- + url: '' + + # -- Option 2: assemble from fields -- + # The password is passed via PGPASSWORD (never embedded in the URL), so any + # characters are safe and a password-only secret can be reused as-is. + # Set either password or passwordSecretName. + # user/database are embedded in the DSN verbatim (user may contain '@', e.g. + # Azure user@servername); for values with : / ? # use Option 1 or 3. + host: '' + port: 5432 + database: '' + user: '' + password: '' + passwordSecretName: '' + passwordSecretKey: 'password' + + # -- Option 3: existing Secret holding the full DSN -- + urlSecretName: '' + urlSecretKey: 'postgres-url' + + # -- Option 4: reuse externalSecret.name (its postgres-url key) -- + # Selected by setting rr.agentSandbox.externalSecret.name (in the Secrets + # section above), not by anything here. Used when options 1-3 are blank. + # + # If options 1-4 are ALL unset, the default (inherit config.postgresql) + # applies -- see the note at the top of this block. + + # -- Optional tuning (defaults shown) -- + schema: 'agent_executor' + poolMax: 10 + sweeperIntervalMs: 60000 + + # Sandbox network access via pasta userspace networking. + # When enabled, sandboxes get isolated outbound access with L7 filtering. + sandboxNetwork: + enabled: true + # Request smarter-devices/net_tun via resources.limits on sandbox pods. + # When true, the kubelet grants /dev/net/tun device cgroup access without + # privileged mode. Requires smarter-device-manager to be running on each + # node (see deployDaemonSet below). + devicePlugin: true + # Deploy the smarter-device-manager DaemonSet from this Helm release. + # Set to false when another release (or external process) already manages + # the DaemonSet — only one instance should run per node. + deployDaemonSet: true + # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster + # agent-sandbox-proxy service URL when empty. + httpProxy: '' + + # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox + # pods can request it via resources.limits. + devicePlugin: + image: + repository: ghcr.io/smarter-project/smarter-device-manager + tag: v1.20.12 + # Number of /dev/net/tun device slots to register. + # Set high enough to accommodate maxTotalJobs + prewarm pool. + maxDevices: 130 + + # When possible, we want the devicePlugin daemonset to preempt normal pods. + # Note: in some cases this is inconvenient or unsupported, i.e. in GKE which + # requires a custom ResourceQuota to use the `system-node-critical` + # PriorityClass in user namespaces. In those cases, set this to `null`. + priorityClassName: system-node-critical + + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. + # The seccomp node-installer DaemonSet copies the profile to this path + # on every node automatically. + seccompProfile: retool/gvisor-seccomp.json + + # S3-compatible snapshot storage. + # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. + snapshotStorage: + s3Bucket: '' + s3Endpoint: '' + s3Region: 'us-east-1' + # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. + # If empty, falls back to the main agent sandbox secret. + credentialsSecretName: '' + + # Sandbox (Job) configuration + sandbox: + port: 3017 + resources: + requests: + cpu: 1 + memory: 2Gi + limits: + cpu: '2' + memory: 4Gi + # Idle timeout (ms) before an unassigned sandbox self-terminates. + sandboxIdleTimeoutMs: 600000 + # Hard ceiling (ms) on total sandbox lifetime, regardless of activity. When + # reached, the sandbox is destroyed (deferred until the current agent loop + # ends). Defaults to 2.5 hours. + sandboxGlobalLifetimeMs: 9000000 + tmpDirSizeLimit: 20Gi + # Separate limit for the rootfs-appjob volume — the sandbox root filesystem + # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi + # allocated for /tmp. + rootfsSizeLimit: 2Gi + # Additional environment variables for sandbox containers. + extraEnv: [] + + # Controller: tracks capacity, assigns sandbox pods, manages scaling + controller: + replicaCount: 1 + port: 3018 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + scaling: + prewarmPoolSize: 5 + maxTotalJobs: 50 + maxConcurrentCreates: 3 + jobRetentionSeconds: 300 + assignedSandboxTtlSeconds: 3600 + reconcileIntervalMs: 5000 + leaderTtlMs: 10000 + leaderRenewMs: 3000 + perUserSandboxLimit: 5 + + # Proxy: HTTP proxy for sandbox egress with credential injection. + # The proxy must be reachable by frontend browsers for WebSocket connections. + proxy: + replicaCount: 1 + port: 3019 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + allowedDomains: '' + # URL the proxy uses to reach the Retool backend for token exchange. + # Defaults to http://:3000 (same-cluster backend service). + backendUrl: '' + backendDomainSuffixes: '' + sandboxProxyTimeoutMs: '180000' # 3 minutes + service: + # Set to LoadBalancer or NodePort to expose the proxy externally. + type: ClusterIP + annotations: {} + # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. + # This is separate from the main Retool ingress since the proxy typically runs on its own domain. + ingress: + enabled: false + # ingressClassName: + annotations: {} + # kubernetes.io/ingress.class: nginx + # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: '' + # e.g. sandbox.yourdomain.com + tls: [] + # - secretName: sandbox-tls + # hosts: + # - sandbox.yourdomain.com + + # Backend integration: these tell the Retool backend how to reach agent executor. + # controllerUrl and proxyUrl default to internal service URLs when empty. + controllerUrl: '' + proxyUrl: '' + # Public URL for frontend browsers to reach the proxy via WebSocket. + # Leave EMPTY for self-hosted: the backend then serves the sandbox same-origin + # as the editor (your Retool base URL) and the front server reverse-proxies the + # /sandbox/* WS+Vite paths to the in-cluster proxy Service — so no dedicated + # proxy domain or ingress is required, and your catch-all ingress is untouched. + # Only set this (e.g. https://sandbox.yourdomain.com) if you deliberately want + # the proxy on a separate domain, in which case also enable proxy.ingress above. + frontendWsProxyDomain: '' + # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. + proxyDomain: '' + + # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. + # Strongly recommended for production to isolate sandbox egress. + networkPolicy: + enabled: false + # CIDR ranges to block in proxy egress rules. Must stay in sync with + # DEFAULT_BLOCKED_CIDRS in the agent-executor source. + blockedRanges: + - 169.254.0.0/16 # link-local / cloud metadata + - 10.0.0.0/8 # private (RFC 1918) + - 172.16.0.0/12 # private (RFC 1918) + - 192.168.0.0/16 # private (RFC 1918) + - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) + - 127.0.0.0/8 # loopback + - 0.0.0.0/8 # "this network" (RFC 791) + blockedRanges6: + - fc00::/7 # IPv6 unique local addresses + - fe80::/10 # IPv6 link-local + - '::1/128' # IPv6 loopback + # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). + # Set to empty to allow DNS to any destination (not recommended). + dnsSelector: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + extraEgress: [] + # CIDRs allowed in proxy egress even if they fall within blockedRanges + # (e.g. private backend endpoint). + backendAllowlist: [] + # Override sandbox ingress source selector (defaults to retool backend pods). + ingressFrom: [] + + # Node placement overrides (falls back to global nodeSelector/tolerations if empty) + nodeSelector: {} + tolerations: [] + affinity: {} + + gitServer: + # Runs the React Retool Git Server in-process on the main backend pod + # (SERVICE_TYPE=...,RR_GIT_SERVER). The main backend internally proxies + # /api/ai/rr/git/v2/* to localhost:RR_GIT_SERVER_PORT, so no extra ingress + # routing is required. Required for the rr / React Retool app pipeline. + # + # When enabled, exactly one of blobStorage.s3, blobStorage.gcs, or + # blobStorage.azure must be configured below — git_server stores all + # objects/packs in blob storage. + enabled: false + + # Optional: number of loose objects before git_server triggers a repack. + # Backend default is 100; unset to inherit it. + repackThreshold: ~ + + # Escape hatch for the blob-storage validation below. The chart can only + # inspect blobStorage, env, environmentVariables, and environmentSecrets at + # template time; it cannot see env vars injected via envFrom (Secret/ConfigMap + # splat). Set this to true to bypass the check when RR_BLOB_STORAGE_PROVIDER / + # RR_DEFAULT_* are provided that way. + skipBlobStorageValidation: false + + # Optionally split the git server out of the main backend into its own + # deployment + service (mirrors how the workload is split in Retool Cloud). + # Requires rr.gitServer.enabled: true. When enabled: + # - a dedicated -git-server Deployment runs SERVICE_TYPE=RR_GIT_SERVER + # - the main backend drops RR_GIT_SERVER from its SERVICE_TYPE and proxies git + # traffic to the service via RR_GIT_SERVER_HOST / RR_GIT_SERVER_PORT + # - the MCP server (if enabled) is auto-pointed at the same service unless + # mcp.config.retoolGitServerUrl is set explicitly + # The blobStorage config below is rendered onto the git-server pod instead of + # the main backend in this mode. + separate: + enabled: false + replicaCount: 1 + # Port the git server listens on (RR_GIT_SERVER_PORT) and that its service exposes. + port: 3010 + # Pod resource requests/limits. Falls back to top-level `resources` if unset. + resources: {} + # Falls back to top-level `affinity` if unset. + affinity: {} + # Annotations/labels applied to the git-server pod template. + annotations: {} + labels: {} + # Annotations/labels applied to the git-server Service (kept separate from + # the pod ones above). + service: + annotations: {} + labels: {} + + # Shared blob-storage config used by git_server (and other features that + # need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. + # Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on + # the backend deployment. + # + # This block can be omitted entirely if RR_BLOB_STORAGE_PROVIDER and the + # RR_DEFAULT_*_* env vars are provided directly via environmentVariables / + # environmentSecrets above — the chart detects that and skips this guard. + blobStorage: {} + # s3: + # bucket: my-rr-bucket + # region: us-east-1 + # endpoint: "" # optional, for S3-compatible (MinIO, R2, etc.) + # accessKeyId: AKIA... + # # Provide secretAccessKey OR the secretName/secretKey pair below. + # secretAccessKey: "" + # secretAccessKeySecretName: "" + # secretAccessKeySecretKey: secret-access-key + # + # gcs: + # bucket: my-rr-bucket + # # Provide credentials (JSON string) OR the secretName/secretKey pair below. + # credentials: "" + # credentialsSecretName: "" + # credentialsSecretKey: credentials.json + # + # azure: + # container: my-rr-container + # # Provide connectionString OR the secretName/secretKey pair below. + # connectionString: "" + # connectionStringSecretName: "" + # connectionStringSecretKey: connection-string agents: # Enable AI Agents @@ -939,301 +1236,6 @@ agents: # Annotations for agent worker pods annotations: {} -# R2 Agent: server-side agent loop worker (independent from agents above). -r2Agent: - # Inherits .Values.r2.enabled when left unset (null); set true/false to override. - enabled: null - - # Labels for R2 agent worker pods - labels: {} - - # R2 agent configuration - config: {} - - # Annotations for R2 agent worker pods - annotations: {} - - # R2 agent worker configuration - worker: - replicaCount: 1 - - resources: - limits: - cpu: 2000m - memory: 4096Mi - requests: - cpu: 1000m - memory: 2048Mi - -# Agent Sandbox Service: sandboxed code execution for AI agents. -# Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), -# and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. -agentSandbox: - # Inherits .Values.r2.enabled when left unset (null); set true/false to override. - enabled: null - - image: - repository: tryretool/agent-sandbox-service - # defaults to top level image.tag - tag: null - pullPolicy: IfNotPresent - - # Lightweight init image used by the prepuller and seccomp DaemonSets. - # Pinning by digest is recommended for production. - initImage: - repository: busybox - tag: '1.37.0' - # Manifest list digest — set to '' in test environments where images are - # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). - digest: '' - - # Annotations for agent sandbox pods - annotations: {} - - # Labels for agent sandbox pods - labels: {} - - # === Secrets ============================================================ - # Provide each secret as a plaintext value below, OR set externalSecret.name - # to a pre-existing Secret with keys jwt-public-key, jwt-private-key, - # encryption-key, api-secret, postgres-url. A plaintext value always wins over - # the external secret for that key. - externalSecret: - name: '' # optional: existing Secret holding all keys below - - jwtPublicKey: '' # REQUIRED (ES256) unless provided via externalSecret - jwtPrivateKey: '' # REQUIRED (ES256) unless provided via externalSecret - encryptionKey: '' # optional: hex 256-bit; must match backend AGENT_SANDBOX_ENCRYPTION_KEY - apiSecret: '' # optional: admin/test endpoints - - # === Postgres state backend ============================================= - # By DEFAULT (all options below left blank) the agent sandbox reuses the - # backend's Postgres connection from config.postgresql / the postgresql - # subchart -- same instance and database, separate schema (see schema below). - # So enabling it on an existing deployment needs nothing here. (Exception: - # if the backend's DB password is supplied via external secrets / envFrom, it - # can't be inherited by a separate pod -- set an option below in that case.) - # To point the sandbox at a different database, set exactly ONE option: - postgres: - # -- Option 1: plaintext DSN -- - url: '' - - # -- Option 2: assemble from fields -- - # The password is passed via PGPASSWORD (never embedded in the URL), so any - # characters are safe and a password-only secret can be reused as-is. - # Set either password or passwordSecretName. - # user/database are embedded in the DSN verbatim (user may contain '@', e.g. - # Azure user@servername); for values with : / ? # use Option 1 or 3. - host: '' - port: 5432 - database: '' - user: '' - password: '' - passwordSecretName: '' - passwordSecretKey: 'password' - - # -- Option 3: existing Secret holding the full DSN -- - urlSecretName: '' - urlSecretKey: 'postgres-url' - - # -- Option 4: reuse externalSecret.name (its postgres-url key) -- - # Selected by setting agentSandbox.externalSecret.name (in the Secrets - # section above), not by anything here. Used when options 1-3 are blank. - # - # If options 1-4 are ALL unset, the default (inherit config.postgresql) - # applies -- see the note at the top of this block. - - # -- Optional tuning (defaults shown) -- - schema: 'agent_executor' - poolMax: 10 - sweeperIntervalMs: 60000 - - # Sandbox network access via pasta userspace networking. - # When enabled, sandboxes get isolated outbound access with L7 filtering. - sandboxNetwork: - enabled: true - # Request smarter-devices/net_tun via resources.limits on sandbox pods. - # When true, the kubelet grants /dev/net/tun device cgroup access without - # privileged mode. Requires smarter-device-manager to be running on each - # node (see deployDaemonSet below). - devicePlugin: true - # Deploy the smarter-device-manager DaemonSet from this Helm release. - # Set to false when another release (or external process) already manages - # the DaemonSet — only one instance should run per node. - deployDaemonSet: true - # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster - # agent-sandbox-proxy service URL when empty. - httpProxy: '' - - # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox - # pods can request it via resources.limits. - devicePlugin: - image: - repository: ghcr.io/smarter-project/smarter-device-manager - tag: v1.20.12 - # Number of /dev/net/tun device slots to register. - # Set high enough to accommodate maxTotalJobs + prewarm pool. - maxDevices: 130 - - # When possible, we want the devicePlugin daemonset to preempt normal pods. - # Note: in some cases this is inconvenient or unsupported, i.e. in GKE which - # requires a custom ResourceQuota to use the `system-node-critical` - # PriorityClass in user namespaces. In those cases, set this to `null`. - priorityClassName: system-node-critical - - # Seccomp profile path relative to /var/lib/kubelet/seccomp/. - # The seccomp node-installer DaemonSet copies the profile to this path - # on every node automatically. - seccompProfile: retool/gvisor-seccomp.json - - # S3-compatible snapshot storage. - # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. - snapshotStorage: - s3Bucket: '' - s3Endpoint: '' - s3Region: 'us-east-1' - # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. - # If empty, falls back to the main agent sandbox secret. - credentialsSecretName: '' - - # Sandbox (Job) configuration - sandbox: - port: 3017 - resources: - requests: - cpu: 1 - memory: 2Gi - limits: - cpu: '2' - memory: 4Gi - # Idle timeout (ms) before an unassigned sandbox self-terminates. - sandboxIdleTimeoutMs: 600000 - # Hard ceiling (ms) on total sandbox lifetime, regardless of activity. When - # reached, the sandbox is destroyed (deferred until the current agent loop - # ends). Defaults to 2.5 hours. - sandboxGlobalLifetimeMs: 9000000 - tmpDirSizeLimit: 20Gi - # Separate limit for the rootfs-appjob volume — the sandbox root filesystem - # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi - # allocated for /tmp. - rootfsSizeLimit: 2Gi - # Additional environment variables for sandbox containers. - extraEnv: [] - - # Controller: tracks capacity, assigns sandbox pods, manages scaling - controller: - replicaCount: 1 - port: 3018 - resources: - requests: - cpu: 250m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - scaling: - prewarmPoolSize: 5 - maxTotalJobs: 50 - maxConcurrentCreates: 3 - jobRetentionSeconds: 300 - assignedSandboxTtlSeconds: 3600 - reconcileIntervalMs: 5000 - leaderTtlMs: 10000 - leaderRenewMs: 3000 - perUserSandboxLimit: 5 - - # Proxy: HTTP proxy for sandbox egress with credential injection. - # The proxy must be reachable by frontend browsers for WebSocket connections. - proxy: - replicaCount: 1 - port: 3019 - resources: - requests: - cpu: 250m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - allowedDomains: '' - # URL the proxy uses to reach the Retool backend for token exchange. - # Defaults to http://:3000 (same-cluster backend service). - backendUrl: '' - backendDomainSuffixes: '' - sandboxProxyTimeoutMs: '180000' # 3 minutes - service: - # Set to LoadBalancer or NodePort to expose the proxy externally. - type: ClusterIP - annotations: {} - # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. - # This is separate from the main Retool ingress since the proxy typically runs on its own domain. - ingress: - enabled: false - # ingressClassName: - annotations: {} - # kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" - # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" - host: '' - # e.g. sandbox.yourdomain.com - tls: [] - # - secretName: sandbox-tls - # hosts: - # - sandbox.yourdomain.com - - # Backend integration: these tell the Retool backend how to reach agent executor. - # controllerUrl and proxyUrl default to internal service URLs when empty. - controllerUrl: '' - proxyUrl: '' - # Public URL for frontend browsers to reach the proxy via WebSocket. - # Leave EMPTY for self-hosted: the backend then serves the sandbox same-origin - # as the editor (your Retool base URL) and the front server reverse-proxies the - # /sandbox/* WS+Vite paths to the in-cluster proxy Service — so no dedicated - # proxy domain or ingress is required, and your catch-all ingress is untouched. - # Only set this (e.g. https://sandbox.yourdomain.com) if you deliberately want - # the proxy on a separate domain, in which case also enable proxy.ingress above. - frontendWsProxyDomain: '' - # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. - proxyDomain: '' - - # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. - # Strongly recommended for production to isolate sandbox egress. - networkPolicy: - enabled: false - # CIDR ranges to block in proxy egress rules. Must stay in sync with - # DEFAULT_BLOCKED_CIDRS in the agent-executor source. - blockedRanges: - - 169.254.0.0/16 # link-local / cloud metadata - - 10.0.0.0/8 # private (RFC 1918) - - 172.16.0.0/12 # private (RFC 1918) - - 192.168.0.0/16 # private (RFC 1918) - - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) - - 127.0.0.0/8 # loopback - - 0.0.0.0/8 # "this network" (RFC 791) - blockedRanges6: - - fc00::/7 # IPv6 unique local addresses - - fe80::/10 # IPv6 link-local - - '::1/128' # IPv6 loopback - # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). - # Set to empty to allow DNS to any destination (not recommended). - dnsSelector: - namespaceSelector: - matchLabels: - kubernetes.io/metadata.name: kube-system - podSelector: - matchLabels: - k8s-app: kube-dns - extraEgress: [] - # CIDRs allowed in proxy egress even if they fall within blockedRanges - # (e.g. private backend endpoint). - backendAllowlist: [] - # Override sandbox ingress source selector (defaults to retool backend pods). - ingressFrom: [] - - # Node placement overrides (falls back to global nodeSelector/tolerations if empty) - nodeSelector: {} - tolerations: [] - affinity: {} - # SHARED TEMPORAL CONFIGURATION # This configuration is shared between all workers. # In order to use workers, temporal must be configured. diff --git a/values.yaml b/values.yaml index 533441d..9f803b4 100644 --- a/values.yaml +++ b/values.yaml @@ -565,7 +565,7 @@ multiplayer: mcp: # Run Retool's MCP server as a separate deployment. Independent of the - # .Values.r2.enabled master switch (the MCP server needs its own OAuth + # .Values.rr.enabled master switch (the MCP server needs its own OAuth # introspection config, so it is opt-in): set true to enable. enabled: false @@ -715,89 +715,6 @@ mcp: annotations: {} labels: {} -rrGitServer: - # Runs the React Retool Git Server in-process on the main backend pod - # (SERVICE_TYPE=...,RR_GIT_SERVER). The main backend internally proxies - # /api/ai/rr/git/v2/* to localhost:RR_GIT_SERVER_PORT, so no extra ingress - # routing is required. Required for the r2 / React Retool app pipeline. - # - # When enabled, exactly one of blobStorage.s3, blobStorage.gcs, or - # blobStorage.azure must be configured below — git_server stores all - # objects/packs in blob storage. - enabled: false - - # Optional: number of loose objects before git_server triggers a repack. - # Backend default is 100; unset to inherit it. - repackThreshold: ~ - - # Escape hatch for the blob-storage validation below. The chart can only - # inspect blobStorage, env, environmentVariables, and environmentSecrets at - # template time; it cannot see env vars injected via envFrom (Secret/ConfigMap - # splat). Set this to true to bypass the check when RR_BLOB_STORAGE_PROVIDER / - # RR_DEFAULT_* are provided that way. - skipBlobStorageValidation: false - - # Optionally split the git server out of the main backend into its own - # deployment + service (mirrors how the workload is split in Retool Cloud). - # Requires rrGitServer.enabled: true. When enabled: - # - a dedicated -git-server Deployment runs SERVICE_TYPE=RR_GIT_SERVER - # - the main backend drops RR_GIT_SERVER from its SERVICE_TYPE and proxies git - # traffic to the service via RR_GIT_SERVER_HOST / RR_GIT_SERVER_PORT - # - the MCP server (if enabled) is auto-pointed at the same service unless - # mcp.config.retoolGitServerUrl is set explicitly - # The blobStorage config below is rendered onto the git-server pod instead of - # the main backend in this mode. - separate: - enabled: false - replicaCount: 1 - # Port the git server listens on (RR_GIT_SERVER_PORT) and that its service exposes. - port: 3010 - # Pod resource requests/limits. Falls back to top-level `resources` if unset. - resources: {} - # Falls back to top-level `affinity` if unset. - affinity: {} - # Annotations/labels applied to the git-server pod template. - annotations: {} - labels: {} - # Annotations/labels applied to the git-server Service (kept separate from - # the pod ones above). - service: - annotations: {} - labels: {} - -# Shared blob-storage config used by git_server (and other features that -# need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. -# Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on -# the backend deployment. -# -# This block can be omitted entirely if RR_BLOB_STORAGE_PROVIDER and the -# RR_DEFAULT_*_* env vars are provided directly via environmentVariables / -# environmentSecrets above — the chart detects that and skips this guard. -blobStorage: {} - # s3: - # bucket: my-rr-bucket - # region: us-east-1 - # endpoint: "" # optional, for S3-compatible (MinIO, R2, etc.) - # accessKeyId: AKIA... - # # Provide secretAccessKey OR the secretName/secretKey pair below. - # secretAccessKey: "" - # secretAccessKeySecretName: "" - # secretAccessKeySecretKey: secret-access-key - # - # gcs: - # bucket: my-rr-bucket - # # Provide credentials (JSON string) OR the secretName/secretKey pair below. - # credentials: "" - # credentialsSecretName: "" - # credentialsSecretKey: credentials.json - # - # azure: - # container: my-rr-container - # # Provide connectionString OR the secretName/secretKey pair below. - # connectionString: "" - # connectionStringSecretName: "" - # connectionStringSecretKey: connection-string - codeExecutor: # as of Chart version 6.7.0, code-executor image version must align with the top-level `image` parameters # explicitly set other fields as needed @@ -848,59 +765,439 @@ codeExecutor: useSeccompProfile: false seccompLocalhostProfile: profiles/nsjail-seccomp.json -# === R2 (Retool agent runtime) ============================================= -# Master switch for the whole R2 stack: the r2Agent worker, jsExecutor, -# agentSandbox, and mcp server. Set `r2.enabled: true` to turn them all on with -# one line. Each component's own `enabled` (left null by default) inherits this -# switch; set a component's `enabled` to true/false to override the master for -# that component only. Shared R2 configuration can be added under this block -# later. -r2: +# === RR (Retool agent runtime) ============================================= +# Master switch for the whole RR stack. Set `rr.enabled: true` to turn on the +# components nested below — jsExecutor, agent, and agentSandbox — with a +# single line. Each component's own `enabled` (left null by default) inherits +# this switch; set a component's `enabled` to true/false to override the master +# for that component only. The gitServer and blobStorage blocks below +# provide the React Retool git server and the object storage the stack needs. +# (The MCP server is configured separately at the top level: it needs its own +# OAuth opt-in and is intentionally independent of this master switch.) +rr: enabled: false -# JS Executor -jsExecutor: - # Inherits .Values.r2.enabled when left unset (null); set true/false to override. - enabled: null + # JS Executor + jsExecutor: + # Inherits .Values.rr.enabled when left unset (null); set true/false to override. + enabled: null - image: - repository: tryretool/js-executor-service - # defaults to top level image.tag - tag: null - pullPolicy: IfNotPresent + image: + repository: tryretool/js-executor-service + # defaults to top level image.tag + tag: null + pullPolicy: IfNotPresent - replicaCount: 1 + replicaCount: 1 - seccompLocalhostProfile: profiles/nsjail-seccomp.json + seccompLocalhostProfile: profiles/nsjail-seccomp.json - # JS-executor-specific environment; not inherited from the top-level - # .Values.env / .Values.environmentSecrets / .Values.environmentVariables. - env: {} - environmentSecrets: [] - environmentVariables: [] + # JS-executor-specific environment; not inherited from the top-level + # .Values.env / .Values.environmentSecrets / .Values.environmentVariables. + env: {} + environmentSecrets: [] + environmentVariables: [] - # Annotations for JS executor pods - annotations: {} + # Annotations for JS executor pods + annotations: {} - # Labels for JS executor pods - labels: {} + # Labels for JS executor pods + labels: {} - volumes: {} - volumeMounts: {} + volumes: {} + volumeMounts: {} - # Config affinity and anti-affinity rules for the JS executor pods - affinity: {} + # Config affinity and anti-affinity rules for the JS executor pods + affinity: {} - # Resources for the JS executor. Memory request and limit are kept equal: - # JSE reads its memory limit and rejects requests at 80% of it, so the - # request must reserve the full amount to avoid premature rejections. - resources: - limits: - cpu: 6000m - memory: 6Gi - requests: - cpu: 6000m - memory: 6Gi + # Resources for the JS executor. Memory request and limit are kept equal: + # JSE reads its memory limit and rejects requests at 80% of it, so the + # request must reserve the full amount to avoid premature rejections. + resources: + limits: + cpu: 6000m + memory: 6Gi + requests: + cpu: 6000m + memory: 6Gi + + # RR Agent: server-side agent loop worker (independent from agents above). + agent: + # Inherits .Values.rr.enabled when left unset (null); set true/false to override. + enabled: null + + # Labels for RR agent worker pods + labels: {} + + # RR agent configuration + config: {} + + # Annotations for RR agent worker pods + annotations: {} + + # RR agent worker configuration + worker: + replicaCount: 1 + + resources: + limits: + cpu: 2000m + memory: 4096Mi + requests: + cpu: 1000m + memory: 2048Mi + + # Agent Sandbox Service: sandboxed code execution for AI agents. + # Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), + # and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. + agentSandbox: + # Inherits .Values.rr.enabled when left unset (null); set true/false to override. + enabled: null + + image: + repository: tryretool/agent-sandbox-service + # defaults to top level image.tag + tag: null + pullPolicy: IfNotPresent + + # Lightweight init image used by the prepuller and seccomp DaemonSets. + # Pinning by digest is recommended for production. + initImage: + repository: busybox + tag: '1.37.0' + # Manifest list digest — set to '' in test environments where images are + # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). + digest: '' + + # Annotations for agent sandbox pods + annotations: {} + + # Labels for agent sandbox pods + labels: {} + + # === Secrets ============================================================ + # Provide each secret as a plaintext value below, OR set externalSecret.name + # to a pre-existing Secret with keys jwt-public-key, jwt-private-key, + # encryption-key, api-secret, postgres-url. A plaintext value always wins over + # the external secret for that key. + externalSecret: + name: '' # optional: existing Secret holding all keys below + + jwtPublicKey: '' # REQUIRED (ES256) unless provided via externalSecret + jwtPrivateKey: '' # REQUIRED (ES256) unless provided via externalSecret + encryptionKey: '' # optional: hex 256-bit; must match backend AGENT_SANDBOX_ENCRYPTION_KEY + apiSecret: '' # optional: admin/test endpoints + + # === Postgres state backend ============================================= + # By DEFAULT (all options below left blank) the agent sandbox reuses the + # backend's Postgres connection from config.postgresql / the postgresql + # subchart -- same instance and database, separate schema (see schema below). + # So enabling it on an existing deployment needs nothing here. (Exception: + # if the backend's DB password is supplied via external secrets / envFrom, it + # can't be inherited by a separate pod -- set an option below in that case.) + # To point the sandbox at a different database, set exactly ONE option: + postgres: + # -- Option 1: plaintext DSN -- + url: '' + + # -- Option 2: assemble from fields -- + # The password is passed via PGPASSWORD (never embedded in the URL), so any + # characters are safe and a password-only secret can be reused as-is. + # Set either password or passwordSecretName. + # user/database are embedded in the DSN verbatim (user may contain '@', e.g. + # Azure user@servername); for values with : / ? # use Option 1 or 3. + host: '' + port: 5432 + database: '' + user: '' + password: '' + passwordSecretName: '' + passwordSecretKey: 'password' + + # -- Option 3: existing Secret holding the full DSN -- + urlSecretName: '' + urlSecretKey: 'postgres-url' + + # -- Option 4: reuse externalSecret.name (its postgres-url key) -- + # Selected by setting rr.agentSandbox.externalSecret.name (in the Secrets + # section above), not by anything here. Used when options 1-3 are blank. + # + # If options 1-4 are ALL unset, the default (inherit config.postgresql) + # applies -- see the note at the top of this block. + + # -- Optional tuning (defaults shown) -- + schema: 'agent_executor' + poolMax: 10 + sweeperIntervalMs: 60000 + + # Sandbox network access via pasta userspace networking. + # When enabled, sandboxes get isolated outbound access with L7 filtering. + sandboxNetwork: + enabled: true + # Request smarter-devices/net_tun via resources.limits on sandbox pods. + # When true, the kubelet grants /dev/net/tun device cgroup access without + # privileged mode. Requires smarter-device-manager to be running on each + # node (see deployDaemonSet below). + devicePlugin: true + # Deploy the smarter-device-manager DaemonSet from this Helm release. + # Set to false when another release (or external process) already manages + # the DaemonSet — only one instance should run per node. + deployDaemonSet: true + # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster + # agent-sandbox-proxy service URL when empty. + httpProxy: '' + + # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox + # pods can request it via resources.limits. + devicePlugin: + image: + repository: ghcr.io/smarter-project/smarter-device-manager + tag: v1.20.12 + # Number of /dev/net/tun device slots to register. + # Set high enough to accommodate maxTotalJobs + prewarm pool. + maxDevices: 130 + + # When possible, we want the devicePlugin daemonset to preempt normal pods. + # Note: in some cases this is inconvenient or unsupported, i.e. in GKE which + # requires a custom ResourceQuota to use the `system-node-critical` + # PriorityClass in user namespaces. In those cases, set this to `null`. + priorityClassName: system-node-critical + + # Seccomp profile path relative to /var/lib/kubelet/seccomp/. + # The seccomp node-installer DaemonSet copies the profile to this path + # on every node automatically. + seccompProfile: retool/gvisor-seccomp.json + + # S3-compatible snapshot storage. + # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. + snapshotStorage: + s3Bucket: '' + s3Endpoint: '' + s3Region: 'us-east-1' + # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. + # If empty, falls back to the main agent sandbox secret. + credentialsSecretName: '' + + # Sandbox (Job) configuration + sandbox: + port: 3017 + resources: + requests: + cpu: 1 + memory: 2Gi + limits: + cpu: '2' + memory: 4Gi + # Idle timeout (ms) before an unassigned sandbox self-terminates. + sandboxIdleTimeoutMs: 600000 + # Hard ceiling (ms) on total sandbox lifetime, regardless of activity. When + # reached, the sandbox is destroyed (deferred until the current agent loop + # ends). Defaults to 2.5 hours. + sandboxGlobalLifetimeMs: 9000000 + tmpDirSizeLimit: 20Gi + # Separate limit for the rootfs-appjob volume — the sandbox root filesystem + # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi + # allocated for /tmp. + rootfsSizeLimit: 2Gi + # Additional environment variables for sandbox containers. + extraEnv: [] + + # Controller: tracks capacity, assigns sandbox pods, manages scaling + controller: + replicaCount: 1 + port: 3018 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + scaling: + prewarmPoolSize: 5 + maxTotalJobs: 50 + maxConcurrentCreates: 3 + jobRetentionSeconds: 300 + assignedSandboxTtlSeconds: 3600 + reconcileIntervalMs: 5000 + leaderTtlMs: 10000 + leaderRenewMs: 3000 + perUserSandboxLimit: 5 + + # Proxy: HTTP proxy for sandbox egress with credential injection. + # The proxy must be reachable by frontend browsers for WebSocket connections. + proxy: + replicaCount: 1 + port: 3019 + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + allowedDomains: '' + # URL the proxy uses to reach the Retool backend for token exchange. + # Defaults to http://:3000 (same-cluster backend service). + backendUrl: '' + backendDomainSuffixes: '' + sandboxProxyTimeoutMs: '180000' # 3 minutes + service: + # Set to LoadBalancer or NodePort to expose the proxy externally. + type: ClusterIP + annotations: {} + # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. + # This is separate from the main Retool ingress since the proxy typically runs on its own domain. + ingress: + enabled: false + # ingressClassName: + annotations: {} + # kubernetes.io/ingress.class: nginx + # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + host: '' + # e.g. sandbox.yourdomain.com + tls: [] + # - secretName: sandbox-tls + # hosts: + # - sandbox.yourdomain.com + + # Backend integration: these tell the Retool backend how to reach agent executor. + # controllerUrl and proxyUrl default to internal service URLs when empty. + controllerUrl: '' + proxyUrl: '' + # Public URL for frontend browsers to reach the proxy via WebSocket. + # Leave EMPTY for self-hosted: the backend then serves the sandbox same-origin + # as the editor (your Retool base URL) and the front server reverse-proxies the + # /sandbox/* WS+Vite paths to the in-cluster proxy Service — so no dedicated + # proxy domain or ingress is required, and your catch-all ingress is untouched. + # Only set this (e.g. https://sandbox.yourdomain.com) if you deliberately want + # the proxy on a separate domain, in which case also enable proxy.ingress above. + frontendWsProxyDomain: '' + # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. + proxyDomain: '' + + # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. + # Strongly recommended for production to isolate sandbox egress. + networkPolicy: + enabled: false + # CIDR ranges to block in proxy egress rules. Must stay in sync with + # DEFAULT_BLOCKED_CIDRS in the agent-executor source. + blockedRanges: + - 169.254.0.0/16 # link-local / cloud metadata + - 10.0.0.0/8 # private (RFC 1918) + - 172.16.0.0/12 # private (RFC 1918) + - 192.168.0.0/16 # private (RFC 1918) + - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) + - 127.0.0.0/8 # loopback + - 0.0.0.0/8 # "this network" (RFC 791) + blockedRanges6: + - fc00::/7 # IPv6 unique local addresses + - fe80::/10 # IPv6 link-local + - '::1/128' # IPv6 loopback + # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). + # Set to empty to allow DNS to any destination (not recommended). + dnsSelector: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + extraEgress: [] + # CIDRs allowed in proxy egress even if they fall within blockedRanges + # (e.g. private backend endpoint). + backendAllowlist: [] + # Override sandbox ingress source selector (defaults to retool backend pods). + ingressFrom: [] + + # Node placement overrides (falls back to global nodeSelector/tolerations if empty) + nodeSelector: {} + tolerations: [] + affinity: {} + + gitServer: + # Runs the React Retool Git Server in-process on the main backend pod + # (SERVICE_TYPE=...,RR_GIT_SERVER). The main backend internally proxies + # /api/ai/rr/git/v2/* to localhost:RR_GIT_SERVER_PORT, so no extra ingress + # routing is required. Required for the rr / React Retool app pipeline. + # + # When enabled, exactly one of blobStorage.s3, blobStorage.gcs, or + # blobStorage.azure must be configured below — git_server stores all + # objects/packs in blob storage. + enabled: false + + # Optional: number of loose objects before git_server triggers a repack. + # Backend default is 100; unset to inherit it. + repackThreshold: ~ + + # Escape hatch for the blob-storage validation below. The chart can only + # inspect blobStorage, env, environmentVariables, and environmentSecrets at + # template time; it cannot see env vars injected via envFrom (Secret/ConfigMap + # splat). Set this to true to bypass the check when RR_BLOB_STORAGE_PROVIDER / + # RR_DEFAULT_* are provided that way. + skipBlobStorageValidation: false + + # Optionally split the git server out of the main backend into its own + # deployment + service (mirrors how the workload is split in Retool Cloud). + # Requires rr.gitServer.enabled: true. When enabled: + # - a dedicated -git-server Deployment runs SERVICE_TYPE=RR_GIT_SERVER + # - the main backend drops RR_GIT_SERVER from its SERVICE_TYPE and proxies git + # traffic to the service via RR_GIT_SERVER_HOST / RR_GIT_SERVER_PORT + # - the MCP server (if enabled) is auto-pointed at the same service unless + # mcp.config.retoolGitServerUrl is set explicitly + # The blobStorage config below is rendered onto the git-server pod instead of + # the main backend in this mode. + separate: + enabled: false + replicaCount: 1 + # Port the git server listens on (RR_GIT_SERVER_PORT) and that its service exposes. + port: 3010 + # Pod resource requests/limits. Falls back to top-level `resources` if unset. + resources: {} + # Falls back to top-level `affinity` if unset. + affinity: {} + # Annotations/labels applied to the git-server pod template. + annotations: {} + labels: {} + # Annotations/labels applied to the git-server Service (kept separate from + # the pod ones above). + service: + annotations: {} + labels: {} + + # Shared blob-storage config used by git_server (and other features that + # need object storage, e.g. snapshots). Set exactly one of s3, gcs, azure. + # Renders RR_BLOB_STORAGE_PROVIDER + RR_DEFAULT__* env vars on + # the backend deployment. + # + # This block can be omitted entirely if RR_BLOB_STORAGE_PROVIDER and the + # RR_DEFAULT_*_* env vars are provided directly via environmentVariables / + # environmentSecrets above — the chart detects that and skips this guard. + blobStorage: {} + # s3: + # bucket: my-rr-bucket + # region: us-east-1 + # endpoint: "" # optional, for S3-compatible (MinIO, R2, etc.) + # accessKeyId: AKIA... + # # Provide secretAccessKey OR the secretName/secretKey pair below. + # secretAccessKey: "" + # secretAccessKeySecretName: "" + # secretAccessKeySecretKey: secret-access-key + # + # gcs: + # bucket: my-rr-bucket + # # Provide credentials (JSON string) OR the secretName/secretKey pair below. + # credentials: "" + # credentialsSecretName: "" + # credentialsSecretKey: credentials.json + # + # azure: + # container: my-rr-container + # # Provide connectionString OR the secretName/secretKey pair below. + # connectionString: "" + # connectionStringSecretName: "" + # connectionStringSecretKey: connection-string agents: # Enable AI Agents @@ -939,301 +1236,6 @@ agents: # Annotations for agent worker pods annotations: {} -# R2 Agent: server-side agent loop worker (independent from agents above). -r2Agent: - # Inherits .Values.r2.enabled when left unset (null); set true/false to override. - enabled: null - - # Labels for R2 agent worker pods - labels: {} - - # R2 agent configuration - config: {} - - # Annotations for R2 agent worker pods - annotations: {} - - # R2 agent worker configuration - worker: - replicaCount: 1 - - resources: - limits: - cpu: 2000m - memory: 4096Mi - requests: - cpu: 1000m - memory: 2048Mi - -# Agent Sandbox Service: sandboxed code execution for AI agents. -# Deploys a controller (manages sandbox lifecycle), proxy (HTTP proxy for sandbox egress), -# and ephemeral Job-based sandboxes. Uses Postgres for controller/proxy state. -agentSandbox: - # Inherits .Values.r2.enabled when left unset (null); set true/false to override. - enabled: null - - image: - repository: tryretool/agent-sandbox-service - # defaults to top level image.tag - tag: null - pullPolicy: IfNotPresent - - # Lightweight init image used by the prepuller and seccomp DaemonSets. - # Pinning by digest is recommended for production. - initImage: - repository: busybox - tag: '1.37.0' - # Manifest list digest — set to '' in test environments where images are - # pre-loaded (containerd 2.0 can't resolve digest references for side-loaded images). - digest: '' - - # Annotations for agent sandbox pods - annotations: {} - - # Labels for agent sandbox pods - labels: {} - - # === Secrets ============================================================ - # Provide each secret as a plaintext value below, OR set externalSecret.name - # to a pre-existing Secret with keys jwt-public-key, jwt-private-key, - # encryption-key, api-secret, postgres-url. A plaintext value always wins over - # the external secret for that key. - externalSecret: - name: '' # optional: existing Secret holding all keys below - - jwtPublicKey: '' # REQUIRED (ES256) unless provided via externalSecret - jwtPrivateKey: '' # REQUIRED (ES256) unless provided via externalSecret - encryptionKey: '' # optional: hex 256-bit; must match backend AGENT_SANDBOX_ENCRYPTION_KEY - apiSecret: '' # optional: admin/test endpoints - - # === Postgres state backend ============================================= - # By DEFAULT (all options below left blank) the agent sandbox reuses the - # backend's Postgres connection from config.postgresql / the postgresql - # subchart -- same instance and database, separate schema (see schema below). - # So enabling it on an existing deployment needs nothing here. (Exception: - # if the backend's DB password is supplied via external secrets / envFrom, it - # can't be inherited by a separate pod -- set an option below in that case.) - # To point the sandbox at a different database, set exactly ONE option: - postgres: - # -- Option 1: plaintext DSN -- - url: '' - - # -- Option 2: assemble from fields -- - # The password is passed via PGPASSWORD (never embedded in the URL), so any - # characters are safe and a password-only secret can be reused as-is. - # Set either password or passwordSecretName. - # user/database are embedded in the DSN verbatim (user may contain '@', e.g. - # Azure user@servername); for values with : / ? # use Option 1 or 3. - host: '' - port: 5432 - database: '' - user: '' - password: '' - passwordSecretName: '' - passwordSecretKey: 'password' - - # -- Option 3: existing Secret holding the full DSN -- - urlSecretName: '' - urlSecretKey: 'postgres-url' - - # -- Option 4: reuse externalSecret.name (its postgres-url key) -- - # Selected by setting agentSandbox.externalSecret.name (in the Secrets - # section above), not by anything here. Used when options 1-3 are blank. - # - # If options 1-4 are ALL unset, the default (inherit config.postgresql) - # applies -- see the note at the top of this block. - - # -- Optional tuning (defaults shown) -- - schema: 'agent_executor' - poolMax: 10 - sweeperIntervalMs: 60000 - - # Sandbox network access via pasta userspace networking. - # When enabled, sandboxes get isolated outbound access with L7 filtering. - sandboxNetwork: - enabled: true - # Request smarter-devices/net_tun via resources.limits on sandbox pods. - # When true, the kubelet grants /dev/net/tun device cgroup access without - # privileged mode. Requires smarter-device-manager to be running on each - # node (see deployDaemonSet below). - devicePlugin: true - # Deploy the smarter-device-manager DaemonSet from this Helm release. - # Set to false when another release (or external process) already manages - # the DaemonSet — only one instance should run per node. - deployDaemonSet: true - # HTTP proxy for sandbox egress L7 filtering. Defaults to the in-cluster - # agent-sandbox-proxy service URL when empty. - httpProxy: '' - - # smarter-device-manager: registers /dev/net/tun with the kubelet so sandbox - # pods can request it via resources.limits. - devicePlugin: - image: - repository: ghcr.io/smarter-project/smarter-device-manager - tag: v1.20.12 - # Number of /dev/net/tun device slots to register. - # Set high enough to accommodate maxTotalJobs + prewarm pool. - maxDevices: 130 - - # When possible, we want the devicePlugin daemonset to preempt normal pods. - # Note: in some cases this is inconvenient or unsupported, i.e. in GKE which - # requires a custom ResourceQuota to use the `system-node-critical` - # PriorityClass in user namespaces. In those cases, set this to `null`. - priorityClassName: system-node-critical - - # Seccomp profile path relative to /var/lib/kubelet/seccomp/. - # The seccomp node-installer DaemonSet copies the profile to this path - # on every node automatically. - seccompProfile: retool/gvisor-seccomp.json - - # S3-compatible snapshot storage. - # When s3Bucket is set, snapshots are persisted to S3 and survive pod restarts. - snapshotStorage: - s3Bucket: '' - s3Endpoint: '' - s3Region: 'us-east-1' - # Name of a K8s Secret containing keys awsAccessKeyId and awsSecretAccessKey. - # If empty, falls back to the main agent sandbox secret. - credentialsSecretName: '' - - # Sandbox (Job) configuration - sandbox: - port: 3017 - resources: - requests: - cpu: 1 - memory: 2Gi - limits: - cpu: '2' - memory: 4Gi - # Idle timeout (ms) before an unassigned sandbox self-terminates. - sandboxIdleTimeoutMs: 600000 - # Hard ceiling (ms) on total sandbox lifetime, regardless of activity. When - # reached, the sandbox is destroyed (deferred until the current agent loop - # ends). Defaults to 2.5 hours. - sandboxGlobalLifetimeMs: 9000000 - tmpDirSizeLimit: 20Gi - # Separate limit for the rootfs-appjob volume — the sandbox root filesystem - # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi - # allocated for /tmp. - rootfsSizeLimit: 2Gi - # Additional environment variables for sandbox containers. - extraEnv: [] - - # Controller: tracks capacity, assigns sandbox pods, manages scaling - controller: - replicaCount: 1 - port: 3018 - resources: - requests: - cpu: 250m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - scaling: - prewarmPoolSize: 5 - maxTotalJobs: 50 - maxConcurrentCreates: 3 - jobRetentionSeconds: 300 - assignedSandboxTtlSeconds: 3600 - reconcileIntervalMs: 5000 - leaderTtlMs: 10000 - leaderRenewMs: 3000 - perUserSandboxLimit: 5 - - # Proxy: HTTP proxy for sandbox egress with credential injection. - # The proxy must be reachable by frontend browsers for WebSocket connections. - proxy: - replicaCount: 1 - port: 3019 - resources: - requests: - cpu: 250m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - allowedDomains: '' - # URL the proxy uses to reach the Retool backend for token exchange. - # Defaults to http://:3000 (same-cluster backend service). - backendUrl: '' - backendDomainSuffixes: '' - sandboxProxyTimeoutMs: '180000' # 3 minutes - service: - # Set to LoadBalancer or NodePort to expose the proxy externally. - type: ClusterIP - annotations: {} - # Optional ingress to expose the proxy to frontend browsers for WebSocket connections. - # This is separate from the main Retool ingress since the proxy typically runs on its own domain. - ingress: - enabled: false - # ingressClassName: - annotations: {} - # kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" - # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" - host: '' - # e.g. sandbox.yourdomain.com - tls: [] - # - secretName: sandbox-tls - # hosts: - # - sandbox.yourdomain.com - - # Backend integration: these tell the Retool backend how to reach agent executor. - # controllerUrl and proxyUrl default to internal service URLs when empty. - controllerUrl: '' - proxyUrl: '' - # Public URL for frontend browsers to reach the proxy via WebSocket. - # Leave EMPTY for self-hosted: the backend then serves the sandbox same-origin - # as the editor (your Retool base URL) and the front server reverse-proxies the - # /sandbox/* WS+Vite paths to the in-cluster proxy Service — so no dedicated - # proxy domain or ingress is required, and your catch-all ingress is untouched. - # Only set this (e.g. https://sandbox.yourdomain.com) if you deliberately want - # the proxy on a separate domain, in which case also enable proxy.ingress above. - frontendWsProxyDomain: '' - # Public URL for proxy domain. Defaults to frontendWsProxyDomain if empty. - proxyDomain: '' - - # NetworkPolicy: restrict sandbox, controller, and proxy pod traffic. - # Strongly recommended for production to isolate sandbox egress. - networkPolicy: - enabled: false - # CIDR ranges to block in proxy egress rules. Must stay in sync with - # DEFAULT_BLOCKED_CIDRS in the agent-executor source. - blockedRanges: - - 169.254.0.0/16 # link-local / cloud metadata - - 10.0.0.0/8 # private (RFC 1918) - - 172.16.0.0/12 # private (RFC 1918) - - 192.168.0.0/16 # private (RFC 1918) - - 100.64.0.0/10 # carrier-grade NAT (RFC 6598) - - 127.0.0.0/8 # loopback - - 0.0.0.0/8 # "this network" (RFC 791) - blockedRanges6: - - fc00::/7 # IPv6 unique local addresses - - fe80::/10 # IPv6 link-local - - '::1/128' # IPv6 loopback - # Restrict DNS egress to pods matching this selector (typically kube-dns/coredns). - # Set to empty to allow DNS to any destination (not recommended). - dnsSelector: - namespaceSelector: - matchLabels: - kubernetes.io/metadata.name: kube-system - podSelector: - matchLabels: - k8s-app: kube-dns - extraEgress: [] - # CIDRs allowed in proxy egress even if they fall within blockedRanges - # (e.g. private backend endpoint). - backendAllowlist: [] - # Override sandbox ingress source selector (defaults to retool backend pods). - ingressFrom: [] - - # Node placement overrides (falls back to global nodeSelector/tolerations if empty) - nodeSelector: {} - tolerations: [] - affinity: {} - # SHARED TEMPORAL CONFIGURATION # This configuration is shared between all workers. # In order to use workers, temporal must be configured. From 1bb57404e04b1b56f66b72a8488f692346bf2eb5 Mon Sep 17 00:00:00 2001 From: jatin Date: Thu, 11 Jun 2026 12:13:11 -0400 Subject: [PATCH 32/37] [agent-sandbox] expose sandboxReadyTimeoutMs -> SANDBOX_READY_TIMEOUT_MS (#322) The agent-executor sandbox connect timeout (config.ts readyTimeoutMs) is now env-configurable via SANDBOX_READY_TIMEOUT_MS, but the chart never set it, so the job-template fell back to the image default (20s). Interactive sandbox boot (gVisor + bundle load) can exceed that, surfacing "did not connect within 20000ms". Add an agentSandbox.sandbox.sandboxReadyTimeoutMs knob (default 20000, matching the code default) and emit SANDBOX_READY_TIMEOUT_MS in the job-template env next to SANDBOX_IDLE_TIMEOUT_MS / SANDBOX_GLOBAL_LIFETIME_MS, so operators can raise it (e.g. 45000) without manual job-template patching. Co-authored-by: Claude Opus 4.8 (1M context) --- charts/retool/templates/deployment_agent_sandbox.yaml | 1 + charts/retool/values.yaml | 5 +++++ values.yaml | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 138fdba..0e9ac57 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -176,6 +176,7 @@ data: ,{"name": "SANDBOX_NETWORK_ENABLED", "value": "{{ $as.sandboxNetwork.enabled }}"} ,{"name": "SANDBOX_IDLE_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxIdleTimeoutMs }}"} ,{"name": "SANDBOX_GLOBAL_LIFETIME_MS", "value": "{{ $as.sandbox.sandboxGlobalLifetimeMs }}"} + ,{"name": "SANDBOX_READY_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxReadyTimeoutMs }}"} {{- if $as.jwtPublicKey }} ,{"name": "AGENT_SANDBOX_JWT_PUBLIC_KEY", "value": "{{ $as.jwtPublicKey }}"} {{- else if $as.externalSecret.name }} diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 9f803b4..7f4b338 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -994,6 +994,11 @@ rr: # reached, the sandbox is destroyed (deferred until the current agent loop # ends). Defaults to 2.5 hours. sandboxGlobalLifetimeMs: 9000000 + # Time (ms) the controller waits for a freshly-started sandbox to connect + # before failing the request. Interactive sandbox boot (gVisor + bundle + # load) can take ~20-25s; raise this (e.g. 45000) if you see "did not + # connect within ms" errors on cold starts. + sandboxReadyTimeoutMs: 20000 tmpDirSizeLimit: 20Gi # Separate limit for the rootfs-appjob volume — the sandbox root filesystem # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi diff --git a/values.yaml b/values.yaml index 9f803b4..7f4b338 100644 --- a/values.yaml +++ b/values.yaml @@ -994,6 +994,11 @@ rr: # reached, the sandbox is destroyed (deferred until the current agent loop # ends). Defaults to 2.5 hours. sandboxGlobalLifetimeMs: 9000000 + # Time (ms) the controller waits for a freshly-started sandbox to connect + # before failing the request. Interactive sandbox boot (gVisor + bundle + # load) can take ~20-25s; raise this (e.g. 45000) if you see "did not + # connect within ms" errors on cold starts. + sandboxReadyTimeoutMs: 20000 tmpDirSizeLimit: 20Gi # Separate limit for the rootfs-appjob volume — the sandbox root filesystem # is a static ~600MB extraction, so 2Gi provides headroom without the 20Gi From 971a1b48115ac1676d6e7810bd6cb1547b44bf7c Mon Sep 17 00:00:00 2001 From: jatin Date: Thu, 11 Jun 2026 12:50:37 -0400 Subject: [PATCH 33/37] [rr] make legacy-values guard message spell out the fix (#323) The pre-rename (`r2.*` / top-level component) values guard added in #321 already fails loud, but the message buried the call to action. Lead with "ACTION REQUIRED: update your Helm values file", state the deploy is blocked, and give an explicit "edit your values file and rename these keys" instruction before the key-move list. Message-only change inside the existing fail string: no values.yaml or CI changes. Verified the guard still fires (now with the clearer text) on a legacy key and stays silent on a clean rr.* render. Co-authored-by: Claude Opus 4.8 (1M context) --- charts/retool/templates/_helpers.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/retool/templates/_helpers.tpl b/charts/retool/templates/_helpers.tpl index 261f6fd..5d1d787 100644 --- a/charts/retool/templates/_helpers.tpl +++ b/charts/retool/templates/_helpers.tpl @@ -1025,7 +1025,7 @@ Two classes of stale config are caught: {{- end -}} {{- end -}} {{- if $found -}} -{{- fail (printf "\n\nThe RR (formerly \"r2\") values layout changed: the master switch and every component it needs now live under the top-level `rr:` block. These keys in your values are NO LONGER READ and would silently disable RR. Rename / move them as shown:\n\n%s\n\nThe master switch is now `rr.enabled`. See values.yaml for the new layout." (join "\n" $found)) -}} +{{- fail (printf "\n\nACTION REQUIRED: update your Helm values file.\n\nThe RR (formerly \"r2\") values layout changed: the master switch and every component it needs now live under the top-level `rr:` block. The keys below are still set in your values but are NO LONGER READ, which would silently disable RR. This deploy is blocked until you fix it.\n\nTo fix: edit your values file (values.yaml / your Helm values overrides) and rename / move these keys:\n\n%s\n\nThe master switch is now `rr.enabled`. See the chart's values.yaml for the full new layout." (join "\n" $found)) -}} {{- end -}} {{- end -}} From ef28c2db411ba7ae84fd81334a06f5e3beb0afea Mon Sep 17 00:00:00 2001 From: jatin Date: Thu, 11 Jun 2026 13:59:08 -0400 Subject: [PATCH 34/37] [agent-sandbox] fix jwtPublicKey breaking job-template JSON (use toJson) (#325) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sandbox job-template ConfigMap embedded jwtPublicKey into a JSON string literal as `"value": "{{ $as.jwtPublicKey }}"`. ES256 keys are normally multi-line PEM (BEGIN/END headers + newlines); a real newline inside a JSON string literal is invalid JSON, so the controller failed to read the job-template and could not spawn sandbox Jobs. (A compact JWK would break it too — embedded double-quotes.) Fix: `"value": {{ $as.jwtPublicKey | toJson }}` — toJson emits the quoted, fully-escaped JSON string (newlines -> \n, quotes -> \"). This also makes the JSON path consistent with the env-var paths, which already use `| quote`. Until now this only worked if the operator pre-flattened the key to a single `\n`-escaped line (the workaround the inline-secrets CI fixture relied on). Updated that fixture to a genuine multi-line PEM block scalar so it exercises the escaping, and corrected its comment. Verified: rendered the inline-secrets fixture and parsed the embedded job-template.json — VALID with the fix, JSONDecodeError without it. Co-authored-by: Claude Opus 4.8 (1M context) --- ...t-agent-sandbox-inline-secrets-option.yaml | 19 ++++++++++++++++--- .../templates/deployment_agent_sandbox.yaml | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml b/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml index 2ac11c6..35c172a 100644 --- a/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml +++ b/charts/retool/ci/test-agent-sandbox-inline-secrets-option.yaml @@ -6,7 +6,9 @@ rr: # proxy ingress). Here we exercise the *other* halves of those branches: # - Secrets inline (no externalSecret.name) → the chart renders its own Secret # (jwt-public-key / jwt-private-key / encryption-key / api-secret). jwtPublicKey - # MUST be single-line: it is injected raw into the sandbox job-template JSON. + # is injected into the sandbox job-template JSON via `toJson`, so a genuine + # multi-line PEM (real newlines, as below) is escaped correctly — no need to + # pre-flatten it to a single `\n`-escaped line. # - Postgres sourcing OPTION 1: plaintext DSN via postgres.url. # - Same-origin proxy: no dedicated proxy domain and no proxy ingress — the # backend reverse-proxies /sandbox/* (frontendWsProxyDomain left empty). @@ -21,8 +23,19 @@ rr: tag: 3.123.4 pullPolicy: IfNotPresent - jwtPublicKey: '-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI\nY+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END PUBLIC KEY-----' - jwtPrivateKey: '-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49\nAwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB\nQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA==\n-----END EC PRIVATE KEY-----' + # Real multi-line PEM (block scalar) — exercises the toJson newline escaping in + # the job-template JSON. A raw "{{ . }}" would produce invalid JSON here. + jwtPublicKey: |- + -----BEGIN PUBLIC KEY----- + MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AI + Y+QUCicYtfv9wLGcEGPQuXoBQtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA== + -----END PUBLIC KEY----- + jwtPrivateKey: |- + -----BEGIN EC PRIVATE KEY----- + MHcCAQEEIMFXLiN/YsJv89D2YkEZ6/Dj5fujghENmYTOilwdChU3oAoGCCqGSM49 + AwEHoUQDQgAEljtqa2nhBwe/PqNhWgPHhj0jv8AIY+QUCicYtfv9wLGcEGPQuXoB + QtuoIuOwXOdbEWgrQyLdIEb0YjegAW3miA== + -----END EC PRIVATE KEY----- encryptionKey: a12b01429fe0fe69a80da94e9e837ab2f1e9bda378ed8a25905a238f6fea6b7a apiSecret: test-agent-sandbox-api-secret diff --git a/charts/retool/templates/deployment_agent_sandbox.yaml b/charts/retool/templates/deployment_agent_sandbox.yaml index 0e9ac57..2dd1039 100644 --- a/charts/retool/templates/deployment_agent_sandbox.yaml +++ b/charts/retool/templates/deployment_agent_sandbox.yaml @@ -178,7 +178,7 @@ data: ,{"name": "SANDBOX_GLOBAL_LIFETIME_MS", "value": "{{ $as.sandbox.sandboxGlobalLifetimeMs }}"} ,{"name": "SANDBOX_READY_TIMEOUT_MS", "value": "{{ $as.sandbox.sandboxReadyTimeoutMs }}"} {{- if $as.jwtPublicKey }} - ,{"name": "AGENT_SANDBOX_JWT_PUBLIC_KEY", "value": "{{ $as.jwtPublicKey }}"} + ,{"name": "AGENT_SANDBOX_JWT_PUBLIC_KEY", "value": {{ $as.jwtPublicKey | toJson }}} {{- else if $as.externalSecret.name }} ,{"name": "AGENT_SANDBOX_JWT_PUBLIC_KEY", "valueFrom": {"secretKeyRef": {"name": "{{ $defaultSecretName }}", "key": "jwt-public-key"}}} {{- end }} From 1292382faf14891f27bcbe58b02220fe78d5b228 Mon Sep 17 00:00:00 2001 From: jatin Date: Thu, 11 Jun 2026 14:41:39 -0400 Subject: [PATCH 35/37] [agent-sandbox] guard device-plugin priorityClassName (fix null opt-out) (#327) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit agent_sandbox_device_plugin.yaml rendered `priorityClassName: {{ $as.devicePlugin.priorityClassName }}` unconditionally. The documented GKE opt-out sets `rr.agentSandbox.devicePlugin.priorityClassName: null` (GKE rejects system-node-critical in user namespaces). A bare {{ ... }} on a nil value emits the literal string ``, so the DaemonSet was submitted with `priorityClassName: ` — a nonexistent class the kubelet rejects, which blocks the whole agent sandbox from scheduling. Wrap it in `{{- if $as.devicePlugin.priorityClassName }}` so the field is omitted when null, matching how every other workload guards .Values.priorityClassName. Adds ci/test-agent-sandbox-deviceplugin-no-priorityclass-option.yaml — device-plugin DaemonSet with priorityClassName: null. kubeconform rejects ``, so this guards the regression. Verified: null -> field omitted (no ); default -> priorityClassName: system-node-critical still renders; lint clean. Co-authored-by: Claude Opus 4.8 (1M context) --- ...-deviceplugin-no-priorityclass-option.yaml | 27 +++++++++++++++++++ .../agent_sandbox_device_plugin.yaml | 2 ++ 2 files changed, 29 insertions(+) create mode 100644 charts/retool/ci/test-agent-sandbox-deviceplugin-no-priorityclass-option.yaml diff --git a/charts/retool/ci/test-agent-sandbox-deviceplugin-no-priorityclass-option.yaml b/charts/retool/ci/test-agent-sandbox-deviceplugin-no-priorityclass-option.yaml new file mode 100644 index 0000000..75b0ac6 --- /dev/null +++ b/charts/retool/ci/test-agent-sandbox-deviceplugin-no-priorityclass-option.yaml @@ -0,0 +1,27 @@ +rr: + + # Agent Sandbox — device-plugin DaemonSet with priorityClassName opted OUT. + # + # Regression guard for the GKE path documented in values.yaml: GKE doesn't allow + # the `system-node-critical` PriorityClass in user namespaces, so operators set + # rr.agentSandbox.devicePlugin.priorityClassName: null + # to drop it. The DaemonSet template must OMIT the priorityClassName field in that + # case — a bare `{{ ... }}` on a nil value would render the literal ``, + # producing an invalid manifest that the kubelet rejects (and that kubeconform + # rejects here in CI). + # + # Overlaid on test-install-values.yaml. + agentSandbox: + enabled: true + + externalSecret: + name: agent-sandbox-secrets + + sandboxNetwork: + enabled: true + devicePlugin: true + deployDaemonSet: true + + devicePlugin: + # The GKE opt-out — DaemonSet must render with no priorityClassName field. + priorityClassName: null diff --git a/charts/retool/templates/agent_sandbox_device_plugin.yaml b/charts/retool/templates/agent_sandbox_device_plugin.yaml index f1f8664..aa46d3f 100644 --- a/charts/retool/templates/agent_sandbox_device_plugin.yaml +++ b/charts/retool/templates/agent_sandbox_device_plugin.yaml @@ -39,7 +39,9 @@ spec: {{- end }} spec: automountServiceAccountToken: false + {{- if $as.devicePlugin.priorityClassName }} priorityClassName: {{ $as.devicePlugin.priorityClassName }} + {{- end }} {{- if $nodeSelector }} nodeSelector: {{ toYaml $nodeSelector | indent 8 }} From 2c6fc90ee0058e14fa92ea6b8b23dbeb3ebdafa7 Mon Sep 17 00:00:00 2001 From: jatin Date: Thu, 11 Jun 2026 15:01:33 -0400 Subject: [PATCH 36/37] set chart version to 6.11.0 (#329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One minor bump after the latest public release on charts.retool.com (6.10.5 stable; 6.11.0-rc1 pre-release). The R² feature set ships as 6.11.0, graduating the existing 6.11.0-rc1 to final. (The 6.12.0 carried on the branch was an internal number that was never published.) Minor release: additive for existing consumers, all R² switches default off. Co-authored-by: Claude Opus 4.8 (1M context) --- charts/retool/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/retool/Chart.yaml b/charts/retool/Chart.yaml index 7d8ed5e..dbedb80 100644 --- a/charts/retool/Chart.yaml +++ b/charts/retool/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: retool description: A Helm chart for Kubernetes type: application -version: 6.12.0 +version: 6.11.0 maintainers: - name: Retool Engineering email: engineering+helm@retool.com From b052ff480f6064222981f5e00f0254572bc0bdcb Mon Sep 17 00:00:00 2001 From: Ryan Artecona Date: Thu, 11 Jun 2026 12:04:49 -0700 Subject: [PATCH 37/37] reduce JSE default CPU request/limit from 6 => 2 --- charts/retool/values.yaml | 4 ++-- values.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/retool/values.yaml b/charts/retool/values.yaml index 7f4b338..0b3b5eb 100644 --- a/charts/retool/values.yaml +++ b/charts/retool/values.yaml @@ -815,10 +815,10 @@ rr: # request must reserve the full amount to avoid premature rejections. resources: limits: - cpu: 6000m + cpu: '2' memory: 6Gi requests: - cpu: 6000m + cpu: '2' memory: 6Gi # RR Agent: server-side agent loop worker (independent from agents above). diff --git a/values.yaml b/values.yaml index 7f4b338..0b3b5eb 100644 --- a/values.yaml +++ b/values.yaml @@ -815,10 +815,10 @@ rr: # request must reserve the full amount to avoid premature rejections. resources: limits: - cpu: 6000m + cpu: '2' memory: 6Gi requests: - cpu: 6000m + cpu: '2' memory: 6Gi # RR Agent: server-side agent loop worker (independent from agents above).