From bcdbc28ede3c639038f1b754179364d4417b5c79 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Sat, 2 May 2026 15:46:16 +0000 Subject: [PATCH 1/4] Rewrite chromium-headful + chromium-headless wrapper as a Go binary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the bash wrapper.sh shipped in both browser images with a single Go binary (server/cmd/wrapper) that detects the headful vs headless profile from supervisor's conf.d at boot. The Go wrapper preserves behaviour parity with the bash scripts but removes serial dead time on the boot path: - Phase A starts xorg/xvfb, dbus, and chromedriver in a single supervisorctl invocation; readiness is then probed concurrently. - Envoy bootstrap (cert generation, NSS DB, template render) runs in a goroutine alongside Phase A. Phase B gates on it because chromium reads the system CA trust store at process start. - Phase B starts chromium, kernel-images-api, and (headful) mutter + optional neko in one call so their bring-up overlaps chromium boot. - Final readiness waits on the union of CDP, chromedriver, forward-proxy, and (when enabled) neko + envoy in parallel. Per-service tweaks: - supervisor confs: startsecs=2 → 0 so supervisorctl start returns as soon as the program is launched (Go probes readiness directly). - init-envoy.sh: drop the trailing 50-iteration port poll and curl-through-proxy test; the Go wrapper's waitAllReady covers it. - Kraftfile cmd updated from /wrapper.sh to /wrapper. Cosmetic + non-critical work (pulseaudio, --no-sandbox infobar dismissal) runs off the hot path. --- README.md | 2 +- images/chromium-headful/Dockerfile | 10 +- images/chromium-headful/Kraftfile | 2 +- .../supervisor/services/chromedriver.conf | 2 +- .../supervisor/services/dbus.conf | 2 +- .../services/kernel-images-api.conf | 2 +- .../supervisor/services/mutter.conf | 2 +- .../supervisor/services/neko.conf | 2 +- .../supervisor/services/xorg.conf | 2 +- images/chromium-headful/wrapper.sh | 338 ---------- images/chromium-headless/image/Dockerfile | 12 +- images/chromium-headless/image/Kraftfile | 2 +- .../supervisor/services/chromedriver.conf | 2 +- .../image/supervisor/services/dbus.conf | 2 +- .../services/kernel-images-api.conf | 2 +- .../image/supervisor/services/xvfb.conf | 2 +- images/chromium-headless/image/wrapper.sh | 294 --------- server/cmd/wrapper/main.go | 591 ++++++++++++++++++ shared/envoy/init-envoy.sh | 38 +- shared/envoy/supervisor-envoy.conf | 2 +- 20 files changed, 626 insertions(+), 685 deletions(-) delete mode 100755 images/chromium-headful/wrapper.sh delete mode 100755 images/chromium-headless/image/wrapper.sh create mode 100644 server/cmd/wrapper/main.go diff --git a/README.md b/README.md index 50fbe2db..22ef9bc0 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Deployed successfully! ├────── service: ├─ private fqdn: ├─── private ip: - └───────── args: /wrapper.sh + └───────── args: /wrapper ``` ### Unikernel Notes diff --git a/images/chromium-headful/Dockerfile b/images/chromium-headful/Dockerfile index 84ecfd9d..89907f16 100644 --- a/images/chromium-headful/Dockerfile +++ b/images/chromium-headful/Dockerfile @@ -27,6 +27,12 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ go build -ldflags="-s -w" -o /out/chromium-launcher ./cmd/chromium-launcher +# Build container entrypoint wrapper (replaces wrapper.sh) +RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ + --mount=type=cache,target=/go/pkg/mod,id=$CACHEIDPREFIX-go-pkg-mod \ + GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ + go build -ldflags="-s -w" -o /out/wrapper ./cmd/wrapper + # webrtc client FROM node:22-bullseye-slim AS client WORKDIR /src @@ -348,7 +354,6 @@ COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xor COPY images/chromium-headful/image-chromium/ / COPY images/chromium-headful/start-pulseaudio.sh /images/chromium-headful/start-pulseaudio.sh RUN chmod +x /images/chromium-headful/start-pulseaudio.sh -COPY images/chromium-headful/wrapper.sh /wrapper.sh COPY images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf COPY images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/ COPY shared/envoy/supervisor-envoy.conf /etc/supervisor/conf.d/services/envoy.conf @@ -365,6 +370,7 @@ RUN chmod +x /usr/local/bin/init-envoy.sh # copy the kernel-images API binary built in the builder stage COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api COPY --from=server-builder /out/chromium-launcher /usr/local/bin/chromium-launcher +COPY --from=server-builder /out/wrapper /wrapper # Copy and compile the Playwright daemon COPY server/runtime/playwright-daemon.ts /tmp/playwright-daemon.ts @@ -381,4 +387,4 @@ RUN esbuild /tmp/playwright-daemon.ts \ RUN useradd -m -s /bin/bash kernel -ENTRYPOINT [ "/wrapper.sh" ] +ENTRYPOINT [ "/wrapper" ] diff --git a/images/chromium-headful/Kraftfile b/images/chromium-headful/Kraftfile index 18af1a0b..9bb12637 100644 --- a/images/chromium-headful/Kraftfile +++ b/images/chromium-headful/Kraftfile @@ -9,4 +9,4 @@ labels: rootfs: ./initrd -cmd: ["/wrapper.sh"] +cmd: ["/wrapper"] diff --git a/images/chromium-headful/supervisor/services/chromedriver.conf b/images/chromium-headful/supervisor/services/chromedriver.conf index 9bca5365..7d8d3812 100644 --- a/images/chromium-headful/supervisor/services/chromedriver.conf +++ b/images/chromium-headful/supervisor/services/chromedriver.conf @@ -2,6 +2,6 @@ command=/usr/local/bin/chromedriver --port=9225 --allowed-ips=127.0.0.1 --log-level=INFO autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/chromedriver redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/dbus.conf b/images/chromium-headful/supervisor/services/dbus.conf index 7edc479c..fcab6898 100644 --- a/images/chromium-headful/supervisor/services/dbus.conf +++ b/images/chromium-headful/supervisor/services/dbus.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p /run/dbus && dbus-uuidgen --ensure && dbus-daemon --system --address=unix:path=/run/dbus/system_bus_socket --nopidfile --nosyslog --nofork' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/dbus redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/kernel-images-api.conf b/images/chromium-headful/supervisor/services/kernel-images-api.conf index e57d30a8..0638dea8 100644 --- a/images/chromium-headful/supervisor/services/kernel-images-api.conf +++ b/images/chromium-headful/supervisor/services/kernel-images-api.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" exec /usr/local/bin/kernel-images-api' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/kernel-images-api redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/mutter.conf b/images/chromium-headful/supervisor/services/mutter.conf index 5de00213..3fac9ea7 100644 --- a/images/chromium-headful/supervisor/services/mutter.conf +++ b/images/chromium-headful/supervisor/services/mutter.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'XDG_SESSION_TYPE=x11 mutter --replace --sm-disable' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/mutter redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/neko.conf b/images/chromium-headful/supervisor/services/neko.conf index c30c8b46..9662df02 100644 --- a/images/chromium-headful/supervisor/services/neko.conf +++ b/images/chromium-headful/supervisor/services/neko.conf @@ -2,6 +2,6 @@ command=/usr/bin/neko serve --server.static /var/www --server.bind 0.0.0.0:8080 autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/neko redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/xorg.conf b/images/chromium-headful/supervisor/services/xorg.conf index 72e515e5..5357ba7c 100644 --- a/images/chromium-headful/supervisor/services/xorg.conf +++ b/images/chromium-headful/supervisor/services/xorg.conf @@ -2,6 +2,6 @@ command=/usr/bin/Xorg :1 -config /etc/neko/xorg.conf -noreset -nolisten tcp autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/xorg redirect_stderr=true diff --git a/images/chromium-headful/wrapper.sh b/images/chromium-headful/wrapper.sh deleted file mode 100755 index 1e3797cb..00000000 --- a/images/chromium-headful/wrapper.sh +++ /dev/null @@ -1,338 +0,0 @@ -#!/bin/bash - -set -o pipefail -o errexit -o nounset - -# If the WITHDOCKER environment variable is not set, it means we are not running inside a Docker container. -# Docker manages /dev/shm itself, and attempting to mount or modify it can cause permission or device errors. -# However, in a unikernel container environment (non-Docker), we need to manually create and mount /dev/shm as a tmpfs -# to support shared memory operations. -if [ -z "${WITHDOCKER:-}" ]; then - mkdir -p /dev/shm - chmod 777 /dev/shm - mount -t tmpfs tmpfs /dev/shm -fi - -# We disable scale-to-zero for the lifetime of this script and restore -# the original setting on exit. -SCALE_TO_ZERO_FILE="/uk/libukp/scale_to_zero_disable" -scale_to_zero_write() { - local char="$1" - # Skip when not running inside Unikraft Cloud (control file absent) - if [[ -e "$SCALE_TO_ZERO_FILE" ]]; then - # Write the character, but do not fail the whole script if this errors out - echo -n "$char" > "$SCALE_TO_ZERO_FILE" 2>/dev/null || \ - echo "[wrapper] Failed to write to scale-to-zero control file" >&2 - fi -} -disable_scale_to_zero() { scale_to_zero_write "+"; } -enable_scale_to_zero() { scale_to_zero_write "-"; } - -wait_for_tcp_port() { - local host="$1" - local port="$2" - local name="$3" - local attempts="${4:-0}" - local sleep_secs="${5:-0.5}" - local timeout_label="${6:-}" - local attempt=0 - - echo "[wrapper] Waiting for ${name} on ${host}:${port}..." - while true; do - if (echo >/dev/tcp/"${host}"/"${port}") >/dev/null 2>&1; then - echo "[wrapper] ${name} is ready on ${host}:${port}" - return 0 - fi - - if (( attempts > 0 )); then - attempt=$((attempt + 1)) - if (( attempt >= attempts )); then - if [[ -n "${timeout_label}" ]]; then - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${timeout_label}" >&2 - else - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${attempts} attempts" >&2 - fi - return 1 - fi - fi - - sleep "${sleep_secs}" - done -} - -# Disable scale-to-zero for the duration of the script when not running under Docker -if [[ -z "${WITHDOCKER:-}" ]]; then - echo "[wrapper] Disabling scale-to-zero" - disable_scale_to_zero -fi - -# ----------------------------------------------------------------------------- -# Ensure a sensible hostname --------------------------------------------------- -# ----------------------------------------------------------------------------- -# Some environments boot with an empty or \"(none)\" hostname which shows up in -# prompts. Best-effort set a friendly hostname early so services inherit it. -if h=$(cat /proc/sys/kernel/hostname 2>/dev/null); then - if [ -z "$h" ] || [ "$h" = "(none)" ]; then - if command -v hostname >/dev/null 2>&1; then - hostname kernel-vm 2>/dev/null || true - fi - echo -n "kernel-vm" > /proc/sys/kernel/hostname 2>/dev/null || true - fi -fi -# Also export HOSTNAME so shells pick it up immediately. -export HOSTNAME="${HOSTNAME:-kernel-vm}" - -# ----------------------------------------------------------------------------- -# Disable IPv6 ----------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# The VM environment has no IPv6 route, so any IPv6 connection attempt will fail -# immediately with ENETUNREACH. Chromium's built-in DNS client may attempt -# DNS-over-HTTPS to IPv6 endpoints (e.g. [2001:4860:4860::8888]:443), and each -# failed attempt wastes a connection slot from the MaxConnectionsPerProxy pool. -# Disabling IPv6 at the kernel level prevents these wasted attempts. -echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 2>/dev/null || true -echo 1 > /proc/sys/net/ipv6/conf/default/disable_ipv6 2>/dev/null || true - -# ----------------------------------------------------------------------------- -# House-keeping for the unprivileged "kernel" user -------------------------------- -# Some Chromium subsystems want to create files under $HOME (NSS cert DB, dconf -# cache). If those directories are missing or owned by root Chromium emits -# noisy error messages such as: -# [ERROR:crypto/nss_util.cc:48] Failed to create /home/kernel/.pki/nssdb ... -# dconf-CRITICAL **: unable to create directory '/home/kernel/.cache/dconf' -# Pre-create them and hand ownership to the user so the messages disappear. -# When RUN_AS_ROOT is true, we skip ownership changes since we're running as root. - -if [[ "${RUN_AS_ROOT:-}" != "true" ]]; then - dirs=( - /home/kernel/user-data - /home/kernel/.config/chromium - /home/kernel/.pki/nssdb - /home/kernel/.cache/dconf - /tmp - /var/log - /var/log/supervisord - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done - - # Ensure correct ownership (ignore errors if already correct) - chown -R kernel:kernel /home/kernel /home/kernel/user-data /home/kernel/.config /home/kernel/.pki /home/kernel/.cache 2>/dev/null || true - # Make policy directory writable for runtime updates - chown -R kernel:kernel /etc/chromium/policies 2>/dev/null || true -else - # When running as root, just create the necessary directories without ownership changes - dirs=( - /tmp - /var/log - /var/log/supervisord - /home/kernel - /home/kernel/user-data - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done -fi - -# ----------------------------------------------------------------------------- -# Dynamic log aggregation for /var/log/supervisord ----------------------------- -# ----------------------------------------------------------------------------- -# Tails any existing and future files under /var/log/supervisord, -# prefixing each line with the relative filepath, e.g. [chromium] ... -start_dynamic_log_aggregator() { - echo "[wrapper] Starting dynamic log aggregator for /var/log/supervisord" - ( - declare -A tailed_files=() - start_tail() { - local f="$1" - [[ -f "$f" ]] || return 0 - [[ -n "${tailed_files[$f]:-}" ]] && return 0 - local label="${f#/var/log/supervisord/}" - # Tie tails to this subshell lifetime so they exit when we stop it - tail --pid="$$" -n +1 -F "$f" 2>/dev/null | sed -u "s/^/[${label}] /" & - tailed_files[$f]=1 - } - # Periodically scan for new *.log files without extra dependencies - while true; do - while IFS= read -r -d '' f; do - start_tail "$f" - done < <(find /var/log/supervisord -type f -print0 2>/dev/null || true) - sleep 1 - done - ) & - tail_pids+=("$!") -} - -# Start log aggregator early so we see supervisor and service logs as they appear -start_dynamic_log_aggregator - -export DISPLAY=:1 - -# Predefine ports and export for services -export INTERNAL_PORT="${INTERNAL_PORT:-9223}" -export CHROME_PORT="${CHROME_PORT:-9222}" - -# Track background tailing processes for cleanup -tail_pids=() - -# Cleanup handler (set early so we catch early failures) -cleanup () { - echo "[wrapper] Cleaning up..." - # Re-enable scale-to-zero if the script terminates early - enable_scale_to_zero - supervisorctl -c /etc/supervisor/supervisord.conf stop chromedriver || true - supervisorctl -c /etc/supervisor/supervisord.conf stop chromium || true - supervisorctl -c /etc/supervisor/supervisord.conf stop kernel-images-api || true - supervisorctl -c /etc/supervisor/supervisord.conf stop dbus || true - # Stop log tailers - if [[ -n "${tail_pids[*]:-}" ]]; then - for tp in "${tail_pids[@]}"; do - kill -TERM "$tp" 2>/dev/null || true - done - fi -} -trap cleanup TERM INT - -# Start supervisord early so it can manage Xorg and Mutter -echo "[wrapper] Starting supervisord" -supervisord -c /etc/supervisor/supervisord.conf -echo "[wrapper] Waiting for supervisord socket..." -for i in {1..30}; do -if [ -S /var/run/supervisor.sock ]; then - break -fi -sleep 0.2 -done - -init-envoy.sh - -echo "[wrapper] Starting Xorg via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start xorg -echo "[wrapper] Waiting for Xorg to open display $DISPLAY..." -for i in {1..50}; do - if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then - break - fi - sleep 0.2 -done - -echo "[wrapper] Starting Mutter via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start mutter -echo "[wrapper] Waiting for Mutter to be ready..." -timeout=30 -while [ $timeout -gt 0 ]; do - if xdotool search --class "mutter" >/dev/null 2>&1; then - break - fi - sleep 1 - ((timeout--)) -done - -# ----------------------------------------------------------------------------- -# System-bus setup via supervisord -------------------------------------------- -# ----------------------------------------------------------------------------- -echo "[wrapper] Starting system D-Bus daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start dbus -echo "[wrapper] Waiting for D-Bus system bus socket..." -for i in {1..50}; do - if [ -S /run/dbus/system_bus_socket ]; then - break - fi - sleep 0.2 -done - -# We will point DBUS_SESSION_BUS_ADDRESS at the system bus socket to suppress -# autolaunch attempts that failed and spammed logs. -export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket" - -# Start Chromium with display :1 and remote debugging, loading our recorder extension. -echo "[wrapper] Starting Chromium via supervisord on internal port $INTERNAL_PORT" -supervisorctl -c /etc/supervisor/supervisord.conf start chromium -wait_for_tcp_port 127.0.0.1 "$INTERNAL_PORT" "Chromium remote debugging" 100 0.2 "20s" || true - -if [[ "${ENABLE_WEBRTC:-}" == "true" ]]; then - # use webrtc - echo "[wrapper] ✨ Starting neko (webrtc server) via supervisord." - supervisorctl -c /etc/supervisor/supervisord.conf start neko - - # Wait for neko to be ready. - wait_for_tcp_port 127.0.0.1 8080 "neko" -fi - -echo "[wrapper] ✨ Starting kernel-images API." - -API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" -API_FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" -API_DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" -API_MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" -API_OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" - -# Start via supervisord (env overrides are read by the service's command) -supervisorctl -c /etc/supervisor/supervisord.conf start kernel-images-api -wait_for_tcp_port 127.0.0.1 "${API_PORT}" "kernel-images API" - -echo "[wrapper] Starting ChromeDriver via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start chromedriver -wait_for_tcp_port 127.0.0.1 9225 "ChromeDriver" 50 0.2 "10s" || true - -echo "[wrapper] Starting PulseAudio daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start pulseaudio - -# close the "--no-sandbox unsupported flag" warning when running as root -# in the unikernel runtime we haven't been able to get chromium to launch as non-root without cryptic crashpad errors -# and when running as root you must use the --no-sandbox flag, which generates a warning -if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then - echo "[wrapper] Running as root, attempting to dismiss the --no-sandbox unsupported flag warning" - if read -r WIDTH HEIGHT <<< "$(xdotool getdisplaygeometry 2>/dev/null)"; then - # Work out an x-coordinate slightly inside the right-hand edge of the - OFFSET_X=$(( WIDTH - 30 )) - if (( OFFSET_X < 0 )); then - OFFSET_X=0 - fi - - # Wait for Chromium window to open before dismissing the --no-sandbox warning. - target='New Tab - Chromium' - echo "[wrapper] Waiting for Chromium window \"${target}\" to appear and become active..." - while :; do - win_id=$(xwininfo -root -tree 2>/dev/null | awk -v t="$target" '$0 ~ t {print $1; exit}') - if [[ -n $win_id ]]; then - win_id=${win_id%:} - if xdotool windowactivate --sync "$win_id"; then - echo "[wrapper] Focused window $win_id ($target) on $DISPLAY" - break - fi - fi - sleep 0.5 - done - - # wait... not sure but this just increases the likelihood of success - # without the sleep you often open the live view and see the mouse hovering over the "X" to dismiss the warning, suggesting that it clicked before the warning or chromium appeared - sleep 5 - - # Attempt to click the warning's close button - echo "[wrapper] Clicking the warning's close button at x=$OFFSET_X y=115" - if curl -s -o /dev/null -X POST \ - http://localhost:${API_PORT}/computer/click_mouse \ - -H "Content-Type: application/json" \ - -d "{\"x\":${OFFSET_X},\"y\":115}"; then - echo "[wrapper] Successfully clicked the warning's close button" - else - echo "[wrapper] Failed to click the warning's close button" >&2 - fi - else - echo "[wrapper] xdotool failed to obtain display geometry; skipping sandbox warning dismissal." >&2 - fi -fi - -if [[ -z "${WITHDOCKER:-}" ]]; then - enable_scale_to_zero -fi - -# Keep the container running while streaming logs -wait diff --git a/images/chromium-headless/image/Dockerfile b/images/chromium-headless/image/Dockerfile index aa7d17ea..b9a3462a 100644 --- a/images/chromium-headless/image/Dockerfile +++ b/images/chromium-headless/image/Dockerfile @@ -28,6 +28,12 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ go build -ldflags="-s -w" -o /out/chromium-launcher ./cmd/chromium-launcher +# Build container entrypoint wrapper (replaces wrapper.sh) +RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ + --mount=type=cache,target=/go/pkg/mod,id=$CACHEIDPREFIX-go-pkg-mod \ + GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ + go build -ldflags="-s -w" -o /out/wrapper ./cmd/wrapper + FROM docker.io/ubuntu:22.04 AS ffmpeg-downloader # Allow cross-compilation when building with BuildKit platforms @@ -217,8 +223,8 @@ RUN useradd -m -s /bin/bash kernel COPY images/chromium-headless/image/start-xvfb.sh /images/chromium-headless/image/start-xvfb.sh RUN chmod +x /images/chromium-headless/image/start-xvfb.sh -# Wrapper script to set environment -COPY images/chromium-headless/image/wrapper.sh /usr/bin/wrapper.sh +# Container entrypoint wrapper (Go binary, replaces wrapper.sh) +COPY --from=server-builder /out/wrapper /wrapper # Supervisord configuration COPY images/chromium-headless/image/supervisord.conf /etc/supervisor/supervisord.conf @@ -251,4 +257,4 @@ RUN esbuild /tmp/playwright-daemon.ts \ --external:esbuild \ && rm /tmp/playwright-daemon.ts -ENTRYPOINT [ "/usr/bin/wrapper.sh" ] +ENTRYPOINT [ "/wrapper" ] diff --git a/images/chromium-headless/image/Kraftfile b/images/chromium-headless/image/Kraftfile index b11a88c2..9bb12637 100644 --- a/images/chromium-headless/image/Kraftfile +++ b/images/chromium-headless/image/Kraftfile @@ -9,4 +9,4 @@ labels: rootfs: ./initrd -cmd: ["/usr/bin/wrapper.sh"] +cmd: ["/wrapper"] diff --git a/images/chromium-headless/image/supervisor/services/chromedriver.conf b/images/chromium-headless/image/supervisor/services/chromedriver.conf index 9bca5365..7d8d3812 100644 --- a/images/chromium-headless/image/supervisor/services/chromedriver.conf +++ b/images/chromium-headless/image/supervisor/services/chromedriver.conf @@ -2,6 +2,6 @@ command=/usr/local/bin/chromedriver --port=9225 --allowed-ips=127.0.0.1 --log-level=INFO autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/chromedriver redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/dbus.conf b/images/chromium-headless/image/supervisor/services/dbus.conf index 7edc479c..fcab6898 100644 --- a/images/chromium-headless/image/supervisor/services/dbus.conf +++ b/images/chromium-headless/image/supervisor/services/dbus.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p /run/dbus && dbus-uuidgen --ensure && dbus-daemon --system --address=unix:path=/run/dbus/system_bus_socket --nopidfile --nosyslog --nofork' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/dbus redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/kernel-images-api.conf b/images/chromium-headless/image/supervisor/services/kernel-images-api.conf index e57d30a8..0638dea8 100644 --- a/images/chromium-headless/image/supervisor/services/kernel-images-api.conf +++ b/images/chromium-headless/image/supervisor/services/kernel-images-api.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" exec /usr/local/bin/kernel-images-api' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/kernel-images-api redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/xvfb.conf b/images/chromium-headless/image/supervisor/services/xvfb.conf index 5279bda4..28974551 100644 --- a/images/chromium-headless/image/supervisor/services/xvfb.conf +++ b/images/chromium-headless/image/supervisor/services/xvfb.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc '/images/chromium-headless/image/start-xvfb.sh' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/xvfb redirect_stderr=true diff --git a/images/chromium-headless/image/wrapper.sh b/images/chromium-headless/image/wrapper.sh deleted file mode 100755 index 6a1935b9..00000000 --- a/images/chromium-headless/image/wrapper.sh +++ /dev/null @@ -1,294 +0,0 @@ -#!/bin/bash - -set -o pipefail -o errexit -o nounset - -# If we are outside Docker-in-Docker make sure /dev/shm exists -if [ -z "${WITHDOCKER:-}" ]; then - mkdir -p /dev/shm - chmod 777 /dev/shm - mount -t tmpfs tmpfs /dev/shm -fi - -# We disable scale-to-zero for the lifetime of this script and restore -# the original setting on exit. -SCALE_TO_ZERO_FILE="/uk/libukp/scale_to_zero_disable" -scale_to_zero_write() { - local char="$1" - # Skip when not running inside Unikraft Cloud (control file absent) - if [[ -e "$SCALE_TO_ZERO_FILE" ]]; then - # Write the character, but do not fail the whole script if this errors out - echo -n "$char" > "$SCALE_TO_ZERO_FILE" 2>/dev/null || \ - echo "[wrapper] Failed to write to scale-to-zero control file" >&2 - fi -} -disable_scale_to_zero() { scale_to_zero_write "+"; } -enable_scale_to_zero() { scale_to_zero_write "-"; } - -wait_for_tcp_port() { - local host="$1" - local port="$2" - local name="$3" - local attempts="${4:-0}" - local sleep_secs="${5:-0.5}" - local timeout_label="${6:-}" - local attempt=0 - - echo "[wrapper] Waiting for ${name} on ${host}:${port}..." - while true; do - if (echo >/dev/tcp/"${host}"/"${port}") >/dev/null 2>&1; then - echo "[wrapper] ${name} is ready on ${host}:${port}" - return 0 - fi - - if (( attempts > 0 )); then - attempt=$((attempt + 1)) - if (( attempt >= attempts )); then - if [[ -n "${timeout_label}" ]]; then - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${timeout_label}" >&2 - else - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${attempts} attempts" >&2 - fi - return 1 - fi - fi - - sleep "${sleep_secs}" - done -} - -# Disable scale-to-zero for the duration of the script when not running under Docker -if [[ -z "${WITHDOCKER:-}" ]]; then - echo "[wrapper] Disabling scale-to-zero" - disable_scale_to_zero -fi - -# ----------------------------------------------------------------------------- -# Ensure a sensible hostname --------------------------------------------------- -# ----------------------------------------------------------------------------- -if h=$(cat /proc/sys/kernel/hostname 2>/dev/null); then - if [ -z "$h" ] || [ "$h" = "(none)" ]; then - if command -v hostname >/dev/null 2>&1; then - hostname kernel-vm 2>/dev/null || true - fi - echo -n "kernel-vm" > /proc/sys/kernel/hostname 2>/dev/null || true - fi -fi -export HOSTNAME="${HOSTNAME:-kernel-vm}" - -# ----------------------------------------------------------------------------- -# Disable IPv6 ----------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# The VM environment has no IPv6 route, so any IPv6 connection attempt will fail -# immediately with ENETUNREACH. Chromium's built-in DNS client may attempt -# DNS-over-HTTPS to IPv6 endpoints (e.g. [2001:4860:4860::8888]:443), and each -# failed attempt wastes a connection slot from the MaxConnectionsPerProxy pool. -# Disabling IPv6 at the kernel level prevents these wasted attempts. -echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 2>/dev/null || true -echo 1 > /proc/sys/net/ipv6/conf/default/disable_ipv6 2>/dev/null || true - -# if CHROMIUM_FLAGS is not set, default to the flags used in playwright_stealth -# NOTE: --disable-background-networking was intentionally removed because it prevents -# Chrome from fetching extensions via ExtensionInstallForcelist enterprise policy. -# Enterprise extensions require Chrome to make HTTP requests to fetch update.xml and .crx files. -if [ -z "${CHROMIUM_FLAGS:-}" ]; then - CHROMIUM_FLAGS="--accept-lang=en-US,en \ - --allow-pre-commit-input \ - --blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4 \ - --crash-dumps-dir=/tmp/chromium-dumps \ - --disable-back-forward-cache \ - --disable-background-timer-throttling \ - --disable-backgrounding-occluded-windows \ - --disable-blink-features=AutomationControlled \ - --disable-breakpad \ - --disable-client-side-phishing-detection \ - --disable-component-extensions-with-background-pages \ - --disable-crash-reporter \ - --disable-crashpad \ - --disable-dev-shm-usage \ - --disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DeferRendererTasksAfterInput,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate \ - --disable-field-trial-config \ - --disable-gcm-registration \ - --disable-gpu \ - --disable-gpu-compositing \ - --disable-hang-monitor \ - --disable-ipc-flooding-protection \ - --disable-notifications \ - --disable-popup-blocking \ - --disable-prompt-on-repost \ - --disable-renderer-backgrounding \ - --disable-search-engine-choice-screen \ - --disable-software-rasterizer \ - --enable-use-zoom-for-dsf=false \ - --export-tagged-pdf \ - --force-color-profile=srgb \ - --hide-crash-restore-bubble \ - --hide-scrollbars \ - --metrics-recording-only \ - --mute-audio \ - --no-default-browser-check \ - --no-first-run \ - --no-sandbox \ - --no-service-autorun \ - --ozone-platform=headless \ - --password-store=basic \ - --unsafely-disable-devtools-self-xss-warnings \ - --use-angle=swiftshader \ - --use-gl=angle \ - --use-mock-keychain" -fi -export CHROMIUM_FLAGS - -# ----------------------------------------------------------------------------- -# House-keeping for the unprivileged "kernel" user ---------------------------- -# When RUN_AS_ROOT is true, we skip ownership changes since we're running as root. -# ----------------------------------------------------------------------------- -if [[ "${RUN_AS_ROOT:-}" != "true" ]]; then - dirs=( - /home/kernel/user-data - /home/kernel/.config/chromium - /home/kernel/.pki/nssdb - /home/kernel/.cache/dconf - /tmp - /var/log - /var/log/supervisord - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done - - # Ensure correct ownership (ignore errors if already correct) - chown -R kernel:kernel /home/kernel /home/kernel/user-data /home/kernel/.config /home/kernel/.pki /home/kernel/.cache 2>/dev/null || true - # Make policy directory writable for runtime updates - chown -R kernel:kernel /etc/chromium/policies 2>/dev/null || true -else - # When running as root, just create the necessary directories without ownership changes - dirs=( - /tmp - /var/log - /var/log/supervisord - /home/kernel - /home/kernel/user-data - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done -fi - -# ----------------------------------------------------------------------------- -# Dynamic log aggregation for /var/log/supervisord ----------------------------- -# ----------------------------------------------------------------------------- -# Tails any existing and future files under /var/log/supervisord, -# prefixing each line with the relative filepath, e.g. [chromium] ... -start_dynamic_log_aggregator() { - echo "[wrapper] Starting dynamic log aggregator for /var/log/supervisord" - ( - declare -A tailed_files=() - start_tail() { - local f="$1" - [[ -f "$f" ]] || return 0 - [[ -n "${tailed_files[$f]:-}" ]] && return 0 - local label="${f#/var/log/supervisord/}" - # Tie tails to this subshell lifetime so they exit when we stop it - tail --pid="$$" -n +1 -F "$f" 2>/dev/null | sed -u "s/^/[${label}] /" & - tailed_files[$f]=1 - } - # Periodically scan for new *.log files without extra dependencies - while true; do - while IFS= read -r -d '' f; do - start_tail "$f" - done < <(find /var/log/supervisord -type f -print0 2>/dev/null || true) - sleep 1 - done - ) & - tail_pids+=("$!") -} - -# Track background tailing processes for cleanup -tail_pids=() - -# Start log aggregator early so we see supervisor and service logs as they appear -start_dynamic_log_aggregator - -# Export common env used by services -export DISPLAY=:1 -export HEIGHT=${HEIGHT:-1080} -export WIDTH=${WIDTH:-1920} -export INTERNAL_PORT="${INTERNAL_PORT:-9223}" -export CHROME_PORT="${CHROME_PORT:-9222}" - -# Cleanup handler -cleanup () { - echo "[wrapper] Cleaning up..." - # Re-enable scale-to-zero if the script terminates early - enable_scale_to_zero - supervisorctl -c /etc/supervisor/supervisord.conf stop chromedriver || true - supervisorctl -c /etc/supervisor/supervisord.conf stop chromium || true - supervisorctl -c /etc/supervisor/supervisord.conf stop xvfb || true - supervisorctl -c /etc/supervisor/supervisord.conf stop dbus || true - supervisorctl -c /etc/supervisor/supervisord.conf stop kernel-images-api || true - # Stop log tailers - if [[ -n "${tail_pids[*]:-}" ]]; then - for tp in "${tail_pids[@]}"; do - kill -TERM "$tp" 2>/dev/null || true - done - fi -} -trap cleanup TERM INT - -echo "[wrapper] Starting supervisord" -supervisord -c /etc/supervisor/supervisord.conf -echo "[wrapper] Waiting for supervisord socket..." -for i in {1..30}; do - if [ -S /var/run/supervisor.sock ]; then - break - fi - sleep 0.2 -done - -init-envoy.sh - -echo "[wrapper] Starting system D-Bus daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start dbus -for i in {1..50}; do - if [ -S /run/dbus/system_bus_socket ]; then - break - fi - sleep 0.2 -done -export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket" - -echo "[wrapper] Starting Xvfb via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start xvfb -for i in {1..50}; do - if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then - break - fi - sleep 0.2 -done - -echo "[wrapper] Starting Chromium via supervisord on internal port $INTERNAL_PORT" -supervisorctl -c /etc/supervisor/supervisord.conf start chromium -wait_for_tcp_port 127.0.0.1 "$INTERNAL_PORT" "Chromium remote debugging" 100 0.2 "20s" || true - -echo "[wrapper] ✨ Starting kernel-images API via supervisord." -supervisorctl -c /etc/supervisor/supervisord.conf start kernel-images-api -API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" -wait_for_tcp_port 127.0.0.1 "${API_PORT}" "kernel-images API" - -echo "[wrapper] Starting ChromeDriver via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start chromedriver -wait_for_tcp_port 127.0.0.1 9225 "ChromeDriver" 50 0.2 "10s" || true - -echo "[wrapper] startup complete!" -# Re-enable scale-to-zero once startup has completed (when not under Docker) -if [[ -z "${WITHDOCKER:-}" ]]; then - enable_scale_to_zero -fi -# Keep the container running while streaming logs -wait diff --git a/server/cmd/wrapper/main.go b/server/cmd/wrapper/main.go new file mode 100644 index 00000000..01374063 --- /dev/null +++ b/server/cmd/wrapper/main.go @@ -0,0 +1,591 @@ +// wrapper boots the chromium-headful and chromium-headless containers: +// prepares the environment, starts supervisord, brings services up in parallel +// where the dependency graph allows, and waits for CDP to be reachable through +// kernel-images-api. +// +// Replaces the legacy /wrapper.sh shipped in both images. Behavior parity is +// intentional — we still rely on supervisord, sysctl, dbus, etc. The only goal +// beyond parity is minimizing time-to-CDP-ready by removing serial dead time. +// +// The headful vs headless profile is detected at boot from supervisor's conf.d +// (xorg.conf → headful, xvfb.conf → headless), which keeps a single binary +// usable in both images without Dockerfile coordination. +package main + +import ( + "bufio" + "context" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" +) + +const ( + supervisorConf = "/etc/supervisor/supervisord.conf" + supervisorConfD = "/etc/supervisor/conf.d/services" + supervisorSock = "/var/run/supervisor.sock" + supervisordLogD = "/var/log/supervisord" + scaleToZeroFile = "/uk/libukp/scale_to_zero_disable" + dbusSocket = "/run/dbus/system_bus_socket" + defaultDisplay = ":1" + defaultIntPort = "9223" + defaultAPIPort = "10001" +) + +type profile int + +const ( + profileHeadful profile = iota + profileHeadless +) + +// detectProfile keys off whichever X server's supervisor conf is present. +// The image build is what writes these files, so this is deterministic. +func detectProfile() profile { + if _, err := os.Stat(filepath.Join(supervisorConfD, "xvfb.conf")); err == nil { + return profileHeadless + } + return profileHeadful +} + +func main() { + t0 := time.Now() + prof := detectProfile() + logf("starting wrapper (profile=%s)", profileName(prof)) + + // /dev/shm: only mount when not running under Docker (Docker manages it). + if os.Getenv("WITHDOCKER") == "" { + _ = os.MkdirAll("/dev/shm", 0o1777) + _ = os.Chmod("/dev/shm", 0o1777) + _ = exec.Command("mount", "-t", "tmpfs", "tmpfs", "/dev/shm").Run() + } + + // Disable scale-to-zero for the duration of startup; restored on exit. + disableScaleToZero() + defer enableScaleToZero() + + // Headless ships a default CHROMIUM_FLAGS list (headless+stealth flags) + // when callers don't set one. Headful's defaults are caller-supplied. + if prof == profileHeadless { + applyHeadlessDefaultFlags() + } + + // Hostname: some envs boot with empty/(none); pick a friendly default. + if h, err := os.ReadFile("/proc/sys/kernel/hostname"); err == nil { + if v := strings.TrimSpace(string(h)); v == "" || v == "(none)" { + _ = exec.Command("hostname", "kernel-vm").Run() + _ = os.WriteFile("/proc/sys/kernel/hostname", []byte("kernel-vm"), 0o644) + } + } + if os.Getenv("HOSTNAME") == "" { + _ = os.Setenv("HOSTNAME", "kernel-vm") + } + + // Disable IPv6 — Chromium DOH wastes connection slots on unreachable v6 endpoints. + _ = os.WriteFile("/proc/sys/net/ipv6/conf/all/disable_ipv6", []byte("1"), 0o644) + _ = os.WriteFile("/proc/sys/net/ipv6/conf/default/disable_ipv6", []byte("1"), 0o644) + + // Pre-create per-user dirs so chromium subsystems don't error. + prepareUserDirs(os.Getenv("RUN_AS_ROOT") == "true") + + // Tail aggregator for service logs. + startLogAggregator() + + // Default env that downstream services expect. + _ = os.Setenv("DISPLAY", defaultDisplay) + if os.Getenv("INTERNAL_PORT") == "" { + _ = os.Setenv("INTERNAL_PORT", defaultIntPort) + } + if os.Getenv("CHROME_PORT") == "" { + _ = os.Setenv("CHROME_PORT", "9222") + } + // Point dbus clients at the system bus socket. Set before supervisord + // starts so it captures the env for child services (notably chromium, + // which would otherwise spam autolaunch errors). + _ = os.Setenv("DBUS_SESSION_BUS_ADDRESS", "unix:path="+dbusSocket) + + // Stale X locks from prior runs. + _ = os.Remove("/tmp/.X1-lock") + _ = os.Remove("/tmp/.X11-unix/X1") + + // supervisord — start in nodaemon mode so we own its lifecycle. + // Without -n it forks and the parent exits with code 0, which would + // drop us out of supCmd.Wait() and the container would stop. + logf("starting supervisord") + supCmd := exec.Command("supervisord", "-n", "-c", supervisorConf) + supCmd.Stdout = os.Stdout + supCmd.Stderr = os.Stderr + if err := supCmd.Start(); err != nil { + fatalf("supervisord start: %v", err) + } + waitForSocket(supervisorSock, 10*time.Second) + + // Envoy bootstrap: cert generation, NSS DB, template render, and + // `supervisorctl start envoy`. Run concurrently with Phase A so the + // shell-out work (openssl, certutil, update-ca-certificates) overlaps + // xorg/dbus/chromedriver bring-up. Phase B (chromium) gates on this + // because chromium reads the system CA trust store at process start + // and needs the envoy self-signed cert in place. The envoy listener + // itself (port 3128) is probed in waitAllReady, not here. + envoyDone := make(chan struct{}) + if isExecutable("/usr/local/bin/init-envoy.sh") { + go func() { + defer close(envoyDone) + runStream("envoy-init", "/usr/local/bin/init-envoy.sh") + }() + } else { + close(envoyDone) + } + + // Phase A: services with no X/dbus/chromium dependency. chromedriver + // listens on 9225 immediately and only attaches to chromium on session + // creation, so it can come up alongside the display stack. + xServer := "xorg" + if prof == profileHeadless { + xServer = "xvfb" + } + startAll(xServer, "dbus", "chromedriver") + waitForX(defaultDisplay, 20*time.Second) + waitForSocket(dbusSocket, 10*time.Second) + + // Pre-touch chromium's supervisord log so kernel-images-api's `tail -f` + // doesn't bail out and enter its 250ms retry backoff when started in + // parallel with chromium. + _ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644) + + // Gate chromium on envoy cert/template work being done. + <-envoyDone + + // Phase B: everything that needs X+dbus, started in a single supervisorctl + // invocation. On headful, mutter is the compositor and neko/api come up + // alongside chromium so their bring-up overlaps with chromium boot rather + // than trailing CDP. Headless has no compositor and no neko. + webrtc := prof == profileHeadful && os.Getenv("ENABLE_WEBRTC") == "true" + var phaseB []string + if prof == profileHeadful { + phaseB = []string{"mutter", "chromium", "kernel-images-api"} + if webrtc { + phaseB = append(phaseB, "neko") + } + } else { + phaseB = []string{"chromium", "kernel-images-api"} + } + startAll(phaseB...) + + // Wait for the union of caller-visible ready signals. Each probe runs + // concurrently and logs as soon as its target is reachable. + waitAllReady(t0, webrtc) + logf("ready in %s", since(t0)) + + // Cosmetic + non-critical services come up off the hot path. Headless has + // no audio stack and no UI to dismiss. + if prof == profileHeadful { + go func() { + startAll("pulseaudio") + if os.Getenv("RUN_AS_ROOT") == "true" { + dismissNoSandboxWarning() + } + }() + } + + // Re-enable scale-to-zero now that the hot path is up. + enableScaleToZero() + + // Forward signals so cleanup runs and supervisord is taken down cleanly. + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT) + go func() { + <-sigs + logf("shutdown: stopping services") + _ = exec.Command("supervisorctl", "-c", supervisorConf, "stop", "all").Run() + _ = supCmd.Process.Signal(syscall.SIGTERM) + }() + + // Block on supervisord; container exits when it does. + if err := supCmd.Wait(); err != nil { + logf("supervisord exited: %v", err) + } +} + +// startAll asks supervisord to start the given programs. We invoke +// supervisorctl once (it accepts multiple args) so we don't pay python +// cold-start costs per service. +func startAll(progs ...string) { + if len(progs) == 0 { + return + } + args := append([]string{"-c", supervisorConf, "start"}, progs...) + cmd := exec.Command("supervisorctl", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + _ = cmd.Run() // a service that fails to come up will surface via readiness checks +} + +func waitForSocket(path string, timeout time.Duration) { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if fi, err := os.Stat(path); err == nil && fi.Mode()&os.ModeSocket != 0 { + return + } + time.Sleep(20 * time.Millisecond) + } + logf("WARNING: socket %s not ready after %s", path, timeout) +} + +// waitAllReady gates on all caller-visible ready signals concurrently: +// - CDP : HTTP /json/version on the public CDP port (proves api proxy is wired +// through to chromium's DevTools server) +// - cd : TCP on chromedriver's internal port 9225 (api on 9224 is bound when +// api itself is up, which CDP readiness already implies) +// - proxy : TCP on chromium's --forward-proxy-port (8888) +// - neko : TCP on neko's HTTP port (8080), only when ENABLE_WEBRTC=true +// - envoy : TCP on envoy's listener (3128), only when envoy is enabled +func waitAllReady(t0 time.Time, webrtc bool) { + chromePort := os.Getenv("CHROME_PORT") + if chromePort == "" { + chromePort = "9222" + } + probes := []struct { + name string + fn func() bool + }{ + {"cdp", func() bool { return httpProbeOK("http://127.0.0.1:" + chromePort + "/json/version") }}, + {"chromedriver", func() bool { return tcpOK("127.0.0.1", "9225") }}, + {"forward-proxy", func() bool { return tcpOK("127.0.0.1", "8888") }}, + } + if webrtc { + probes = append(probes, struct { + name string + fn func() bool + }{"neko", func() bool { return tcpOK("127.0.0.1", "8080") }}) + } + if envoyEnabled() { + probes = append(probes, struct { + name string + fn func() bool + }{"envoy", func() bool { return tcpOK("127.0.0.1", "3128") }}) + } + + done := make(chan string, len(probes)) + for _, p := range probes { + go func(name string, fn func() bool) { + deadline := time.Now().Add(60 * time.Second) + for time.Now().Before(deadline) { + if fn() { + logf("[ready] %s in %s", name, since(t0)) + done <- name + return + } + time.Sleep(20 * time.Millisecond) + } + logf("[ready] WARNING: %s never became ready", name) + done <- name + }(p.name, p.fn) + } + for range probes { + <-done + } +} + +func tcpOK(host, port string) bool { + c, err := net.DialTimeout("tcp4", net.JoinHostPort(host, port), 200*time.Millisecond) + if err != nil { + return false + } + _ = c.Close() + return true +} + +var probeClient = &http.Client{Timeout: time.Second} + +func httpProbeOK(url string) bool { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return false + } + resp, err := probeClient.Do(req) + if err != nil { + return false + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return false + } + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return strings.Contains(string(body), `"Browser"`) +} + +// waitForX waits until the X server is reachable on display :N. We try both +// the named unix socket (Xorg, headful) and the abstract namespace socket +// (Xvfb runs with -nolisten unix, which disables the named socket but leaves +// the abstract one). Cheaper than spawning xdpyinfo in a loop. +func waitForX(display string, timeout time.Duration) { + num := strings.TrimPrefix(display, ":") + named := "/tmp/.X11-unix/X" + num + abstract := "@/tmp/.X11-unix/X" + num // Linux abstract namespace + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if c, err := net.DialTimeout("unix", named, 200*time.Millisecond); err == nil { + _ = c.Close() + return + } + if c, err := net.DialTimeout("unix", abstract, 200*time.Millisecond); err == nil { + _ = c.Close() + return + } + time.Sleep(20 * time.Millisecond) + } + logf("WARNING: X display %s not responsive after %s", display, timeout) +} + +func disableScaleToZero() { writeScaleToZero("+") } +func enableScaleToZero() { writeScaleToZero("-") } + +func writeScaleToZero(c string) { + if _, err := os.Stat(scaleToZeroFile); err != nil { + return // not running on Unikraft Cloud + } + _ = os.WriteFile(scaleToZeroFile, []byte(c), 0o644) +} + +// applyHeadlessDefaultFlags mirrors the legacy headless wrapper.sh: when +// CHROMIUM_FLAGS is unset, fill in a curated headless+stealth flag list. +// --disable-background-networking is intentionally omitted: it prevents +// Chrome from fetching ExtensionInstallForcelist managed extensions. +func applyHeadlessDefaultFlags() { + if strings.TrimSpace(os.Getenv("CHROMIUM_FLAGS")) != "" { + return + } + flags := strings.Join([]string{ + "--accept-lang=en-US,en", + "--allow-pre-commit-input", + "--blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4", + "--crash-dumps-dir=/tmp/chromium-dumps", + "--disable-back-forward-cache", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-blink-features=AutomationControlled", + "--disable-breakpad", + "--disable-client-side-phishing-detection", + "--disable-component-extensions-with-background-pages", + "--disable-crash-reporter", + "--disable-crashpad", + "--disable-dev-shm-usage", + "--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DeferRendererTasksAfterInput,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate", + "--disable-field-trial-config", + "--disable-gcm-registration", + "--disable-gpu", + "--disable-gpu-compositing", + "--disable-hang-monitor", + "--disable-ipc-flooding-protection", + "--disable-notifications", + "--disable-popup-blocking", + "--disable-prompt-on-repost", + "--disable-renderer-backgrounding", + "--disable-search-engine-choice-screen", + "--disable-software-rasterizer", + "--enable-use-zoom-for-dsf=false", + "--export-tagged-pdf", + "--force-color-profile=srgb", + "--hide-crash-restore-bubble", + "--hide-scrollbars", + "--metrics-recording-only", + "--mute-audio", + "--no-default-browser-check", + "--no-first-run", + "--no-sandbox", + "--no-service-autorun", + "--ozone-platform=headless", + "--password-store=basic", + "--unsafely-disable-devtools-self-xss-warnings", + "--use-angle=swiftshader", + "--use-gl=angle", + "--use-mock-keychain", + }, " ") + _ = os.Setenv("CHROMIUM_FLAGS", flags) +} + +func profileName(p profile) string { + if p == profileHeadless { + return "headless" + } + return "headful" +} + +// envoyEnabled mirrors init-envoy.sh's gate: when any of these are unset +// the script exits early without starting envoy, so we should skip the +// readiness probe too (otherwise it would just time out at 60s). +func envoyEnabled() bool { + return os.Getenv("INST_NAME") != "" && + os.Getenv("METRO_NAME") != "" && + os.Getenv("XDS_SERVER") != "" && + os.Getenv("KERNEL_INSTANCE_JWT") != "" +} + +func prepareUserDirs(asRoot bool) { + if asRoot { + for _, d := range []string{"/tmp", "/var/log", supervisordLogD, "/home/kernel", "/home/kernel/user-data"} { + _ = os.MkdirAll(d, 0o755) + } + return + } + dirs := []string{ + "/home/kernel/user-data", + "/home/kernel/.config/chromium", + "/home/kernel/.pki/nssdb", + "/home/kernel/.cache/dconf", + "/tmp", + "/var/log", + supervisordLogD, + } + for _, d := range dirs { + _ = os.MkdirAll(d, 0o755) + } + _ = exec.Command("chown", "-R", "kernel:kernel", + "/home/kernel", "/home/kernel/user-data", "/home/kernel/.config", + "/home/kernel/.pki", "/home/kernel/.cache").Run() + _ = exec.Command("chown", "-R", "kernel:kernel", "/etc/chromium/policies").Run() +} + +// startLogAggregator tails any file under /var/log/supervisord, prefixing +// each line with the relative path so the container log stream remains +// readable. +func startLogAggregator() { + _ = os.MkdirAll(supervisordLogD, 0o755) + go func() { + seen := map[string]bool{} + for { + entries, _ := os.ReadDir(supervisordLogD) + for _, e := range entries { + path := filepath.Join(supervisordLogD, e.Name()) + if seen[path] { + continue + } + if fi, err := os.Stat(path); err == nil && fi.Mode().IsRegular() { + seen[path] = true + go tailFile(path) + } + } + time.Sleep(500 * time.Millisecond) + } + }() +} + +func tailFile(path string) { + cmd := exec.Command("tail", "-n", "+1", "-F", path) + stdout, err := cmd.StdoutPipe() + if err != nil { + return + } + cmd.Stderr = nil + if err := cmd.Start(); err != nil { + return + } + label := filepath.Base(path) + scanner := bufio.NewScanner(stdout) + scanner.Buffer(make([]byte, 64*1024), 1024*1024) + for scanner.Scan() { + fmt.Printf("[%s] %s\n", label, scanner.Text()) + } +} + +// dismissNoSandboxWarning replicates the wrapper.sh behaviour of clicking the +// "X" on the --no-sandbox infobar. Cosmetic; runs off the hot path. +func dismissNoSandboxWarning() { + out, err := exec.Command("xdotool", "getdisplaygeometry").Output() + if err != nil { + return + } + parts := strings.Fields(strings.TrimSpace(string(out))) + if len(parts) != 2 { + return + } + width := parts[0] + x := width + if w := atoi(width); w > 30 { + x = fmt.Sprintf("%d", w-30) + } + target := "New Tab - Chromium" + deadline := time.Now().Add(30 * time.Second) + for time.Now().Before(deadline) { + out, err := exec.Command("xdotool", "search", "--name", target).Output() + if err == nil && len(strings.TrimSpace(string(out))) > 0 { + id := strings.Fields(string(out))[0] + if exec.Command("xdotool", "windowactivate", "--sync", id).Run() == nil { + break + } + } + time.Sleep(100 * time.Millisecond) + } + port := os.Getenv("KERNEL_IMAGES_API_PORT") + if port == "" { + port = defaultAPIPort + } + body := fmt.Sprintf(`{"x":%s,"y":115}`, x) + _ = exec.Command("curl", "-s", "-o", "/dev/null", "-X", "POST", + "http://localhost:"+port+"/computer/click_mouse", + "-H", "Content-Type: application/json", + "-d", body).Run() +} + +func atoi(s string) int { + n := 0 + for _, c := range s { + if c < '0' || c > '9' { + return 0 + } + n = n*10 + int(c-'0') + } + return n +} + +func isExecutable(path string) bool { + fi, err := os.Stat(path) + return err == nil && fi.Mode().IsRegular() && fi.Mode().Perm()&0o111 != 0 +} + +func runStream(label, name string, args ...string) { + cmd := exec.Command(name, args...) + cmd.Stdout = prefixWriter{label: label, w: os.Stdout} + cmd.Stderr = prefixWriter{label: label, w: os.Stderr} + _ = cmd.Run() +} + +type prefixWriter struct { + label string + w *os.File +} + +func (p prefixWriter) Write(b []byte) (int, error) { + for _, line := range strings.Split(strings.TrimRight(string(b), "\n"), "\n") { + if line == "" { + continue + } + fmt.Fprintf(p.w, "[%s] %s\n", p.label, line) + } + return len(b), nil +} + +// timestamped wrapper log; prefix mirrors the bash script's [wrapper] tag. +func logf(format string, args ...any) { + fmt.Fprintf(os.Stdout, "[wrapper] "+format+"\n", args...) +} + +func since(t time.Time) time.Duration { + return time.Since(t).Truncate(time.Millisecond) +} + +func fatalf(format string, args ...any) { + logf(format, args...) + os.Exit(1) +} diff --git a/shared/envoy/init-envoy.sh b/shared/envoy/init-envoy.sh index c27a3f25..cedbdd2c 100644 --- a/shared/envoy/init-envoy.sh +++ b/shared/envoy/init-envoy.sh @@ -73,37 +73,7 @@ sed -e "s|{INST_NAME}|$inst_esc|g" \ echo "[envoy-init] Starting Envoy via supervisord" supervisorctl -c /etc/supervisor/supervisord.conf start envoy -# Wait for Envoy port to be open -echo "[envoy-init] Waiting for Envoy port to open..." -port_open=false -for i in {1..50}; do - if nc -z 127.0.0.1 "3128" 2>/dev/null; then - echo "[envoy-init] Envoy port confirmed open" - port_open=true - break - fi - sleep 0.2 -done - -if [[ "$port_open" != "true" ]]; then - echo "[envoy-init] ERROR: Envoy port 3128 failed to open after 10 seconds" - exit 1 -fi - -# Test proxy functionality -echo "[envoy-init] Testing proxy functionality..." -proxy_working=false -for i in {1..50}; do - if curl -s -f -x https://127.0.0.1:3128 --max-time 2 https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html >/dev/null 2>&1; then - echo "[envoy-init] Confirmed a request is proxied" - proxy_working=true - break - fi - echo "[envoy-init] Check failed, trying again..." - sleep 0.2 -done - -if [[ "$proxy_working" != "true" ]]; then - echo "[envoy-init] ERROR: Envoy proxy test failed after 10 seconds" - exit 1 -fi +# Readiness (port 3128 reachable) is now probed by the Go wrapper's +# waitAllReady alongside CDP/chromedriver, so this script returns as soon +# as the start request has been issued. Removing the in-script poll lets +# init-envoy.sh run concurrently with Phase A bring-up. diff --git a/shared/envoy/supervisor-envoy.conf b/shared/envoy/supervisor-envoy.conf index 4da59010..ae18726f 100644 --- a/shared/envoy/supervisor-envoy.conf +++ b/shared/envoy/supervisor-envoy.conf @@ -2,6 +2,6 @@ command=envoy -c /etc/envoy/bootstrap.yaml --log-level warn --drain-time-s 1 --drain-strategy immediate autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/envoy redirect_stderr=true From 747c4ec0aff25491d4d492e880ae61069fa5893c Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Fri, 8 May 2026 18:26:26 +0000 Subject: [PATCH 2/4] Split wrapper into identity-free and identity-bound phases Boot now layers as: Phase A: xorg/xvfb, dbus, chromedriver + envoy cert install (cert generation/CA trust/NSS DB - no per-instance envs read) Phase B: mutter, chromium, neko (X/dbus consumers, identity-free) Phase C: envoy template render + envoy and kernel-images-api restart (gated on INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT) init-envoy.sh now takes a phase argument (certs|config|all). The wrapper calls certs early so chromium can boot with the proxy cert in trust, and defers config (template render + supervisorctl start) until Phase C. A FORK HOOK comment marks the Phase B/C boundary: post-snapshot restore, fresh identity envs need to be received from the host before Phase C runs, and `supervisorctl restart` makes the same Phase C code path safe on both boot (start cold service) and fork (stop+start to drop stale identity). Co-Authored-By: Claude Opus 4.7 --- server/cmd/wrapper/main.go | 81 ++++++++++++++----- shared/envoy/init-envoy.sh | 162 ++++++++++++++++++++++--------------- 2 files changed, 159 insertions(+), 84 deletions(-) diff --git a/server/cmd/wrapper/main.go b/server/cmd/wrapper/main.go index 01374063..92f8719a 100644 --- a/server/cmd/wrapper/main.go +++ b/server/cmd/wrapper/main.go @@ -128,24 +128,24 @@ func main() { } waitForSocket(supervisorSock, 10*time.Second) - // Envoy bootstrap: cert generation, NSS DB, template render, and - // `supervisorctl start envoy`. Run concurrently with Phase A so the - // shell-out work (openssl, certutil, update-ca-certificates) overlaps - // xorg/dbus/chromedriver bring-up. Phase B (chromium) gates on this - // because chromium reads the system CA trust store at process start - // and needs the envoy self-signed cert in place. The envoy listener - // itself (port 3128) is probed in waitAllReady, not here. - envoyDone := make(chan struct{}) + // Envoy cert work (openssl, update-ca-certificates, certutil) is the + // only piece of Envoy bring-up that's identity-free, and it has to land + // before chromium starts because chromium reads the system CA trust + // store at process start. Run it concurrently with Phase A so the + // shell-out cost overlaps xorg/dbus/chromedriver bring-up. Template + // render and `supervisorctl start envoy` happen later in Phase C — + // those depend on INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT. + envoyCertsDone := make(chan struct{}) if isExecutable("/usr/local/bin/init-envoy.sh") { go func() { - defer close(envoyDone) - runStream("envoy-init", "/usr/local/bin/init-envoy.sh") + defer close(envoyCertsDone) + runStream("envoy-init", "/usr/local/bin/init-envoy.sh", "certs") }() } else { - close(envoyDone) + close(envoyCertsDone) } - // Phase A: services with no X/dbus/chromium dependency. chromedriver + // Phase A: identity-free services with no X/dbus dependency. chromedriver // listens on 9225 immediately and only attaches to chromium on session // creation, so it can come up alongside the display stack. xServer := "xorg" @@ -161,25 +161,52 @@ func main() { // parallel with chromium. _ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644) - // Gate chromium on envoy cert/template work being done. - <-envoyDone + // Gate chromium on the envoy cert being installed in the trust store. + <-envoyCertsDone - // Phase B: everything that needs X+dbus, started in a single supervisorctl - // invocation. On headful, mutter is the compositor and neko/api come up - // alongside chromium so their bring-up overlaps with chromium boot rather - // than trailing CDP. Headless has no compositor and no neko. + // Phase B: identity-free X/dbus consumers. Chromium itself doesn't read + // any per-instance identity envs — it just needs the envoy cert (Phase A) + // in trust. mutter is the compositor on headful; neko is the WebRTC + // streamer when ENABLE_WEBRTC=true. webrtc := prof == profileHeadful && os.Getenv("ENABLE_WEBRTC") == "true" var phaseB []string if prof == profileHeadful { - phaseB = []string{"mutter", "chromium", "kernel-images-api"} + phaseB = []string{"mutter", "chromium"} if webrtc { phaseB = append(phaseB, "neko") } } else { - phaseB = []string{"chromium", "kernel-images-api"} + phaseB = []string{"chromium"} } startAll(phaseB...) + // FORK HOOK: + // When this binary runs as a forked snapshot restore, the per-fork + // identity envs (INST_NAME, METRO_NAME, XDS_SERVER, KERNEL_INSTANCE_JWT, + // plus any future per-tenant secrets) won't be set yet at this point — + // the snapshot was taken from a different instance. Insert the + // following sequence here once the env-delivery channel exists: + // 1. Block on the host-pushed env bundle (vsock socket, virtio-fs + // drop file, or whatever transport the control plane settles on). + // 2. Apply the bundle to this process's environ via os.Setenv so + // Phase C below picks them up via the existing $VAR expansion in + // init-envoy.sh and the supervisorctl-spawned services inherit + // them. + // 3. Phase C uses `supervisorctl restart envoy` (idempotent — start + // on first boot, stop+start on a re-render after fork) so a + // restored snapshot drops its stale identity cleanly. + // Boot path keeps running through unchanged: the wait simply no-ops + // when there's no fork bundle to receive. + + // Phase C: identity-bound. Render envoy bootstrap with INST_NAME/JWT/etc + // and (re)start envoy + kernel-images-api. Both services use `restart` + // so the same code path works for boot (start a stopped service) and + // post-fork (stop+start to force a re-read of refreshed envs). + if isExecutable("/usr/local/bin/init-envoy.sh") { + runStream("envoy-init", "/usr/local/bin/init-envoy.sh", "config") + } + restartAll("kernel-images-api") + // Wait for the union of caller-visible ready signals. Each probe runs // concurrently and logs as soon as its target is reachable. waitAllReady(t0, webrtc) @@ -219,10 +246,22 @@ func main() { // supervisorctl once (it accepts multiple args) so we don't pay python // cold-start costs per service. func startAll(progs ...string) { + supervisorctl("start", progs...) +} + +// restartAll is the start-or-stop+start variant. It's used for services +// that may already be running from a snapshot restore (post-fork, see the +// FORK HOOK in main) so they pick up refreshed envs cleanly. supervisorctl +// `restart` is a no-op stop on cold programs followed by a normal start. +func restartAll(progs ...string) { + supervisorctl("restart", progs...) +} + +func supervisorctl(verb string, progs ...string) { if len(progs) == 0 { return } - args := append([]string{"-c", supervisorConf, "start"}, progs...) + args := append([]string{"-c", supervisorConf, verb}, progs...) cmd := exec.Command("supervisorctl", args...) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr diff --git a/shared/envoy/init-envoy.sh b/shared/envoy/init-envoy.sh index cedbdd2c..928cd938 100644 --- a/shared/envoy/init-envoy.sh +++ b/shared/envoy/init-envoy.sh @@ -2,30 +2,37 @@ set -o pipefail -o errexit -o nounset -# The browser instance JWT is the sole token contract for xDS and host-local -# services in the image runtime. -INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}" - -# Check for required environment variables, to see if envoy is enabled -if [[ -z "${INST_NAME:-}" || -z "${METRO_NAME:-}" || -z "${XDS_SERVER:-}" || -z "${INSTANCE_JWT:-}" ]]; then - echo "[envoy-init] Required environment variables not set. Skipping Envoy initialization." - exit 0 -fi - -# Also check for template file -if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then - echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy initialization." - exit 0 -fi - -echo "[envoy-init] Preparing Envoy bootstrap configuration" -mkdir -p /etc/envoy - -# Generate self-signed certificates for TLS forward proxy -echo "[envoy-init] Generating self-signed certificates for TLS forward proxy" -mkdir -p /etc/envoy/certs - -if [[ ! -f /etc/envoy/certs/proxy.crt || ! -f /etc/envoy/certs/proxy.key ]]; then +# Phase argument lets the Go wrapper split the script into an identity-free +# stage (certs/CA trust/NSS DB — runs early so chromium boots with the cert +# already trusted) and an identity-bound stage (template render with +# INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT, then envoy start). +# certs — generate self-signed cert and install it in trust stores +# config — render bootstrap template and start envoy via supervisord +# all — both phases (default; preserves legacy single-call behavior) +PHASE="${1:-all}" + +case "$PHASE" in + certs|config|all) ;; + *) + echo "[envoy-init] Unknown phase: $PHASE (expected certs|config|all)" >&2 + exit 2 + ;; +esac + +run_certs() { + if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then + echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping cert generation." + return 0 + fi + + echo "[envoy-init] Generating self-signed certificates for TLS forward proxy" + mkdir -p /etc/envoy/certs + + if [[ -f /etc/envoy/certs/proxy.crt && -f /etc/envoy/certs/proxy.key ]]; then + echo "[envoy-init] Certificates already exist, skipping generation" + return 0 + fi + echo "[envoy-init] Creating new self-signed certificate" openssl req -x509 -nodes -days 3650 -newkey rsa:2048 \ -keyout /etc/envoy/certs/proxy.key \ @@ -34,46 +41,75 @@ if [[ ! -f /etc/envoy/certs/proxy.crt || ! -f /etc/envoy/certs/proxy.key ]]; the -addext "subjectAltName = DNS:localhost,IP:127.0.0.1" \ 2>&1 | sed 's/^/[envoy-init] /' echo "[envoy-init] Certificate generated successfully" - - # Add certificate to system trust store for Chrome/Chromium - echo "[envoy-init] Adding certificate to system trust store" - cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt - cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt - update-ca-certificates 2>&1 | sed 's/^/[envoy-init] /' - echo "[envoy-init] Certificate added to system trust store" -if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then + + echo "[envoy-init] Adding certificate to system trust store" + cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt + cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt + update-ca-certificates 2>&1 | sed 's/^/[envoy-init] /' + echo "[envoy-init] Certificate added to system trust store" + + if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then mkdir -p /root/.pki/nssdb certutil -d /root/.pki/nssdb -N --empty-password 2>/dev/null || true certutil -d /root/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt echo "[envoy-init] Certificate added to nssdb as root" - else - mkdir -p /home/kernel/.pki/nssdb - certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true - certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt - chown -R kernel:kernel /home/kernel/.pki - echo "[envoy-init] Certificate added to nssdb as kernel" - fi - echo "[envoy-init] Certificate added to nssdb" -else - echo "[envoy-init] Certificates already exist, skipping generation" -fi - -# Render template with provided environment variables -echo "[envoy-init] Rendering template with INST_NAME=${INST_NAME}, METRO_NAME=${METRO_NAME}, XDS_SERVER=${XDS_SERVER}, KERNEL_INSTANCE_JWT=***" -inst_esc=$(printf '%s' "$INST_NAME" | sed -e 's/[\/&]/\\&/g') -metro_esc=$(printf '%s' "$METRO_NAME" | sed -e 's/[\/&]/\\&/g') -xds_esc=$(printf '%s' "$XDS_SERVER" | sed -e 's/[\/&]/\\&/g') -jwt_esc=$(printf '%s' "$INSTANCE_JWT" | sed -e 's/[\/&]/\\&/g') -sed -e "s|{INST_NAME}|$inst_esc|g" \ - -e "s|{METRO_NAME}|$metro_esc|g" \ - -e "s|{XDS_SERVER}|$xds_esc|g" \ - -e "s|{KERNEL_INSTANCE_JWT}|$jwt_esc|g" \ - /etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml - -echo "[envoy-init] Starting Envoy via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start envoy - -# Readiness (port 3128 reachable) is now probed by the Go wrapper's -# waitAllReady alongside CDP/chromedriver, so this script returns as soon -# as the start request has been issued. Removing the in-script poll lets -# init-envoy.sh run concurrently with Phase A bring-up. + else + mkdir -p /home/kernel/.pki/nssdb + certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true + certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt + chown -R kernel:kernel /home/kernel/.pki + echo "[envoy-init] Certificate added to nssdb as kernel" + fi +} + +run_config() { + # Identity envs gate the config phase: without them xDS can't bind, so + # render+start is a no-op on images that don't run with a JWT. + INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}" + if [[ -z "${INST_NAME:-}" || -z "${METRO_NAME:-}" || -z "${XDS_SERVER:-}" || -z "${INSTANCE_JWT:-}" ]]; then + echo "[envoy-init] Required environment variables not set. Skipping Envoy config/start." + return 0 + fi + + if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then + echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy config/start." + return 0 + fi + + echo "[envoy-init] Preparing Envoy bootstrap configuration" + mkdir -p /etc/envoy + + echo "[envoy-init] Rendering template with INST_NAME=${INST_NAME}, METRO_NAME=${METRO_NAME}, XDS_SERVER=${XDS_SERVER}, KERNEL_INSTANCE_JWT=***" + inst_esc=$(printf '%s' "$INST_NAME" | sed -e 's/[\/&]/\\&/g') + metro_esc=$(printf '%s' "$METRO_NAME" | sed -e 's/[\/&]/\\&/g') + xds_esc=$(printf '%s' "$XDS_SERVER" | sed -e 's/[\/&]/\\&/g') + jwt_esc=$(printf '%s' "$INSTANCE_JWT" | sed -e 's/[\/&]/\\&/g') + sed -e "s|{INST_NAME}|$inst_esc|g" \ + -e "s|{METRO_NAME}|$metro_esc|g" \ + -e "s|{XDS_SERVER}|$xds_esc|g" \ + -e "s|{KERNEL_INSTANCE_JWT}|$jwt_esc|g" \ + /etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml + + echo "[envoy-init] Starting Envoy via supervisord" + # `restart` is start-or-stop+start: on first boot this just starts envoy, + # on a re-render (e.g. post-fork env refresh) it forces a clean re-read + # of the rendered bootstrap. Either way no callers see stale identity. + supervisorctl -c /etc/supervisor/supervisord.conf restart envoy + + # Readiness (port 3128 reachable) is probed by the Go wrapper's + # waitAllReady alongside CDP/chromedriver, so this script returns as soon + # as the start request has been issued. +} + +case "$PHASE" in + certs) + run_certs + ;; + config) + run_config + ;; + all) + run_certs + run_config + ;; +esac From ac8c716280fb22ffcbfad70cbec92315b02d57d2 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Fri, 8 May 2026 19:30:17 +0000 Subject: [PATCH 3/4] wrapper: settle delay before infobar dismiss click Co-Authored-By: Claude Opus 4.7 --- server/cmd/wrapper/main.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/cmd/wrapper/main.go b/server/cmd/wrapper/main.go index 92f8719a..01772d9b 100644 --- a/server/cmd/wrapper/main.go +++ b/server/cmd/wrapper/main.go @@ -566,6 +566,11 @@ func dismissNoSandboxWarning() { } time.Sleep(100 * time.Millisecond) } + // Without a settle delay the click can land before the --no-sandbox infobar + // has finished painting, leaving the warning on screen. The legacy + // wrapper.sh slept 5s here for the same reason. Runs off the hot path + // (goroutine fired post-readiness) so this doesn't extend time-to-CDP. + time.Sleep(5 * time.Second) port := os.Getenv("KERNEL_IMAGES_API_PORT") if port == "" { port = defaultAPIPort From 60ae285b3261683c3c645d36c72a9897de8a2921 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Fri, 8 May 2026 21:08:24 +0000 Subject: [PATCH 4/4] images: bake envoy proxy CA cert at build time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves openssl gen + system trust store install + NSS DB seed (for both root and kernel users) from runtime (init-envoy.sh) to image build time. The certs are static — CN=localhost, SANs localhost/127.0.0.1 — and trusted only by this image's CA store and chromium NSS DB, so sharing them across containers built from the same image does not widen the threat model. Full security rationale in bake-certs.sh. Net effect: Phase A no longer waits on a concurrent openssl/certutil shell-out, which was contending with xorg/dbus/chromedriver bring-up and adding ~1.7s to chromedriver readiness on test-mode boots. Removes the envoyCertsDone goroutine and the `init-envoy.sh certs` invocation entirely; init-envoy.sh now only renders the bootstrap template and starts envoy. --- images/chromium-headful/Dockerfile | 6 + images/chromium-headless/image/Dockerfile | 6 + server/cmd/wrapper/main.go | 33 ++--- shared/envoy/bake-certs.sh | 65 +++++++++ shared/envoy/init-envoy.sh | 152 ++++++---------------- 5 files changed, 126 insertions(+), 136 deletions(-) create mode 100644 shared/envoy/bake-certs.sh diff --git a/images/chromium-headful/Dockerfile b/images/chromium-headful/Dockerfile index 89907f16..838be4b2 100644 --- a/images/chromium-headful/Dockerfile +++ b/images/chromium-headful/Dockerfile @@ -387,4 +387,10 @@ RUN esbuild /tmp/playwright-daemon.ts \ RUN useradd -m -s /bin/bash kernel +# Bake the envoy forward-proxy CA cert into the image (system trust store + +# NSS DB for both root and kernel users). See bake-certs.sh for the security +# rationale on sharing the cert across containers built from this image. +COPY shared/envoy/bake-certs.sh /usr/local/bin/bake-certs.sh +RUN chmod +x /usr/local/bin/bake-certs.sh && /usr/local/bin/bake-certs.sh && rm /usr/local/bin/bake-certs.sh + ENTRYPOINT [ "/wrapper" ] diff --git a/images/chromium-headless/image/Dockerfile b/images/chromium-headless/image/Dockerfile index b9a3462a..4bd91746 100644 --- a/images/chromium-headless/image/Dockerfile +++ b/images/chromium-headless/image/Dockerfile @@ -240,6 +240,12 @@ COPY shared/envoy/bootstrap.yaml /etc/envoy/templates/bootstrap.yaml COPY shared/envoy/init-envoy.sh /usr/local/bin/init-envoy.sh RUN chmod +x /usr/local/bin/init-envoy.sh +# Bake the envoy forward-proxy CA cert into the image (system trust store + +# NSS DB for both root and kernel users). See bake-certs.sh for the security +# rationale on sharing the cert across containers built from this image. +COPY shared/envoy/bake-certs.sh /usr/local/bin/bake-certs.sh +RUN chmod +x /usr/local/bin/bake-certs.sh && /usr/local/bin/bake-certs.sh && rm /usr/local/bin/bake-certs.sh + # Copy the kernel-images API binary built in the builder stage COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api COPY --from=server-builder /out/chromium-launcher /usr/local/bin/chromium-launcher diff --git a/server/cmd/wrapper/main.go b/server/cmd/wrapper/main.go index 01772d9b..6976f827 100644 --- a/server/cmd/wrapper/main.go +++ b/server/cmd/wrapper/main.go @@ -128,26 +128,12 @@ func main() { } waitForSocket(supervisorSock, 10*time.Second) - // Envoy cert work (openssl, update-ca-certificates, certutil) is the - // only piece of Envoy bring-up that's identity-free, and it has to land - // before chromium starts because chromium reads the system CA trust - // store at process start. Run it concurrently with Phase A so the - // shell-out cost overlaps xorg/dbus/chromedriver bring-up. Template - // render and `supervisorctl start envoy` happen later in Phase C — - // those depend on INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT. - envoyCertsDone := make(chan struct{}) - if isExecutable("/usr/local/bin/init-envoy.sh") { - go func() { - defer close(envoyCertsDone) - runStream("envoy-init", "/usr/local/bin/init-envoy.sh", "certs") - }() - } else { - close(envoyCertsDone) - } - // Phase A: identity-free services with no X/dbus dependency. chromedriver // listens on 9225 immediately and only attaches to chromium on session - // creation, so it can come up alongside the display stack. + // creation, so it can come up alongside the display stack. The envoy + // forward-proxy CA cert is baked into the image at build time (see + // shared/envoy/bake-certs.sh), so chromium trusts it on first start with + // no runtime cert work to wait on. xServer := "xorg" if prof == profileHeadless { xServer = "xvfb" @@ -161,13 +147,10 @@ func main() { // parallel with chromium. _ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644) - // Gate chromium on the envoy cert being installed in the trust store. - <-envoyCertsDone - // Phase B: identity-free X/dbus consumers. Chromium itself doesn't read - // any per-instance identity envs — it just needs the envoy cert (Phase A) - // in trust. mutter is the compositor on headful; neko is the WebRTC - // streamer when ENABLE_WEBRTC=true. + // any per-instance identity envs — it just needs the envoy cert (baked + // into the image) in trust. mutter is the compositor on headful; neko is + // the WebRTC streamer when ENABLE_WEBRTC=true. webrtc := prof == profileHeadful && os.Getenv("ENABLE_WEBRTC") == "true" var phaseB []string if prof == profileHeadful { @@ -203,7 +186,7 @@ func main() { // so the same code path works for boot (start a stopped service) and // post-fork (stop+start to force a re-read of refreshed envs). if isExecutable("/usr/local/bin/init-envoy.sh") { - runStream("envoy-init", "/usr/local/bin/init-envoy.sh", "config") + runStream("envoy-init", "/usr/local/bin/init-envoy.sh") } restartAll("kernel-images-api") diff --git a/shared/envoy/bake-certs.sh b/shared/envoy/bake-certs.sh new file mode 100644 index 00000000..6a987cd9 --- /dev/null +++ b/shared/envoy/bake-certs.sh @@ -0,0 +1,65 @@ +#!/bin/bash +set -eux + +# Generate the self-signed cert envoy presents on the localhost forward proxy, +# install it into the system CA trust store, and seed the NSS DBs for both the +# root and `kernel` users so chromium trusts it regardless of which user the +# wrapper runs as. Runs once at image build time so container startup pays +# zero cost — no openssl invocation, no certutil shell-outs. +# +# Safety of a shared (per-image-tag) cert across customer instances: +# - The cert's only Subject Alternative Names are `DNS:localhost` and +# `IP:127.0.0.1`. A TLS client only accepts it for connections to +# localhost, so the cert (and its private key) are useless for MITMing +# any traffic the cert holder doesn't already control. +# - The cert is trusted only by this image's system CA store and chromium +# NSS DB. It is not trusted by customer machines, the host, or anything +# outside this container. +# - The forward proxy listens on 127.0.0.1 inside a network-isolated +# container. One customer's container has no path to another customer's +# localhost. Even an attacker holding the private key would need code +# execution inside a sibling container to use it, at which point they +# have everything anyway. +# - The cert never leaves the container — no customer SDK, no browser +# extension, no host service ever sees it. +# Bottom line: this CA is an in-container trust anchor for a localhost-only +# TLS listener. Sharing the key across containers built from the same image +# does not widen the threat model. + +mkdir -p /etc/envoy/certs +openssl req -x509 -nodes -days 3650 -newkey rsa:2048 \ + -keyout /etc/envoy/certs/proxy.key \ + -out /etc/envoy/certs/proxy.crt \ + -subj "/C=US/ST=CA/O=Kernel/CN=localhost" \ + -addext "subjectAltName = DNS:localhost,IP:127.0.0.1" + +# System trust store — picked up by curl, openssl, Go's net/http, etc. +cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt +cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt + +# Seed both NSS DBs so chromium trusts the cert under either user. The +# wrapper's RUN_AS_ROOT branch chooses which DB chromium reads from at +# runtime; seeding both at build time means we don't need to know yet. +mkdir -p /root/.pki/nssdb /home/kernel/.pki/nssdb +certutil -d /root/.pki/nssdb -N --empty-password 2>/dev/null || true +certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true +certutil -d /root/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt +certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt + +# Install any pre-baked CA certs (BrightData certs are downloaded into +# /etc/envoy/brightdata by install-proxy.sh in private images). Same +# identity-free trust-store work as the self-signed cert above — moving it +# here means runtime sees an already-populated trust store. +if [ -d /etc/envoy/brightdata ]; then + for cert in /etc/envoy/brightdata/*.crt; do + [ -f "$cert" ] || continue + cert_name=$(basename "$cert" .crt) + cp "$cert" "/usr/local/share/ca-certificates/brightdata-${cert_name}.crt" + certutil -d /root/.pki/nssdb -A -t "C,," -n "BrightData $cert_name" -i "$cert" + certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "BrightData $cert_name" -i "$cert" + done +fi + +chown -R kernel:kernel /home/kernel/.pki + +update-ca-certificates diff --git a/shared/envoy/init-envoy.sh b/shared/envoy/init-envoy.sh index 928cd938..c831a062 100644 --- a/shared/envoy/init-envoy.sh +++ b/shared/envoy/init-envoy.sh @@ -2,114 +2,44 @@ set -o pipefail -o errexit -o nounset -# Phase argument lets the Go wrapper split the script into an identity-free -# stage (certs/CA trust/NSS DB — runs early so chromium boots with the cert -# already trusted) and an identity-bound stage (template render with -# INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT, then envoy start). -# certs — generate self-signed cert and install it in trust stores -# config — render bootstrap template and start envoy via supervisord -# all — both phases (default; preserves legacy single-call behavior) -PHASE="${1:-all}" - -case "$PHASE" in - certs|config|all) ;; - *) - echo "[envoy-init] Unknown phase: $PHASE (expected certs|config|all)" >&2 - exit 2 - ;; -esac - -run_certs() { - if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then - echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping cert generation." - return 0 - fi - - echo "[envoy-init] Generating self-signed certificates for TLS forward proxy" - mkdir -p /etc/envoy/certs - - if [[ -f /etc/envoy/certs/proxy.crt && -f /etc/envoy/certs/proxy.key ]]; then - echo "[envoy-init] Certificates already exist, skipping generation" - return 0 - fi - - echo "[envoy-init] Creating new self-signed certificate" - openssl req -x509 -nodes -days 3650 -newkey rsa:2048 \ - -keyout /etc/envoy/certs/proxy.key \ - -out /etc/envoy/certs/proxy.crt \ - -subj "/C=US/ST=CA/O=Kernel/CN=localhost" \ - -addext "subjectAltName = DNS:localhost,IP:127.0.0.1" \ - 2>&1 | sed 's/^/[envoy-init] /' - echo "[envoy-init] Certificate generated successfully" - - echo "[envoy-init] Adding certificate to system trust store" - cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt - cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt - update-ca-certificates 2>&1 | sed 's/^/[envoy-init] /' - echo "[envoy-init] Certificate added to system trust store" - - if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then - mkdir -p /root/.pki/nssdb - certutil -d /root/.pki/nssdb -N --empty-password 2>/dev/null || true - certutil -d /root/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt - echo "[envoy-init] Certificate added to nssdb as root" - else - mkdir -p /home/kernel/.pki/nssdb - certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true - certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt - chown -R kernel:kernel /home/kernel/.pki - echo "[envoy-init] Certificate added to nssdb as kernel" - fi -} - -run_config() { - # Identity envs gate the config phase: without them xDS can't bind, so - # render+start is a no-op on images that don't run with a JWT. - INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}" - if [[ -z "${INST_NAME:-}" || -z "${METRO_NAME:-}" || -z "${XDS_SERVER:-}" || -z "${INSTANCE_JWT:-}" ]]; then - echo "[envoy-init] Required environment variables not set. Skipping Envoy config/start." - return 0 - fi - - if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then - echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy config/start." - return 0 - fi - - echo "[envoy-init] Preparing Envoy bootstrap configuration" - mkdir -p /etc/envoy - - echo "[envoy-init] Rendering template with INST_NAME=${INST_NAME}, METRO_NAME=${METRO_NAME}, XDS_SERVER=${XDS_SERVER}, KERNEL_INSTANCE_JWT=***" - inst_esc=$(printf '%s' "$INST_NAME" | sed -e 's/[\/&]/\\&/g') - metro_esc=$(printf '%s' "$METRO_NAME" | sed -e 's/[\/&]/\\&/g') - xds_esc=$(printf '%s' "$XDS_SERVER" | sed -e 's/[\/&]/\\&/g') - jwt_esc=$(printf '%s' "$INSTANCE_JWT" | sed -e 's/[\/&]/\\&/g') - sed -e "s|{INST_NAME}|$inst_esc|g" \ - -e "s|{METRO_NAME}|$metro_esc|g" \ - -e "s|{XDS_SERVER}|$xds_esc|g" \ - -e "s|{KERNEL_INSTANCE_JWT}|$jwt_esc|g" \ - /etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml - - echo "[envoy-init] Starting Envoy via supervisord" - # `restart` is start-or-stop+start: on first boot this just starts envoy, - # on a re-render (e.g. post-fork env refresh) it forces a clean re-read - # of the rendered bootstrap. Either way no callers see stale identity. - supervisorctl -c /etc/supervisor/supervisord.conf restart envoy - - # Readiness (port 3128 reachable) is probed by the Go wrapper's - # waitAllReady alongside CDP/chromedriver, so this script returns as soon - # as the start request has been issued. -} - -case "$PHASE" in - certs) - run_certs - ;; - config) - run_config - ;; - all) - run_certs - run_config - ;; -esac +# Runtime config for envoy. Cert generation and CA trust install ran at image +# build time (see shared/envoy/bake-certs.sh) so this script only does the +# identity-bound work: render the bootstrap template with the per-instance +# envs and start envoy via supervisord. + +# Identity envs gate this script: without them xDS can't bind, so this is a +# no-op on images that don't run with a JWT. +INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}" +if [[ -z "${INST_NAME:-}" || -z "${METRO_NAME:-}" || -z "${XDS_SERVER:-}" || -z "${INSTANCE_JWT:-}" ]]; then + echo "[envoy-init] Required environment variables not set. Skipping Envoy config/start." + exit 0 +fi + +if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then + echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy config/start." + exit 0 +fi + +echo "[envoy-init] Preparing Envoy bootstrap configuration" +mkdir -p /etc/envoy + +echo "[envoy-init] Rendering template with INST_NAME=${INST_NAME}, METRO_NAME=${METRO_NAME}, XDS_SERVER=${XDS_SERVER}, KERNEL_INSTANCE_JWT=***" +inst_esc=$(printf '%s' "$INST_NAME" | sed -e 's/[\/&]/\\&/g') +metro_esc=$(printf '%s' "$METRO_NAME" | sed -e 's/[\/&]/\\&/g') +xds_esc=$(printf '%s' "$XDS_SERVER" | sed -e 's/[\/&]/\\&/g') +jwt_esc=$(printf '%s' "$INSTANCE_JWT" | sed -e 's/[\/&]/\\&/g') +sed -e "s|{INST_NAME}|$inst_esc|g" \ + -e "s|{METRO_NAME}|$metro_esc|g" \ + -e "s|{XDS_SERVER}|$xds_esc|g" \ + -e "s|{KERNEL_INSTANCE_JWT}|$jwt_esc|g" \ + /etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml + +echo "[envoy-init] Starting Envoy via supervisord" +# `restart` is start-or-stop+start: on first boot this just starts envoy, +# on a re-render (e.g. post-fork env refresh) it forces a clean re-read +# of the rendered bootstrap. Either way no callers see stale identity. +supervisorctl -c /etc/supervisor/supervisord.conf restart envoy + +# Readiness (port 3128 reachable) is probed by the Go wrapper's +# waitAllReady alongside CDP/chromedriver, so this script returns as soon +# as the start request has been issued.