diff --git a/README.md b/README.md index 50fbe2db..22ef9bc0 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Deployed successfully! ├────── service: ├─ private fqdn: ├─── private ip: - └───────── args: /wrapper.sh + └───────── args: /wrapper ``` ### Unikernel Notes diff --git a/images/chromium-headful/Dockerfile b/images/chromium-headful/Dockerfile index 84ecfd9d..838be4b2 100644 --- a/images/chromium-headful/Dockerfile +++ b/images/chromium-headful/Dockerfile @@ -27,6 +27,12 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ go build -ldflags="-s -w" -o /out/chromium-launcher ./cmd/chromium-launcher +# Build container entrypoint wrapper (replaces wrapper.sh) +RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ + --mount=type=cache,target=/go/pkg/mod,id=$CACHEIDPREFIX-go-pkg-mod \ + GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ + go build -ldflags="-s -w" -o /out/wrapper ./cmd/wrapper + # webrtc client FROM node:22-bullseye-slim AS client WORKDIR /src @@ -348,7 +354,6 @@ COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xor COPY images/chromium-headful/image-chromium/ / COPY images/chromium-headful/start-pulseaudio.sh /images/chromium-headful/start-pulseaudio.sh RUN chmod +x /images/chromium-headful/start-pulseaudio.sh -COPY images/chromium-headful/wrapper.sh /wrapper.sh COPY images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf COPY images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/ COPY shared/envoy/supervisor-envoy.conf /etc/supervisor/conf.d/services/envoy.conf @@ -365,6 +370,7 @@ RUN chmod +x /usr/local/bin/init-envoy.sh # copy the kernel-images API binary built in the builder stage COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api COPY --from=server-builder /out/chromium-launcher /usr/local/bin/chromium-launcher +COPY --from=server-builder /out/wrapper /wrapper # Copy and compile the Playwright daemon COPY server/runtime/playwright-daemon.ts /tmp/playwright-daemon.ts @@ -381,4 +387,10 @@ RUN esbuild /tmp/playwright-daemon.ts \ RUN useradd -m -s /bin/bash kernel -ENTRYPOINT [ "/wrapper.sh" ] +# Bake the envoy forward-proxy CA cert into the image (system trust store + +# NSS DB for both root and kernel users). See bake-certs.sh for the security +# rationale on sharing the cert across containers built from this image. +COPY shared/envoy/bake-certs.sh /usr/local/bin/bake-certs.sh +RUN chmod +x /usr/local/bin/bake-certs.sh && /usr/local/bin/bake-certs.sh && rm /usr/local/bin/bake-certs.sh + +ENTRYPOINT [ "/wrapper" ] diff --git a/images/chromium-headful/Kraftfile b/images/chromium-headful/Kraftfile index 18af1a0b..9bb12637 100644 --- a/images/chromium-headful/Kraftfile +++ b/images/chromium-headful/Kraftfile @@ -9,4 +9,4 @@ labels: rootfs: ./initrd -cmd: ["/wrapper.sh"] +cmd: ["/wrapper"] diff --git a/images/chromium-headful/supervisor/services/chromedriver.conf b/images/chromium-headful/supervisor/services/chromedriver.conf index 9bca5365..7d8d3812 100644 --- a/images/chromium-headful/supervisor/services/chromedriver.conf +++ b/images/chromium-headful/supervisor/services/chromedriver.conf @@ -2,6 +2,6 @@ command=/usr/local/bin/chromedriver --port=9225 --allowed-ips=127.0.0.1 --log-level=INFO autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/chromedriver redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/dbus.conf b/images/chromium-headful/supervisor/services/dbus.conf index 7edc479c..fcab6898 100644 --- a/images/chromium-headful/supervisor/services/dbus.conf +++ b/images/chromium-headful/supervisor/services/dbus.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p /run/dbus && dbus-uuidgen --ensure && dbus-daemon --system --address=unix:path=/run/dbus/system_bus_socket --nopidfile --nosyslog --nofork' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/dbus redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/kernel-images-api.conf b/images/chromium-headful/supervisor/services/kernel-images-api.conf index e57d30a8..0638dea8 100644 --- a/images/chromium-headful/supervisor/services/kernel-images-api.conf +++ b/images/chromium-headful/supervisor/services/kernel-images-api.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" exec /usr/local/bin/kernel-images-api' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/kernel-images-api redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/mutter.conf b/images/chromium-headful/supervisor/services/mutter.conf index 5de00213..3fac9ea7 100644 --- a/images/chromium-headful/supervisor/services/mutter.conf +++ b/images/chromium-headful/supervisor/services/mutter.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'XDG_SESSION_TYPE=x11 mutter --replace --sm-disable' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/mutter redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/neko.conf b/images/chromium-headful/supervisor/services/neko.conf index c30c8b46..9662df02 100644 --- a/images/chromium-headful/supervisor/services/neko.conf +++ b/images/chromium-headful/supervisor/services/neko.conf @@ -2,6 +2,6 @@ command=/usr/bin/neko serve --server.static /var/www --server.bind 0.0.0.0:8080 autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/neko redirect_stderr=true diff --git a/images/chromium-headful/supervisor/services/xorg.conf b/images/chromium-headful/supervisor/services/xorg.conf index 72e515e5..5357ba7c 100644 --- a/images/chromium-headful/supervisor/services/xorg.conf +++ b/images/chromium-headful/supervisor/services/xorg.conf @@ -2,6 +2,6 @@ command=/usr/bin/Xorg :1 -config /etc/neko/xorg.conf -noreset -nolisten tcp autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/xorg redirect_stderr=true diff --git a/images/chromium-headful/wrapper.sh b/images/chromium-headful/wrapper.sh deleted file mode 100755 index 1e3797cb..00000000 --- a/images/chromium-headful/wrapper.sh +++ /dev/null @@ -1,338 +0,0 @@ -#!/bin/bash - -set -o pipefail -o errexit -o nounset - -# If the WITHDOCKER environment variable is not set, it means we are not running inside a Docker container. -# Docker manages /dev/shm itself, and attempting to mount or modify it can cause permission or device errors. -# However, in a unikernel container environment (non-Docker), we need to manually create and mount /dev/shm as a tmpfs -# to support shared memory operations. -if [ -z "${WITHDOCKER:-}" ]; then - mkdir -p /dev/shm - chmod 777 /dev/shm - mount -t tmpfs tmpfs /dev/shm -fi - -# We disable scale-to-zero for the lifetime of this script and restore -# the original setting on exit. -SCALE_TO_ZERO_FILE="/uk/libukp/scale_to_zero_disable" -scale_to_zero_write() { - local char="$1" - # Skip when not running inside Unikraft Cloud (control file absent) - if [[ -e "$SCALE_TO_ZERO_FILE" ]]; then - # Write the character, but do not fail the whole script if this errors out - echo -n "$char" > "$SCALE_TO_ZERO_FILE" 2>/dev/null || \ - echo "[wrapper] Failed to write to scale-to-zero control file" >&2 - fi -} -disable_scale_to_zero() { scale_to_zero_write "+"; } -enable_scale_to_zero() { scale_to_zero_write "-"; } - -wait_for_tcp_port() { - local host="$1" - local port="$2" - local name="$3" - local attempts="${4:-0}" - local sleep_secs="${5:-0.5}" - local timeout_label="${6:-}" - local attempt=0 - - echo "[wrapper] Waiting for ${name} on ${host}:${port}..." - while true; do - if (echo >/dev/tcp/"${host}"/"${port}") >/dev/null 2>&1; then - echo "[wrapper] ${name} is ready on ${host}:${port}" - return 0 - fi - - if (( attempts > 0 )); then - attempt=$((attempt + 1)) - if (( attempt >= attempts )); then - if [[ -n "${timeout_label}" ]]; then - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${timeout_label}" >&2 - else - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${attempts} attempts" >&2 - fi - return 1 - fi - fi - - sleep "${sleep_secs}" - done -} - -# Disable scale-to-zero for the duration of the script when not running under Docker -if [[ -z "${WITHDOCKER:-}" ]]; then - echo "[wrapper] Disabling scale-to-zero" - disable_scale_to_zero -fi - -# ----------------------------------------------------------------------------- -# Ensure a sensible hostname --------------------------------------------------- -# ----------------------------------------------------------------------------- -# Some environments boot with an empty or \"(none)\" hostname which shows up in -# prompts. Best-effort set a friendly hostname early so services inherit it. -if h=$(cat /proc/sys/kernel/hostname 2>/dev/null); then - if [ -z "$h" ] || [ "$h" = "(none)" ]; then - if command -v hostname >/dev/null 2>&1; then - hostname kernel-vm 2>/dev/null || true - fi - echo -n "kernel-vm" > /proc/sys/kernel/hostname 2>/dev/null || true - fi -fi -# Also export HOSTNAME so shells pick it up immediately. -export HOSTNAME="${HOSTNAME:-kernel-vm}" - -# ----------------------------------------------------------------------------- -# Disable IPv6 ----------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# The VM environment has no IPv6 route, so any IPv6 connection attempt will fail -# immediately with ENETUNREACH. Chromium's built-in DNS client may attempt -# DNS-over-HTTPS to IPv6 endpoints (e.g. [2001:4860:4860::8888]:443), and each -# failed attempt wastes a connection slot from the MaxConnectionsPerProxy pool. -# Disabling IPv6 at the kernel level prevents these wasted attempts. -echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 2>/dev/null || true -echo 1 > /proc/sys/net/ipv6/conf/default/disable_ipv6 2>/dev/null || true - -# ----------------------------------------------------------------------------- -# House-keeping for the unprivileged "kernel" user -------------------------------- -# Some Chromium subsystems want to create files under $HOME (NSS cert DB, dconf -# cache). If those directories are missing or owned by root Chromium emits -# noisy error messages such as: -# [ERROR:crypto/nss_util.cc:48] Failed to create /home/kernel/.pki/nssdb ... -# dconf-CRITICAL **: unable to create directory '/home/kernel/.cache/dconf' -# Pre-create them and hand ownership to the user so the messages disappear. -# When RUN_AS_ROOT is true, we skip ownership changes since we're running as root. - -if [[ "${RUN_AS_ROOT:-}" != "true" ]]; then - dirs=( - /home/kernel/user-data - /home/kernel/.config/chromium - /home/kernel/.pki/nssdb - /home/kernel/.cache/dconf - /tmp - /var/log - /var/log/supervisord - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done - - # Ensure correct ownership (ignore errors if already correct) - chown -R kernel:kernel /home/kernel /home/kernel/user-data /home/kernel/.config /home/kernel/.pki /home/kernel/.cache 2>/dev/null || true - # Make policy directory writable for runtime updates - chown -R kernel:kernel /etc/chromium/policies 2>/dev/null || true -else - # When running as root, just create the necessary directories without ownership changes - dirs=( - /tmp - /var/log - /var/log/supervisord - /home/kernel - /home/kernel/user-data - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done -fi - -# ----------------------------------------------------------------------------- -# Dynamic log aggregation for /var/log/supervisord ----------------------------- -# ----------------------------------------------------------------------------- -# Tails any existing and future files under /var/log/supervisord, -# prefixing each line with the relative filepath, e.g. [chromium] ... -start_dynamic_log_aggregator() { - echo "[wrapper] Starting dynamic log aggregator for /var/log/supervisord" - ( - declare -A tailed_files=() - start_tail() { - local f="$1" - [[ -f "$f" ]] || return 0 - [[ -n "${tailed_files[$f]:-}" ]] && return 0 - local label="${f#/var/log/supervisord/}" - # Tie tails to this subshell lifetime so they exit when we stop it - tail --pid="$$" -n +1 -F "$f" 2>/dev/null | sed -u "s/^/[${label}] /" & - tailed_files[$f]=1 - } - # Periodically scan for new *.log files without extra dependencies - while true; do - while IFS= read -r -d '' f; do - start_tail "$f" - done < <(find /var/log/supervisord -type f -print0 2>/dev/null || true) - sleep 1 - done - ) & - tail_pids+=("$!") -} - -# Start log aggregator early so we see supervisor and service logs as they appear -start_dynamic_log_aggregator - -export DISPLAY=:1 - -# Predefine ports and export for services -export INTERNAL_PORT="${INTERNAL_PORT:-9223}" -export CHROME_PORT="${CHROME_PORT:-9222}" - -# Track background tailing processes for cleanup -tail_pids=() - -# Cleanup handler (set early so we catch early failures) -cleanup () { - echo "[wrapper] Cleaning up..." - # Re-enable scale-to-zero if the script terminates early - enable_scale_to_zero - supervisorctl -c /etc/supervisor/supervisord.conf stop chromedriver || true - supervisorctl -c /etc/supervisor/supervisord.conf stop chromium || true - supervisorctl -c /etc/supervisor/supervisord.conf stop kernel-images-api || true - supervisorctl -c /etc/supervisor/supervisord.conf stop dbus || true - # Stop log tailers - if [[ -n "${tail_pids[*]:-}" ]]; then - for tp in "${tail_pids[@]}"; do - kill -TERM "$tp" 2>/dev/null || true - done - fi -} -trap cleanup TERM INT - -# Start supervisord early so it can manage Xorg and Mutter -echo "[wrapper] Starting supervisord" -supervisord -c /etc/supervisor/supervisord.conf -echo "[wrapper] Waiting for supervisord socket..." -for i in {1..30}; do -if [ -S /var/run/supervisor.sock ]; then - break -fi -sleep 0.2 -done - -init-envoy.sh - -echo "[wrapper] Starting Xorg via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start xorg -echo "[wrapper] Waiting for Xorg to open display $DISPLAY..." -for i in {1..50}; do - if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then - break - fi - sleep 0.2 -done - -echo "[wrapper] Starting Mutter via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start mutter -echo "[wrapper] Waiting for Mutter to be ready..." -timeout=30 -while [ $timeout -gt 0 ]; do - if xdotool search --class "mutter" >/dev/null 2>&1; then - break - fi - sleep 1 - ((timeout--)) -done - -# ----------------------------------------------------------------------------- -# System-bus setup via supervisord -------------------------------------------- -# ----------------------------------------------------------------------------- -echo "[wrapper] Starting system D-Bus daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start dbus -echo "[wrapper] Waiting for D-Bus system bus socket..." -for i in {1..50}; do - if [ -S /run/dbus/system_bus_socket ]; then - break - fi - sleep 0.2 -done - -# We will point DBUS_SESSION_BUS_ADDRESS at the system bus socket to suppress -# autolaunch attempts that failed and spammed logs. -export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket" - -# Start Chromium with display :1 and remote debugging, loading our recorder extension. -echo "[wrapper] Starting Chromium via supervisord on internal port $INTERNAL_PORT" -supervisorctl -c /etc/supervisor/supervisord.conf start chromium -wait_for_tcp_port 127.0.0.1 "$INTERNAL_PORT" "Chromium remote debugging" 100 0.2 "20s" || true - -if [[ "${ENABLE_WEBRTC:-}" == "true" ]]; then - # use webrtc - echo "[wrapper] ✨ Starting neko (webrtc server) via supervisord." - supervisorctl -c /etc/supervisor/supervisord.conf start neko - - # Wait for neko to be ready. - wait_for_tcp_port 127.0.0.1 8080 "neko" -fi - -echo "[wrapper] ✨ Starting kernel-images API." - -API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" -API_FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" -API_DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" -API_MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" -API_OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" - -# Start via supervisord (env overrides are read by the service's command) -supervisorctl -c /etc/supervisor/supervisord.conf start kernel-images-api -wait_for_tcp_port 127.0.0.1 "${API_PORT}" "kernel-images API" - -echo "[wrapper] Starting ChromeDriver via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start chromedriver -wait_for_tcp_port 127.0.0.1 9225 "ChromeDriver" 50 0.2 "10s" || true - -echo "[wrapper] Starting PulseAudio daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start pulseaudio - -# close the "--no-sandbox unsupported flag" warning when running as root -# in the unikernel runtime we haven't been able to get chromium to launch as non-root without cryptic crashpad errors -# and when running as root you must use the --no-sandbox flag, which generates a warning -if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then - echo "[wrapper] Running as root, attempting to dismiss the --no-sandbox unsupported flag warning" - if read -r WIDTH HEIGHT <<< "$(xdotool getdisplaygeometry 2>/dev/null)"; then - # Work out an x-coordinate slightly inside the right-hand edge of the - OFFSET_X=$(( WIDTH - 30 )) - if (( OFFSET_X < 0 )); then - OFFSET_X=0 - fi - - # Wait for Chromium window to open before dismissing the --no-sandbox warning. - target='New Tab - Chromium' - echo "[wrapper] Waiting for Chromium window \"${target}\" to appear and become active..." - while :; do - win_id=$(xwininfo -root -tree 2>/dev/null | awk -v t="$target" '$0 ~ t {print $1; exit}') - if [[ -n $win_id ]]; then - win_id=${win_id%:} - if xdotool windowactivate --sync "$win_id"; then - echo "[wrapper] Focused window $win_id ($target) on $DISPLAY" - break - fi - fi - sleep 0.5 - done - - # wait... not sure but this just increases the likelihood of success - # without the sleep you often open the live view and see the mouse hovering over the "X" to dismiss the warning, suggesting that it clicked before the warning or chromium appeared - sleep 5 - - # Attempt to click the warning's close button - echo "[wrapper] Clicking the warning's close button at x=$OFFSET_X y=115" - if curl -s -o /dev/null -X POST \ - http://localhost:${API_PORT}/computer/click_mouse \ - -H "Content-Type: application/json" \ - -d "{\"x\":${OFFSET_X},\"y\":115}"; then - echo "[wrapper] Successfully clicked the warning's close button" - else - echo "[wrapper] Failed to click the warning's close button" >&2 - fi - else - echo "[wrapper] xdotool failed to obtain display geometry; skipping sandbox warning dismissal." >&2 - fi -fi - -if [[ -z "${WITHDOCKER:-}" ]]; then - enable_scale_to_zero -fi - -# Keep the container running while streaming logs -wait diff --git a/images/chromium-headless/image/Dockerfile b/images/chromium-headless/image/Dockerfile index aa7d17ea..4bd91746 100644 --- a/images/chromium-headless/image/Dockerfile +++ b/images/chromium-headless/image/Dockerfile @@ -28,6 +28,12 @@ RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ go build -ldflags="-s -w" -o /out/chromium-launcher ./cmd/chromium-launcher +# Build container entrypoint wrapper (replaces wrapper.sh) +RUN --mount=type=cache,target=/root/.cache/go-build,id=$CACHEIDPREFIX-go-build \ + --mount=type=cache,target=/go/pkg/mod,id=$CACHEIDPREFIX-go-pkg-mod \ + GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ + go build -ldflags="-s -w" -o /out/wrapper ./cmd/wrapper + FROM docker.io/ubuntu:22.04 AS ffmpeg-downloader # Allow cross-compilation when building with BuildKit platforms @@ -217,8 +223,8 @@ RUN useradd -m -s /bin/bash kernel COPY images/chromium-headless/image/start-xvfb.sh /images/chromium-headless/image/start-xvfb.sh RUN chmod +x /images/chromium-headless/image/start-xvfb.sh -# Wrapper script to set environment -COPY images/chromium-headless/image/wrapper.sh /usr/bin/wrapper.sh +# Container entrypoint wrapper (Go binary, replaces wrapper.sh) +COPY --from=server-builder /out/wrapper /wrapper # Supervisord configuration COPY images/chromium-headless/image/supervisord.conf /etc/supervisor/supervisord.conf @@ -234,6 +240,12 @@ COPY shared/envoy/bootstrap.yaml /etc/envoy/templates/bootstrap.yaml COPY shared/envoy/init-envoy.sh /usr/local/bin/init-envoy.sh RUN chmod +x /usr/local/bin/init-envoy.sh +# Bake the envoy forward-proxy CA cert into the image (system trust store + +# NSS DB for both root and kernel users). See bake-certs.sh for the security +# rationale on sharing the cert across containers built from this image. +COPY shared/envoy/bake-certs.sh /usr/local/bin/bake-certs.sh +RUN chmod +x /usr/local/bin/bake-certs.sh && /usr/local/bin/bake-certs.sh && rm /usr/local/bin/bake-certs.sh + # Copy the kernel-images API binary built in the builder stage COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api COPY --from=server-builder /out/chromium-launcher /usr/local/bin/chromium-launcher @@ -251,4 +263,4 @@ RUN esbuild /tmp/playwright-daemon.ts \ --external:esbuild \ && rm /tmp/playwright-daemon.ts -ENTRYPOINT [ "/usr/bin/wrapper.sh" ] +ENTRYPOINT [ "/wrapper" ] diff --git a/images/chromium-headless/image/Kraftfile b/images/chromium-headless/image/Kraftfile index b11a88c2..9bb12637 100644 --- a/images/chromium-headless/image/Kraftfile +++ b/images/chromium-headless/image/Kraftfile @@ -9,4 +9,4 @@ labels: rootfs: ./initrd -cmd: ["/usr/bin/wrapper.sh"] +cmd: ["/wrapper"] diff --git a/images/chromium-headless/image/supervisor/services/chromedriver.conf b/images/chromium-headless/image/supervisor/services/chromedriver.conf index 9bca5365..7d8d3812 100644 --- a/images/chromium-headless/image/supervisor/services/chromedriver.conf +++ b/images/chromium-headless/image/supervisor/services/chromedriver.conf @@ -2,6 +2,6 @@ command=/usr/local/bin/chromedriver --port=9225 --allowed-ips=127.0.0.1 --log-level=INFO autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/chromedriver redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/dbus.conf b/images/chromium-headless/image/supervisor/services/dbus.conf index 7edc479c..fcab6898 100644 --- a/images/chromium-headless/image/supervisor/services/dbus.conf +++ b/images/chromium-headless/image/supervisor/services/dbus.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p /run/dbus && dbus-uuidgen --ensure && dbus-daemon --system --address=unix:path=/run/dbus/system_bus_socket --nopidfile --nosyslog --nofork' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/dbus redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/kernel-images-api.conf b/images/chromium-headless/image/supervisor/services/kernel-images-api.conf index e57d30a8..0638dea8 100644 --- a/images/chromium-headless/image/supervisor/services/kernel-images-api.conf +++ b/images/chromium-headless/image/supervisor/services/kernel-images-api.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc 'mkdir -p "${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" && PORT="${KERNEL_IMAGES_API_PORT:-10001}" FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" LOG_CDP_MESSAGES="${LOG_CDP_MESSAGES:-false}" exec /usr/local/bin/kernel-images-api' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/kernel-images-api redirect_stderr=true diff --git a/images/chromium-headless/image/supervisor/services/xvfb.conf b/images/chromium-headless/image/supervisor/services/xvfb.conf index 5279bda4..28974551 100644 --- a/images/chromium-headless/image/supervisor/services/xvfb.conf +++ b/images/chromium-headless/image/supervisor/services/xvfb.conf @@ -2,6 +2,6 @@ command=/bin/bash -lc '/images/chromium-headless/image/start-xvfb.sh' autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/xvfb redirect_stderr=true diff --git a/images/chromium-headless/image/wrapper.sh b/images/chromium-headless/image/wrapper.sh deleted file mode 100755 index 6a1935b9..00000000 --- a/images/chromium-headless/image/wrapper.sh +++ /dev/null @@ -1,294 +0,0 @@ -#!/bin/bash - -set -o pipefail -o errexit -o nounset - -# If we are outside Docker-in-Docker make sure /dev/shm exists -if [ -z "${WITHDOCKER:-}" ]; then - mkdir -p /dev/shm - chmod 777 /dev/shm - mount -t tmpfs tmpfs /dev/shm -fi - -# We disable scale-to-zero for the lifetime of this script and restore -# the original setting on exit. -SCALE_TO_ZERO_FILE="/uk/libukp/scale_to_zero_disable" -scale_to_zero_write() { - local char="$1" - # Skip when not running inside Unikraft Cloud (control file absent) - if [[ -e "$SCALE_TO_ZERO_FILE" ]]; then - # Write the character, but do not fail the whole script if this errors out - echo -n "$char" > "$SCALE_TO_ZERO_FILE" 2>/dev/null || \ - echo "[wrapper] Failed to write to scale-to-zero control file" >&2 - fi -} -disable_scale_to_zero() { scale_to_zero_write "+"; } -enable_scale_to_zero() { scale_to_zero_write "-"; } - -wait_for_tcp_port() { - local host="$1" - local port="$2" - local name="$3" - local attempts="${4:-0}" - local sleep_secs="${5:-0.5}" - local timeout_label="${6:-}" - local attempt=0 - - echo "[wrapper] Waiting for ${name} on ${host}:${port}..." - while true; do - if (echo >/dev/tcp/"${host}"/"${port}") >/dev/null 2>&1; then - echo "[wrapper] ${name} is ready on ${host}:${port}" - return 0 - fi - - if (( attempts > 0 )); then - attempt=$((attempt + 1)) - if (( attempt >= attempts )); then - if [[ -n "${timeout_label}" ]]; then - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${timeout_label}" >&2 - else - echo "[wrapper] WARNING: ${name} not ready on ${host}:${port} after ${attempts} attempts" >&2 - fi - return 1 - fi - fi - - sleep "${sleep_secs}" - done -} - -# Disable scale-to-zero for the duration of the script when not running under Docker -if [[ -z "${WITHDOCKER:-}" ]]; then - echo "[wrapper] Disabling scale-to-zero" - disable_scale_to_zero -fi - -# ----------------------------------------------------------------------------- -# Ensure a sensible hostname --------------------------------------------------- -# ----------------------------------------------------------------------------- -if h=$(cat /proc/sys/kernel/hostname 2>/dev/null); then - if [ -z "$h" ] || [ "$h" = "(none)" ]; then - if command -v hostname >/dev/null 2>&1; then - hostname kernel-vm 2>/dev/null || true - fi - echo -n "kernel-vm" > /proc/sys/kernel/hostname 2>/dev/null || true - fi -fi -export HOSTNAME="${HOSTNAME:-kernel-vm}" - -# ----------------------------------------------------------------------------- -# Disable IPv6 ----------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# The VM environment has no IPv6 route, so any IPv6 connection attempt will fail -# immediately with ENETUNREACH. Chromium's built-in DNS client may attempt -# DNS-over-HTTPS to IPv6 endpoints (e.g. [2001:4860:4860::8888]:443), and each -# failed attempt wastes a connection slot from the MaxConnectionsPerProxy pool. -# Disabling IPv6 at the kernel level prevents these wasted attempts. -echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 2>/dev/null || true -echo 1 > /proc/sys/net/ipv6/conf/default/disable_ipv6 2>/dev/null || true - -# if CHROMIUM_FLAGS is not set, default to the flags used in playwright_stealth -# NOTE: --disable-background-networking was intentionally removed because it prevents -# Chrome from fetching extensions via ExtensionInstallForcelist enterprise policy. -# Enterprise extensions require Chrome to make HTTP requests to fetch update.xml and .crx files. -if [ -z "${CHROMIUM_FLAGS:-}" ]; then - CHROMIUM_FLAGS="--accept-lang=en-US,en \ - --allow-pre-commit-input \ - --blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4 \ - --crash-dumps-dir=/tmp/chromium-dumps \ - --disable-back-forward-cache \ - --disable-background-timer-throttling \ - --disable-backgrounding-occluded-windows \ - --disable-blink-features=AutomationControlled \ - --disable-breakpad \ - --disable-client-side-phishing-detection \ - --disable-component-extensions-with-background-pages \ - --disable-crash-reporter \ - --disable-crashpad \ - --disable-dev-shm-usage \ - --disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DeferRendererTasksAfterInput,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate \ - --disable-field-trial-config \ - --disable-gcm-registration \ - --disable-gpu \ - --disable-gpu-compositing \ - --disable-hang-monitor \ - --disable-ipc-flooding-protection \ - --disable-notifications \ - --disable-popup-blocking \ - --disable-prompt-on-repost \ - --disable-renderer-backgrounding \ - --disable-search-engine-choice-screen \ - --disable-software-rasterizer \ - --enable-use-zoom-for-dsf=false \ - --export-tagged-pdf \ - --force-color-profile=srgb \ - --hide-crash-restore-bubble \ - --hide-scrollbars \ - --metrics-recording-only \ - --mute-audio \ - --no-default-browser-check \ - --no-first-run \ - --no-sandbox \ - --no-service-autorun \ - --ozone-platform=headless \ - --password-store=basic \ - --unsafely-disable-devtools-self-xss-warnings \ - --use-angle=swiftshader \ - --use-gl=angle \ - --use-mock-keychain" -fi -export CHROMIUM_FLAGS - -# ----------------------------------------------------------------------------- -# House-keeping for the unprivileged "kernel" user ---------------------------- -# When RUN_AS_ROOT is true, we skip ownership changes since we're running as root. -# ----------------------------------------------------------------------------- -if [[ "${RUN_AS_ROOT:-}" != "true" ]]; then - dirs=( - /home/kernel/user-data - /home/kernel/.config/chromium - /home/kernel/.pki/nssdb - /home/kernel/.cache/dconf - /tmp - /var/log - /var/log/supervisord - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done - - # Ensure correct ownership (ignore errors if already correct) - chown -R kernel:kernel /home/kernel /home/kernel/user-data /home/kernel/.config /home/kernel/.pki /home/kernel/.cache 2>/dev/null || true - # Make policy directory writable for runtime updates - chown -R kernel:kernel /etc/chromium/policies 2>/dev/null || true -else - # When running as root, just create the necessary directories without ownership changes - dirs=( - /tmp - /var/log - /var/log/supervisord - /home/kernel - /home/kernel/user-data - ) - - for dir in "${dirs[@]}"; do - if [ ! -d "$dir" ]; then - mkdir -p "$dir" - fi - done -fi - -# ----------------------------------------------------------------------------- -# Dynamic log aggregation for /var/log/supervisord ----------------------------- -# ----------------------------------------------------------------------------- -# Tails any existing and future files under /var/log/supervisord, -# prefixing each line with the relative filepath, e.g. [chromium] ... -start_dynamic_log_aggregator() { - echo "[wrapper] Starting dynamic log aggregator for /var/log/supervisord" - ( - declare -A tailed_files=() - start_tail() { - local f="$1" - [[ -f "$f" ]] || return 0 - [[ -n "${tailed_files[$f]:-}" ]] && return 0 - local label="${f#/var/log/supervisord/}" - # Tie tails to this subshell lifetime so they exit when we stop it - tail --pid="$$" -n +1 -F "$f" 2>/dev/null | sed -u "s/^/[${label}] /" & - tailed_files[$f]=1 - } - # Periodically scan for new *.log files without extra dependencies - while true; do - while IFS= read -r -d '' f; do - start_tail "$f" - done < <(find /var/log/supervisord -type f -print0 2>/dev/null || true) - sleep 1 - done - ) & - tail_pids+=("$!") -} - -# Track background tailing processes for cleanup -tail_pids=() - -# Start log aggregator early so we see supervisor and service logs as they appear -start_dynamic_log_aggregator - -# Export common env used by services -export DISPLAY=:1 -export HEIGHT=${HEIGHT:-1080} -export WIDTH=${WIDTH:-1920} -export INTERNAL_PORT="${INTERNAL_PORT:-9223}" -export CHROME_PORT="${CHROME_PORT:-9222}" - -# Cleanup handler -cleanup () { - echo "[wrapper] Cleaning up..." - # Re-enable scale-to-zero if the script terminates early - enable_scale_to_zero - supervisorctl -c /etc/supervisor/supervisord.conf stop chromedriver || true - supervisorctl -c /etc/supervisor/supervisord.conf stop chromium || true - supervisorctl -c /etc/supervisor/supervisord.conf stop xvfb || true - supervisorctl -c /etc/supervisor/supervisord.conf stop dbus || true - supervisorctl -c /etc/supervisor/supervisord.conf stop kernel-images-api || true - # Stop log tailers - if [[ -n "${tail_pids[*]:-}" ]]; then - for tp in "${tail_pids[@]}"; do - kill -TERM "$tp" 2>/dev/null || true - done - fi -} -trap cleanup TERM INT - -echo "[wrapper] Starting supervisord" -supervisord -c /etc/supervisor/supervisord.conf -echo "[wrapper] Waiting for supervisord socket..." -for i in {1..30}; do - if [ -S /var/run/supervisor.sock ]; then - break - fi - sleep 0.2 -done - -init-envoy.sh - -echo "[wrapper] Starting system D-Bus daemon via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start dbus -for i in {1..50}; do - if [ -S /run/dbus/system_bus_socket ]; then - break - fi - sleep 0.2 -done -export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket" - -echo "[wrapper] Starting Xvfb via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start xvfb -for i in {1..50}; do - if xdpyinfo -display "$DISPLAY" >/dev/null 2>&1; then - break - fi - sleep 0.2 -done - -echo "[wrapper] Starting Chromium via supervisord on internal port $INTERNAL_PORT" -supervisorctl -c /etc/supervisor/supervisord.conf start chromium -wait_for_tcp_port 127.0.0.1 "$INTERNAL_PORT" "Chromium remote debugging" 100 0.2 "20s" || true - -echo "[wrapper] ✨ Starting kernel-images API via supervisord." -supervisorctl -c /etc/supervisor/supervisord.conf start kernel-images-api -API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" -wait_for_tcp_port 127.0.0.1 "${API_PORT}" "kernel-images API" - -echo "[wrapper] Starting ChromeDriver via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start chromedriver -wait_for_tcp_port 127.0.0.1 9225 "ChromeDriver" 50 0.2 "10s" || true - -echo "[wrapper] startup complete!" -# Re-enable scale-to-zero once startup has completed (when not under Docker) -if [[ -z "${WITHDOCKER:-}" ]]; then - enable_scale_to_zero -fi -# Keep the container running while streaming logs -wait diff --git a/server/cmd/wrapper/main.go b/server/cmd/wrapper/main.go new file mode 100644 index 00000000..6976f827 --- /dev/null +++ b/server/cmd/wrapper/main.go @@ -0,0 +1,618 @@ +// wrapper boots the chromium-headful and chromium-headless containers: +// prepares the environment, starts supervisord, brings services up in parallel +// where the dependency graph allows, and waits for CDP to be reachable through +// kernel-images-api. +// +// Replaces the legacy /wrapper.sh shipped in both images. Behavior parity is +// intentional — we still rely on supervisord, sysctl, dbus, etc. The only goal +// beyond parity is minimizing time-to-CDP-ready by removing serial dead time. +// +// The headful vs headless profile is detected at boot from supervisor's conf.d +// (xorg.conf → headful, xvfb.conf → headless), which keeps a single binary +// usable in both images without Dockerfile coordination. +package main + +import ( + "bufio" + "context" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" +) + +const ( + supervisorConf = "/etc/supervisor/supervisord.conf" + supervisorConfD = "/etc/supervisor/conf.d/services" + supervisorSock = "/var/run/supervisor.sock" + supervisordLogD = "/var/log/supervisord" + scaleToZeroFile = "/uk/libukp/scale_to_zero_disable" + dbusSocket = "/run/dbus/system_bus_socket" + defaultDisplay = ":1" + defaultIntPort = "9223" + defaultAPIPort = "10001" +) + +type profile int + +const ( + profileHeadful profile = iota + profileHeadless +) + +// detectProfile keys off whichever X server's supervisor conf is present. +// The image build is what writes these files, so this is deterministic. +func detectProfile() profile { + if _, err := os.Stat(filepath.Join(supervisorConfD, "xvfb.conf")); err == nil { + return profileHeadless + } + return profileHeadful +} + +func main() { + t0 := time.Now() + prof := detectProfile() + logf("starting wrapper (profile=%s)", profileName(prof)) + + // /dev/shm: only mount when not running under Docker (Docker manages it). + if os.Getenv("WITHDOCKER") == "" { + _ = os.MkdirAll("/dev/shm", 0o1777) + _ = os.Chmod("/dev/shm", 0o1777) + _ = exec.Command("mount", "-t", "tmpfs", "tmpfs", "/dev/shm").Run() + } + + // Disable scale-to-zero for the duration of startup; restored on exit. + disableScaleToZero() + defer enableScaleToZero() + + // Headless ships a default CHROMIUM_FLAGS list (headless+stealth flags) + // when callers don't set one. Headful's defaults are caller-supplied. + if prof == profileHeadless { + applyHeadlessDefaultFlags() + } + + // Hostname: some envs boot with empty/(none); pick a friendly default. + if h, err := os.ReadFile("/proc/sys/kernel/hostname"); err == nil { + if v := strings.TrimSpace(string(h)); v == "" || v == "(none)" { + _ = exec.Command("hostname", "kernel-vm").Run() + _ = os.WriteFile("/proc/sys/kernel/hostname", []byte("kernel-vm"), 0o644) + } + } + if os.Getenv("HOSTNAME") == "" { + _ = os.Setenv("HOSTNAME", "kernel-vm") + } + + // Disable IPv6 — Chromium DOH wastes connection slots on unreachable v6 endpoints. + _ = os.WriteFile("/proc/sys/net/ipv6/conf/all/disable_ipv6", []byte("1"), 0o644) + _ = os.WriteFile("/proc/sys/net/ipv6/conf/default/disable_ipv6", []byte("1"), 0o644) + + // Pre-create per-user dirs so chromium subsystems don't error. + prepareUserDirs(os.Getenv("RUN_AS_ROOT") == "true") + + // Tail aggregator for service logs. + startLogAggregator() + + // Default env that downstream services expect. + _ = os.Setenv("DISPLAY", defaultDisplay) + if os.Getenv("INTERNAL_PORT") == "" { + _ = os.Setenv("INTERNAL_PORT", defaultIntPort) + } + if os.Getenv("CHROME_PORT") == "" { + _ = os.Setenv("CHROME_PORT", "9222") + } + // Point dbus clients at the system bus socket. Set before supervisord + // starts so it captures the env for child services (notably chromium, + // which would otherwise spam autolaunch errors). + _ = os.Setenv("DBUS_SESSION_BUS_ADDRESS", "unix:path="+dbusSocket) + + // Stale X locks from prior runs. + _ = os.Remove("/tmp/.X1-lock") + _ = os.Remove("/tmp/.X11-unix/X1") + + // supervisord — start in nodaemon mode so we own its lifecycle. + // Without -n it forks and the parent exits with code 0, which would + // drop us out of supCmd.Wait() and the container would stop. + logf("starting supervisord") + supCmd := exec.Command("supervisord", "-n", "-c", supervisorConf) + supCmd.Stdout = os.Stdout + supCmd.Stderr = os.Stderr + if err := supCmd.Start(); err != nil { + fatalf("supervisord start: %v", err) + } + waitForSocket(supervisorSock, 10*time.Second) + + // Phase A: identity-free services with no X/dbus dependency. chromedriver + // listens on 9225 immediately and only attaches to chromium on session + // creation, so it can come up alongside the display stack. The envoy + // forward-proxy CA cert is baked into the image at build time (see + // shared/envoy/bake-certs.sh), so chromium trusts it on first start with + // no runtime cert work to wait on. + xServer := "xorg" + if prof == profileHeadless { + xServer = "xvfb" + } + startAll(xServer, "dbus", "chromedriver") + waitForX(defaultDisplay, 20*time.Second) + waitForSocket(dbusSocket, 10*time.Second) + + // Pre-touch chromium's supervisord log so kernel-images-api's `tail -f` + // doesn't bail out and enter its 250ms retry backoff when started in + // parallel with chromium. + _ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644) + + // Phase B: identity-free X/dbus consumers. Chromium itself doesn't read + // any per-instance identity envs — it just needs the envoy cert (baked + // into the image) in trust. mutter is the compositor on headful; neko is + // the WebRTC streamer when ENABLE_WEBRTC=true. + webrtc := prof == profileHeadful && os.Getenv("ENABLE_WEBRTC") == "true" + var phaseB []string + if prof == profileHeadful { + phaseB = []string{"mutter", "chromium"} + if webrtc { + phaseB = append(phaseB, "neko") + } + } else { + phaseB = []string{"chromium"} + } + startAll(phaseB...) + + // FORK HOOK: + // When this binary runs as a forked snapshot restore, the per-fork + // identity envs (INST_NAME, METRO_NAME, XDS_SERVER, KERNEL_INSTANCE_JWT, + // plus any future per-tenant secrets) won't be set yet at this point — + // the snapshot was taken from a different instance. Insert the + // following sequence here once the env-delivery channel exists: + // 1. Block on the host-pushed env bundle (vsock socket, virtio-fs + // drop file, or whatever transport the control plane settles on). + // 2. Apply the bundle to this process's environ via os.Setenv so + // Phase C below picks them up via the existing $VAR expansion in + // init-envoy.sh and the supervisorctl-spawned services inherit + // them. + // 3. Phase C uses `supervisorctl restart envoy` (idempotent — start + // on first boot, stop+start on a re-render after fork) so a + // restored snapshot drops its stale identity cleanly. + // Boot path keeps running through unchanged: the wait simply no-ops + // when there's no fork bundle to receive. + + // Phase C: identity-bound. Render envoy bootstrap with INST_NAME/JWT/etc + // and (re)start envoy + kernel-images-api. Both services use `restart` + // so the same code path works for boot (start a stopped service) and + // post-fork (stop+start to force a re-read of refreshed envs). + if isExecutable("/usr/local/bin/init-envoy.sh") { + runStream("envoy-init", "/usr/local/bin/init-envoy.sh") + } + restartAll("kernel-images-api") + + // Wait for the union of caller-visible ready signals. Each probe runs + // concurrently and logs as soon as its target is reachable. + waitAllReady(t0, webrtc) + logf("ready in %s", since(t0)) + + // Cosmetic + non-critical services come up off the hot path. Headless has + // no audio stack and no UI to dismiss. + if prof == profileHeadful { + go func() { + startAll("pulseaudio") + if os.Getenv("RUN_AS_ROOT") == "true" { + dismissNoSandboxWarning() + } + }() + } + + // Re-enable scale-to-zero now that the hot path is up. + enableScaleToZero() + + // Forward signals so cleanup runs and supervisord is taken down cleanly. + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT) + go func() { + <-sigs + logf("shutdown: stopping services") + _ = exec.Command("supervisorctl", "-c", supervisorConf, "stop", "all").Run() + _ = supCmd.Process.Signal(syscall.SIGTERM) + }() + + // Block on supervisord; container exits when it does. + if err := supCmd.Wait(); err != nil { + logf("supervisord exited: %v", err) + } +} + +// startAll asks supervisord to start the given programs. We invoke +// supervisorctl once (it accepts multiple args) so we don't pay python +// cold-start costs per service. +func startAll(progs ...string) { + supervisorctl("start", progs...) +} + +// restartAll is the start-or-stop+start variant. It's used for services +// that may already be running from a snapshot restore (post-fork, see the +// FORK HOOK in main) so they pick up refreshed envs cleanly. supervisorctl +// `restart` is a no-op stop on cold programs followed by a normal start. +func restartAll(progs ...string) { + supervisorctl("restart", progs...) +} + +func supervisorctl(verb string, progs ...string) { + if len(progs) == 0 { + return + } + args := append([]string{"-c", supervisorConf, verb}, progs...) + cmd := exec.Command("supervisorctl", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + _ = cmd.Run() // a service that fails to come up will surface via readiness checks +} + +func waitForSocket(path string, timeout time.Duration) { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if fi, err := os.Stat(path); err == nil && fi.Mode()&os.ModeSocket != 0 { + return + } + time.Sleep(20 * time.Millisecond) + } + logf("WARNING: socket %s not ready after %s", path, timeout) +} + +// waitAllReady gates on all caller-visible ready signals concurrently: +// - CDP : HTTP /json/version on the public CDP port (proves api proxy is wired +// through to chromium's DevTools server) +// - cd : TCP on chromedriver's internal port 9225 (api on 9224 is bound when +// api itself is up, which CDP readiness already implies) +// - proxy : TCP on chromium's --forward-proxy-port (8888) +// - neko : TCP on neko's HTTP port (8080), only when ENABLE_WEBRTC=true +// - envoy : TCP on envoy's listener (3128), only when envoy is enabled +func waitAllReady(t0 time.Time, webrtc bool) { + chromePort := os.Getenv("CHROME_PORT") + if chromePort == "" { + chromePort = "9222" + } + probes := []struct { + name string + fn func() bool + }{ + {"cdp", func() bool { return httpProbeOK("http://127.0.0.1:" + chromePort + "/json/version") }}, + {"chromedriver", func() bool { return tcpOK("127.0.0.1", "9225") }}, + {"forward-proxy", func() bool { return tcpOK("127.0.0.1", "8888") }}, + } + if webrtc { + probes = append(probes, struct { + name string + fn func() bool + }{"neko", func() bool { return tcpOK("127.0.0.1", "8080") }}) + } + if envoyEnabled() { + probes = append(probes, struct { + name string + fn func() bool + }{"envoy", func() bool { return tcpOK("127.0.0.1", "3128") }}) + } + + done := make(chan string, len(probes)) + for _, p := range probes { + go func(name string, fn func() bool) { + deadline := time.Now().Add(60 * time.Second) + for time.Now().Before(deadline) { + if fn() { + logf("[ready] %s in %s", name, since(t0)) + done <- name + return + } + time.Sleep(20 * time.Millisecond) + } + logf("[ready] WARNING: %s never became ready", name) + done <- name + }(p.name, p.fn) + } + for range probes { + <-done + } +} + +func tcpOK(host, port string) bool { + c, err := net.DialTimeout("tcp4", net.JoinHostPort(host, port), 200*time.Millisecond) + if err != nil { + return false + } + _ = c.Close() + return true +} + +var probeClient = &http.Client{Timeout: time.Second} + +func httpProbeOK(url string) bool { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return false + } + resp, err := probeClient.Do(req) + if err != nil { + return false + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return false + } + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return strings.Contains(string(body), `"Browser"`) +} + +// waitForX waits until the X server is reachable on display :N. We try both +// the named unix socket (Xorg, headful) and the abstract namespace socket +// (Xvfb runs with -nolisten unix, which disables the named socket but leaves +// the abstract one). Cheaper than spawning xdpyinfo in a loop. +func waitForX(display string, timeout time.Duration) { + num := strings.TrimPrefix(display, ":") + named := "/tmp/.X11-unix/X" + num + abstract := "@/tmp/.X11-unix/X" + num // Linux abstract namespace + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if c, err := net.DialTimeout("unix", named, 200*time.Millisecond); err == nil { + _ = c.Close() + return + } + if c, err := net.DialTimeout("unix", abstract, 200*time.Millisecond); err == nil { + _ = c.Close() + return + } + time.Sleep(20 * time.Millisecond) + } + logf("WARNING: X display %s not responsive after %s", display, timeout) +} + +func disableScaleToZero() { writeScaleToZero("+") } +func enableScaleToZero() { writeScaleToZero("-") } + +func writeScaleToZero(c string) { + if _, err := os.Stat(scaleToZeroFile); err != nil { + return // not running on Unikraft Cloud + } + _ = os.WriteFile(scaleToZeroFile, []byte(c), 0o644) +} + +// applyHeadlessDefaultFlags mirrors the legacy headless wrapper.sh: when +// CHROMIUM_FLAGS is unset, fill in a curated headless+stealth flag list. +// --disable-background-networking is intentionally omitted: it prevents +// Chrome from fetching ExtensionInstallForcelist managed extensions. +func applyHeadlessDefaultFlags() { + if strings.TrimSpace(os.Getenv("CHROMIUM_FLAGS")) != "" { + return + } + flags := strings.Join([]string{ + "--accept-lang=en-US,en", + "--allow-pre-commit-input", + "--blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4", + "--crash-dumps-dir=/tmp/chromium-dumps", + "--disable-back-forward-cache", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-blink-features=AutomationControlled", + "--disable-breakpad", + "--disable-client-side-phishing-detection", + "--disable-component-extensions-with-background-pages", + "--disable-crash-reporter", + "--disable-crashpad", + "--disable-dev-shm-usage", + "--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DeferRendererTasksAfterInput,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate", + "--disable-field-trial-config", + "--disable-gcm-registration", + "--disable-gpu", + "--disable-gpu-compositing", + "--disable-hang-monitor", + "--disable-ipc-flooding-protection", + "--disable-notifications", + "--disable-popup-blocking", + "--disable-prompt-on-repost", + "--disable-renderer-backgrounding", + "--disable-search-engine-choice-screen", + "--disable-software-rasterizer", + "--enable-use-zoom-for-dsf=false", + "--export-tagged-pdf", + "--force-color-profile=srgb", + "--hide-crash-restore-bubble", + "--hide-scrollbars", + "--metrics-recording-only", + "--mute-audio", + "--no-default-browser-check", + "--no-first-run", + "--no-sandbox", + "--no-service-autorun", + "--ozone-platform=headless", + "--password-store=basic", + "--unsafely-disable-devtools-self-xss-warnings", + "--use-angle=swiftshader", + "--use-gl=angle", + "--use-mock-keychain", + }, " ") + _ = os.Setenv("CHROMIUM_FLAGS", flags) +} + +func profileName(p profile) string { + if p == profileHeadless { + return "headless" + } + return "headful" +} + +// envoyEnabled mirrors init-envoy.sh's gate: when any of these are unset +// the script exits early without starting envoy, so we should skip the +// readiness probe too (otherwise it would just time out at 60s). +func envoyEnabled() bool { + return os.Getenv("INST_NAME") != "" && + os.Getenv("METRO_NAME") != "" && + os.Getenv("XDS_SERVER") != "" && + os.Getenv("KERNEL_INSTANCE_JWT") != "" +} + +func prepareUserDirs(asRoot bool) { + if asRoot { + for _, d := range []string{"/tmp", "/var/log", supervisordLogD, "/home/kernel", "/home/kernel/user-data"} { + _ = os.MkdirAll(d, 0o755) + } + return + } + dirs := []string{ + "/home/kernel/user-data", + "/home/kernel/.config/chromium", + "/home/kernel/.pki/nssdb", + "/home/kernel/.cache/dconf", + "/tmp", + "/var/log", + supervisordLogD, + } + for _, d := range dirs { + _ = os.MkdirAll(d, 0o755) + } + _ = exec.Command("chown", "-R", "kernel:kernel", + "/home/kernel", "/home/kernel/user-data", "/home/kernel/.config", + "/home/kernel/.pki", "/home/kernel/.cache").Run() + _ = exec.Command("chown", "-R", "kernel:kernel", "/etc/chromium/policies").Run() +} + +// startLogAggregator tails any file under /var/log/supervisord, prefixing +// each line with the relative path so the container log stream remains +// readable. +func startLogAggregator() { + _ = os.MkdirAll(supervisordLogD, 0o755) + go func() { + seen := map[string]bool{} + for { + entries, _ := os.ReadDir(supervisordLogD) + for _, e := range entries { + path := filepath.Join(supervisordLogD, e.Name()) + if seen[path] { + continue + } + if fi, err := os.Stat(path); err == nil && fi.Mode().IsRegular() { + seen[path] = true + go tailFile(path) + } + } + time.Sleep(500 * time.Millisecond) + } + }() +} + +func tailFile(path string) { + cmd := exec.Command("tail", "-n", "+1", "-F", path) + stdout, err := cmd.StdoutPipe() + if err != nil { + return + } + cmd.Stderr = nil + if err := cmd.Start(); err != nil { + return + } + label := filepath.Base(path) + scanner := bufio.NewScanner(stdout) + scanner.Buffer(make([]byte, 64*1024), 1024*1024) + for scanner.Scan() { + fmt.Printf("[%s] %s\n", label, scanner.Text()) + } +} + +// dismissNoSandboxWarning replicates the wrapper.sh behaviour of clicking the +// "X" on the --no-sandbox infobar. Cosmetic; runs off the hot path. +func dismissNoSandboxWarning() { + out, err := exec.Command("xdotool", "getdisplaygeometry").Output() + if err != nil { + return + } + parts := strings.Fields(strings.TrimSpace(string(out))) + if len(parts) != 2 { + return + } + width := parts[0] + x := width + if w := atoi(width); w > 30 { + x = fmt.Sprintf("%d", w-30) + } + target := "New Tab - Chromium" + deadline := time.Now().Add(30 * time.Second) + for time.Now().Before(deadline) { + out, err := exec.Command("xdotool", "search", "--name", target).Output() + if err == nil && len(strings.TrimSpace(string(out))) > 0 { + id := strings.Fields(string(out))[0] + if exec.Command("xdotool", "windowactivate", "--sync", id).Run() == nil { + break + } + } + time.Sleep(100 * time.Millisecond) + } + // Without a settle delay the click can land before the --no-sandbox infobar + // has finished painting, leaving the warning on screen. The legacy + // wrapper.sh slept 5s here for the same reason. Runs off the hot path + // (goroutine fired post-readiness) so this doesn't extend time-to-CDP. + time.Sleep(5 * time.Second) + port := os.Getenv("KERNEL_IMAGES_API_PORT") + if port == "" { + port = defaultAPIPort + } + body := fmt.Sprintf(`{"x":%s,"y":115}`, x) + _ = exec.Command("curl", "-s", "-o", "/dev/null", "-X", "POST", + "http://localhost:"+port+"/computer/click_mouse", + "-H", "Content-Type: application/json", + "-d", body).Run() +} + +func atoi(s string) int { + n := 0 + for _, c := range s { + if c < '0' || c > '9' { + return 0 + } + n = n*10 + int(c-'0') + } + return n +} + +func isExecutable(path string) bool { + fi, err := os.Stat(path) + return err == nil && fi.Mode().IsRegular() && fi.Mode().Perm()&0o111 != 0 +} + +func runStream(label, name string, args ...string) { + cmd := exec.Command(name, args...) + cmd.Stdout = prefixWriter{label: label, w: os.Stdout} + cmd.Stderr = prefixWriter{label: label, w: os.Stderr} + _ = cmd.Run() +} + +type prefixWriter struct { + label string + w *os.File +} + +func (p prefixWriter) Write(b []byte) (int, error) { + for _, line := range strings.Split(strings.TrimRight(string(b), "\n"), "\n") { + if line == "" { + continue + } + fmt.Fprintf(p.w, "[%s] %s\n", p.label, line) + } + return len(b), nil +} + +// timestamped wrapper log; prefix mirrors the bash script's [wrapper] tag. +func logf(format string, args ...any) { + fmt.Fprintf(os.Stdout, "[wrapper] "+format+"\n", args...) +} + +func since(t time.Time) time.Duration { + return time.Since(t).Truncate(time.Millisecond) +} + +func fatalf(format string, args ...any) { + logf(format, args...) + os.Exit(1) +} diff --git a/shared/envoy/bake-certs.sh b/shared/envoy/bake-certs.sh new file mode 100644 index 00000000..6a987cd9 --- /dev/null +++ b/shared/envoy/bake-certs.sh @@ -0,0 +1,65 @@ +#!/bin/bash +set -eux + +# Generate the self-signed cert envoy presents on the localhost forward proxy, +# install it into the system CA trust store, and seed the NSS DBs for both the +# root and `kernel` users so chromium trusts it regardless of which user the +# wrapper runs as. Runs once at image build time so container startup pays +# zero cost — no openssl invocation, no certutil shell-outs. +# +# Safety of a shared (per-image-tag) cert across customer instances: +# - The cert's only Subject Alternative Names are `DNS:localhost` and +# `IP:127.0.0.1`. A TLS client only accepts it for connections to +# localhost, so the cert (and its private key) are useless for MITMing +# any traffic the cert holder doesn't already control. +# - The cert is trusted only by this image's system CA store and chromium +# NSS DB. It is not trusted by customer machines, the host, or anything +# outside this container. +# - The forward proxy listens on 127.0.0.1 inside a network-isolated +# container. One customer's container has no path to another customer's +# localhost. Even an attacker holding the private key would need code +# execution inside a sibling container to use it, at which point they +# have everything anyway. +# - The cert never leaves the container — no customer SDK, no browser +# extension, no host service ever sees it. +# Bottom line: this CA is an in-container trust anchor for a localhost-only +# TLS listener. Sharing the key across containers built from the same image +# does not widen the threat model. + +mkdir -p /etc/envoy/certs +openssl req -x509 -nodes -days 3650 -newkey rsa:2048 \ + -keyout /etc/envoy/certs/proxy.key \ + -out /etc/envoy/certs/proxy.crt \ + -subj "/C=US/ST=CA/O=Kernel/CN=localhost" \ + -addext "subjectAltName = DNS:localhost,IP:127.0.0.1" + +# System trust store — picked up by curl, openssl, Go's net/http, etc. +cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt +cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt + +# Seed both NSS DBs so chromium trusts the cert under either user. The +# wrapper's RUN_AS_ROOT branch chooses which DB chromium reads from at +# runtime; seeding both at build time means we don't need to know yet. +mkdir -p /root/.pki/nssdb /home/kernel/.pki/nssdb +certutil -d /root/.pki/nssdb -N --empty-password 2>/dev/null || true +certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true +certutil -d /root/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt +certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt + +# Install any pre-baked CA certs (BrightData certs are downloaded into +# /etc/envoy/brightdata by install-proxy.sh in private images). Same +# identity-free trust-store work as the self-signed cert above — moving it +# here means runtime sees an already-populated trust store. +if [ -d /etc/envoy/brightdata ]; then + for cert in /etc/envoy/brightdata/*.crt; do + [ -f "$cert" ] || continue + cert_name=$(basename "$cert" .crt) + cp "$cert" "/usr/local/share/ca-certificates/brightdata-${cert_name}.crt" + certutil -d /root/.pki/nssdb -A -t "C,," -n "BrightData $cert_name" -i "$cert" + certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "BrightData $cert_name" -i "$cert" + done +fi + +chown -R kernel:kernel /home/kernel/.pki + +update-ca-certificates diff --git a/shared/envoy/init-envoy.sh b/shared/envoy/init-envoy.sh index c27a3f25..c831a062 100644 --- a/shared/envoy/init-envoy.sh +++ b/shared/envoy/init-envoy.sh @@ -2,63 +2,27 @@ set -o pipefail -o errexit -o nounset -# The browser instance JWT is the sole token contract for xDS and host-local -# services in the image runtime. -INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}" +# Runtime config for envoy. Cert generation and CA trust install ran at image +# build time (see shared/envoy/bake-certs.sh) so this script only does the +# identity-bound work: render the bootstrap template with the per-instance +# envs and start envoy via supervisord. -# Check for required environment variables, to see if envoy is enabled +# Identity envs gate this script: without them xDS can't bind, so this is a +# no-op on images that don't run with a JWT. +INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}" if [[ -z "${INST_NAME:-}" || -z "${METRO_NAME:-}" || -z "${XDS_SERVER:-}" || -z "${INSTANCE_JWT:-}" ]]; then - echo "[envoy-init] Required environment variables not set. Skipping Envoy initialization." + echo "[envoy-init] Required environment variables not set. Skipping Envoy config/start." exit 0 fi -# Also check for template file if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then - echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy initialization." + echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy config/start." exit 0 fi echo "[envoy-init] Preparing Envoy bootstrap configuration" mkdir -p /etc/envoy -# Generate self-signed certificates for TLS forward proxy -echo "[envoy-init] Generating self-signed certificates for TLS forward proxy" -mkdir -p /etc/envoy/certs - -if [[ ! -f /etc/envoy/certs/proxy.crt || ! -f /etc/envoy/certs/proxy.key ]]; then - echo "[envoy-init] Creating new self-signed certificate" - openssl req -x509 -nodes -days 3650 -newkey rsa:2048 \ - -keyout /etc/envoy/certs/proxy.key \ - -out /etc/envoy/certs/proxy.crt \ - -subj "/C=US/ST=CA/O=Kernel/CN=localhost" \ - -addext "subjectAltName = DNS:localhost,IP:127.0.0.1" \ - 2>&1 | sed 's/^/[envoy-init] /' - echo "[envoy-init] Certificate generated successfully" - - # Add certificate to system trust store for Chrome/Chromium - echo "[envoy-init] Adding certificate to system trust store" - cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt - cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt - update-ca-certificates 2>&1 | sed 's/^/[envoy-init] /' - echo "[envoy-init] Certificate added to system trust store" -if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then - mkdir -p /root/.pki/nssdb - certutil -d /root/.pki/nssdb -N --empty-password 2>/dev/null || true - certutil -d /root/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt - echo "[envoy-init] Certificate added to nssdb as root" - else - mkdir -p /home/kernel/.pki/nssdb - certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true - certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt - chown -R kernel:kernel /home/kernel/.pki - echo "[envoy-init] Certificate added to nssdb as kernel" - fi - echo "[envoy-init] Certificate added to nssdb" -else - echo "[envoy-init] Certificates already exist, skipping generation" -fi - -# Render template with provided environment variables echo "[envoy-init] Rendering template with INST_NAME=${INST_NAME}, METRO_NAME=${METRO_NAME}, XDS_SERVER=${XDS_SERVER}, KERNEL_INSTANCE_JWT=***" inst_esc=$(printf '%s' "$INST_NAME" | sed -e 's/[\/&]/\\&/g') metro_esc=$(printf '%s' "$METRO_NAME" | sed -e 's/[\/&]/\\&/g') @@ -71,39 +35,11 @@ sed -e "s|{INST_NAME}|$inst_esc|g" \ /etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml echo "[envoy-init] Starting Envoy via supervisord" -supervisorctl -c /etc/supervisor/supervisord.conf start envoy - -# Wait for Envoy port to be open -echo "[envoy-init] Waiting for Envoy port to open..." -port_open=false -for i in {1..50}; do - if nc -z 127.0.0.1 "3128" 2>/dev/null; then - echo "[envoy-init] Envoy port confirmed open" - port_open=true - break - fi - sleep 0.2 -done - -if [[ "$port_open" != "true" ]]; then - echo "[envoy-init] ERROR: Envoy port 3128 failed to open after 10 seconds" - exit 1 -fi - -# Test proxy functionality -echo "[envoy-init] Testing proxy functionality..." -proxy_working=false -for i in {1..50}; do - if curl -s -f -x https://127.0.0.1:3128 --max-time 2 https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html >/dev/null 2>&1; then - echo "[envoy-init] Confirmed a request is proxied" - proxy_working=true - break - fi - echo "[envoy-init] Check failed, trying again..." - sleep 0.2 -done - -if [[ "$proxy_working" != "true" ]]; then - echo "[envoy-init] ERROR: Envoy proxy test failed after 10 seconds" - exit 1 -fi +# `restart` is start-or-stop+start: on first boot this just starts envoy, +# on a re-render (e.g. post-fork env refresh) it forces a clean re-read +# of the rendered bootstrap. Either way no callers see stale identity. +supervisorctl -c /etc/supervisor/supervisord.conf restart envoy + +# Readiness (port 3128 reachable) is probed by the Go wrapper's +# waitAllReady alongside CDP/chromedriver, so this script returns as soon +# as the start request has been issued. diff --git a/shared/envoy/supervisor-envoy.conf b/shared/envoy/supervisor-envoy.conf index 4da59010..ae18726f 100644 --- a/shared/envoy/supervisor-envoy.conf +++ b/shared/envoy/supervisor-envoy.conf @@ -2,6 +2,6 @@ command=envoy -c /etc/envoy/bootstrap.yaml --log-level warn --drain-time-s 1 --drain-strategy immediate autostart=false autorestart=true -startsecs=2 +startsecs=0 stdout_logfile=/var/log/supervisord/envoy redirect_stderr=true