From dd9e4733f309643372c1d7193308f82a76ad86c4 Mon Sep 17 00:00:00 2001 From: "James N." Date: Tue, 9 Jun 2026 16:11:04 -0700 Subject: [PATCH] ci(infra): apply per-environment tfvars via -var-file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Terraform deploy step passed only individual -var flags and never referenced prod.tfvars / dev.tfvars, so enable_networking and enable_private_endpoint silently fell back to their defaults (false). As a result the pipeline deployed a PUBLIC topology even though both tfvars declare the intended PRIVATE topology (VNet-integrated Container Apps env + Cosmos private endpoint, public access disabled). This left production in a broken half-state: a manual 'apply -var-file=prod.tfvars' had disabled Cosmos public access, but the Container Apps environment was created without VNet integration (which is immutable post-creation), so the backend egresses over the public internet and Cosmos's firewall rejects it — every /chat returns 500. Fix: select the per-environment var file (production → prod.tfvars, else dev.tfvars) and pass it as -var-file, listed before the explicit -var flags so those (env, subscription, location, ACR, images, iteration) still override the file. Networking/model config now comes from the tfvars as intended. NOTE: applying this to an environment whose Container Apps env was created without a VNet will FORCE-REPLACE that environment (VNet integration is immutable), recreating the contained Container Apps. Plan/redeploy during a maintenance window. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/infrastructure.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/infrastructure.yml b/.github/workflows/infrastructure.yml index 00a4f579b..63bcb2c63 100644 --- a/.github/workflows/infrastructure.yml +++ b/.github/workflows/infrastructure.yml @@ -82,8 +82,22 @@ jobs: export ARM_TENANT_ID="${{ vars.AZURE_TENANT_ID }}" export ARM_SUBSCRIPTION_ID="${{ vars.AZURE_SUBSCRIPTION_ID }}" - # Common -var flags used by plan and import + # Select the per-environment Terraform variable file. This is the + # single source of truth for networking, model deployments, and + # other env config (e.g. enable_networking / enable_private_endpoint). + # production → prod.tfvars ; everything else → dev.tfvars + case "${{ inputs.environment }}" in + production|prod) VAR_FILE="prod.tfvars" ;; + *) VAR_FILE="dev.tfvars" ;; + esac + echo "Using -var-file=${VAR_FILE} for environment '${{ inputs.environment }}'" + + # -var-file is listed FIRST so the explicit -var flags below (env, + # subscription, location, ACR, images, iteration) override the + # matching keys from the tfvars file. Terraform applies -var/-var-file + # in command-line order, with later values winning. TF_VARS=( + -var-file=${VAR_FILE} -var project_name=${{ github.event.repository.name }} -var environment=${{ inputs.environment }} -var tenant_id=${{ vars.AZURE_TENANT_ID }}