microsoft · james-tn · Jun 9, 2026 · Jun 8, 2026
diff --git a/.github/workflows/agent-evaluation.yml b/.github/workflows/agent-evaluation.yml
@@ -54,16 +54,17 @@ jobs:
   agent-evaluation:
     name: Agent Quality Evaluation
     runs-on: ubuntu-latest
+    timeout-minutes: 30
     environment: ${{ inputs.environment || 'integration' }}
     permissions:
       contents: read
       id-token: write   # Needed for OIDC → DefaultAzureCredential
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.12'
 

diff --git a/.github/workflows/destroy.yml b/.github/workflows/destroy.yml
@@ -26,10 +26,17 @@ on:
         default: tf
         required: true
 
+# Never run two destroys against the same environment at once, and don't
+# cancel a destroy in progress (interrupting Terraform mid-destroy is unsafe).
+concurrency:
+  group: destroy-${{ inputs.environment }}
+  cancel-in-progress: false
+
 jobs:
   terraform_destroy:
     name: Terraform Destroy
     runs-on: ubuntu-latest
+    timeout-minutes: 30
     environment: ${{ inputs.environment || 'integration' }}
     permissions:
       id-token: write

diff --git a/.github/workflows/docker-application.yml b/.github/workflows/docker-application.yml
@@ -27,13 +27,14 @@ jobs:
   build:
     name: Build & Push Backend Image
     runs-on: ubuntu-latest
+    timeout-minutes: 30
     environment: ${{ inputs.environment || 'integration' }}
     permissions:
       id-token: write
       contents: read
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Azure OIDC Login
         uses: azure/login@v2

diff --git a/.github/workflows/docker-mcp.yml b/.github/workflows/docker-mcp.yml
@@ -27,13 +27,14 @@ jobs:
   build:
     name: Build & Push MCP Image
     runs-on: ubuntu-latest
+    timeout-minutes: 30
     environment: ${{ inputs.environment || 'integration' }}
     permissions:
       id-token: write
       contents: read
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Azure OIDC Login
         uses: azure/login@v2

diff --git a/.github/workflows/infrastructure.yml b/.github/workflows/infrastructure.yml
@@ -41,6 +41,7 @@ jobs:
   tf:
     name: Terraform Deployment
     runs-on: ubuntu-latest
+    timeout-minutes: 45
     environment: ${{ inputs.environment }}
     if: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.iac-tool || 'tf') == 'tf' }}
     permissions:
@@ -166,6 +167,7 @@ jobs:
 
   bicep:
       runs-on: ubuntu-latest
+      timeout-minutes: 45
       environment: ${{ inputs.environment }}
       if: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.iac-tool || 'tf') == 'bicep' }}
       permissions:

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -20,6 +20,14 @@ on:
         required: false
         default: true
         description: 'Whether MCP is internal-only (skip MCP tests)'
+      advisory:
+        type: boolean
+        required: false
+        default: false
+        description: >-
+          When true, a degraded/unreachable shared environment is reported but
+          does not fail the job (used for tests-only PRs to main, where the env
+          is pre-deployed and not built from the PR).
 
   workflow_dispatch:
     inputs:
@@ -35,45 +43,129 @@ on:
         description: 'MCP service endpoint URL (optional if internal)'
         required: false
 
+# Least-privilege: this workflow only reads the repo.
+permissions:
+  contents: read
+
 jobs:
   integration-tests:
     name: Run Integration Tests
     runs-on: ubuntu-latest
+    timeout-minutes: 20
     permissions:
       contents: read
-    # No environment needed - uses repo-level variables
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.12'
+          cache: 'pip'
+          cache-dependency-path: tests/requirements.txt
 
       - name: Install test dependencies
+        run: pip install -r tests/requirements.txt
+
+      # ------------------------------------------------------------------
+      # Liveness gate: poll the backend until it responds, instead of a
+      # blind `sleep 30`. Distinguishes "environment unreachable/degraded"
+      # from "tests failed", so a broken *shared* env doesn't masquerade as
+      # a code regression.
+      # ------------------------------------------------------------------
+      - name: Wait for backend to become reachable
+        id: liveness
         run: |
-          pip install -r tests/requirements.txt
+          BE="${{ inputs.backend_endpoint }}"
+          if [ -z "$BE" ]; then
+            echo "reachable=false" >> "$GITHUB_OUTPUT"
+            echo "::warning::No backend endpoint provided"
+            exit 0
+          fi
+
+          echo "Polling $BE for readiness (up to ~90s)..."
+          reachable=false
+          for i in $(seq 1 18); do
+            code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 "$BE/" || echo 000)
+            # Any HTTP response below 500 means the app is up and serving.
+            if [ "$code" != "000" ] && [ "$code" -lt 500 ]; then
+              echo "Backend reachable (HTTP $code) after $((i*5))s"
+              reachable=true
+              break
+            fi
+            echo "  attempt $i: HTTP $code — retrying in 5s"
+            sleep 5
+          done
+          echo "reachable=$reachable" >> "$GITHUB_OUTPUT"
 
-      - name: Wait for Container Apps to warm up
+      - name: Skip note (environment unreachable)
+        if: steps.liveness.outputs.reachable != 'true'
         run: |
-          echo "Waiting 30 seconds for Container Apps to be ready..."
-          sleep 30
+          {
+            echo "## Integration Tests — SKIPPED (environment unreachable)"
+            echo ""
+            echo "The target backend did not return a healthy response, so the"
+            echo "integration suite was not run. This usually means the shared"
+            echo "environment is degraded (e.g. backend down, model key invalid),"
+            echo "not that this change is broken."
+            echo ""
+            echo "- Backend: \`${{ inputs.backend_endpoint }}\`"
+            echo "- Environment: \`${{ inputs.environment }}\`"
+            echo "- Advisory mode: \`${{ inputs.advisory }}\`"
+          } >> "$GITHUB_STEP_SUMMARY"
+          if [ "${{ inputs.advisory }}" = "true" ]; then
+            echo "Advisory mode: not failing the pipeline for a degraded shared env."
+            exit 0
+          fi
+          echo "::error::Backend unreachable and not in advisory mode."
+          exit 1
 
       - name: Run integration tests
+        if: steps.liveness.outputs.reachable == 'true'
+        id: pytest
+        # In advisory mode (tests-only PRs to main) the shared env is not built
+        # from this PR, so we report results without blocking the PR.
+        continue-on-error: ${{ inputs.advisory }}
         run: |
           cd tests
-          pytest -v -m "integration" --tb=short
+          pytest -v -m "integration" --tb=short \
+            --junitxml="junit-integration.xml"
         env:
           BACKEND_API_ENDPOINT: ${{ inputs.backend_endpoint }}
           MCP_ENDPOINT: ${{ inputs.mcp_endpoint }}
           MCP_INTERNAL_ONLY: ${{ inputs.mcp_internal_only && 'true' || 'false' }}
 
-      - name: Test Summary
+      - name: Upload test results
+        if: always() && steps.liveness.outputs.reachable == 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: integration-test-results-${{ inputs.environment }}
+          path: tests/junit-integration.xml
+          if-no-files-found: ignore
+          retention-days: 14
+
+      - name: Test summary
         if: always()
         run: |
-          echo "## Integration Test Results" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "- Backend Endpoint: ${{ inputs.backend_endpoint }}" >> $GITHUB_STEP_SUMMARY
-          echo "- MCP Endpoint: ${{ inputs.mcp_endpoint || 'Internal (skipped)' }}" >> $GITHUB_STEP_SUMMARY
-          echo "- Environment: ${{ inputs.environment }}" >> $GITHUB_STEP_SUMMARY
+          {
+            echo "## Integration Test Results"
+            echo ""
+            echo "- Backend Endpoint: ${{ inputs.backend_endpoint }}"
+            echo "- MCP Endpoint: ${{ inputs.mcp_endpoint || 'Internal (skipped)' }}"
+            echo "- Environment: ${{ inputs.environment }}"
+            echo "- Reachable: ${{ steps.liveness.outputs.reachable }}"
+            echo "- Outcome: ${{ steps.pytest.outcome }}"
+            echo "- Advisory: ${{ inputs.advisory }}"
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      # In advisory mode we surface failures as a warning but keep the job green
+      # so a degraded shared env can't block unrelated PRs to main.
+      - name: Advisory failure note
+        if: >-
+          steps.liveness.outputs.reachable == 'true'
+          && steps.pytest.outcome == 'failure'
+          && inputs.advisory
+        run: |
+          echo "::warning::Integration tests failed against the shared environment, but this is advisory (tests-only PR to main). Not blocking."
+
diff --git a/.github/workflows/orchestrate.yml b/.github/workflows/orchestrate.yml
@@ -95,13 +95,24 @@ permissions:
   pull-requests: write
   id-token: write
 
+# Prevent overlapping runs for the same PR/branch. Never cancel an in-flight
+# run on `main` (cancelling a production deploy mid-apply is dangerous);
+# cancel superseded runs on dev branches / PRs to save minutes and avoid
+# concurrent Terraform state access on the same environment.
+concurrency:
+  group: cicd-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
 
 jobs:
   # ────────────────────────────────────────────────────────────────────
   # Step 0: Determine pipeline mode and target environment
   # ────────────────────────────────────────────────────────────────────
   pipeline-config:
     runs-on: ubuntu-latest
+    timeout-minutes: 5
+    permissions:
+      contents: read
     outputs:
       environment: ${{ steps.config.outputs.environment }}
       full_deploy: ${{ steps.config.outputs.full_deploy }}
@@ -175,6 +186,50 @@ jobs:
           echo "Create PR only: $CREATE_PR_ONLY"
           echo "──────────────────────────────────────"
 
+  # ────────────────────────────────────────────────────────────────────
+  # Fast unit / regression tests (no Azure)
+  #   Deterministic, mock-based tests that validate the agent-framework
+  #   API surface used by every agent. A cheap, fast signal that catches
+  #   breakage (e.g. a dependency upgrade) long before the expensive
+  #   build/deploy path. Runs on PRs and pushes; skipped only for the
+  #   lightweight *-dev "create PR" pass (the resulting PR re-runs them).
+  # ────────────────────────────────────────────────────────────────────
+  unit-tests:
+    name: Unit & Regression Tests (no Azure)
+    needs: pipeline-config
+    if: needs.pipeline-config.outputs.create_pr_only != 'true'
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+
+      - name: Sync application environment
+        working-directory: agentic_ai/applications
+        run: uv sync
+
+      - name: Run agent-framework regression tests
+        working-directory: agentic_ai/applications
+        run: >-
+          uv run --with pytest python -m pytest
+          ../../tests/test_agent_framework_1_2_1_regression.py
+          -v --junitxml=../../junit-unit.xml
+
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: unit-test-results
+          path: junit-unit.xml
+          if-no-files-found: ignore
+          retention-days: 14
+
   # ────────────────────────────────────────────────────────────────────
   # Step 0.5: Create PR to int-agentic (*-dev push only)
   #   When a developer pushes to their *-dev branch, we just create
@@ -185,6 +240,10 @@ jobs:
     needs: pipeline-config
     if: needs.pipeline-config.outputs.create_pr_only == 'true'
     runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      pull-requests: write
     steps:
       - name: Create or update PR to int-agentic
         env:
@@ -227,6 +286,10 @@ jobs:
     needs: pipeline-config
     if: needs.pipeline-config.outputs.full_deploy == 'true'
     runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      id-token: write
     environment: ${{ needs.pipeline-config.outputs.environment }}
     steps:
       - name: Azure OIDC Login
@@ -306,6 +369,10 @@ jobs:
       needs.pipeline-config.outputs.full_deploy == 'false'
       && needs.pipeline-config.outputs.create_pr_only == 'false'
     runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+      id-token: write
     environment: ${{ needs.pipeline-config.outputs.environment }}
     outputs:
       backend_endpoint: ${{ steps.lookup.outputs.backend_endpoint }}
@@ -363,6 +430,9 @@ jobs:
       backend_endpoint: ${{ needs.deploy-infrastructure.outputs.backend_endpoint || needs.resolve-endpoints.outputs.backend_endpoint }}
       mcp_endpoint: ${{ needs.deploy-infrastructure.outputs.mcp_endpoint || needs.resolve-endpoints.outputs.mcp_endpoint }}
       mcp_internal_only: true
+      # Tests-only mode (PR → main) runs against a pre-existing shared env that
+      # is NOT built from this PR, so a degraded env must not block the PR.
+      advisory: ${{ needs.pipeline-config.outputs.full_deploy == 'false' }}
     secrets: inherit
 
   # ────────────────────────────────────────────────────────────────────
@@ -396,6 +466,10 @@ jobs:
       && github.event_name == 'pull_request'
       && github.base_ref == 'int-agentic'
     runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: write
+      pull-requests: write
     steps:
       - name: Auto-merge PR into int-agentic
         env:

diff --git a/.github/workflows/promote-to-main.yml b/.github/workflows/promote-to-main.yml
@@ -36,9 +36,15 @@ permissions:
   contents: read
   pull-requests: write
 
+# Only the most recent promotion attempt matters; supersede older ones.
+concurrency:
+  group: promote-to-main
+  cancel-in-progress: true
+
 jobs:
   promote:
     runs-on: ubuntu-latest
+    timeout-minutes: 10
     steps:
       - name: Checkout
         uses: actions/checkout@v6

diff --git a/.github/workflows/update-containers.yml b/.github/workflows/update-containers.yml
@@ -35,6 +35,7 @@ jobs:
   update-containers:
     name: Update Container Apps
     runs-on: ubuntu-latest
+    timeout-minutes: 20
     environment: ${{ inputs.environment }}
     permissions:
       id-token: write