diff --git a/.github/workflows/chart.yml b/.github/workflows/chart.yml index 182ea3cfc..e5c780fc0 100644 --- a/.github/workflows/chart.yml +++ b/.github/workflows/chart.yml @@ -47,7 +47,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Login to GitHub Container Registry - uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 866369a99..b586b42c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ on: paths-ignore: [docs/**, "**.md", "**.mdx", "**.png", "**.jpg"] env: - GO_VERSION: '1.25.8' + GO_VERSION: '1.25.9' CERT_MANAGER_VERSION: 'v1.16.2' jobs: @@ -184,7 +184,7 @@ jobs: - name: Upload logs if: always() - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 with: name: e2e-logs-${{ matrix.customized-settings }} path: test/e2e/logs-${{ matrix.customized-settings }}/ diff --git a/.github/workflows/code-lint.yml b/.github/workflows/code-lint.yml index 36adee70f..e3ceb5da1 100644 --- a/.github/workflows/code-lint.yml +++ b/.github/workflows/code-lint.yml @@ -14,7 +14,7 @@ on: env: # Common versions - GO_VERSION: "1.25.8" + GO_VERSION: "1.25.9" jobs: detect-noop: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ab5b97a4e..ccf33b9b8 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -42,7 +42,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4 + uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@c10b8064de6f491fea524254123dbe5e09572f13 # v4 + uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -69,4 +69,4 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4 + uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4 diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index eab1c9697..8489d593d 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 + uses: step-security/harden-runner@6c3c2f2c1c457b00c10c4848d6f5491db3b629df # v2.18.0 with: egress-policy: audit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3e43ff244..6709caf80 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ env: HUB_AGENT_IMAGE_NAME: hub-agent MEMBER_AGENT_IMAGE_NAME: member-agent REFRESH_TOKEN_IMAGE_NAME: refresh-token - GO_VERSION: "1.25.8" + GO_VERSION: "1.25.9" jobs: export-registry: @@ -46,7 +46,7 @@ jobs: ref: ${{ needs.export-registry.outputs.tag }} - name: Login to ghcr.io - uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 with: registry: ghcr.io username: ${{ github.actor }} diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 7f9bd13de..e4836973a 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -18,7 +18,7 @@ env: MEMBER_AGENT_IMAGE_NAME: member-agent REFRESH_TOKEN_IMAGE_NAME: refresh-token - GO_VERSION: '1.25.8' + GO_VERSION: '1.25.9' jobs: export-registry: @@ -47,7 +47,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Login to ${{ env.REGISTRY }} - uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} diff --git a/.github/workflows/upgrade.yml b/.github/workflows/upgrade.yml index 8bcd6e0fd..34e4f0f3b 100644 --- a/.github/workflows/upgrade.yml +++ b/.github/workflows/upgrade.yml @@ -17,7 +17,7 @@ on: paths-ignore: [docs/**, "**.md", "**.mdx", "**.png", "**.jpg"] env: - GO_VERSION: '1.25.8' + GO_VERSION: '1.25.9' jobs: detect-noop: diff --git a/.golangci.yml b/.golangci.yml index f6d620d1d..afad58749 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,6 +1,6 @@ run: timeout: 15m - go: '1.25.8' + go: '1.25.9' linters-settings: stylecheck: diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 7afe7c346..4b0e8000f 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -1,10 +1,11 @@ # The KubeFleet Maintainers -| Maintainer | Organization | GitHub Username | -|----------------|--------------|----------------------------------------------------| -| Ryan Zhang | Microsoft | [@ryanzhang-oss](https://github.com/ryanzhang-oss) | -| Zhiying Lin | Microsoft | [@zhiying-lin](https://github.com/zhiying-lin) | -| Chen Yu | Microsoft | [@michaelawyu](https://github.com/michaelawyu) | -| Wei Weng | Microsoft | [@weng271190436](https://github.com/weng271190436) | -| Yetkin Timocin | Microsoft | [@ytimocin](https://github.com/ytimocin) | -| Simon Waight | Microsoft | [@sjwaight](https://github.com/sjwaight) | +| Maintainer | Organization | GitHub Username | +|------------------|--------------|----------------------------------------------------| +| Ryan Zhang | Microsoft | [@ryanzhang-oss](https://github.com/ryanzhang-oss) | +| Zhiying Lin | Microsoft | [@zhiying-lin](https://github.com/zhiying-lin) | +| Chen Yu | Microsoft | [@michaelawyu](https://github.com/michaelawyu) | +| Wei Weng | Microsoft | [@weng271190436](https://github.com/weng271190436) | +| Yetkin Timocin | Microsoft | [@ytimocin](https://github.com/ytimocin) | +| Stéphane Erbrech | Microsoft | [@serbrech](https://github.com/serbrech) | +| Simon Waight | Microsoft | [@sjwaight](https://github.com/sjwaight) | diff --git a/Makefile b/Makefile index ff95693da..e2941acc7 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,7 @@ HUB_SERVER_URL ?= https://172.19.0.2:6443 HUB_KIND_CLUSTER_NAME = hub-testing MEMBER_KIND_CLUSTER_NAME = member-testing MEMBER_CLUSTER_COUNT ?= 3 +JOIN_MEMBERS ?= false # Directories ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) @@ -217,6 +218,18 @@ e2e-tests-custom: setup-clusters ## Run custom E2E tests with labels .PHONY: setup-clusters setup-clusters: ## Set up Kind clusters for E2E testing cd ./test/e2e && chmod +x ./setup.sh && ./setup.sh $(MEMBER_CLUSTER_COUNT) +ifeq ($(JOIN_MEMBERS),true) + $(MAKE) join-members +else + @echo "" + @echo "Clusters are ready but member clusters have not been joined to the hub." + @echo "To join them, run: make join-members" + @echo "Or re-run with: JOIN_MEMBERS=true make setup-clusters" +endif + +.PHONY: join-members +join-members: ## Join member clusters to the hub cluster (run after setup-clusters) + cd ./test/e2e && chmod +x ./join.sh && ./join.sh $(MEMBER_CLUSTER_COUNT) .PHONY: collect-e2e-logs collect-e2e-logs: ## Collect logs from hub and member agent pods after e2e tests diff --git a/apis/placement/v1beta1/commons.go b/apis/placement/v1beta1/commons.go index 3d118dff0..3800d8817 100644 --- a/apis/placement/v1beta1/commons.go +++ b/apis/placement/v1beta1/commons.go @@ -74,6 +74,11 @@ const ( // MemberClusterFinalizer is used to make sure that we handle gc of all the member cluster resources on the hub cluster. MemberClusterFinalizer = FleetPrefix + "membercluster-finalizer" + // MemberNameLabel is a label automatically added to MemberCluster objects + // with the value set to the MemberCluster's name. This enables selecting clusters + // by name in ResourceOverride and ClusterResourceOverride via labelSelector. + MemberNameLabel = FleetPrefix + "member-name" + // WorkFinalizer is used by the work generator to make sure that the binding is not deleted until the work objects // it generates are all deleted, or used by the work controller to make sure the work has been deleted in the member // cluster. diff --git a/charts/README.md b/charts/README.md index ab47fd547..da0068a4b 100644 --- a/charts/README.md +++ b/charts/README.md @@ -9,11 +9,11 @@ This directory contains Helm charts for deploying KubeFleet components. ## Chart Versioning -**Important:** Chart versions match the KubeFleet release versions. When a KubeFleet release is tagged (e.g., `v0.2.1`), the Helm charts are published with the same version (`0.2.1`). +**Important:** Chart versions match the KubeFleet release versions. When a KubeFleet release is tagged (e.g., `v0.3.0`), the Helm charts are published with the same version (`0.3.0`). -**Example:** To install KubeFleet v0.2.1, use: +**Example:** To install KubeFleet v0.3.0, use: ```bash -helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent --version 0.2.1 +helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent --version 0.3.0 --namespace fleet-system --create-namespace ``` This ensures consistency between the application version and the chart version, making it easy to know which chart version to use with each KubeFleet release. @@ -43,7 +43,10 @@ helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent \ helm install member-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/member-agent \ --version VERSION \ --namespace fleet-system \ - --create-namespace + --create-namespace \ + --set config.hubURL=https:// \ + --set config.hubCA= \ + --set config.memberClusterName= ``` ### Option 2: Traditional Helm Repository @@ -65,7 +68,10 @@ helm install hub-agent kubefleet/hub-agent \ # Install member-agent helm install member-agent kubefleet/member-agent \ --namespace fleet-system \ - --create-namespace + --create-namespace \ + --set config.hubURL=https:// \ + --set config.hubCA= \ + --set config.memberClusterName= ``` ### Installing Specific Versions @@ -73,9 +79,9 @@ helm install member-agent kubefleet/member-agent \ #### OCI Registry ```bash -# Install a specific version from OCI registry (e.g., v0.2.1 release) +# Install a specific version from OCI registry (e.g., v0.3.0 release) helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent \ - --version 0.2.1 \ + --version 0.3.0 \ --namespace fleet-system \ --create-namespace ``` @@ -86,9 +92,9 @@ helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent \ # List available versions helm search repo kubefleet --versions -# Install a specific version (e.g., v0.2.1 release) +# Install a specific version (e.g., v0.3.0 release) helm install hub-agent kubefleet/hub-agent \ - --version 0.2.1 \ + --version 0.3.0 \ --namespace fleet-system \ --create-namespace ``` @@ -98,13 +104,13 @@ helm install hub-agent kubefleet/hub-agent \ #### OCI Registry ```bash -# Upgrade to a specific version (e.g., v0.2.1) +# Upgrade to a specific version (e.g., v0.3.0) helm upgrade hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent \ - --version 0.2.1 \ + --version 0.3.0 \ --namespace fleet-system helm upgrade member-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/member-agent \ - --version 0.2.1 \ + --version 0.3.0 \ --namespace fleet-system ``` @@ -137,7 +143,12 @@ For development and testing, you can install charts directly from the local repo ```bash # Install from local path helm install hub-agent ./charts/hub-agent --namespace fleet-system --create-namespace -helm install member-agent ./charts/member-agent --namespace fleet-system --create-namespace +helm install member-agent ./charts/member-agent \ + --namespace fleet-system \ + --create-namespace \ + --set config.hubURL=https:// \ + --set config.hubCA= \ + --set config.memberClusterName= ``` ### Linting diff --git a/charts/hub-agent/README.md b/charts/hub-agent/README.md index 11eb8ffed..b49e1bd15 100644 --- a/charts/hub-agent/README.md +++ b/charts/hub-agent/README.md @@ -2,7 +2,7 @@ ## Chart Versioning -Chart versions match the KubeFleet release versions. For example, to install KubeFleet v0.2.1, use chart version `0.2.1`. +Chart versions match the KubeFleet release versions. For example, to install KubeFleet v0.3.0, use chart version `0.3.0`. ## Install Chart @@ -54,12 +54,16 @@ helm install cert-manager jetstack/cert-manager \ # Then install hub-agent with cert-manager enabled (OCI, specify VERSION) helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent \ --version VERSION \ + --namespace fleet-system \ + --create-namespace \ --set useCertManager=true \ --set enableWorkload=true \ --set enableWebhook=true # Or using traditional repository helm install hub-agent kubefleet/hub-agent \ + --namespace fleet-system \ + --create-namespace \ --set useCertManager=true \ --set enableWorkload=true \ --set enableWebhook=true @@ -156,12 +160,16 @@ helm install cert-manager jetstack/cert-manager \ # Then install hub-agent with cert-manager enabled (OCI, specify VERSION) helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent \ --version VERSION \ + --namespace fleet-system \ + --create-namespace \ --set useCertManager=true \ --set enableWorkload=true \ --set enableWebhook=true # Or using traditional repository helm install hub-agent kubefleet/hub-agent \ + --namespace fleet-system \ + --create-namespace \ --set useCertManager=true \ --set enableWorkload=true \ --set enableWebhook=true @@ -177,12 +185,16 @@ Example with custom secret name: # Using OCI registry (specify VERSION) helm install hub-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/hub-agent \ --version VERSION \ + --namespace fleet-system \ + --create-namespace \ --set useCertManager=true \ --set enableWorkload=true \ --set webhookCertSecretName=my-webhook-secret # Using traditional repository helm install hub-agent kubefleet/hub-agent \ + --namespace fleet-system \ + --create-namespace \ --set useCertManager=true \ --set enableWorkload=true \ --set webhookCertSecretName=my-webhook-secret diff --git a/charts/member-agent/README.md b/charts/member-agent/README.md index 6a25c6333..b5429039b 100644 --- a/charts/member-agent/README.md +++ b/charts/member-agent/README.md @@ -2,10 +2,34 @@ ## Chart Versioning -Chart versions match the KubeFleet release versions. For example, to install KubeFleet v0.2.1, use chart version `0.2.1`. +Chart versions match the KubeFleet release versions. For example, to install KubeFleet v0.3.0, use chart version `0.3.0`. ## Install Chart +### Prerequisites + +Before installing, collect the following values from the **hub** cluster: + +**`config.hubURL`** — the hub cluster's API server endpoint: + +```console +kubectl config view --raw -o jsonpath='{.clusters[0].cluster.server}' +``` + +**`config.hubCA`** — the hub cluster's certificate authority data (base64-encoded): + +```console +kubectl config view --raw -o jsonpath='{.clusters[0].cluster.certificate-authority-data}' +``` + +If your hub kubeconfig uses a CA file path instead of inline data, encode the file: + +```console +cat /path/to/hub-ca.crt | base64 -w0 +``` + +**`config.memberClusterName`** — the name you want this member cluster to be registered as in the hub. This must match the `MemberCluster` resource name on the hub. + ### Using Published Chart (Recommended) The member-agent chart is published to both GitHub Container Registry (OCI) and GitHub Pages. @@ -17,7 +41,10 @@ The member-agent chart is published to both GitHub Container Registry (OCI) and helm install member-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/member-agent \ --version VERSION \ --namespace fleet-system \ - --create-namespace + --create-namespace \ + --set config.hubURL=https:// \ + --set config.hubCA= \ + --set config.memberClusterName= ``` #### Option 2: Traditional Helm Repository @@ -28,13 +55,23 @@ helm repo add kubefleet https://kubefleet-dev.github.io/kubefleet/charts helm repo update # Install member-agent (specify --version to pin to a specific release) -helm install member-agent kubefleet/member-agent --namespace fleet-system --create-namespace +helm install member-agent kubefleet/member-agent \ + --namespace fleet-system \ + --create-namespace \ + --set config.hubURL=https:// \ + --set config.hubCA= \ + --set config.memberClusterName= ``` ### From Local Source ```console -helm install member-agent ./charts/member-agent/ --namespace fleet-system --create-namespace +helm install member-agent ./charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ + --set config.hubURL=https:// \ + --set config.hubCA= \ + --set config.memberClusterName= ``` _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ @@ -64,6 +101,8 @@ helm upgrade member-agent kubefleet/member-agent --namespace fleet-system | resources | The resource request/limits for the container image | limits: "2" CPU, 4Gi, requests: 100m CPU, 128Mi | | namespace | Namespace that this Helm chart is installed on. | `fleet-system` | | logVerbosity | Log level. Uses V logs (klog) | `3` | +| tlsClientInsecure | Skip TLS server certificate verification when the member agent connects to the hub cluster. Leave this `false` unless you explicitly trust the endpoint and understand the risk. | `false` | +| useCAAuth | Use certificate-based authentication for the hub connection instead of the token-based path. | `false` | | propertyProvider | The property provider to use with the member agent; if none is specified, the Fleet member agent will start with no property provider (i.e., the agent will expose no cluster properties, and collect only limited resource usage information) | `` | | region | The region where the member cluster resides | `` | | enableNamespaceCollectionInPropertyProvider | Enable namespace collection in the property provider; when enabled, the member agent will collect and report the list of namespaces present in the member cluster to the hub cluster for use in scheduling decisions | `false` | @@ -78,6 +117,12 @@ helm upgrade member-agent kubefleet/member-agent --namespace fleet-system | config.azureCloudConfig | The cloud provider configuration | **required if property provider is set to azure** | +## Hub TLS configuration + +By default, the chart keeps TLS server certificate verification enabled for the member agent's connection to the hub API server (`tlsClientInsecure=false`). This requires a valid `config.hubCA` value — the placeholder in `values.yaml` will cause the agent to fail at startup. See [Prerequisites](#prerequisites) for how to obtain the hub CA data. + +Set `tlsClientInsecure=true` only for explicitly trusted test environments where certificate verification cannot be configured. + ## Override Azure cloud config **If PropertyProvider feature is set to azure, then a cloud configuration is required.** diff --git a/charts/member-agent/values.yaml b/charts/member-agent/values.yaml index 31ca2383c..686f36d6b 100644 --- a/charts/member-agent/values.yaml +++ b/charts/member-agent/values.yaml @@ -65,7 +65,7 @@ secret: azure: clientid: -tlsClientInsecure: true #TODO should be false in the production +tlsClientInsecure: false useCAAuth: false enablePprof: true diff --git a/docker/crd-installer.Dockerfile b/docker/crd-installer.Dockerfile index 302c6e0a1..a8355aa8a 100644 --- a/docker/crd-installer.Dockerfile +++ b/docker/crd-installer.Dockerfile @@ -1,5 +1,5 @@ # Build the crdinstaller binary -FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.8 AS builder +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.9 AS builder ARG GOOS=linux ARG GOARCH=amd64 diff --git a/docker/hub-agent.Dockerfile b/docker/hub-agent.Dockerfile index 5c751257d..cd5253fe5 100644 --- a/docker/hub-agent.Dockerfile +++ b/docker/hub-agent.Dockerfile @@ -1,5 +1,5 @@ # Build the hubagent binary -FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.8 AS builder +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.9 AS builder ARG GOOS=linux ARG GOARCH=amd64 diff --git a/docker/member-agent.Dockerfile b/docker/member-agent.Dockerfile index 33e122e1b..74b19f6a5 100644 --- a/docker/member-agent.Dockerfile +++ b/docker/member-agent.Dockerfile @@ -1,5 +1,5 @@ # Build the memberagent binary -FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.8 AS builder +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.9 AS builder ARG GOOS=linux ARG GOARCH=amd64 diff --git a/docker/refresh-token.Dockerfile b/docker/refresh-token.Dockerfile index f688c9e68..d62a3e1df 100644 --- a/docker/refresh-token.Dockerfile +++ b/docker/refresh-token.Dockerfile @@ -1,5 +1,5 @@ # Build the refreshtoken binary -FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.8 AS builder +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.25.9 AS builder ARG GOOS="linux" ARG GOARCH="amd64" @@ -16,8 +16,10 @@ RUN go mod download COPY cmd/authtoken/main.go main.go COPY pkg/authtoken pkg/authtoken -# Build with CGO enabled and GOEXPERIMENT=systemcrypto for internal usage -RUN echo "Building for GOOS=${GOOS} GOARCH=${GOARCH}" +ARG TARGETARCH + +# Build +RUN echo "Building images with GOOS=${GOOS} GOARCH=${GOARCH}" RUN CGO_ENABLED=1 GOOS=$GOOS GOARCH=$GOARCH GOEXPERIMENT=systemcrypto GO111MODULE=on go build -o refreshtoken main.go # Use Azure Linux distroless base image to package the refreshtoken binary diff --git a/go.mod b/go.mod index 68d96d644..e22d7a8d6 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module go.goms.io/fleet -go 1.25.8 +go 1.25.9 require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 diff --git a/hack/Azure/property-based-scheduling.md b/hack/Azure/property-based-scheduling.md index bedb04d42..41645abe9 100644 --- a/hack/Azure/property-based-scheduling.md +++ b/hack/Azure/property-based-scheduling.md @@ -116,6 +116,8 @@ kubectl config use-context $HUB_CLUSTER-admin export REGISTRY=fleetdemo.azurecr.io export TAG=demo helm install hub-agent charts/hub-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set image.pullPolicy=Always \ --set image.repository=$REGISTRY/hub-agent \ --set image.tag=$TAG \ @@ -170,11 +172,15 @@ do done # Install the member agent. +export HUB_CA=$(kubectl config view --raw -o jsonpath="{.clusters[?(@.name==\"$HUB_CLUSTER\")].cluster.certificate-authority-data}") for (( i=0; i<3; i++ )); do kubectl config use-context "${MEMBER_CLUSTERS[$i]}-admin" helm install member-agent charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set config.hubURL=$HUB_SERVER_ADDR \ + --set config.hubCA=$HUB_CA \ --set image.repository=$REGISTRY/member-agent \ --set image.tag=$TAG \ --set refreshtoken.repository=$REGISTRY/refresh-token \ diff --git a/hack/Azure/setup/createHubCluster.sh b/hack/Azure/setup/createHubCluster.sh index 9d37a1bc8..7e0fa16f1 100755 --- a/hack/Azure/setup/createHubCluster.sh +++ b/hack/Azure/setup/createHubCluster.sh @@ -23,6 +23,8 @@ export OUTPUT_TYPE="${OUTPUT_TYPE:-type=docker}" echo "Installing hub-agent..." # Install the hub agent helm chart on the hub cluster helm install hub-agent charts/hub-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set image.pullPolicy=Always \ --set image.repository=$REGISTRY/hub-agent \ --set image.tag=$TAG \ diff --git a/hack/Azure/setup/joinMC.sh b/hack/Azure/setup/joinMC.sh index f45b83559..7f4deed70 100755 --- a/hack/Azure/setup/joinMC.sh +++ b/hack/Azure/setup/joinMC.sh @@ -4,6 +4,7 @@ export HUB_CLUSTER="$1" export HUB_CLUSTER_CONTEXT=$(kubectl config view -o jsonpath="{.contexts[?(@.context.cluster==\"$HUB_CLUSTER\")].name}") export HUB_CLUSTER_ADDRESS=$(kubectl config view -o jsonpath="{.clusters[?(@.name==\"$HUB_CLUSTER\")].cluster.server}") +export HUB_CA=$(kubectl config view --raw -o jsonpath="{.clusters[?(@.name==\"$HUB_CLUSTER\")].cluster.certificate-authority-data}") for MC in "${@:2}"; do @@ -79,7 +80,10 @@ helm uninstall member-agent --wait echo "Installing member-agent..." helm install member-agent charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set config.hubURL=$HUB_CLUSTER_ADDRESS \ + --set config.hubCA=$HUB_CA \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$FLEET_VERSION \ --set refreshtoken.repository=$REGISTRY/$REFRESH_TOKEN_IMAGE \ diff --git a/hack/membership/join.sh b/hack/membership/join.sh index f84765d51..dd97ed16e 100755 --- a/hack/membership/join.sh +++ b/hack/membership/join.sh @@ -49,6 +49,10 @@ fi [ -z "$HUB_CLUSTER_ADDRESS" ] && echo "Environment variable HUB_CLUSTER_ADDRESS is not set." #&& exit 1 +# Extract the hub cluster CA for secure TLS verification +HUB_CLUSTER_NAME=$(kubectl config view -o jsonpath="{.contexts[?(@.name==\"$HUB_CLUSTER_CONTEXT\")].context.cluster}") +HUB_CA=$(kubectl config view --raw -o jsonpath="{.clusters[?(@.name==\"$HUB_CLUSTER_NAME\")].cluster.certificate-authority-data}") + [ -z "$MEMBER_CLUSTER" ] && echo "Environment variable MEMBER_CLUSTER is not set." #&& exit 1 [ -z "$MEMBER_CLUSTER_CONTEXT" ] && echo "Environment variable MEMBER_CLUSTER_CONTEXT is not set; will use the value of MEMBER_CLUSTER instead." #&& exit 1 @@ -117,7 +121,10 @@ kubectl delete secret hub-kubeconfig-secret --ignore-not-found --wait kubectl create secret generic hub-kubeconfig-secret --from-literal=token=$TOKEN helm uninstall member-agent --ignore-not-found --wait helm install member-agent charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set config.hubURL=$HUB_CLUSTER_ADDRESS \ + --set config.hubCA=$HUB_CA \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$FLEET_VERSION \ --set image.pullPolicy=Never \ diff --git a/hack/membership/joinMC.sh b/hack/membership/joinMC.sh index 38c524fda..0caf2a90f 100755 --- a/hack/membership/joinMC.sh +++ b/hack/membership/joinMC.sh @@ -113,6 +113,8 @@ kubectl create secret generic hub-kubeconfig-secret --from-literal=token=$TOKEN echo "Installing member-agent..." helm upgrade --install member-agent charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set config.hubURL=$HUB_CLUSTER_ADDRESS \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$IMAGE_TAG \ diff --git a/hack/perftest/fleetbuilder/deploy-azure-resources.sh b/hack/perftest/fleetbuilder/deploy-azure-resources.sh new file mode 100644 index 000000000..404ca232c --- /dev/null +++ b/hack/perftest/fleetbuilder/deploy-azure-resources.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -e + +RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:?Environment variable RESOURCE_GROUP_NAME is not set} +LOCATION=${LOCATION:?Environment variable LOCATION is not set} +REGISTRY_NAME_WO_SUFFIX=${REGISTRY_NAME_WO_SUFFIX:?Environment variable REGISTRY_NAME_WO_SUFFIX is not set} +VNET_NAME=${VNET_NAME:?Environment variable VNET_NAME is not set} +STORAGE_ACCOUNT_NAME=${STORAGE_ACCOUNT_NAME:?Environment variable STORAGE_ACCOUNT_NAME is not set} +QUEUE_NAME=${QUEUE_NAME:?Environment variable QUEUE_NAME is not set} +CUSTOM_TAGS=${CUSTOM_TAGS:-perf_test=true} + +# Create an Azure resource group. +echo "Creating resource group $RESOURCE_GROUP_NAME in location $LOCATION..." +az group create \ + -n "$RESOURCE_GROUP_NAME" \ + -l "$LOCATION" \ + --tags "$CUSTOM_TAGS" + +# Create an Azure Container Registry. +echo "Creating Azure Container Registry $REGISTRY_NAME_WO_SUFFIX in resource group $RESOURCE_GROUP_NAME..." +az acr create \ + -n "$REGISTRY_NAME_WO_SUFFIX" \ + -g "$RESOURCE_GROUP_NAME" \ + -l "$LOCATION" \ + --sku Basic \ + --tags "$CUSTOM_TAGS" + +# Create an Azure VNet for the host clusters. +echo "Creating VNet $VNET_NAME in resource group $RESOURCE_GROUP_NAME..." +az network vnet create \ + -g "$RESOURCE_GROUP_NAME" \ + -n "$VNET_NAME" \ + --location "$LOCATION" \ + --address-prefixes "10.0.0.0/8" \ + --subnet-name "default" \ + --subnet-prefixes "10.0.0.0/16" \ + --tags "$CUSTOM_TAGS" + +# Create an Azure storage account. +echo "Creating storage account $STORAGE_ACCOUNT_NAME in resource group $RESOURCE_GROUP_NAME..." +az storage account create \ + -n "$STORAGE_ACCOUNT_NAME" \ + -g "$RESOURCE_GROUP_NAME" \ + -l "$LOCATION" \ + --sku Standard_LRS \ + --tags "$CUSTOM_TAGS" + +# Create an Azure storage queue. +echo "Creating storage queue $QUEUE_NAME in storage account $STORAGE_ACCOUNT_NAME..." +az storage queue create \ + -n "$QUEUE_NAME" \ + --account-name "$STORAGE_ACCOUNT_NAME" diff --git a/hack/perftest/fleetbuilder/deploy-member-clusters.sh b/hack/perftest/fleetbuilder/deploy-member-clusters.sh new file mode 100644 index 000000000..574f85886 --- /dev/null +++ b/hack/perftest/fleetbuilder/deploy-member-clusters.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -e + +# Check the required environment variables. +RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:?Environment variable RESOURCE_GROUP_NAME is not set} +LOCATION=${LOCATION:?Environment variable LOCATION is not set} +REGISTRY_NAME_WO_SUFFIX=${REGISTRY_NAME_WO_SUFFIX:?Environment variable REGISTRY_NAME_WO_SUFFIX is not set} +MEMBER_CLUSTER_NODE_COUNT=${MEMBER_CLUSTER_NODE_COUNT:-2} +MEMBER_CLUSTER_VM_SIZE=${MEMBER_CLUSTER_VM_SIZE:-Standard_D4s_v3} +CUSTOM_TAGS=${CUSTOM_TAGS:-perf_test=true} + +while true; do + # Retrieve a cluster name from the work queue. + echo "Retrieving cluster name from the work queue..." + CLUSTER_IDX=$(python3 dequeue.py) + if [ -z "$CLUSTER_IDX" ]; then + echo "No more clusters to create. Exiting." + break + fi + CLUSTER_NAME="cluster-$CLUSTER_IDX" + + # Create the AKS cluster. + echo "Creating AKS cluster $CLUSTER_NAME..." + az aks create \ + -g "$RESOURCE_GROUP_NAME" \ + -n "$CLUSTER_NAME" \ + --location "$LOCATION" \ + --node-count "$MEMBER_CLUSTER_NODE_COUNT" \ + --node-vm-size "$MEMBER_CLUSTER_VM_SIZE" \ + --enable-aad \ + --enable-azure-rbac \ + --tier standard \ + --network-plugin azure \ + --attach-acr "$REGISTRY_NAME_WO_SUFFIX" \ + --tags "$CUSTOM_TAGS" + + # Sleep for a short while. + echo "Cluster $CLUSTER_NAME created. Sleeping for 15 seconds before processing the next cluster..." + sleep 15 +done diff --git a/hack/perftest/fleetbuilder/deploy-setup-hub-cluster.sh b/hack/perftest/fleetbuilder/deploy-setup-hub-cluster.sh new file mode 100644 index 000000000..c9fd42966 --- /dev/null +++ b/hack/perftest/fleetbuilder/deploy-setup-hub-cluster.sh @@ -0,0 +1,140 @@ +#!/bin/bash +set -e + +# Check the required environment variables. +RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:?Environment variable RESOURCE_GROUP_NAME is not set} +LOCATION=${LOCATION:?Environment variable LOCATION is not set} +REGISTRY_NAME=${REGISTRY_NAME:?Environment variable REGISTRY_NAME is not set} +REGISTRY_NAME_WO_SUFFIX=${REGISTRY_NAME_WO_SUFFIX:?Environment variable REGISTRY_NAME_WO_SUFFIX is not set} +HUB_CLUSTER_NAME=${HUB_CLUSTER_NAME:-hub} +HUB_CLUSTER_NODE_COUNT=${HUB_CLUSTER_NODE_COUNT:-2} +HUB_CLUSTER_VM_SIZE=${HUB_CLUSTER_VM_SIZE:-Standard_D16s_v3} +CUSTOM_TAGS=${CUSTOM_TAGS:-perf_test=true} + +HUB_AGENT_IMAGE_NAME=${HUB_AGENT_IMAGE_NAME:-hub-agent} +HUB_AGENT_CRD_INSTALLER_IMAGE_NAME=${HUB_AGENT_CRD_INSTALLER_IMAGE_NAME:-crd-installer} +COMMON_CORE_IMAGE_TAG=${COMMON_CORE_IMAGE_TAG:-experimental} + +KUBEFLEET_SRC_REPO=${KUBEFLEET_SRC_REPO:?Environment variable KUBEFLEET_SRC_REPO is not set} + +INSTALL_NETWORKING_AGENTS=${INSTALL_NETWORKING_AGENTS:-true} +INSTALL_NETWORKING_AGENTS_HELM_FLAG_VALUE="false" +if [ "$INSTALL_NETWORKING_AGENTS" = "true" ]; then + FLEET_NETWORKING_SRC_REPO=${FLEET_NETWORKING_SRC_REPO:?Environment variable FLEET_NETWORKING_SRC_REPO is not set} + HUB_NET_AGENT_IMAGE_NAME=${HUB_NET_AGENT_IMAGE_NAME:-hub-net-controller-manager} + HUB_NET_AGENT_CRD_INSTALLER_IMAGE_NAME=${HUB_NET_AGENT_CRD_INSTALLER_IMAGE_NAME:-net-crd-installer} + COMMON_NETWORKING_IMAGE_TAG=${COMMON_NETWORKING_IMAGE_TAG:-experimental} + + INSTALL_NETWORKING_AGENTS_HELM_FLAG_VALUE="true" +fi + + +echo "Creating the AKS cluster $HUB_CLUSTER_NAME in resource group $RESOURCE_GROUP_NAME..." +az aks create \ + --resource-group "$RESOURCE_GROUP_NAME" \ + --name "$HUB_CLUSTER_NAME" \ + --location "$LOCATION" \ + --node-count "$HUB_CLUSTER_NODE_COUNT" \ + --node-vm-size "$HUB_CLUSTER_VM_SIZE" \ + --enable-aad \ + --enable-azure-rbac \ + --tier standard \ + --network-plugin azure \ + --attach-acr "$REGISTRY_NAME_WO_SUFFIX" \ + --tags "$CUSTOM_TAGS" + +# Retrieve the hub cluster credential. +echo "Retrieving the credential for hub cluster $HUB_CLUSTER_NAME..." +az aks get-credentials --resource-group "$RESOURCE_GROUP_NAME" --name "$HUB_CLUSTER_NAME" + +# Install the hub agent. +kubectl config use-context "$HUB_CLUSTER_NAME" + +echo "Installing the hub agent in cluster $HUB_CLUSTER_NAME..." +pushd "$KUBEFLEET_SRC_REPO" +helm upgrade hub-agent charts/hub-agent/ \ + --install \ + --set image.pullPolicy=Always \ + --set "image.repository=$REGISTRY_NAME/$HUB_AGENT_IMAGE_NAME" \ + --set "image.tag=$COMMON_CORE_IMAGE_TAG" \ + --set resources.requests.cpu=1 \ + --set resources.requests.memory=1Gi \ + --set resources.limits.cpu=12 \ + --set resources.limits.memory=24Gi \ + --set namespace=fleet-system \ + --set logVerbosity=2 \ + --set logFileMaxSize=100000 \ + --set enableWebhook=true \ + --set enableGuardRail=true \ + --set enableWorkload=false \ + --set webhookClientConnectionType=service \ + --set forceDeleteWaitTime="5m0s" \ + --set clusterUnhealthyThreshold="3m0s" \ + --set networkingAgentsEnabled="$INSTALL_NETWORKING_AGENTS_HELM_FLAG_VALUE" + +popd + +# Install the Kubernetes Prometheus monitoring stack. +# +# Note: if you see 401 Forbidden errors when trying to access the chart, add the Helm chart repository +# with the command `helm repo add prometheus-community https://prometheus-community.github.io/helm-charts` +# and install the chart with `helm upgrade kube-prometheus-stack prometheus-community/kube-prometheus-stack ...` +# instead. +echo "Installing the Kubernetes Prometheus monitoring stack in cluster $HUB_CLUSTER_NAME..." +helm upgrade kube-prometheus-stack oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack \ + --version 82.15.1 \ + --install \ + -n monitoring \ + --create-namespace \ + --set prometheus.prometheusSpec.scrapeConfigSelectorNilUsesHelmValues=true \ + --set-json 'prometheus.prometheusSpec.scrapeConfigSelector={"matchLabels":{"prom": "monitoring"}}' + +cat </dev/null | grep -q 'GNU'; then + sed -i "$@" + else + sed -i '' "$@" + fi +} + +# Check the required environment variables. +RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:?Environment variable RESOURCE_GROUP_NAME is not set} +KUBECONFIG_DIR=${KUBECONFIG_DIR:?Environment variable KUBECONFIG_DIR is not set} + +PER_HOST_VCLUSTER_COUNT=${PER_HOST_VCLUSTER_COUNT:-25} + +while true; do + # Retrieve a cluster name from the work queue. + echo "Retrieving cluster name from the work queue..." + CLUSTER_IDX=$(python3 dequeue.py) + if [ -z "$CLUSTER_IDX" ]; then + echo "No more clusters to create. Exiting." + break + fi + CLUSTER_NAME="host-cluster-$CLUSTER_IDX" + + # Retrieve the cluster credential. + az aks get-credentials --resource-group "$RESOURCE_GROUP_NAME" --name "$CLUSTER_NAME" --file "$KUBECONFIG_DIR/$CLUSTER_NAME.kubeconfig" + export KUBECONFIG="$KUBECONFIG_DIR/$CLUSTER_NAME.kubeconfig" + + VCLUSTER_START_IDX=$(( (CLUSTER_IDX - 1) * PER_HOST_VCLUSTER_COUNT + 1 )) + VCLUSTER_END_IDX=$(( CLUSTER_IDX * PER_HOST_VCLUSTER_COUNT )) + echo "Deploying vclusters with indices from $VCLUSTER_START_IDX to $VCLUSTER_END_IDX in cluster $CLUSTER_NAME..." + for i in $(seq $VCLUSTER_START_IDX $VCLUSTER_END_IDX); do + VCLUSTER_NAME="vcluster-$i" + echo "Deploying vcluster $VCLUSTER_NAME in cluster $CLUSTER_NAME..." + vcluster describe "$VCLUSTER_NAME" -n "$VCLUSTER_NAME" > /dev/null 2>&1 || vcluster create "$VCLUSTER_NAME" -n "$VCLUSTER_NAME" --values vcluster.yaml --add=false --connect=false + + echo "Patching the vcluster API server service to use the LoadBalancer type with an internal IP assigned..." + # Each vcluster API service is exposed via an internal LB; this is to conserve public IPs and comply with best + # security practices. The vcluster API server must be accessed via a jumpbox. + kubectl patch svc "$VCLUSTER_NAME" -n "$VCLUSTER_NAME" --type=merge --patch '{"spec": {"type": "LoadBalancer"}, "metadata": {"annotations": {"service.beta.kubernetes.io/azure-load-balancer-internal": "true"}}}' + + echo "Retrieving the internal IP address of the vcluster API server service..." + kubectl wait svc "$VCLUSTER_NAME" -n "$VCLUSTER_NAME" --for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=300s + VCLUSTER_API_SERVER_IP=$(kubectl get svc "$VCLUSTER_NAME" -n "$VCLUSTER_NAME" -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + + echo "Retrieving the KUBECONFIG of the vcluster..." + # Wait until the secret appears. + kubectl wait secret "vc-$VCLUSTER_NAME" -n "$VCLUSTER_NAME" --for=create --timeout=300s + kubectl get secret "vc-$VCLUSTER_NAME" -n "$VCLUSTER_NAME" -o jsonpath='{.data.config}' | base64 --decode > "$KUBECONFIG_DIR/$VCLUSTER_NAME.kubeconfig" + + echo "Patching the KUBECONFIG to use the internal IP address of the vcluster API server service..." + sed_in_place "s/https:\/\/localhost:8443/https:\/\/$VCLUSTER_API_SERVER_IP:443/g" "$KUBECONFIG_DIR/$VCLUSTER_NAME.kubeconfig" + done +done diff --git a/hack/perftest/fleetbuilder/dequeue.py b/hack/perftest/fleetbuilder/dequeue.py new file mode 100644 index 000000000..8919d6918 --- /dev/null +++ b/hack/perftest/fleetbuilder/dequeue.py @@ -0,0 +1,24 @@ +import os +from azure.identity import DefaultAzureCredential +from azure.storage.queue import QueueClient + +STORAGE_ACCOUNT_NAME = os.getenv("STORAGE_ACCOUNT_NAME") +if not STORAGE_ACCOUNT_NAME or len(STORAGE_ACCOUNT_NAME) == 0: + raise Exception("Missing environment variable: STORAGE_ACCOUNT_NAME") + +QUEUE_NAME = os.getenv("QUEUE_NAME") +if not QUEUE_NAME or len(QUEUE_NAME) == 0: + raise Exception("Missing environment variable: QUEUE_NAME") + +visibility_timeout = 600 # 10 minutes + +account_url= f"https://{STORAGE_ACCOUNT_NAME}.queue.core.windows.net" +default_cred = DefaultAzureCredential() +queue_client = QueueClient(account_url=account_url, + queue_name=QUEUE_NAME, + credential=default_cred) + +received_msgs = queue_client.receive_messages(max_messages=1, visibility_timeout=visibility_timeout) +for msg in received_msgs: + print(f"{msg.content}") + queue_client.delete_message(msg) diff --git a/hack/perftest/fleetbuilder/enqueue.py b/hack/perftest/fleetbuilder/enqueue.py new file mode 100644 index 000000000..76437370f --- /dev/null +++ b/hack/perftest/fleetbuilder/enqueue.py @@ -0,0 +1,31 @@ +import os +from azure.identity import DefaultAzureCredential +from azure.storage.queue import QueueClient + +STORAGE_ACCOUNT_NAME = os.getenv("STORAGE_ACCOUNT_NAME") +if not STORAGE_ACCOUNT_NAME or len(STORAGE_ACCOUNT_NAME) == 0: + raise Exception("Missing environment variable: STORAGE_ACCOUNT_NAME") + +QUEUE_NAME = os.getenv("QUEUE_NAME") +if not QUEUE_NAME or len(QUEUE_NAME) == 0: + raise Exception("Missing environment variable: QUEUE_NAME") + +START_IDX = os.getenv("START_IDX") +if not START_IDX or len(START_IDX) == 0 or not START_IDX.isnumeric(): + raise Exception("Missing or invalid environment variable: START_IDX") + +END_IDX = os.getenv("END_IDX") +if not END_IDX or len(END_IDX) == 0 or not END_IDX.isnumeric(): + raise Exception("Missing or invalid environment variable: END_IDX") + +if int(START_IDX) > int(END_IDX): + raise Exception("Invalid environment variables: START_IDX should be less than or equal to END_IDX") + +account_url = f"https://{STORAGE_ACCOUNT_NAME}.queue.core.windows.net" +default_cred = DefaultAzureCredential() +queue_client = QueueClient(account_url=account_url, + queue_name=QUEUE_NAME, + credential=default_cred) + +for i in range(int(START_IDX), int(END_IDX) + 1): + queue_client.send_message(f"{i}") diff --git a/hack/perftest/fleetbuilder/join-member-clusters.sh b/hack/perftest/fleetbuilder/join-member-clusters.sh new file mode 100644 index 000000000..b2e3927aa --- /dev/null +++ b/hack/perftest/fleetbuilder/join-member-clusters.sh @@ -0,0 +1,148 @@ +#!/bin/bash +set -e + +# Check the required environment variables. +RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:?Environment variable RESOURCE_GROUP_NAME is not set} +REGISTRY_NAME=${REGISTRY_NAME:?Environment variable REGISTRY_NAME is not set} +KUBECONFIG_DIR=${KUBECONFIG_DIR:?Environment variable KUBECONFIG_DIR is not set} +KUBEFLEET_SRC_REPO=${KUBEFLEET_SRC_REPO:?Environment variable KUBEFLEET_SRC_REPO is not set} + +HUB_CLUSTER_NAME=${HUB_CLUSTER_NAME:-hub} +HUB_CLUSTER_API_SERVER_ADDR=${HUB_CLUSTER_API_SERVER_ADDR:?Environment variable HUB_CLUSTER_API_SERVER_ADDR is not set} + +MEMBER_AGENT_IMAGE_NAME="${MEMBER_AGENT_IMAGE_NAME:-member-agent}" +REFRESH_TOKEN_IMAGE_NAME="${REFRESH_TOKEN_IMAGE_NAME:-refresh-token}" +PROPERTY_PROVIDER="${PROPERTY_PROVIDER:-azure}" +CRD_INSTALLER_IMAGE_NAME="${CRD_INSTALLER_IMAGE_NAME:-crd-installer}" +COMMON_CORE_IMAGE_TAG=${COMMON_CORE_IMAGE_TAG:-experimental} + +INSTALL_NETWORKING_AGENTS=${INSTALL_NETWORKING_AGENTS:-true} +if [ "$INSTALL_NETWORKING_AGENTS" = "true" ]; then + FLEET_NETWORKING_SRC_REPO=${FLEET_NETWORKING_SRC_REPO:?Environment variable FLEET_NETWORKING_SRC_REPO is not set} + MEMBER_NET_AGENT_IMAGE_NAME=${MEMBER_NET_AGENT_IMAGE_NAME:-member-net-controller-manager} + MCS_AGENT_IMAGE_NAME=${MCS_AGENT_IMAGE_NAME:-mcs-controller-manager} + MEMBER_NET_AGENT_CRD_INSTALLER_IMAGE_NAME=${MEMBER_NET_AGENT_CRD_INSTALLER_IMAGE_NAME:-net-crd-installer} + COMMON_NETWORKING_IMAGE_TAG=${COMMON_NETWORKING_IMAGE_TAG:-experimental} +fi + +while true; do + # Retrieve a cluster name from the work queue. + echo "Retrieving cluster name from the work queue..." + CLUSTER_IDX=$(python3 dequeue.py) + if [ -z "$CLUSTER_IDX" ]; then + echo "No more clusters to join. Exiting." + break + fi + CLUSTER_NAME="host-cluster-$CLUSTER_IDX" + + # Retrieve the member cluster credential. + az aks get-credentials --resource-group "$RESOURCE_GROUP_NAME" --name "$CLUSTER_NAME" --file "$KUBECONFIG_DIR/$CLUSTER_NAME.kubeconfig" + KUBECONFIG_PATH="$KUBECONFIG_DIR/$CLUSTER_NAME.kubeconfig" + + # Set up a service account for the member cluster in the hub cluster. + echo "Setting up service account for member cluster $CLUSTER_NAME in the hub cluster..." + kubectl --context "$HUB_CLUSTER_NAME" create serviceaccount "fleet-member-agent-$CLUSTER_NAME" -n fleet-system +cat <= '3.9' +azure-identity==1.25.3; python_version >= '3.9' diff --git a/hack/perftest/fleetbuilder/vcluster.yaml b/hack/perftest/fleetbuilder/vcluster.yaml new file mode 100644 index 000000000..fbcb10de2 --- /dev/null +++ b/hack/perftest/fleetbuilder/vcluster.yaml @@ -0,0 +1,18 @@ +controlPlane: + # For security and simplicity reasons, do NOT expose the API server. + ingress: + enabled: false + host: "" + +# Resource synchronization settings when using host cluster's worker nodes. +sync: + # Resources that sync from virtual cluster to host. + toHost: + serviceAccounts: + enabled: true # Sync ServiceAccounts for operators/controllers. + + # Resources that sync from host to virtual cluster. + fromHost: + nodes: + enabled: true # Sync real nodes instead of fake nodes. + clearImageStatus: true # Remove image data to save resources. diff --git a/hack/quickstart/join-member-clusters.ps1 b/hack/quickstart/join-member-clusters.ps1 index 2e229d926..66ea7c319 100644 --- a/hack/quickstart/join-member-clusters.ps1 +++ b/hack/quickstart/join-member-clusters.ps1 @@ -120,6 +120,14 @@ if ($parsedHubURL.Scheme -ne "https") { Fail-WithHelp "hub control plane URL must use https" } +# Extract the hub cluster CA for secure TLS verification +$jsonpath = "{.clusters[?(@.name==""$HubClusterName"")].cluster.certificate-authority-data}" +$HubCA = kubectl config view --raw -o "jsonpath=$jsonpath" +if ([string]::IsNullOrWhiteSpace($HubCA)) { + Write-Error "Failed to extract certificate authority data from hub cluster '$HubClusterName'" + exit 1 +} + foreach ($memberClusterName in $MemberClusterNames) { if ([string]::IsNullOrWhiteSpace($memberClusterName)) { Fail-WithHelp "member cluster name cannot be empty" @@ -196,6 +204,7 @@ spec: helm install member-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/member-agent ` --version $KubefleetVersion ` --set "config.hubURL=$HubControlPlaneURL" ` + --set "config.hubCA=$HubCA" ` --set "config.memberClusterName=$memberClusterName" ` --set logFileMaxSize=100000 ` --namespace fleet-system ` diff --git a/hack/quickstart/join-member-clusters.sh b/hack/quickstart/join-member-clusters.sh index e941dca56..27ddb25fc 100755 --- a/hack/quickstart/join-member-clusters.sh +++ b/hack/quickstart/join-member-clusters.sh @@ -95,6 +95,10 @@ export SERVICE_ACCOUNT="$MEMBER_CLUSTER_NAME-hub-cluster-access" echo "Switching into hub cluster context..." kubectl config use $HUB_CLUSTER_NAME + +# Extract the hub cluster CA for secure TLS verification +export HUB_CA=$(kubectl config view --raw -o jsonpath="{.clusters[?(@.name==\"$HUB_CLUSTER_NAME\")].cluster.certificate-authority-data}") + # The service account can, in theory, be created in any namespace; for simplicity reasons, # here you will use the namespace reserved by Fleet installation, `fleet-system`. # @@ -149,6 +153,7 @@ echo "Installing member-agent..." helm install member-agent oci://ghcr.io/kubefleet-dev/kubefleet/charts/member-agent \ --version $KUBEFLEET_VERSION \ --set config.hubURL=$HUB_CONTROL_PLANE_URL \ + --set config.hubCA=$HUB_CA \ --set config.memberClusterName=$MEMBER_CLUSTER_NAME \ --set logFileMaxSize=100000 \ --namespace fleet-system \ diff --git a/pkg/controllers/membercluster/v1beta1/membercluster_controller.go b/pkg/controllers/membercluster/v1beta1/membercluster_controller.go index 2145931ca..d22bfc159 100644 --- a/pkg/controllers/membercluster/v1beta1/membercluster_controller.go +++ b/pkg/controllers/membercluster/v1beta1/membercluster_controller.go @@ -105,6 +105,13 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim klog.ErrorS(err, "Failed to add the finalizer to member cluster", "memberCluster", mcObjRef) return runtime.Result{}, err } + + // Ensure the member cluster has the name label for override selection by member name. + if err := r.ensureMemberNameLabel(ctx, &mc); err != nil { + klog.ErrorS(err, "Failed to ensure the name label on member cluster", "memberCluster", mcObjRef) + return runtime.Result{}, err + } + currentIMC, err := r.getInternalMemberCluster(ctx, mc.GetName()) if err != nil { return runtime.Result{}, err @@ -273,6 +280,22 @@ func (r *Reconciler) ensureFinalizer(ctx context.Context, mc *clusterv1beta1.Mem return r.Update(ctx, mc, client.FieldOwner(utils.MCControllerFieldManagerName)) } +// ensureMemberNameLabel makes sure that the member cluster has a label with its own name. +// This enables selecting clusters by name in ResourceOverride and ClusterResourceOverride via labelSelector. +func (r *Reconciler) ensureMemberNameLabel(ctx context.Context, mc *clusterv1beta1.MemberCluster) error { + if mc.Labels != nil && mc.Labels[placementv1beta1.MemberNameLabel] == mc.Name { + return nil + } + + if mc.Labels == nil { + mc.Labels = make(map[string]string) + } + mc.Labels[placementv1beta1.MemberNameLabel] = mc.Name + + klog.InfoS("Ensured the member cluster name label", "memberCluster", klog.KObj(mc)) + return r.Update(ctx, mc, client.FieldOwner(utils.MCControllerFieldManagerName)) +} + // join takes the actions to make hub cluster ready for member cluster to join, including: // - Create namespace for member cluster // - Create role & role bindings for member cluster to access hub cluster diff --git a/pkg/controllers/membercluster/v1beta1/membercluster_controller_integration_test.go b/pkg/controllers/membercluster/v1beta1/membercluster_controller_integration_test.go index c2a579d32..17d194eba 100644 --- a/pkg/controllers/membercluster/v1beta1/membercluster_controller_integration_test.go +++ b/pkg/controllers/membercluster/v1beta1/membercluster_controller_integration_test.go @@ -117,6 +117,28 @@ var _ = Describe("Test MemberCluster Controller", func() { Expect(joinCondition.Reason).To(Equal(reasonMemberClusterJoined)) }) + It("should add the member cluster name label", func() { + Expect(k8sClient.Get(ctx, memberClusterNamespacedName, &mc)).Should(Succeed()) + Expect(mc.Labels).NotTo(BeNil()) + Expect(mc.Labels[placementv1beta1.MemberNameLabel]).To(Equal(memberClusterName)) + }) + + It("should restore the member cluster name label if removed", func() { + Expect(k8sClient.Get(ctx, memberClusterNamespacedName, &mc)).Should(Succeed()) + delete(mc.Labels, placementv1beta1.MemberNameLabel) + Expect(k8sClient.Update(ctx, &mc)).Should(Succeed()) + + By("trigger reconcile to restore the name label") + result, err := r.Reconcile(ctx, ctrl.Request{ + NamespacedName: memberClusterNamespacedName, + }) + Expect(result).Should(Equal(ctrl.Result{})) + Expect(err).Should(Succeed()) + + Expect(k8sClient.Get(ctx, memberClusterNamespacedName, &mc)).Should(Succeed()) + Expect(mc.Labels[placementv1beta1.MemberNameLabel]).To(Equal(memberClusterName)) + }) + It("should relay cluster resource usage + properties, and property provider conditions", func() { Expect(k8sClient.Get(ctx, memberClusterNamespacedName, &mc)).Should(Succeed()) diff --git a/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go b/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go index 0b97522c9..f393a3623 100644 --- a/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go +++ b/pkg/controllers/membercluster/v1beta1/membercluster_controller_test.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "strings" "testing" "time" @@ -63,6 +64,157 @@ const ( propertyProviderConditionMessage2 = "property provider condition 2 message" ) +func TestEnsureMemberNameLabel(t *testing.T) { + tests := map[string]struct { + r *Reconciler + memberCluster *clusterv1beta1.MemberCluster + wantLabels map[string]string + wantErr string + }{ + "label already present with correct value": { + r: &Reconciler{ + Client: &test.MockClient{ + MockUpdate: test.NewMockUpdateFn(fmt.Errorf("update should not be called when label is already correct")), + }, + }, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mc1", + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: "mc1", + }, + }, + }, + wantLabels: map[string]string{ + placementv1beta1.MemberNameLabel: "mc1", + }, + }, + "no labels at all": { + r: &Reconciler{ + Client: &test.MockClient{ + MockUpdate: func(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + return nil + }, + }, + }, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mc1", + }, + }, + wantLabels: map[string]string{ + placementv1beta1.MemberNameLabel: "mc1", + }, + }, + "labels exist but name label is missing": { + r: &Reconciler{ + Client: &test.MockClient{ + MockUpdate: func(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + return nil + }, + }, + }, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mc1", + Labels: map[string]string{ + "existing-label": "value", + }, + }, + }, + wantLabels: map[string]string{ + "existing-label": "value", + placementv1beta1.MemberNameLabel: "mc1", + }, + }, + "label present with wrong value": { + r: &Reconciler{ + Client: &test.MockClient{ + MockUpdate: func(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + return nil + }, + }, + }, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mc1", + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: "wrong-name", + }, + }, + }, + wantLabels: map[string]string{ + placementv1beta1.MemberNameLabel: "mc1", + }, + }, + "update error": { + r: &Reconciler{ + Client: &test.MockClient{ + MockUpdate: func(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + return errors.New("update failed") + }, + }, + }, + memberCluster: &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mc1", + }, + }, + wantErr: "update failed", + }, + } + + for testName, tt := range tests { + t.Run(testName, func(t *testing.T) { + err := tt.r.ensureMemberNameLabel(context.Background(), tt.memberCluster) + if tt.wantErr == "" { + if err != nil { + t.Errorf("ensureMemberNameLabel() = %v, want nil", err) + } + if diff := cmp.Diff(tt.wantLabels, tt.memberCluster.Labels); diff != "" { + t.Errorf("ensureMemberNameLabel() labels mismatch (-want +got):\n%s", diff) + } + } else { + if err == nil { + t.Errorf("ensureMemberNameLabel() = nil, want error containing %q", tt.wantErr) + } else if !strings.Contains(err.Error(), tt.wantErr) { + t.Errorf("ensureMemberNameLabel() = %v, want error containing %q", err, tt.wantErr) + } + } + }) + } +} + +func TestReconcileEnsureMemberNameLabelError(t *testing.T) { + // This test verifies that Reconcile returns an error when ensureMemberNameLabel fails. + // The MC already has the finalizer (so ensureFinalizer is a no-op) but no label. + updateErr := errors.New("update failed") + r := &Reconciler{ + Client: &test.MockClient{ + MockGet: func(ctx context.Context, key client.ObjectKey, obj client.Object) error { + mc := obj.(*clusterv1beta1.MemberCluster) + mc.Name = "mc1" + mc.Finalizers = []string{placementv1beta1.MemberClusterFinalizer} + // No labels, so ensureMemberNameLabel will attempt an update. + return nil + }, + MockUpdate: func(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + return updateErr + }, + }, + recorder: record.NewFakeRecorder(10), + } + result, err := r.Reconcile(context.Background(), ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "mc1"}, + }) + if result != (ctrl.Result{}) { + t.Errorf("Reconcile() result = %v, want %v", result, ctrl.Result{}) + } + if err == nil || !strings.Contains(err.Error(), updateErr.Error()) { + t.Errorf("Reconcile() error = %v, want error containing %q", err, updateErr.Error()) + } +} + func TestSyncNamespace(t *testing.T) { tests := map[string]struct { r *Reconciler diff --git a/pkg/controllers/rollout/controller_integration_test.go b/pkg/controllers/rollout/controller_integration_test.go index 56c23358d..1a6582177 100644 --- a/pkg/controllers/rollout/controller_integration_test.go +++ b/pkg/controllers/rollout/controller_integration_test.go @@ -1926,7 +1926,8 @@ func generateResourceSnapshot(namespace, testRPName string, resourceIndex int, i func generateMemberCluster(idx int, clusterName string) *clusterv1beta1.MemberCluster { clusterLabels := map[string]string{ - "index": strconv.Itoa(idx), + "index": strconv.Itoa(idx), + placementv1beta1.MemberNameLabel: clusterName, } return &clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/controllers/workgenerator/controller_integration_test.go b/pkg/controllers/workgenerator/controller_integration_test.go index b76f8d945..8ed516447 100644 --- a/pkg/controllers/workgenerator/controller_integration_test.go +++ b/pkg/controllers/workgenerator/controller_integration_test.go @@ -130,7 +130,8 @@ var _ = Describe("Test Work Generator Controller for clusterResourcePlacement", ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, Labels: map[string]string{ - "override": "true", + "override": "true", + placementv1beta1.MemberNameLabel: memberClusterName, }, }, } @@ -1732,6 +1733,9 @@ var _ = Describe("Test Work Generator Controller for clusterResourcePlacement", memberCluster := clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: memberClusterName, + }, }, } Expect(k8sClient.Create(ctx, &memberCluster)).Should(Succeed(), "Failed to create member cluster") @@ -1854,6 +1858,9 @@ var _ = Describe("Test Work Generator Controller for clusterResourcePlacement", memberCluster := clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: memberClusterName, + }, }, } Expect(k8sClient.Create(ctx, &memberCluster)).Should(Succeed(), "Failed to create member cluster") @@ -1980,6 +1987,9 @@ var _ = Describe("Test Work Generator Controller for clusterResourcePlacement", memberCluster := clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: memberClusterName, + }, }, } Expect(k8sClient.Create(ctx, &memberCluster)).Should(Succeed(), "Failed to create member cluster") @@ -2108,6 +2118,9 @@ var _ = Describe("Test Work Generator Controller for clusterResourcePlacement", memberCluster := clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: memberClusterName, + }, }, } Expect(k8sClient.Create(ctx, &memberCluster)).Should(Succeed(), "Failed to create member cluster") @@ -2239,6 +2252,9 @@ var _ = Describe("Test Work Generator Controller for clusterResourcePlacement", memberCluster := clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: memberClusterName, + }, }, } Expect(k8sClient.Create(ctx, &memberCluster)).Should(Succeed(), "Failed to create member cluster") @@ -2613,6 +2629,9 @@ var _ = Describe("Test Work Generator Controller for clusterResourcePlacement", memberCluster := clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: memberClusterName, + }, }, } Expect(k8sClient.Create(ctx, &memberCluster)).Should(Succeed(), "Failed to create member cluster") @@ -4751,7 +4770,8 @@ var _ = Describe("Test Work Generator Controller for ResourcePlacement", func() ObjectMeta: metav1.ObjectMeta{ Name: memberClusterName, Labels: map[string]string{ - "override": "true", + "override": "true", + placementv1beta1.MemberNameLabel: memberClusterName, }, }, } diff --git a/pkg/scheduler/watchers/membercluster/suite_test.go b/pkg/scheduler/watchers/membercluster/suite_test.go index bf9fb2fa9..7e16c0e9d 100644 --- a/pkg/scheduler/watchers/membercluster/suite_test.go +++ b/pkg/scheduler/watchers/membercluster/suite_test.go @@ -66,6 +66,9 @@ var ( memberCluster := &clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: name, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: name, + }, }, } controllerutil.AddFinalizer(memberCluster, placementv1beta1.MemberClusterFinalizer) diff --git a/test/e2e/actuals_test.go b/test/e2e/actuals_test.go index d5d452737..6a212c07a 100644 --- a/test/e2e/actuals_test.go +++ b/test/e2e/actuals_test.go @@ -1256,8 +1256,30 @@ func crpStatusWithOverrideUpdatedActual( wantClusterResourceOverrides []string, wantResourceOverrides []placementv1beta1.NamespacedName) func() error { crpKey := types.NamespacedName{Name: fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess())} + croMap := make(map[string][]string, len(wantSelectedClusters)) + roMap := make(map[string][]placementv1beta1.NamespacedName, len(wantSelectedClusters)) + for _, name := range wantSelectedClusters { + croMap[name] = wantClusterResourceOverrides + roMap[name] = wantResourceOverrides + } return placementStatusWithOverrideUpdatedActual(crpKey, wantSelectedResourceIdentifiers, wantSelectedClusters, - wantObservedResourceIndex, wantClusterResourceOverrides, wantResourceOverrides) + wantObservedResourceIndex, croMap, roMap) +} + +// crpStatusWithSingleClusterOverrideUpdatedActual is like crpStatusWithOverrideUpdatedActual but +// applies the override only to overrideCluster while other selected clusters receive no override. +func crpStatusWithSingleClusterOverrideUpdatedActual( + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantSelectedClusters []string, + wantObservedResourceIndex string, + overrideCluster string, + wantClusterResourceOverrides []string, + wantResourceOverrides []placementv1beta1.NamespacedName) func() error { + crpKey := types.NamespacedName{Name: fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess())} + croMap := map[string][]string{overrideCluster: wantClusterResourceOverrides} + roMap := map[string][]placementv1beta1.NamespacedName{overrideCluster: wantResourceOverrides} + return placementStatusWithOverrideUpdatedActual(crpKey, wantSelectedResourceIdentifiers, wantSelectedClusters, + wantObservedResourceIndex, croMap, roMap) } func rpStatusWithOverrideUpdatedActual( @@ -1267,8 +1289,14 @@ func rpStatusWithOverrideUpdatedActual( wantClusterResourceOverrides []string, wantResourceOverrides []placementv1beta1.NamespacedName) func() error { rpKey := types.NamespacedName{Name: fmt.Sprintf(rpNameTemplate, GinkgoParallelProcess()), Namespace: appNamespace().Name} + croMap := make(map[string][]string, len(wantSelectedClusters)) + roMap := make(map[string][]placementv1beta1.NamespacedName, len(wantSelectedClusters)) + for _, name := range wantSelectedClusters { + croMap[name] = wantClusterResourceOverrides + roMap[name] = wantResourceOverrides + } return placementStatusWithOverrideUpdatedActual(rpKey, wantSelectedResourceIdentifiers, wantSelectedClusters, - wantObservedResourceIndex, wantClusterResourceOverrides, wantResourceOverrides) + wantObservedResourceIndex, croMap, roMap) } func placementStatusWithOverrideUpdatedActual( @@ -1276,22 +1304,24 @@ func placementStatusWithOverrideUpdatedActual( wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, wantSelectedClusters []string, wantObservedResourceIndex string, - wantClusterResourceOverrides []string, - wantResourceOverrides []placementv1beta1.NamespacedName, + wantClusterResourceOverrides map[string][]string, + wantResourceOverrides map[string][]placementv1beta1.NamespacedName, ) func() error { return func() error { placement, err := retrievePlacement(placementKey) if err != nil { return err } - hasOverride := len(wantResourceOverrides) > 0 || len(wantClusterResourceOverrides) > 0 + hasOverride := false var wantPlacementStatus []placementv1beta1.PerClusterPlacementStatus for _, name := range wantSelectedClusters { + perClusterHasOverride := len(wantClusterResourceOverrides[name]) > 0 || len(wantResourceOverrides[name]) > 0 + hasOverride = hasOverride || perClusterHasOverride wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.PerClusterPlacementStatus{ ClusterName: name, - Conditions: perClusterRolloutCompletedConditions(placement.GetGeneration(), true, hasOverride), - ApplicableResourceOverrides: wantResourceOverrides, - ApplicableClusterResourceOverrides: wantClusterResourceOverrides, + Conditions: perClusterRolloutCompletedConditions(placement.GetGeneration(), true, perClusterHasOverride), + ApplicableResourceOverrides: wantResourceOverrides[name], + ApplicableClusterResourceOverrides: wantClusterResourceOverrides[name], ObservedResourceIndex: wantObservedResourceIndex, }) } diff --git a/test/e2e/join.sh b/test/e2e/join.sh new file mode 100755 index 000000000..9046d3718 --- /dev/null +++ b/test/e2e/join.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +# This script joins member clusters to the hub cluster by creating MemberCluster CRs. +# It should be run after setup.sh has completed. + +set -o errexit +set -o nounset +set -o pipefail + +export KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config}" +MEMBER_CLUSTER_COUNT=$1 + +HUB_CLUSTER="hub" +declare -a MEMBER_CLUSTERS=() + +for (( i=1;i<=MEMBER_CLUSTER_COUNT;i++ )) +do + MEMBER_CLUSTERS+=("cluster-$i") +done + +# Verify that the hub cluster exists. +if ! kind get clusters 2>/dev/null | grep -q "^${HUB_CLUSTER}$"; then + echo "Error: Hub cluster '${HUB_CLUSTER}' not found. Run 'make setup-clusters' first." + exit 1 +fi + +# Verify that the member clusters exist. +for i in "${MEMBER_CLUSTERS[@]}" +do + if ! kind get clusters 2>/dev/null | grep -q "^${i}$"; then + echo "Error: Member cluster '${i}' not found. Run 'make setup-clusters' first." + exit 1 + fi +done + +# Switch to the hub cluster context. +kind export kubeconfig --name "$HUB_CLUSTER" + +# Verify that fleet-system namespace exists on the hub cluster. +if ! kubectl get namespace fleet-system &>/dev/null; then + echo "Error: Namespace 'fleet-system' not found on hub cluster. Run 'make setup-clusters' first." + exit 1 +fi + +# Create MemberCluster CRs for each member cluster. +echo "Creating MemberCluster CRs on the hub cluster..." + +for i in "${MEMBER_CLUSTERS[@]}" +do + echo "Creating MemberCluster CR for kind-${i}..." + cat </dev/null || echo "") + if [ "$JOINED" = "True" ]; then + echo "kind-${i} has joined successfully." + break + fi + if [ $SECONDS -ge $TIMEOUT ]; then + echo "Error: Timed out waiting for kind-${i} to join after ${TIMEOUT}s." + kubectl get membercluster "kind-${i}" -o yaml 2>/dev/null || true + exit 1 + fi + sleep 2 + done +done + +echo "All member clusters have joined the hub cluster." diff --git a/test/e2e/placement_cro_test.go b/test/e2e/placement_cro_test.go index e61a2861f..6c32034e2 100644 --- a/test/e2e/placement_cro_test.go +++ b/test/e2e/placement_cro_test.go @@ -27,6 +27,7 @@ import ( "k8s.io/apimachinery/pkg/types" placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/test/e2e/framework" ) // TODO: Add more tests to cover the negative cases that override failed, need to make sure @@ -952,3 +953,110 @@ var _ = Context("creating clusterResourceOverride for a namespace-only CRP", Ord } }) }) + +// This test verifies that the automatic member-name label can be used to target +// a specific cluster by name in a ClusterResourceOverride via labelSelector. +var _ = Context("creating clusterResourceOverride selecting a single cluster by the member-name label", Ordered, func() { + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + croName := fmt.Sprintf(croNameTemplate, GinkgoParallelProcess()) + croSnapShotName := fmt.Sprintf(placementv1beta1.OverrideSnapshotNameFmt, croName, 0) + + // Target only the first member cluster by its name label. + var targetCluster *framework.Cluster + + BeforeAll(func() { + targetCluster = allMemberClusters[0] + + By("creating work resources") + createWorkResources() + + // Create the CRO that selects a single cluster using the member-name label. + cro := &placementv1beta1.ClusterResourceOverride{ + ObjectMeta: metav1.ObjectMeta{ + Name: croName, + }, + Spec: placementv1beta1.ClusterResourceOverrideSpec{ + ClusterResourceSelectors: workResourceSelector(), + Policy: &placementv1beta1.OverridePolicy{ + OverrideRules: []placementv1beta1.OverrideRule{ + { + ClusterSelector: &placementv1beta1.ClusterSelector{ + ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + placementv1beta1.MemberNameLabel: targetCluster.ClusterName, + }, + }, + }, + }, + }, + JSONPatchOverrides: []placementv1beta1.JSONPatchOverride{ + { + Operator: placementv1beta1.JSONPatchOverrideOpAdd, + Path: "/metadata/annotations", + Value: apiextensionsv1.JSON{Raw: []byte(fmt.Sprintf(`{"%s": "%s"}`, croTestAnnotationKey, croTestAnnotationValue))}, + }, + }, + }, + }, + }, + }, + } + By(fmt.Sprintf("creating clusterResourceOverride %s targeting cluster %s", croName, targetCluster.ClusterName)) + Expect(hubClient.Create(ctx, cro)).To(Succeed(), "Failed to create clusterResourceOverride %s", croName) + + // Wait for the CRO snapshot to be created before the CRP. + Eventually(func() error { + croSnap := &placementv1beta1.ClusterResourceOverrideSnapshot{} + return hubClient.Get(ctx, types.NamespacedName{Name: croSnapShotName}, croSnap) + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create CRO snapshot") + + // Create the CRP. + createCRP(crpName) + }) + + AfterAll(func() { + By(fmt.Sprintf("deleting placement %s and related resources", crpName)) + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + + By(fmt.Sprintf("deleting clusterResourceOverride %s", croName)) + cleanupClusterResourceOverride(croName) + }) + + It("should update CRP status as expected", func() { + wantCRONames := []string{croSnapShotName} + // The CRO targets only one cluster via clusterSelector, so only the targeted + // cluster should have ApplicableClusterResourceOverrides set. + crpStatusUpdatedActual := crpStatusWithSingleClusterOverrideUpdatedActual( + workResourceIdentifiers(), allMemberClusterNames, "0", + targetCluster.ClusterName, wantCRONames, nil) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) + }) + + It("should place the selected resources on all member clusters", checkIfPlacedWorkResourcesOnAllMemberClusters) + + It("should have override annotation only on the targeted cluster", func() { + wantAnnotations := map[string]string{croTestAnnotationKey: croTestAnnotationValue} + Expect(validateAnnotationOfWorkNamespaceOnCluster(targetCluster, wantAnnotations)).Should(Succeed(), + "Failed to override annotation on targeted cluster %s", targetCluster.ClusterName) + Expect(validateAnnotationOfConfigMapOnCluster(targetCluster, wantAnnotations)).Should(Succeed(), + "Failed to override configmap annotation on targeted cluster %s", targetCluster.ClusterName) + }) + + It("should not have override annotation on other clusters", func() { + for _, cluster := range allMemberClusters { + if cluster.ClusterName == targetCluster.ClusterName { + continue + } + Consistently(func() error { + return validateNamespaceNoAnnotationOnCluster(cluster, croTestAnnotationKey) + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), + "Override annotation should not be present on non-targeted cluster %s", cluster.ClusterName) + Consistently(func() error { + return validateConfigMapNoAnnotationKeyOnCluster(cluster, croTestAnnotationKey) + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), + "Override annotation should not be present on non-targeted cluster %s", cluster.ClusterName) + } + }) +}) diff --git a/test/e2e/setup.sh b/test/e2e/setup.sh index ab87a9fce..937757a4a 100755 --- a/test/e2e/setup.sh +++ b/test/e2e/setup.sh @@ -205,6 +205,9 @@ done kind export kubeconfig --name $HUB_CLUSTER HUB_SERVER_URL="https://$(docker inspect $HUB_CLUSTER-control-plane --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'):6443" +# Extract the hub cluster CA for secure TLS verification +HUB_CA=$(kubectl config view --raw -o jsonpath='{.clusters[?(@.name=="kind-'$HUB_CLUSTER'")].cluster.certificate-authority-data}') + # Install the member agents and related components # Note that the work applier in the member agent are set to requeue at max. every 5 seconds instead of using the default # exponential backoff behavior; this is to accommodate some of the timeout settings in the E2E test specs. @@ -216,6 +219,7 @@ do --namespace fleet-system \ --create-namespace \ --set config.hubURL=$HUB_SERVER_URL \ + --set config.hubCA=$HUB_CA \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$TAG \ --set refreshtoken.repository=$REGISTRY/$REFRESH_TOKEN_IMAGE \ @@ -241,6 +245,7 @@ do --namespace fleet-system \ --create-namespace \ --set config.hubURL=$HUB_SERVER_URL \ + --set config.hubCA=$HUB_CA \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$TAG \ --set refreshtoken.repository=$REGISTRY/$REFRESH_TOKEN_IMAGE \ diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go index 7fd410a78..4f0432bcf 100644 --- a/test/e2e/utils_test.go +++ b/test/e2e/utils_test.go @@ -285,6 +285,11 @@ func checkIfMemberClusterHasJoined(memberCluster *framework.Cluster) { return fmt.Errorf("agent status diff (-got, +want): %s", diff) } + // Verify the member cluster name label is set. + if mcObj.Labels[placementv1beta1.MemberNameLabel] != memberCluster.ClusterName { + return fmt.Errorf("member cluster name label = %q, want %q", mcObj.Labels[placementv1beta1.MemberNameLabel], memberCluster.ClusterName) + } + return nil }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Member cluster has not joined yet") } diff --git a/test/scheduler/utils_test.go b/test/scheduler/utils_test.go index caecae117..7a07004cd 100644 --- a/test/scheduler/utils_test.go +++ b/test/scheduler/utils_test.go @@ -217,6 +217,9 @@ func createMemberCluster(name string, taints []clusterv1beta1.Taint) { memberCluster := clusterv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: name, + Labels: map[string]string{ + placementv1beta1.MemberNameLabel: name, + }, }, Spec: clusterv1beta1.MemberClusterSpec{ Identity: rbacv1.Subject{ diff --git a/test/upgrade/setup.sh b/test/upgrade/setup.sh index 7f5d31576..93e03658a 100755 --- a/test/upgrade/setup.sh +++ b/test/upgrade/setup.sh @@ -68,6 +68,8 @@ done # Install the hub agent to the hub cluster. kind export kubeconfig --name $HUB_CLUSTER helm install hub-agent charts/hub-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set image.pullPolicy=Never \ --set image.repository=$REGISTRY/$HUB_AGENT_IMAGE \ --set image.tag=$IMAGE_TAG \ @@ -119,12 +121,18 @@ done kind export kubeconfig --name $HUB_CLUSTER HUB_SERVER_URL="https://$(docker inspect $HUB_CLUSTER-control-plane --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'):6443" +# Extract the hub cluster CA for secure TLS verification +HUB_CA=$(kubectl config view --raw -o jsonpath='{.clusters[?(@.name=="kind-'$HUB_CLUSTER'")].cluster.certificate-authority-data}') + # Install the member agents and related components. for (( i=0; i<${MEMBER_CLUSTER_COUNT}; i++ )); do kind export kubeconfig --name "${MEMBER_CLUSTERS[$i]}" helm install member-agent charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set config.hubURL=$HUB_SERVER_URL \ + --set config.hubCA=$HUB_CA \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$IMAGE_TAG \ --set crdInstaller.enabled=true \ diff --git a/test/upgrade/upgrade.sh b/test/upgrade/upgrade.sh index 831e45255..0e420e630 100755 --- a/test/upgrade/upgrade.sh +++ b/test/upgrade/upgrade.sh @@ -57,6 +57,7 @@ if [ -n "$UPGRADE_HUB_SIDE" ]; then echo "Upgrading the hub agent in the hub cluster..." kind export kubeconfig --name $HUB_CLUSTER helm upgrade hub-agent charts/hub-agent/ \ + --namespace fleet-system \ --set image.pullPolicy=Never \ --set image.repository=$REGISTRY/$HUB_AGENT_IMAGE \ --set image.tag=$IMAGE_TAG \ @@ -86,6 +87,7 @@ if [ -n "$UPGRADE_MEMBER_SIDE" ]; then do kind export kubeconfig --name "${MEMBER_CLUSTERS[$i]}" helm upgrade member-agent charts/member-agent/ \ + --namespace fleet-system \ --set config.hubURL=$HUB_SERVER_URL \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$IMAGE_TAG \