diff --git a/api/v1alpha1/clickhousecluster_types.go b/api/v1alpha1/clickhousecluster_types.go
index ab38a7f0..9cabf663 100644
--- a/api/v1alpha1/clickhousecluster_types.go
+++ b/api/v1alpha1/clickhousecluster_types.go
@@ -199,6 +199,11 @@ type ClickHouseClusterStatus struct {
// +optional
// +operator-sdk:csv:customresourcedefinitions:type=status
Version string `json:"version,omitempty"`
+ // VersionProbeRevision is the image hash of the last successful version probe.
+ // When this matches the current image hash, the cached Version is used directly.
+ // +optional
+ // +operator-sdk:csv:customresourcedefinitions:type=status
+ VersionProbeRevision string `json:"versionProbeRevision,omitempty"`
}
// ClickHouseCluster is the Schema for the `clickhouseclusters` API.
diff --git a/api/v1alpha1/keepercluster_types.go b/api/v1alpha1/keepercluster_types.go
index bfa74fed..0ec022a7 100644
--- a/api/v1alpha1/keepercluster_types.go
+++ b/api/v1alpha1/keepercluster_types.go
@@ -160,6 +160,11 @@ type KeeperClusterStatus struct {
// +optional
// +operator-sdk:csv:customresourcedefinitions:type=status
Version string `json:"version,omitempty"`
+ // VersionProbeRevision is the image hash of the last successful version probe.
+ // When this matches the current image hash, the cached Version is used directly.
+ // +optional
+ // +operator-sdk:csv:customresourcedefinitions:type=status
+ VersionProbeRevision string `json:"versionProbeRevision,omitempty"`
}
// KeeperCluster is the Schema for the `keeperclusters` API.
diff --git a/config/crd/bases/clickhouse.com_clickhouseclusters.yaml b/config/crd/bases/clickhouse.com_clickhouseclusters.yaml
index 1b46c3b1..befc2e87 100644
--- a/config/crd/bases/clickhouse.com_clickhouseclusters.yaml
+++ b/config/crd/bases/clickhouse.com_clickhouseclusters.yaml
@@ -6963,6 +6963,11 @@ spec:
description: Version indicates the version reported by the container
image.
type: string
+ versionProbeRevision:
+ description: |-
+ VersionProbeRevision is the image hash of the last successful version probe.
+ When this matches the current image hash, the cached Version is used directly.
+ type: string
type: object
type: object
served: true
diff --git a/config/crd/bases/clickhouse.com_keeperclusters.yaml b/config/crd/bases/clickhouse.com_keeperclusters.yaml
index de7907b7..66e7383d 100644
--- a/config/crd/bases/clickhouse.com_keeperclusters.yaml
+++ b/config/crd/bases/clickhouse.com_keeperclusters.yaml
@@ -6875,6 +6875,11 @@ spec:
description: Version indicates the version reported by the container
image.
type: string
+ versionProbeRevision:
+ description: |-
+ VersionProbeRevision is the image hash of the last successful version probe.
+ When this matches the current image hash, the cached Version is used directly.
+ type: string
type: object
type: object
served: true
diff --git a/dist/chart/templates/crd/clickhouseclusters.clickhouse.com.yaml b/dist/chart/templates/crd/clickhouseclusters.clickhouse.com.yaml
index 0a1f1a65..3e92b3cf 100644
--- a/dist/chart/templates/crd/clickhouseclusters.clickhouse.com.yaml
+++ b/dist/chart/templates/crd/clickhouseclusters.clickhouse.com.yaml
@@ -6966,6 +6966,11 @@ spec:
description: Version indicates the version reported by the container
image.
type: string
+ versionProbeRevision:
+ description: |-
+ VersionProbeRevision is the image hash of the last successful version probe.
+ When this matches the current image hash, the cached Version is used directly.
+ type: string
type: object
type: object
served: true
diff --git a/dist/chart/templates/crd/keeperclusters.clickhouse.com.yaml b/dist/chart/templates/crd/keeperclusters.clickhouse.com.yaml
index e20b0c8c..71b32265 100644
--- a/dist/chart/templates/crd/keeperclusters.clickhouse.com.yaml
+++ b/dist/chart/templates/crd/keeperclusters.clickhouse.com.yaml
@@ -6878,6 +6878,11 @@ spec:
description: Version indicates the version reported by the container
image.
type: string
+ versionProbeRevision:
+ description: |-
+ VersionProbeRevision is the image hash of the last successful version probe.
+ When this matches the current image hash, the cached Version is used directly.
+ type: string
type: object
type: object
served: true
diff --git a/docs/api_reference.md b/docs/api_reference.md
index fa6f5a90..ae28df20 100644
--- a/docs/api_reference.md
+++ b/docs/api_reference.md
@@ -78,6 +78,7 @@ ClickHouseClusterStatus defines the observed state of ClickHouseCluster.
| `updateRevision` | string | UpdateRevision indicates latest requested ClickHouseCluster spec revision. | true | |
| `observedGeneration` | integer | ObservedGeneration indicates latest generation observed by controller. | true | |
| `version` | string | Version indicates the version reported by the container image. | false | |
+| `versionProbeRevision` | string | VersionProbeRevision is the image hash of the last successful version probe.
When this matches the current image hash, the cached Version is used directly. | false | |
Appears in:
- [ClickHouseCluster](#clickhousecluster)
@@ -278,6 +279,7 @@ KeeperClusterStatus defines the observed state of KeeperCluster.
| `updateRevision` | string | CurrentRevision indicates latest requested KeeperCluster spec revision. | true | |
| `observedGeneration` | integer | ObservedGeneration indicates latest generation observed by controller. | true | |
| `version` | string | Version indicates the version reported by the container image. | false | |
+| `versionProbeRevision` | string | VersionProbeRevision is the image hash of the last successful version probe.
When this matches the current image hash, the cached Version is used directly. | false | |
Appears in:
- [KeeperCluster](#keepercluster)
diff --git a/internal/controller/clickhouse/controller_test.go b/internal/controller/clickhouse/controller_test.go
index 6b9fe73e..4b3a19b6 100644
--- a/internal/controller/clickhouse/controller_test.go
+++ b/internal/controller/clickhouse/controller_test.go
@@ -182,6 +182,12 @@ var _ = When("reconciling ClickHouseCluster", Ordered, func() {
}
Expect(suite.Client.Update(ctx, updatedCR)).To(Succeed())
+ // Clear the cached version probe revision to force the probe to re-run,
+ // since the image didn't change but the overrides did.
+ Expect(suite.Client.Get(ctx, cr.NamespacedName(), updatedCR)).To(Succeed())
+ updatedCR.Status.VersionProbeRevision = ""
+ Expect(suite.Client.Status().Update(ctx, updatedCR)).To(Succeed())
+
// Delete old job so new one is created with overrides.
for _, j := range jobs.Items {
Expect(suite.Client.Delete(ctx, &j, client.PropagationPolicy(metav1.DeletePropagationBackground))).To(Succeed())
diff --git a/internal/controller/clickhouse/sync.go b/internal/controller/clickhouse/sync.go
index 9323ec69..1ff56f0d 100644
--- a/internal/controller/clickhouse/sync.go
+++ b/internal/controller/clickhouse/sync.go
@@ -414,6 +414,8 @@ func (r *clickhouseReconciler) reconcileVersionProbe(ctx context.Context, log ct
PodTemplate: r.Cluster.Spec.PodTemplate,
ContainerTemplate: r.Cluster.Spec.ContainerTemplate,
VersionProbe: r.Cluster.Spec.VersionProbeTemplate,
+ CachedVersion: r.Cluster.Status.Version,
+ CachedRevision: r.Cluster.Status.VersionProbeRevision,
})
if err != nil {
return chctrl.StepResult{}, fmt.Errorf("run version probe: %w", err)
@@ -422,6 +424,7 @@ func (r *clickhouseReconciler) reconcileVersionProbe(ctx context.Context, log ct
r.versionProbe = probeResult
if probeResult.Completed() {
r.Cluster.Status.Version = probeResult.Version
+ r.Cluster.Status.VersionProbeRevision = probeResult.Revision
}
return chctrl.StepContinue(), nil
diff --git a/internal/controller/keeper/sync.go b/internal/controller/keeper/sync.go
index 72d69f90..a4385d2a 100644
--- a/internal/controller/keeper/sync.go
+++ b/internal/controller/keeper/sync.go
@@ -216,13 +216,18 @@ func (r *keeperReconciler) reconcileClusterRevisions(ctx context.Context, log ct
PodTemplate: r.Cluster.Spec.PodTemplate,
ContainerTemplate: r.Cluster.Spec.ContainerTemplate,
VersionProbe: r.Cluster.Spec.VersionProbeTemplate,
+ CachedVersion: r.Cluster.Status.Version,
+ CachedRevision: r.Cluster.Status.VersionProbeRevision,
})
if err != nil {
return chctrl.StepResult{}, fmt.Errorf("run version probe: %w", err)
}
r.versionProbe = probeResult
- r.Cluster.Status.Version = r.versionProbe.Version
+ if probeResult.Completed() {
+ r.Cluster.Status.Version = probeResult.Version
+ r.Cluster.Status.VersionProbeRevision = probeResult.Revision
+ }
if r.Checker != nil {
cond, event := chctrl.GetUpgradeCondition(*r.Checker, r.versionProbe, r.Cluster.Spec.UpgradeChannel)
diff --git a/internal/controller/versionprobe.go b/internal/controller/versionprobe.go
index 379fb502..f6e71163 100644
--- a/internal/controller/versionprobe.go
+++ b/internal/controller/versionprobe.go
@@ -42,6 +42,10 @@ type VersionProbeConfig struct {
ContainerTemplate v1.ContainerTemplateSpec
// VersionProbe is the user-provided override for the version probe Job.
VersionProbe *v1.VersionProbeTemplate
+ // CachedVersion is the previously detected version stored in CR Status.
+ CachedVersion string
+ // CachedRevision is the image hash that produced CachedVersion.
+ CachedRevision string
}
// VersionProbeResult holds the outcome of a version probe reconciliation.
@@ -52,6 +56,8 @@ type VersionProbeResult struct {
Pending bool
// Err if version probe failed it contains the error.
Err error
+ // Revision is the image hash of the probe, set on successful completion.
+ Revision string
}
// Completed returns true if probe completed successfully with a detected version, false otherwise.
@@ -61,12 +67,27 @@ func (r *VersionProbeResult) Completed() bool {
// VersionProbe manages a one-time Job to detect the version from a container image.
// Returns the version string when available, or empty string if the Job is pending/running.
+// When CachedRevision matches the current image hash and CachedVersion is non-empty,
+// returns the cached version immediately without creating or reading a Job.
func (rm *ResourceManager) VersionProbe(
ctx context.Context,
log controllerutil.Logger,
cfg VersionProbeConfig,
) (VersionProbeResult, error) {
- job, err := rm.buildVersionProbeJob(cfg)
+ // Compute revision once — used for cache check, Job name, and result.
+ revision, err := imageRevision(cfg)
+ if err != nil {
+ return VersionProbeResult{}, fmt.Errorf("compute image revision: %w", err)
+ }
+
+ // Fast path: return cached version when image hasn't changed.
+ // Intentionally skips cleanupVersionProbeJobs and Job/Pod API calls.
+ // Orphaned Jobs from previous images are cleaned on next cache miss.
+ if cfg.CachedRevision == revision && cfg.CachedVersion != "" {
+ return VersionProbeResult{Version: cfg.CachedVersion, Revision: revision}, nil
+ }
+
+ job, err := rm.buildVersionProbeJob(cfg, revision)
if err != nil {
return VersionProbeResult{}, fmt.Errorf("build version probe job: %w", err)
}
@@ -124,7 +145,7 @@ func (rm *ResourceManager) VersionProbe(
return VersionProbeResult{Err: err}, nil
}
- return VersionProbeResult{Version: version}, nil
+ return VersionProbeResult{Version: version, Revision: revision}, nil
}
// GetVersionSyncCondition evaluates the VersionInSync condition based on the probe result and replica versions.
@@ -209,7 +230,27 @@ func (rm *ResourceManager) cleanupVersionProbeJobs(ctx context.Context, log cont
}
}
-func (rm *ResourceManager) buildVersionProbeJob(cfg VersionProbeConfig) (batchv1.Job, error) {
+// imageRevision computes a hash of the image and pull policy to use as the cache key
+// and Job name suffix. Extracted from buildVersionProbeJob to allow VersionProbe to
+// compute it once and pass it through.
+func imageRevision(cfg VersionProbeConfig) (string, error) {
+ type imageKey struct {
+ Image string
+ PullPolicy corev1.PullPolicy
+ }
+
+ hash, err := controllerutil.DeepHashObject(imageKey{
+ Image: cfg.ContainerTemplate.Image.String(),
+ PullPolicy: cfg.ContainerTemplate.ImagePullPolicy,
+ })
+ if err != nil {
+ return "", fmt.Errorf("hash image key: %w", err)
+ }
+
+ return hash, nil
+}
+
+func (rm *ResourceManager) buildVersionProbeJob(cfg VersionProbeConfig, revision string) (batchv1.Job, error) {
job := batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Namespace: rm.owner.GetNamespace(),
@@ -269,21 +310,7 @@ func (rm *ResourceManager) buildVersionProbeJob(cfg VersionProbeConfig) (batchv1
return batchv1.Job{}, fmt.Errorf("set version probe job controller reference: %w", err)
}
- // Recreate successful probe only on Image or PullPolicy changes
- type imageKey struct {
- Image string
- PullPolicy corev1.PullPolicy
- }
-
- imageHash, err := controllerutil.DeepHashObject(imageKey{
- Image: cfg.ContainerTemplate.Image.String(),
- PullPolicy: cfg.ContainerTemplate.ImagePullPolicy,
- })
- if err != nil {
- return batchv1.Job{}, fmt.Errorf("hash version probe job image: %w", err)
- }
-
- job.Name = fmt.Sprintf("%s-version-probe-%s", rm.specificName, imageHash[:8])
+ job.Name = fmt.Sprintf("%s-version-probe-%s", rm.specificName, revision[:8])
// Set reserved labels after overrides to ensure they are not modified by user overrides.
job.Labels = controllerutil.MergeMaps(job.Labels, map[string]string{
diff --git a/internal/controller/versionprobe_test.go b/internal/controller/versionprobe_test.go
index 73ffc440..2c6e73f4 100644
--- a/internal/controller/versionprobe_test.go
+++ b/internal/controller/versionprobe_test.go
@@ -1,6 +1,8 @@
package controller
import (
+ "context"
+
"github.com/google/go-cmp/cmp"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
@@ -8,8 +10,15 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+ "k8s.io/client-go/tools/events"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/fake"
+ "sigs.k8s.io/controller-runtime/pkg/log/zap"
v1 "github.com/ClickHouse/clickhouse-operator/api/v1alpha1"
+ "github.com/ClickHouse/clickhouse-operator/internal/controllerutil"
)
// baseJob builds a minimal operator-generated Job for testing overrides.
@@ -261,3 +270,135 @@ var _ = Describe("patchResource with jobSchema (version probe overrides)", func(
}))
})
})
+
+// setupProbeTest creates a fake Controller, owner CR, and ResourceManager
+// for testing VersionProbe() with a fake Kubernetes client.
+func setupProbeTest() (ResourceManager, controllerutil.Logger) {
+ scheme := runtime.NewScheme()
+ Expect(clientgoscheme.AddToScheme(scheme)).To(Succeed())
+ Expect(v1.AddToScheme(scheme)).To(Succeed())
+
+ builder := fake.NewClientBuilder().WithScheme(scheme)
+
+ fakeClient := builder.Build()
+ recorder := events.NewFakeRecorder(32)
+ log := controllerutil.NewLogger(zap.NewRaw(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
+
+ owner := &v1.ClickHouseCluster{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: "default",
+ Name: "test",
+ UID: "test-uid",
+ },
+ }
+
+ cc := &fakeController{client: fakeClient, scheme: scheme, recorder: recorder}
+
+ return NewResourceManager(cc, owner), log
+}
+
+// fakeController implements the Controller interface for unit tests.
+type fakeController struct {
+ client client.Client
+ scheme *runtime.Scheme
+ recorder events.EventRecorder
+}
+
+func (f *fakeController) GetClient() client.Client { return f.client }
+func (f *fakeController) GetScheme() *runtime.Scheme { return f.scheme }
+func (f *fakeController) GetRecorder() events.EventRecorder { return f.recorder }
+
+// probeCfg returns a VersionProbeConfig with the given image and cache values.
+func probeCfg(image, cachedVersion, cachedRevision string) VersionProbeConfig {
+ return VersionProbeConfig{
+ Binary: "clickhouse-server",
+ ContainerTemplate: v1.ContainerTemplateSpec{
+ Image: v1.ContainerImage{Repository: image, Tag: "latest"},
+ },
+ CachedVersion: cachedVersion,
+ CachedRevision: cachedRevision,
+ }
+}
+
+var _ = Describe("VersionProbe caching", func() {
+ It("should return cached version on cache hit without creating a Job", func(ctx context.Context) {
+ rm, log := setupProbeTest()
+
+ cfg := probeCfg("clickhouse/clickhouse-server", "", "")
+
+ By("running the first probe to get the revision")
+
+ revision, err := imageRevision(cfg)
+ Expect(err).NotTo(HaveOccurred())
+
+ By("setting up cache fields as if a previous probe succeeded")
+
+ cfg.CachedVersion = "25.3.1.1"
+ cfg.CachedRevision = revision
+
+ result, err := rm.VersionProbe(ctx, log, cfg)
+ Expect(err).NotTo(HaveOccurred())
+
+ By("verifying it returned the cached version without creating a Job")
+ Expect(result.Version).To(Equal("25.3.1.1"))
+ Expect(result.Revision).To(Equal(revision))
+ Expect(result.Pending).To(BeFalse())
+ Expect(result.Completed()).To(BeTrue())
+
+ By("verifying no Jobs were created")
+
+ var jobs batchv1.JobList
+ Expect(rm.ctrl.GetClient().List(ctx, &jobs, client.InNamespace("default"))).To(Succeed())
+ Expect(jobs.Items).To(BeEmpty())
+ })
+
+ It("should miss cache when CachedVersion is empty even if revision matches", func(ctx context.Context) {
+ rm, log := setupProbeTest()
+
+ cfg := probeCfg("clickhouse/clickhouse-server", "", "")
+ revision, err := imageRevision(cfg)
+ Expect(err).NotTo(HaveOccurred())
+
+ By("setting revision but leaving version empty (fresh CR)")
+
+ cfg.CachedRevision = revision
+ cfg.CachedVersion = ""
+
+ result, err := rm.VersionProbe(ctx, log, cfg)
+ Expect(err).NotTo(HaveOccurred())
+
+ By("verifying probe was created (cache miss)")
+ Expect(result.Pending).To(BeTrue())
+ Expect(result.Version).To(BeEmpty())
+
+ By("verifying a Job was created")
+
+ var jobs batchv1.JobList
+ Expect(rm.ctrl.GetClient().List(ctx, &jobs, client.InNamespace("default"))).To(Succeed())
+ Expect(jobs.Items).To(HaveLen(1))
+ })
+
+ It("should miss cache when image changes", func(ctx context.Context) {
+ rm, log := setupProbeTest()
+
+ By("computing revision for the original image")
+
+ originalCfg := probeCfg("clickhouse/clickhouse-server", "", "")
+ originalRevision, err := imageRevision(originalCfg)
+ Expect(err).NotTo(HaveOccurred())
+
+ By("creating config with a different image but the old revision cached")
+
+ cfg := probeCfg("custom-registry/clickhouse-server", "25.3.1.1", originalRevision)
+
+ result, err := rm.VersionProbe(ctx, log, cfg)
+ Expect(err).NotTo(HaveOccurred())
+
+ By("verifying a new probe Job was created (cache miss due to image change)")
+ Expect(result.Pending).To(BeTrue())
+
+ var jobs batchv1.JobList
+ Expect(rm.ctrl.GetClient().List(ctx, &jobs, client.InNamespace("default"))).To(Succeed())
+ Expect(jobs.Items).To(HaveLen(1))
+ })
+})