diff --git a/oeps/0010-kv-cache-server-component/README.md b/oeps/0010-kv-cache-server-component/README.md
index 27bc3721..864d8962 100644
--- a/oeps/0010-kv-cache-server-component/README.md
+++ b/oeps/0010-kv-cache-server-component/README.md
@@ -18,7 +18,7 @@ to the pool.
   - [Naming](#naming)
   - [Deployment Modes](#deployment-modes)
   - [Provider and Backend Model](#provider-and-backend-model)
-  - [External Provider Inputs](#external-provider-inputs)
+  - [Provider Design Inputs](#provider-design-inputs)
   - [User Stories](#user-stories)
     - [Story 1: Pre-create a Node-local LMCache Pool](#story-1-pre-create-a-node-local-lmcache-pool)
     - [Story 2: Bind an InferenceService to an Existing Pool](#story-2-bind-an-inferenceservice-to-an-existing-pool)
@@ -95,11 +95,12 @@ The initial alpha supports four `KVCachePool` deployment modes:
 3. `DistributedStore`
 4. `ProviderManaged`
 
-`NodeLocal` covers LMCache-style one-cache-server-per-node deployments.
+`RawDeployment` covers simple OME-managed Kubernetes Deployment-backed pool
+workloads. `NodeLocal` covers LMCache-style one-cache-server-per-node
+deployments.
 `DistributedStore` covers coordinated store systems such as Mooncake
 master/store deployments. `ProviderManaged` covers providers that expose their
-own controller and CRD, such as LMCache's `LMCacheEngine`. `External` binding
-to infrastructure that OME does not manage is intentionally deferred.
+own controller and CRD, such as LMCache's `LMCacheEngine`.
 
 ## Motivation
 
@@ -135,10 +136,12 @@ namespace.
    alpha.
 4. Add `ServingRuntime.spec.kvCacheConnectors` for runtime-side connector
    support.
-5. Keep the pool API provider-neutral and extensible to LMCache, Mooncake,
-   NIXL-backed configurations, and future providers.
-6. Reuse existing OME workload configuration types where appropriate,
-   including `PodSpec`, `RunnerSpec`, and `ComponentExtensionSpec`.
+5. Keep the pool API provider-neutral and extensible to LMCache CPU RAM,
+   Mooncake, NIXL-backed configurations, and future providers.
+6. Reuse existing OME workload configuration patterns where appropriate,
+   especially `PodSpec`, `ComponentExtensionSpec`, and Kubernetes container
+   fields, without embedding serving-runner-only abstractions such as
+   `RunnerSpec`.
 7. Keep all pool pod/container configuration under `spec.workloads[]`.
 8. Publish normalized connection information in `KVCachePool.status`.
 9. Preserve existing `InferenceService` behavior when `spec.kvCachePool` is
@@ -244,9 +247,6 @@ Mode meanings:
 - `ProviderManaged`: OME owns the `KVCachePool` intent but delegates provider
   implementation resources to a provider controller or provider CRD.
 
-`External` is deferred until OME defines a clear status, ownership, and
-validation contract for infrastructure it does not manage.
-
 ### Provider and Backend Model
 
 The API distinguishes provider from backend.
@@ -256,14 +256,14 @@ provider may expose connection metadata, create provider workloads, or translate
 pool intent into a provider-native CR.
 
 `provider.backends[]` identifies storage or transfer backends used underneath
-the provider. For example, LMCache may be the provider while Mooncake or NIXL
-is configured as a backend.
+the provider. For example, LMCache may be the provider while CPU RAM, Mooncake,
+or NIXL is configured as a backend.
 
 Provider-specific configuration belongs under `provider.config`.
 Backend-specific configuration belongs under `provider.backends[].config`.
 There is no giant top-level `providerConfig` bag.
 
-### External Provider Inputs
+### Provider Design Inputs
 
 The API shape is influenced by these provider designs:
 
@@ -272,9 +272,9 @@ The API shape is influenced by these provider designs:
 2. [LMCache operator](https://docs.lmcache.ai/mp/operator.html) motivates
    `ProviderManaged`, where OME reconciles provider-native resources and
    reflects their connection status.
-3. [LMCache storage backends](https://docs.lmcache.ai/kv_cache/storage_backends/mooncake.html)
+3. [LMCache storage backends](https://docs.lmcache.ai/kv_cache/storage_backends/local_storage.html)
    motivate separating **provider** from **backend** because LMCache can use
-   Mooncake or NIXL beneath the LMCache integration layer.
+   CPU RAM, Mooncake, or NIXL beneath the LMCache integration layer.
 4. [Mooncake Store](https://kvcache-ai.github.io/Mooncake/design/mooncake-store.html)
    motivates `DistributedStore` and named pool workloads such as `master` and
    `store`.
@@ -437,10 +437,6 @@ type KVCacheProviderSpec struct {
     // +required
     Name KVCacheProvider `json:"name"`
 
-    // Version optionally constrains the provider implementation version.
-    // +optional
-    Version *string `json:"version,omitempty"`
-
     // Backends identifies storage or transfer backends used by the provider.
     // +optional
     // +listType=map
@@ -452,7 +448,7 @@ type KVCacheProviderSpec struct {
     Config *runtime.RawExtension `json:"config,omitempty"`
 }
 
-// +kubebuilder:validation:Enum=Local;Mooncake;NIXL;Redis;Filesystem
+// +kubebuilder:validation:Enum=Local;CPURAM;Mooncake;NIXL;Redis
 type KVCacheBackendType string
 
 type KVCacheBackendSpec struct {
@@ -473,7 +469,9 @@ type KVCacheBackendSpec struct {
 Cache policy:
 
 ```go
-// +kubebuilder:validation:Enum=LRU;LFU;FIFO;ProviderDefault
+// KVCacheEvictionPolicy is the desired cache eviction behavior. Leaving the
+// field unset selects the provider's default policy.
+// +kubebuilder:validation:Enum=LRU;LFU;FIFO
 type KVCacheEvictionPolicy string
 
 type KVCachePolicySpec struct {
@@ -482,10 +480,6 @@ type KVCachePolicySpec struct {
     // +optional
     Capacity *resource.Quantity `json:"capacity,omitempty"`
 
-    // TTLSeconds optionally limits cache entry lifetime.
-    // +optional
-    TTLSeconds *int64 `json:"ttlSeconds,omitempty"`
-
     // EvictionPolicy is the desired eviction behavior.
     // +optional
     EvictionPolicy *KVCacheEvictionPolicy `json:"evictionPolicy,omitempty"`
@@ -493,10 +487,6 @@ type KVCachePolicySpec struct {
     // ChunkSize is a provider-neutral chunk/page/block size hint.
     // +optional
     ChunkSize *resource.Quantity `json:"chunkSize,omitempty"`
-
-    // Keyspace optionally scopes generated cache keys.
-    // +optional
-    Keyspace *string `json:"keyspace,omitempty"`
 }
 ```
 
@@ -510,20 +500,23 @@ type KVCachePoolWorkloadSpec struct {
     Name string `json:"name"`
 
     // PodSpec provides pod-level customization for this pool workload.
+    // Container configuration is expressed through PodSpec.Containers.
     // +optional
     PodSpec `json:",inline"`
 
-    // ComponentExtensionSpec reuses OME workload knobs such as replicas,
-    // autoscaling, labels, annotations, PDB, and deployment strategy.
-    // +optional
+    // ComponentExtensionSpec provides replicas, autoscaling, labels,
+    // annotations, and PodDisruptionBudget configuration for this workload.
     ComponentExtensionSpec `json:",inline"`
-
-    // Runner customizes the primary container for this workload.
-    // +optional
-    Runner *RunnerSpec `json:"runner,omitempty"`
 }
 ```
 
+`KVCachePoolWorkloadSpec` intentionally composes the existing
+`ComponentExtensionSpec` instead of adding a parallel pool-specific scaling
+shape. That keeps `minReplicas`, `maxReplicas`, `scaleMetric`, `scaleTarget`,
+`kedaConfig`, labels, annotations, PDB, and deployment strategy behavior aligned
+with OME's existing component API. Provider adapters remain responsible for
+validating which extension fields are meaningful for each deployment mode.
+
 #### InferenceService Extension
 
 Add `KVCachePool *KVCachePoolRef` to `InferenceServiceSpec`:
@@ -589,8 +582,11 @@ type KVCacheConnectorSpec struct {
     // +listType=atomic
     DeploymentModes []KVCachePoolDeploymentMode `json:"deploymentModes,omitempty"`
 
-    // Components provides component-specific connector configuration.
+    // Components provides component-specific connector configuration. Keys
+    // must be one of the InferenceService ComponentType values (engine,
+    // decoder, router, predictor); other keys are rejected at admission.
     // +optional
+    // +kubebuilder:validation:XValidation:rule="self.all(k, k in ['engine','decoder','router','predictor'])",message="components key must be one of engine, decoder, router, predictor"
     Components map[ComponentType]KVCacheConnectorComponentSpec `json:"components,omitempty"`
 }
 
@@ -612,26 +608,33 @@ type KVCacheConnectorComponentSpec struct {
 }
 
 type KVCacheConnectorConfig struct {
-    // ConnectorClass names the runtime connector implementation, such as
-    // LMCacheConnectorV1.
+    // ConnectorClass maps to "kv_connector".
     // +optional
     ConnectorClass *string `json:"connectorClass,omitempty"`
 
-    // Role describes the component role understood by the runtime adapter, such
-    // as kv_both, kv_producer, or kv_consumer.
+    // Role maps to "kv_role".
     // +optional
     Role *string `json:"role,omitempty"`
 
-    // ConnectionRefName optionally selects a named connection entry published
-    // by KVCachePool status.
+    // ExtraConfig maps to "kv_connector_extra_config".
     // +optional
-    ConnectionRefName *string `json:"connectionRefName,omitempty"`
+    // +kubebuilder:pruning:PreserveUnknownFields
+    ExtraConfig *runtime.RawExtension `json:"extraConfig,omitempty"`
+
+    // ConfigMapRef sources the full --kv-transfer-config JSON from a
+    // ConfigMap; when set, the inline fields are ignored.
+    // +optional
+    ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"`
 }
 ```
 
-`KVCacheConnectorConfig` is intentionally typed. It is not a
-`runtime.RawExtension`. Provider-specific escaping remains on the pool provider
-and backend specs.
+`KVCacheConnectorConfig` keeps common runtime connector intent typed while
+allowing narrowly scoped escape hatches for runtime-native transfer config.
+`extraConfig` is limited to the nested connector extra-config payload.
+`configMapRef` is for advanced runtimes that need to own the complete
+`--kv-transfer-config` JSON; when it is set, the inline fields are ignored.
+Provider-specific pool configuration still belongs under `provider.config` or
+`provider.backends[].config`.
 
 #### Status
 
@@ -641,10 +644,6 @@ and backend specs.
 type KVCachePoolStatus struct {
     duckv1.Status `json:",inline"`
 
-    // Phase is a coarse lifecycle summary.
-    // +optional
-    Phase KVCachePoolPhase `json:"phase,omitempty"`
-
     // Connection contains normalized connection information consumed by
     // ServingRuntime connector adapters.
     // +optional
@@ -689,8 +688,8 @@ type KVCachePoolPortStatus struct {
 
 type KVCachePoolWorkloadStatus struct {
     Name string `json:"name"`
-    ReadyReplicas int32 `json:"readyReplicas,omitempty"`
-    DesiredReplicas int32 `json:"desiredReplicas,omitempty"`
+    ReadyReplicas int32 `json:"readyReplicas"`
+    DesiredReplicas int32 `json:"desiredReplicas"`
 }
 ```
 
@@ -712,7 +711,7 @@ spec:
     name: LMCache
     backends:
       - name: local-memory
-        type: Local
+        type: CPURAM
     config:
       mode: Multiprocess
       endpointDiscovery: NodeHostIP
@@ -731,33 +730,34 @@ spec:
         - name: shm
           hostPath:
             path: /dev/shm
-      runner:
-        image: lmcache/standalone:nightly
-        command:
-          - /opt/venv/bin/lmcache
-        args:
-          - server
-        ports:
-          - name: transfer
-            containerPort: 6555
-            hostPort: 6555
-          - name: http
-            containerPort: 8080
-            hostPort: 8080
-          - name: metrics
-            containerPort: 9090
-            hostPort: 9090
-        readinessProbe:
-          httpGet:
-            path: /healthcheck
-            port: http
-        resources:
-          requests:
-            cpu: "4"
-            memory: 64Gi
-          limits:
-            cpu: "8"
-            memory: 80Gi
+      containers:
+        - name: server
+          image: lmcache/standalone:nightly
+          command:
+            - /opt/venv/bin/lmcache
+          args:
+            - server
+          ports:
+            - name: transfer
+              containerPort: 6555
+              hostPort: 6555
+            - name: http
+              containerPort: 8080
+              hostPort: 8080
+            - name: metrics
+              containerPort: 9090
+              hostPort: 9090
+          readinessProbe:
+            httpGet:
+              path: /healthcheck
+              port: http
+          resources:
+            requests:
+              cpu: "4"
+              memory: 64Gi
+            limits:
+              cpu: "8"
+              memory: 80Gi
 ```
 
 #### LMCache ProviderManaged Pool
@@ -783,12 +783,13 @@ spec:
     - name: server
       nodeSelector:
         node-type: gpu
-      runner:
-        image: lmcache/standalone:nightly
-        resources:
-          requests:
-            cpu: "4"
-            memory: 64Gi
+      containers:
+        - name: server
+          image: lmcache/standalone:nightly
+          resources:
+            requests:
+              cpu: "4"
+              memory: 64Gi
 ```
 
 The provider adapter translates the generic pool and workload intent into the
@@ -816,40 +817,42 @@ spec:
   workloads:
     - name: master
       minReplicas: 1
-      runner:
-        image: mooncake/mooncake-transfer-engine:latest
-        command:
-          - mooncake_master
-        args:
-          - --enable_http_metadata_server=true
-          - --http_metadata_server_host=0.0.0.0
-          - --http_metadata_server_port=8080
-          - --rpc_port=50051
-          - --metrics_port=9003
-        ports:
-          - name: rpc
-            containerPort: 50051
-          - name: metadata
-            containerPort: 8080
-          - name: metrics
-            containerPort: 9003
-        resources:
-          requests:
-            cpu: "4"
-            memory: 8Gi
+      containers:
+        - name: master
+          image: mooncake/mooncake-transfer-engine:latest
+          command:
+            - mooncake_master
+          args:
+            - --enable_http_metadata_server=true
+            - --http_metadata_server_host=0.0.0.0
+            - --http_metadata_server_port=8080
+            - --rpc_port=50051
+            - --metrics_port=9003
+          ports:
+            - name: rpc
+              containerPort: 50051
+            - name: metadata
+              containerPort: 8080
+            - name: metrics
+              containerPort: 9003
+          resources:
+            requests:
+              cpu: "4"
+              memory: 8Gi
     - name: store
       minReplicas: 4
-      runner:
-        image: mooncake/mooncake-transfer-engine:latest
-        command:
-          - mooncake_client
-        ports:
-          - name: rpc
-            containerPort: 50052
-        resources:
-          requests:
-            cpu: "8"
-            memory: 180Gi
+      containers:
+        - name: store
+          image: mooncake/mooncake-transfer-engine:latest
+          command:
+            - mooncake_client
+          ports:
+            - name: rpc
+              containerPort: 50052
+          resources:
+            requests:
+              cpu: "8"
+              memory: 180Gi
       volumes:
         - name: cache-data
           emptyDir: {}
@@ -911,8 +914,6 @@ spec:
     name: vllm-lmcache
   kvCachePool:
     name: lmcache-node-pool
-    kind: KVCachePool
-    apiGroup: ome.io
   engine:
     minReplicas: 2
 ```
@@ -957,7 +958,7 @@ When `spec.kvCachePool` is present:
 
 ### Connector Merge Rules
 
-Connector injection uses a three-way merge:
+Connector injection uses this ordered merge:
 
 ```text
 ServingRuntime component config
@@ -978,6 +979,11 @@ Environment merge should preserve the same precedence. Generated connector env
 and connector `EnvironmentOverride` should not overwrite explicit
 `InferenceService` component env values.
 
+When `connectorConfig.configMapRef` is set, the runtime connector adapter uses
+that referenced full transfer-config payload instead of generating one from
+`connectorClass`, `role`, and `extraConfig`. The adapter still merges
+`runtimeArgsOverride` and `environmentOverride` with the same precedence rules.
+
 ### Provider Adapter Contracts
 
 Introduce two internal adapter boundaries.
@@ -1044,7 +1050,7 @@ Provider validation should fail fast for:
 2. unsupported deployment mode for provider;
 3. duplicate workload names;
 4. missing required workload roles for a mode;
-5. missing runner image when a workload requires one;
+5. missing container image when OME must create a workload;
 6. provider config that cannot be decoded by the selected adapter; and
 7. missing connection status from provider-managed resources.
 
@@ -1060,7 +1066,7 @@ Controller-level metrics:
 6. `ome_kvcache_provider_errors_total`
 
 Pool workloads should preserve provider metrics ports configured in
-`workloads[].runner.ports`.
+`workloads[].containers[].ports`.
 
 Recommended labels:
 
@@ -1223,6 +1229,10 @@ references without changing behavior when the field is absent.
 - 2026-05-10: Reworked design to introduce namespace-scoped `KVCachePool` CRD,
   reference-only `InferenceService.spec.kvCachePool`, and runtime-side
   `ServingRuntime.spec.kvCacheConnectors`.
+- 2026-05-11: Aligned the OEP with the alpha implementation by trimming
+  speculative cache-policy fields, using provider defaults by omission,
+  composing `ComponentExtensionSpec` for workload extensions, and adding
+  runtime connector `extraConfig`/`configMapRef` escape hatches.
 
 ## Drawbacks
 
@@ -1267,14 +1277,15 @@ OME could add both namespaced and cluster-scoped variants.
 This proposal defers cluster scope. The initial resource is namespaced to keep
 ownership, RBAC, service discovery, and connection status straightforward.
 
-### Top-level Runner Fields
+### Top-level Pod or Container Fields
 
-OME could put `runner`, `PodSpec`, and scaling fields directly on
-`KVCachePoolSpec`.
+OME could put pod, container, and scaling fields directly on
+`KVCachePoolSpec`, or reuse the `InferenceService` `runner` shape.
 
 This proposal rejects that shape because it conflicts with multi-role providers
 such as Mooncake. All pod and container configuration lives under
-`spec.workloads[]`.
+`spec.workloads[]`, and pool workloads use Kubernetes `containers` rather than
+the serving-component-specific `runner` field.
 
 ### Separate `managementMode`
 
diff --git a/pkg/apis/ome/v1beta1/inference_service.go b/pkg/apis/ome/v1beta1/inference_service.go
index 04832cbb..7e039b1c 100644
--- a/pkg/apis/ome/v1beta1/inference_service.go
+++ b/pkg/apis/ome/v1beta1/inference_service.go
@@ -53,6 +53,13 @@ type InferenceServiceSpec struct {
 	// AcceleratorSelector specifies accelerator selection preferences
 	// +optional
 	AcceleratorSelector *AcceleratorSelector `json:"acceleratorSelector,omitempty"`
+
+	// KVCachePool references a namespace-scoped KVCachePool that this service
+	// should attach to for distributed KV cache reuse. Connector behavior is
+	// derived from the referenced pool and from the selected ServingRuntime's
+	// kvCacheConnectors.
+	// +optional
+	KVCachePool *KVCachePoolRef `json:"kvCachePool,omitempty"`
 }
 
 // AcceleratorSelector defines how to select accelerators for the InferenceService
diff --git a/pkg/apis/ome/v1beta1/kvcachepool_types.go b/pkg/apis/ome/v1beta1/kvcachepool_types.go
new file mode 100644
index 00000000..c393981a
--- /dev/null
+++ b/pkg/apis/ome/v1beta1/kvcachepool_types.go
@@ -0,0 +1,321 @@
+package v1beta1
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"knative.dev/pkg/apis"
+	duckv1 "knative.dev/pkg/apis/duck/v1"
+)
+
+// KVCachePoolDeploymentMode describes how OME reconciles a KVCachePool.
+// +kubebuilder:validation:Enum=RawDeployment;NodeLocal;DistributedStore;ProviderManaged
+type KVCachePoolDeploymentMode string
+
+const (
+	KVCachePoolRawDeployment    KVCachePoolDeploymentMode = "RawDeployment"
+	KVCachePoolNodeLocal        KVCachePoolDeploymentMode = "NodeLocal"
+	KVCachePoolDistributedStore KVCachePoolDeploymentMode = "DistributedStore"
+	KVCachePoolProviderManaged  KVCachePoolDeploymentMode = "ProviderManaged"
+)
+
+// KVCacheProvider identifies the primary KV cache integration layer.
+// +kubebuilder:validation:Enum=LMCache;Mooncake;NIXL
+type KVCacheProvider string
+
+const (
+	KVCacheProviderLMCache  KVCacheProvider = "LMCache"
+	KVCacheProviderMooncake KVCacheProvider = "Mooncake"
+	KVCacheProviderNIXL     KVCacheProvider = "NIXL"
+)
+
+// KVCacheBackendType identifies a storage or transfer backend used underneath a provider.
+// +kubebuilder:validation:Enum=Local;CPURAM;Mooncake;NIXL;Redis
+type KVCacheBackendType string
+
+const (
+	KVCacheBackendLocal    KVCacheBackendType = "Local"
+	KVCacheBackendCPURAM   KVCacheBackendType = "CPURAM"
+	KVCacheBackendMooncake KVCacheBackendType = "Mooncake"
+	KVCacheBackendNIXL     KVCacheBackendType = "NIXL"
+	KVCacheBackendRedis    KVCacheBackendType = "Redis"
+)
+
+// KVCacheEvictionPolicy is the desired cache eviction behavior. Leaving the
+// field unset selects the provider's default policy.
+// +kubebuilder:validation:Enum=LRU;IsolatedLRU;noop
+type KVCacheEvictionPolicy string
+
+const (
+	KVCacheEvictionLRU         KVCacheEvictionPolicy = "LRU"
+	KVCacheEvictionIsolatedLRU KVCacheEvictionPolicy = "IsolatedLRU"
+	KVCacheEvictionNoop        KVCacheEvictionPolicy = "noop"
+)
+
+// KVCachePoolSpec defines the desired state of a KVCachePool.
+type KVCachePoolSpec struct {
+	// Provider identifies the primary KV cache integration layer.
+	// +required
+	Provider KVCacheProviderSpec `json:"provider"`
+
+	// DeploymentMode describes how OME reconciles this pool.
+	// +required
+	DeploymentMode KVCachePoolDeploymentMode `json:"deploymentMode"`
+
+	// Cache describes provider-neutral cache policy.
+	// +optional
+	Cache *KVCachePolicySpec `json:"cache,omitempty"`
+
+	// Workloads contains pod and container configuration for OME-managed pool
+	// roles. This is the only place where pool pod/container config appears.
+	// +optional
+	// +listType=map
+	// +listMapKey=name
+	Workloads []KVCachePoolWorkloadSpec `json:"workloads,omitempty"`
+}
+
+// KVCacheProviderSpec identifies the primary provider for a KVCachePool and any
+// storage or transfer backends used underneath the provider.
+type KVCacheProviderSpec struct {
+	// +required
+	Name KVCacheProvider `json:"name"`
+
+	// Backends identifies storage or transfer backends used by the provider.
+	// +optional
+	// +listType=map
+	// +listMapKey=name
+	Backends []KVCacheBackendSpec `json:"backends,omitempty"`
+
+	// Config contains provider-scoped settings that are not portable OME API.
+	// +optional
+	// +kubebuilder:pruning:PreserveUnknownFields
+	Config *runtime.RawExtension `json:"config,omitempty"`
+}
+
+// KVCacheBackendSpec identifies a storage or transfer backend used by a provider.
+type KVCacheBackendSpec struct {
+	// +required
+	Name string `json:"name"`
+
+	// Type identifies the backend implementation.
+	// +required
+	Type KVCacheBackendType `json:"type"`
+
+	// Config contains backend-scoped settings that are not portable OME API.
+	// +optional
+	// +kubebuilder:pruning:PreserveUnknownFields
+	Config *runtime.RawExtension `json:"config,omitempty"`
+}
+
+// KVCachePolicySpec captures provider-neutral cache policy. Providers translate
+// these settings to their native size, memory, segment, or storage knobs.
+type KVCachePolicySpec struct {
+	// Capacity is the intended total pool capacity.
+	// +optional
+	Capacity *resource.Quantity `json:"capacity,omitempty"`
+
+	// EvictionPolicy is the desired eviction behavior.
+	// +optional
+	// +kubebuilder:default=LRU
+	EvictionPolicy *KVCacheEvictionPolicy `json:"evictionPolicy,omitempty"`
+
+	// ChunkSize is a provider-neutral chunk/page/block size hint.
+	// +optional
+	ChunkSize *resource.Quantity `json:"chunkSize,omitempty"`
+}
+
+// KVCachePoolWorkloadSpec describes pod and container configuration for an
+// OME-managed pool role such as server, master, or store. It composes the same
+// PodSpec and ComponentExtensionSpec used by engine, decoder, and router
+// components so pool workloads inherit consistent replica, autoscaling, label,
+// and PDB semantics.
+type KVCachePoolWorkloadSpec struct {
+	// Name identifies the provider or backend role, such as server, master, or
+	// store.
+	// +required
+	Name string `json:"name"`
+
+	// PodSpec provides pod-level customization for this pool workload.
+	// Container configuration is expressed through PodSpec.Containers.
+	// +optional
+	PodSpec `json:",inline"`
+
+	// ComponentExtensionSpec provides replicas, autoscaling, labels,
+	// annotations, and PodDisruptionBudget configuration for this workload.
+	ComponentExtensionSpec `json:",inline"`
+}
+
+// KVCachePoolRef is a reference from another resource to a KVCachePool.
+type KVCachePoolRef struct {
+	// Name of the KVCachePool being referenced.
+	// +required
+	Name string `json:"name"`
+
+	// Kind of the referenced resource. Defaults to KVCachePool.
+	// +optional
+	// +kubebuilder:default="KVCachePool"
+	Kind *string `json:"kind,omitempty"`
+
+	// APIGroup of the referenced resource. Defaults to ome.io.
+	// +optional
+	// +kubebuilder:default="ome.io"
+	APIGroup *string `json:"apiGroup,omitempty"`
+}
+
+// KVCacheConnectorSpec describes runtime-side support for attaching serving
+// components to a KVCachePool with a specific provider.
+type KVCacheConnectorSpec struct {
+	// Provider identifies the pool provider this connector supports.
+	// +required
+	Provider KVCacheProvider `json:"provider"`
+
+	// DeploymentModes lists supported pool deployment modes. An empty list
+	// means all modes supported by this provider adapter.
+	// +optional
+	// +listType=atomic
+	DeploymentModes []KVCachePoolDeploymentMode `json:"deploymentModes,omitempty"`
+
+	// Components provides component-specific connector configuration. Keys
+	// must be one of the InferenceService ComponentType values (engine,
+	// decoder, router, predictor); other keys are rejected at admission.
+	// +optional
+	// +kubebuilder:validation:XValidation:rule="self.all(k, k in ['engine','decoder','router'])",message="components key must be one of engine, decoder, router, predictor"
+	Components map[ComponentType]KVCacheConnectorComponentSpec `json:"components,omitempty"`
+}
+
+// KVCacheConnectorComponentSpec configures runtime-side connector behavior for
+// a single serving component.
+type KVCacheConnectorComponentSpec struct {
+	// ConnectorConfig is typed connector intent interpreted by the
+	// provider/runtime adapter.
+	// +optional
+	ConnectorConfig *KVCacheConnectorConfig `json:"connectorConfig,omitempty"`
+
+	// RuntimeArgsOverride provides connector-specific runtime args. Matching
+	// args replace existing values; missing args are appended.
+	// +optional
+	// +listType=atomic
+	RuntimeArgsOverride []string `json:"runtimeArgsOverride,omitempty"`
+
+	// EnvironmentOverride provides connector-specific environment variables.
+	// +optional
+	EnvironmentOverride map[string]string `json:"environmentOverride,omitempty"`
+}
+
+// KVCacheConnectorConfig mirrors the runtime --kv-transfer-config JSON
+// payload. Either set the inline fields or reference a ConfigMap holding the
+// full JSON.
+type KVCacheConnectorConfig struct {
+	// ConnectorClass maps to "kv_connector".
+	// +optional
+	ConnectorClass *string `json:"connectorClass,omitempty"`
+
+	// Role maps to "kv_role".
+	// +optional
+	Role *string `json:"role,omitempty"`
+
+	// ExtraConfig maps to "kv_connector_extra_config".
+	// +optional
+	// +kubebuilder:pruning:PreserveUnknownFields
+	ExtraConfig *runtime.RawExtension `json:"extraConfig,omitempty"`
+
+	// ConfigMapRef sources the full --kv-transfer-config JSON from a
+	// ConfigMap; when set, the inline fields are ignored.
+	// +optional
+	ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"`
+}
+
+// KVCachePoolStatus describes the observed state of a KVCachePool.
+type KVCachePoolStatus struct {
+	// Conditions for the KVCachePool. The controller sets the standard `Ready`
+	// condition along with any provider- or workload-specific conditions.
+	duckv1.Status `json:",inline"`
+
+	// Connection contains normalized connection information consumed by
+	// ServingRuntime connector adapters.
+	// +optional
+	Connection *KVCachePoolConnectionStatus `json:"connection,omitempty"`
+
+	// Workloads reports provider workload status.
+	// +optional
+	// +listType=map
+	// +listMapKey=name
+	Workloads []KVCachePoolWorkloadStatus `json:"workloads,omitempty"`
+}
+
+// KVCachePoolConnectionStatus normalizes connection information for
+// runtime-side connector injection.
+type KVCachePoolConnectionStatus struct {
+	// Endpoint is the primary in-cluster endpoint when one exists.
+	// +optional
+	Endpoint *apis.URL `json:"endpoint,omitempty"`
+
+	// Ports lists named connection ports.
+	// +optional
+	// +listType=map
+	// +listMapKey=name
+	Ports []KVCachePoolPortStatus `json:"ports,omitempty"`
+
+	// ConfigMapRef points to provider-generated connection config when needed.
+	// +optional
+	ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"`
+
+	// SecretRef points to provider-generated credentials when needed.
+	// +optional
+	SecretRef *corev1.LocalObjectReference `json:"secretRef,omitempty"`
+
+	// ProviderStatus contains provider-scoped observed state, not desired
+	// configuration.
+	// +optional
+	// +kubebuilder:pruning:PreserveUnknownFields
+	ProviderStatus *runtime.RawExtension `json:"providerStatus,omitempty"`
+}
+
+// KVCachePoolPortStatus is a named connection port published by a pool.
+type KVCachePoolPortStatus struct {
+	// +required
+	Name string `json:"name"`
+	// +required
+	Port int32 `json:"port"`
+}
+
+// KVCachePoolWorkloadStatus reports the observed state of a pool workload
+// role.
+type KVCachePoolWorkloadStatus struct {
+	// +required
+	Name            string `json:"name"`
+	ReadyReplicas   int32  `json:"readyReplicas"`
+	DesiredReplicas int32  `json:"desiredReplicas"`
+}
+
+// KVCachePool is the Schema for distributed KV cache pools.
+// +k8s:openapi-gen=true
+// +genclient
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:printcolumn:name="Provider",type="string",JSONPath=".spec.provider.name"
+// +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".spec.deploymentMode"
+// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
+// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
+// +kubebuilder:resource:path=kvcachepools,shortName=kvcp
+type KVCachePool struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   KVCachePoolSpec   `json:"spec,omitempty"`
+	Status KVCachePoolStatus `json:"status,omitempty"`
+}
+
+// KVCachePoolList contains a list of KVCachePool.
+// +k8s:openapi-gen=true
+// +kubebuilder:object:root=true
+type KVCachePoolList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []KVCachePool `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&KVCachePool{}, &KVCachePoolList{})
+}
diff --git a/pkg/apis/ome/v1beta1/servingruntime_types.go b/pkg/apis/ome/v1beta1/servingruntime_types.go
index 952e8d9c..3707c35f 100644
--- a/pkg/apis/ome/v1beta1/servingruntime_types.go
+++ b/pkg/apis/ome/v1beta1/servingruntime_types.go
@@ -226,6 +226,15 @@ type ServingRuntimeSpec struct {
 	// AcceleratorRequirements specifies the accelerator requirements for this runtime
 	// +optional
 	AcceleratorRequirements *AcceleratorRequirements `json:"acceleratorRequirements,omitempty"`
+
+	// KVCacheConnectors describes runtime-side support for attaching serving
+	// components to referenced KVCachePools. The InferenceService controller
+	// uses these entries to inject provider-specific connector args, env, and
+	// configuration when a service references a KVCachePool.
+	// +optional
+	// +listType=map
+	// +listMapKey=provider
+	KVCacheConnectors []KVCacheConnectorSpec `json:"kvCacheConnectors,omitempty"`
 }
 
 // AcceleratorRequirements specifies the accelerator requirements for this runtime
diff --git a/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go b/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go
index bb366602..9d00afc2 100644
--- a/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go
+++ b/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go
@@ -1480,6 +1480,11 @@ func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec) {
 		*out = new(AcceleratorSelector)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.KVCachePool != nil {
+		in, out := &in.KVCachePool, &out.KVCachePool
+		*out = new(KVCachePoolRef)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec.
@@ -1526,6 +1531,402 @@ func (in *InferenceServiceStatus) DeepCopy() *InferenceServiceStatus {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCacheBackendSpec) DeepCopyInto(out *KVCacheBackendSpec) {
+	*out = *in
+	if in.Config != nil {
+		in, out := &in.Config, &out.Config
+		*out = new(runtime.RawExtension)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheBackendSpec.
+func (in *KVCacheBackendSpec) DeepCopy() *KVCacheBackendSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCacheBackendSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCacheConnectorComponentSpec) DeepCopyInto(out *KVCacheConnectorComponentSpec) {
+	*out = *in
+	if in.ConnectorConfig != nil {
+		in, out := &in.ConnectorConfig, &out.ConnectorConfig
+		*out = new(KVCacheConnectorConfig)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.RuntimeArgsOverride != nil {
+		in, out := &in.RuntimeArgsOverride, &out.RuntimeArgsOverride
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.EnvironmentOverride != nil {
+		in, out := &in.EnvironmentOverride, &out.EnvironmentOverride
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheConnectorComponentSpec.
+func (in *KVCacheConnectorComponentSpec) DeepCopy() *KVCacheConnectorComponentSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCacheConnectorComponentSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCacheConnectorConfig) DeepCopyInto(out *KVCacheConnectorConfig) {
+	*out = *in
+	if in.ConnectorClass != nil {
+		in, out := &in.ConnectorClass, &out.ConnectorClass
+		*out = new(string)
+		**out = **in
+	}
+	if in.Role != nil {
+		in, out := &in.Role, &out.Role
+		*out = new(string)
+		**out = **in
+	}
+	if in.ExtraConfig != nil {
+		in, out := &in.ExtraConfig, &out.ExtraConfig
+		*out = new(runtime.RawExtension)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.ConfigMapRef != nil {
+		in, out := &in.ConfigMapRef, &out.ConfigMapRef
+		*out = new(v1.LocalObjectReference)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheConnectorConfig.
+func (in *KVCacheConnectorConfig) DeepCopy() *KVCacheConnectorConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCacheConnectorConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCacheConnectorSpec) DeepCopyInto(out *KVCacheConnectorSpec) {
+	*out = *in
+	if in.DeploymentModes != nil {
+		in, out := &in.DeploymentModes, &out.DeploymentModes
+		*out = make([]KVCachePoolDeploymentMode, len(*in))
+		copy(*out, *in)
+	}
+	if in.Components != nil {
+		in, out := &in.Components, &out.Components
+		*out = make(map[ComponentType]KVCacheConnectorComponentSpec, len(*in))
+		for key, val := range *in {
+			(*out)[key] = *val.DeepCopy()
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheConnectorSpec.
+func (in *KVCacheConnectorSpec) DeepCopy() *KVCacheConnectorSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCacheConnectorSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePolicySpec) DeepCopyInto(out *KVCachePolicySpec) {
+	*out = *in
+	if in.Capacity != nil {
+		in, out := &in.Capacity, &out.Capacity
+		x := (*in).DeepCopy()
+		*out = &x
+	}
+	if in.EvictionPolicy != nil {
+		in, out := &in.EvictionPolicy, &out.EvictionPolicy
+		*out = new(KVCacheEvictionPolicy)
+		**out = **in
+	}
+	if in.ChunkSize != nil {
+		in, out := &in.ChunkSize, &out.ChunkSize
+		x := (*in).DeepCopy()
+		*out = &x
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePolicySpec.
+func (in *KVCachePolicySpec) DeepCopy() *KVCachePolicySpec {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePolicySpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePool) DeepCopyInto(out *KVCachePool) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePool.
+func (in *KVCachePool) DeepCopy() *KVCachePool {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePool)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *KVCachePool) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolConnectionStatus) DeepCopyInto(out *KVCachePoolConnectionStatus) {
+	*out = *in
+	if in.Endpoint != nil {
+		in, out := &in.Endpoint, &out.Endpoint
+		*out = new(apis.URL)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Ports != nil {
+		in, out := &in.Ports, &out.Ports
+		*out = make([]KVCachePoolPortStatus, len(*in))
+		copy(*out, *in)
+	}
+	if in.ConfigMapRef != nil {
+		in, out := &in.ConfigMapRef, &out.ConfigMapRef
+		*out = new(v1.LocalObjectReference)
+		**out = **in
+	}
+	if in.SecretRef != nil {
+		in, out := &in.SecretRef, &out.SecretRef
+		*out = new(v1.LocalObjectReference)
+		**out = **in
+	}
+	if in.ProviderStatus != nil {
+		in, out := &in.ProviderStatus, &out.ProviderStatus
+		*out = new(runtime.RawExtension)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolConnectionStatus.
+func (in *KVCachePoolConnectionStatus) DeepCopy() *KVCachePoolConnectionStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolConnectionStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolList) DeepCopyInto(out *KVCachePoolList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]KVCachePool, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolList.
+func (in *KVCachePoolList) DeepCopy() *KVCachePoolList {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *KVCachePoolList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolPortStatus) DeepCopyInto(out *KVCachePoolPortStatus) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolPortStatus.
+func (in *KVCachePoolPortStatus) DeepCopy() *KVCachePoolPortStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolPortStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolRef) DeepCopyInto(out *KVCachePoolRef) {
+	*out = *in
+	if in.Kind != nil {
+		in, out := &in.Kind, &out.Kind
+		*out = new(string)
+		**out = **in
+	}
+	if in.APIGroup != nil {
+		in, out := &in.APIGroup, &out.APIGroup
+		*out = new(string)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolRef.
+func (in *KVCachePoolRef) DeepCopy() *KVCachePoolRef {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolRef)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolSpec) DeepCopyInto(out *KVCachePoolSpec) {
+	*out = *in
+	in.Provider.DeepCopyInto(&out.Provider)
+	if in.Cache != nil {
+		in, out := &in.Cache, &out.Cache
+		*out = new(KVCachePolicySpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Workloads != nil {
+		in, out := &in.Workloads, &out.Workloads
+		*out = make([]KVCachePoolWorkloadSpec, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolSpec.
+func (in *KVCachePoolSpec) DeepCopy() *KVCachePoolSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolStatus) DeepCopyInto(out *KVCachePoolStatus) {
+	*out = *in
+	in.Status.DeepCopyInto(&out.Status)
+	if in.Connection != nil {
+		in, out := &in.Connection, &out.Connection
+		*out = new(KVCachePoolConnectionStatus)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Workloads != nil {
+		in, out := &in.Workloads, &out.Workloads
+		*out = make([]KVCachePoolWorkloadStatus, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolStatus.
+func (in *KVCachePoolStatus) DeepCopy() *KVCachePoolStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolWorkloadSpec) DeepCopyInto(out *KVCachePoolWorkloadSpec) {
+	*out = *in
+	in.PodSpec.DeepCopyInto(&out.PodSpec)
+	in.ComponentExtensionSpec.DeepCopyInto(&out.ComponentExtensionSpec)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolWorkloadSpec.
+func (in *KVCachePoolWorkloadSpec) DeepCopy() *KVCachePoolWorkloadSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolWorkloadSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCachePoolWorkloadStatus) DeepCopyInto(out *KVCachePoolWorkloadStatus) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolWorkloadStatus.
+func (in *KVCachePoolWorkloadStatus) DeepCopy() *KVCachePoolWorkloadStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCachePoolWorkloadStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KVCacheProviderSpec) DeepCopyInto(out *KVCacheProviderSpec) {
+	*out = *in
+	if in.Backends != nil {
+		in, out := &in.Backends, &out.Backends
+		*out = make([]KVCacheBackendSpec, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	if in.Config != nil {
+		in, out := &in.Config, &out.Config
+		*out = new(runtime.RawExtension)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheProviderSpec.
+func (in *KVCacheProviderSpec) DeepCopy() *KVCacheProviderSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(KVCacheProviderSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *KedaConfig) DeepCopyInto(out *KedaConfig) {
 	*out = *in
@@ -2430,6 +2831,13 @@ func (in *ServingRuntimeSpec) DeepCopyInto(out *ServingRuntimeSpec) {
 		*out = new(AcceleratorRequirements)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.KVCacheConnectors != nil {
+		in, out := &in.KVCacheConnectors, &out.KVCacheConnectors
+		*out = make([]KVCacheConnectorSpec, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeSpec.
diff --git a/pkg/apis/ome/v1beta1/zz_generated.defaults.go b/pkg/apis/ome/v1beta1/zz_generated.defaults.go
index 6aa4ca7b..f9c8dbff 100644
--- a/pkg/apis/ome/v1beta1/zz_generated.defaults.go
+++ b/pkg/apis/ome/v1beta1/zz_generated.defaults.go
@@ -20,6 +20,8 @@ func RegisterDefaults(scheme *runtime.Scheme) error {
 	scheme.AddTypeDefaultingFunc(&ClusterServingRuntimeList{}, func(obj interface{}) { SetObjectDefaults_ClusterServingRuntimeList(obj.(*ClusterServingRuntimeList)) })
 	scheme.AddTypeDefaultingFunc(&InferenceService{}, func(obj interface{}) { SetObjectDefaults_InferenceService(obj.(*InferenceService)) })
 	scheme.AddTypeDefaultingFunc(&InferenceServiceList{}, func(obj interface{}) { SetObjectDefaults_InferenceServiceList(obj.(*InferenceServiceList)) })
+	scheme.AddTypeDefaultingFunc(&KVCachePool{}, func(obj interface{}) { SetObjectDefaults_KVCachePool(obj.(*KVCachePool)) })
+	scheme.AddTypeDefaultingFunc(&KVCachePoolList{}, func(obj interface{}) { SetObjectDefaults_KVCachePoolList(obj.(*KVCachePoolList)) })
 	scheme.AddTypeDefaultingFunc(&ServingRuntime{}, func(obj interface{}) { SetObjectDefaults_ServingRuntime(obj.(*ServingRuntime)) })
 	scheme.AddTypeDefaultingFunc(&ServingRuntimeList{}, func(obj interface{}) { SetObjectDefaults_ServingRuntimeList(obj.(*ServingRuntimeList)) })
 	return nil
@@ -3106,6 +3108,163 @@ func SetObjectDefaults_InferenceServiceList(in *InferenceServiceList) {
 	}
 }
 
+func SetObjectDefaults_KVCachePool(in *KVCachePool) {
+	for i := range in.Spec.Workloads {
+		a := &in.Spec.Workloads[i]
+		for j := range a.PodSpec.Volumes {
+			b := &a.PodSpec.Volumes[j]
+			if b.VolumeSource.ISCSI != nil {
+				if b.VolumeSource.ISCSI.ISCSIInterface == "" {
+					b.VolumeSource.ISCSI.ISCSIInterface = "default"
+				}
+			}
+			if b.VolumeSource.RBD != nil {
+				if b.VolumeSource.RBD.RBDPool == "" {
+					b.VolumeSource.RBD.RBDPool = "rbd"
+				}
+				if b.VolumeSource.RBD.RadosUser == "" {
+					b.VolumeSource.RBD.RadosUser = "admin"
+				}
+				if b.VolumeSource.RBD.Keyring == "" {
+					b.VolumeSource.RBD.Keyring = "/etc/ceph/keyring"
+				}
+			}
+			if b.VolumeSource.AzureDisk != nil {
+				if b.VolumeSource.AzureDisk.CachingMode == nil {
+					ptrVar1 := v1.AzureDataDiskCachingMode(v1.AzureDataDiskCachingReadWrite)
+					b.VolumeSource.AzureDisk.CachingMode = &ptrVar1
+				}
+				if b.VolumeSource.AzureDisk.FSType == nil {
+					var ptrVar1 string = "ext4"
+					b.VolumeSource.AzureDisk.FSType = &ptrVar1
+				}
+				if b.VolumeSource.AzureDisk.ReadOnly == nil {
+					var ptrVar1 bool = false
+					b.VolumeSource.AzureDisk.ReadOnly = &ptrVar1
+				}
+				if b.VolumeSource.AzureDisk.Kind == nil {
+					ptrVar1 := v1.AzureDataDiskKind(v1.AzureSharedBlobDisk)
+					b.VolumeSource.AzureDisk.Kind = &ptrVar1
+				}
+			}
+			if b.VolumeSource.ScaleIO != nil {
+				if b.VolumeSource.ScaleIO.StorageMode == "" {
+					b.VolumeSource.ScaleIO.StorageMode = "ThinProvisioned"
+				}
+				if b.VolumeSource.ScaleIO.FSType == "" {
+					b.VolumeSource.ScaleIO.FSType = "xfs"
+				}
+			}
+		}
+		for j := range a.PodSpec.InitContainers {
+			b := &a.PodSpec.InitContainers[j]
+			for k := range b.Ports {
+				c := &b.Ports[k]
+				if c.Protocol == "" {
+					c.Protocol = "TCP"
+				}
+			}
+			if b.LivenessProbe != nil {
+				if b.LivenessProbe.ProbeHandler.GRPC != nil {
+					if b.LivenessProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.LivenessProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+			if b.ReadinessProbe != nil {
+				if b.ReadinessProbe.ProbeHandler.GRPC != nil {
+					if b.ReadinessProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.ReadinessProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+			if b.StartupProbe != nil {
+				if b.StartupProbe.ProbeHandler.GRPC != nil {
+					if b.StartupProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.StartupProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+		}
+		for j := range a.PodSpec.Containers {
+			b := &a.PodSpec.Containers[j]
+			for k := range b.Ports {
+				c := &b.Ports[k]
+				if c.Protocol == "" {
+					c.Protocol = "TCP"
+				}
+			}
+			if b.LivenessProbe != nil {
+				if b.LivenessProbe.ProbeHandler.GRPC != nil {
+					if b.LivenessProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.LivenessProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+			if b.ReadinessProbe != nil {
+				if b.ReadinessProbe.ProbeHandler.GRPC != nil {
+					if b.ReadinessProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.ReadinessProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+			if b.StartupProbe != nil {
+				if b.StartupProbe.ProbeHandler.GRPC != nil {
+					if b.StartupProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.StartupProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+		}
+		for j := range a.PodSpec.EphemeralContainers {
+			b := &a.PodSpec.EphemeralContainers[j]
+			for k := range b.EphemeralContainerCommon.Ports {
+				c := &b.EphemeralContainerCommon.Ports[k]
+				if c.Protocol == "" {
+					c.Protocol = "TCP"
+				}
+			}
+			if b.EphemeralContainerCommon.LivenessProbe != nil {
+				if b.EphemeralContainerCommon.LivenessProbe.ProbeHandler.GRPC != nil {
+					if b.EphemeralContainerCommon.LivenessProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.EphemeralContainerCommon.LivenessProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+			if b.EphemeralContainerCommon.ReadinessProbe != nil {
+				if b.EphemeralContainerCommon.ReadinessProbe.ProbeHandler.GRPC != nil {
+					if b.EphemeralContainerCommon.ReadinessProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.EphemeralContainerCommon.ReadinessProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+			if b.EphemeralContainerCommon.StartupProbe != nil {
+				if b.EphemeralContainerCommon.StartupProbe.ProbeHandler.GRPC != nil {
+					if b.EphemeralContainerCommon.StartupProbe.ProbeHandler.GRPC.Service == nil {
+						var ptrVar1 string = ""
+						b.EphemeralContainerCommon.StartupProbe.ProbeHandler.GRPC.Service = &ptrVar1
+					}
+				}
+			}
+		}
+	}
+}
+
+func SetObjectDefaults_KVCachePoolList(in *KVCachePoolList) {
+	for i := range in.Items {
+		a := &in.Items[i]
+		SetObjectDefaults_KVCachePool(a)
+	}
+}
+
 func SetObjectDefaults_ServingRuntime(in *ServingRuntime) {
 	if in.Spec.RouterConfig != nil {
 		for i := range in.Spec.RouterConfig.PodSpec.Volumes {
diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_kvcachepool.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_kvcachepool.go
new file mode 100644
index 00000000..4e737c62
--- /dev/null
+++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_kvcachepool.go
@@ -0,0 +1,34 @@
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	v1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1"
+	omev1beta1 "github.com/sgl-project/ome/pkg/client/clientset/versioned/typed/ome/v1beta1"
+	gentype "k8s.io/client-go/gentype"
+)
+
+// fakeKVCachePools implements KVCachePoolInterface
+type fakeKVCachePools struct {
+	*gentype.FakeClientWithList[*v1beta1.KVCachePool, *v1beta1.KVCachePoolList]
+	Fake *FakeOmeV1beta1
+}
+
+func newFakeKVCachePools(fake *FakeOmeV1beta1, namespace string) omev1beta1.KVCachePoolInterface {
+	return &fakeKVCachePools{
+		gentype.NewFakeClientWithList[*v1beta1.KVCachePool, *v1beta1.KVCachePoolList](
+			fake.Fake,
+			namespace,
+			v1beta1.SchemeGroupVersion.WithResource("kvcachepools"),
+			v1beta1.SchemeGroupVersion.WithKind("KVCachePool"),
+			func() *v1beta1.KVCachePool { return &v1beta1.KVCachePool{} },
+			func() *v1beta1.KVCachePoolList { return &v1beta1.KVCachePoolList{} },
+			func(dst, src *v1beta1.KVCachePoolList) { dst.ListMeta = src.ListMeta },
+			func(list *v1beta1.KVCachePoolList) []*v1beta1.KVCachePool { return gentype.ToPointerSlice(list.Items) },
+			func(list *v1beta1.KVCachePoolList, items []*v1beta1.KVCachePool) {
+				list.Items = gentype.FromPointerSlice(items)
+			},
+		),
+		fake,
+	}
+}
diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go
index 4f90ccaa..2b494a52 100644
--- a/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go
+++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go
@@ -36,6 +36,10 @@ func (c *FakeOmeV1beta1) InferenceServices(namespace string) v1beta1.InferenceSe
 	return newFakeInferenceServices(c, namespace)
 }
 
+func (c *FakeOmeV1beta1) KVCachePools(namespace string) v1beta1.KVCachePoolInterface {
+	return newFakeKVCachePools(c, namespace)
+}
+
 func (c *FakeOmeV1beta1) ServingRuntimes(namespace string) v1beta1.ServingRuntimeInterface {
 	return newFakeServingRuntimes(c, namespace)
 }
diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go
index 8d6fa046..d87e4ee6 100644
--- a/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go
+++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go
@@ -14,4 +14,6 @@ type FineTunedWeightExpansion interface{}
 
 type InferenceServiceExpansion interface{}
 
+type KVCachePoolExpansion interface{}
+
 type ServingRuntimeExpansion interface{}
diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/kvcachepool.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/kvcachepool.go
new file mode 100644
index 00000000..b51919ad
--- /dev/null
+++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/kvcachepool.go
@@ -0,0 +1,54 @@
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1beta1
+
+import (
+	context "context"
+
+	omev1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1"
+	scheme "github.com/sgl-project/ome/pkg/client/clientset/versioned/scheme"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	watch "k8s.io/apimachinery/pkg/watch"
+	gentype "k8s.io/client-go/gentype"
+)
+
+// KVCachePoolsGetter has a method to return a KVCachePoolInterface.
+// A group's client should implement this interface.
+type KVCachePoolsGetter interface {
+	KVCachePools(namespace string) KVCachePoolInterface
+}
+
+// KVCachePoolInterface has methods to work with KVCachePool resources.
+type KVCachePoolInterface interface {
+	Create(ctx context.Context, kVCachePool *omev1beta1.KVCachePool, opts v1.CreateOptions) (*omev1beta1.KVCachePool, error)
+	Update(ctx context.Context, kVCachePool *omev1beta1.KVCachePool, opts v1.UpdateOptions) (*omev1beta1.KVCachePool, error)
+	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+	UpdateStatus(ctx context.Context, kVCachePool *omev1beta1.KVCachePool, opts v1.UpdateOptions) (*omev1beta1.KVCachePool, error)
+	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
+	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*omev1beta1.KVCachePool, error)
+	List(ctx context.Context, opts v1.ListOptions) (*omev1beta1.KVCachePoolList, error)
+	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *omev1beta1.KVCachePool, err error)
+	KVCachePoolExpansion
+}
+
+// kVCachePools implements KVCachePoolInterface
+type kVCachePools struct {
+	*gentype.ClientWithList[*omev1beta1.KVCachePool, *omev1beta1.KVCachePoolList]
+}
+
+// newKVCachePools returns a KVCachePools
+func newKVCachePools(c *OmeV1beta1Client, namespace string) *kVCachePools {
+	return &kVCachePools{
+		gentype.NewClientWithList[*omev1beta1.KVCachePool, *omev1beta1.KVCachePoolList](
+			"kvcachepools",
+			c.RESTClient(),
+			scheme.ParameterCodec,
+			namespace,
+			func() *omev1beta1.KVCachePool { return &omev1beta1.KVCachePool{} },
+			func() *omev1beta1.KVCachePoolList { return &omev1beta1.KVCachePoolList{} },
+		),
+	}
+}
diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go
index 30fbe169..c0db29ab 100644
--- a/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go
+++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go
@@ -18,6 +18,7 @@ type OmeV1beta1Interface interface {
 	ClusterServingRuntimesGetter
 	FineTunedWeightsGetter
 	InferenceServicesGetter
+	KVCachePoolsGetter
 	ServingRuntimesGetter
 }
 
@@ -50,6 +51,10 @@ func (c *OmeV1beta1Client) InferenceServices(namespace string) InferenceServiceI
 	return newInferenceServices(c, namespace)
 }
 
+func (c *OmeV1beta1Client) KVCachePools(namespace string) KVCachePoolInterface {
+	return newKVCachePools(c, namespace)
+}
+
 func (c *OmeV1beta1Client) ServingRuntimes(namespace string) ServingRuntimeInterface {
 	return newServingRuntimes(c, namespace)
 }
diff --git a/pkg/client/informers/externalversions/generic.go b/pkg/client/informers/externalversions/generic.go
index 4ca3f72c..5cda6325 100644
--- a/pkg/client/informers/externalversions/generic.go
+++ b/pkg/client/informers/externalversions/generic.go
@@ -49,6 +49,8 @@ func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource
 		return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().FineTunedWeights().Informer()}, nil
 	case v1beta1.SchemeGroupVersion.WithResource("inferenceservices"):
 		return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().InferenceServices().Informer()}, nil
+	case v1beta1.SchemeGroupVersion.WithResource("kvcachepools"):
+		return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().KVCachePools().Informer()}, nil
 	case v1beta1.SchemeGroupVersion.WithResource("servingruntimes"):
 		return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().ServingRuntimes().Informer()}, nil
 
diff --git a/pkg/client/informers/externalversions/ome/v1beta1/interface.go b/pkg/client/informers/externalversions/ome/v1beta1/interface.go
index 7463f41a..2d2990ed 100644
--- a/pkg/client/informers/externalversions/ome/v1beta1/interface.go
+++ b/pkg/client/informers/externalversions/ome/v1beta1/interface.go
@@ -20,6 +20,8 @@ type Interface interface {
 	FineTunedWeights() FineTunedWeightInformer
 	// InferenceServices returns a InferenceServiceInformer.
 	InferenceServices() InferenceServiceInformer
+	// KVCachePools returns a KVCachePoolInformer.
+	KVCachePools() KVCachePoolInformer
 	// ServingRuntimes returns a ServingRuntimeInformer.
 	ServingRuntimes() ServingRuntimeInformer
 }
@@ -65,6 +67,11 @@ func (v *version) InferenceServices() InferenceServiceInformer {
 	return &inferenceServiceInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
 }
 
+// KVCachePools returns a KVCachePoolInformer.
+func (v *version) KVCachePools() KVCachePoolInformer {
+	return &kVCachePoolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
+}
+
 // ServingRuntimes returns a ServingRuntimeInformer.
 func (v *version) ServingRuntimes() ServingRuntimeInformer {
 	return &servingRuntimeInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
diff --git a/pkg/client/informers/externalversions/ome/v1beta1/kvcachepool.go b/pkg/client/informers/externalversions/ome/v1beta1/kvcachepool.go
new file mode 100644
index 00000000..4934b5ad
--- /dev/null
+++ b/pkg/client/informers/externalversions/ome/v1beta1/kvcachepool.go
@@ -0,0 +1,86 @@
+// Code generated by informer-gen. DO NOT EDIT.
+
+package v1beta1
+
+import (
+	context "context"
+	time "time"
+
+	apisomev1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1"
+	versioned "github.com/sgl-project/ome/pkg/client/clientset/versioned"
+	internalinterfaces "github.com/sgl-project/ome/pkg/client/informers/externalversions/internalinterfaces"
+	omev1beta1 "github.com/sgl-project/ome/pkg/client/listers/ome/v1beta1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	watch "k8s.io/apimachinery/pkg/watch"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// KVCachePoolInformer provides access to a shared informer and lister for
+// KVCachePools.
+type KVCachePoolInformer interface {
+	Informer() cache.SharedIndexInformer
+	Lister() omev1beta1.KVCachePoolLister
+}
+
+type kVCachePoolInformer struct {
+	factory          internalinterfaces.SharedInformerFactory
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+	namespace        string
+}
+
+// NewKVCachePoolInformer constructs a new informer for KVCachePool type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewKVCachePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
+	return NewFilteredKVCachePoolInformer(client, namespace, resyncPeriod, indexers, nil)
+}
+
+// NewFilteredKVCachePoolInformer constructs a new informer for KVCachePool type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewFilteredKVCachePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
+	return cache.NewSharedIndexInformer(
+		&cache.ListWatch{
+			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.OmeV1beta1().KVCachePools(namespace).List(context.Background(), options)
+			},
+			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.OmeV1beta1().KVCachePools(namespace).Watch(context.Background(), options)
+			},
+			ListWithContextFunc: func(ctx context.Context, options v1.ListOptions) (runtime.Object, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.OmeV1beta1().KVCachePools(namespace).List(ctx, options)
+			},
+			WatchFuncWithContext: func(ctx context.Context, options v1.ListOptions) (watch.Interface, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.OmeV1beta1().KVCachePools(namespace).Watch(ctx, options)
+			},
+		},
+		&apisomev1beta1.KVCachePool{},
+		resyncPeriod,
+		indexers,
+	)
+}
+
+func (f *kVCachePoolInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
+	return NewFilteredKVCachePoolInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
+}
+
+func (f *kVCachePoolInformer) Informer() cache.SharedIndexInformer {
+	return f.factory.InformerFor(&apisomev1beta1.KVCachePool{}, f.defaultInformer)
+}
+
+func (f *kVCachePoolInformer) Lister() omev1beta1.KVCachePoolLister {
+	return omev1beta1.NewKVCachePoolLister(f.Informer().GetIndexer())
+}
diff --git a/pkg/client/listers/ome/v1beta1/expansion_generated.go b/pkg/client/listers/ome/v1beta1/expansion_generated.go
index aad67952..57480c01 100644
--- a/pkg/client/listers/ome/v1beta1/expansion_generated.go
+++ b/pkg/client/listers/ome/v1beta1/expansion_generated.go
@@ -38,6 +38,14 @@ type InferenceServiceListerExpansion interface{}
 // InferenceServiceNamespaceLister.
 type InferenceServiceNamespaceListerExpansion interface{}
 
+// KVCachePoolListerExpansion allows custom methods to be added to
+// KVCachePoolLister.
+type KVCachePoolListerExpansion interface{}
+
+// KVCachePoolNamespaceListerExpansion allows custom methods to be added to
+// KVCachePoolNamespaceLister.
+type KVCachePoolNamespaceListerExpansion interface{}
+
 // ServingRuntimeListerExpansion allows custom methods to be added to
 // ServingRuntimeLister.
 type ServingRuntimeListerExpansion interface{}
diff --git a/pkg/client/listers/ome/v1beta1/kvcachepool.go b/pkg/client/listers/ome/v1beta1/kvcachepool.go
new file mode 100644
index 00000000..ae2c423e
--- /dev/null
+++ b/pkg/client/listers/ome/v1beta1/kvcachepool.go
@@ -0,0 +1,54 @@
+// Code generated by lister-gen. DO NOT EDIT.
+
+package v1beta1
+
+import (
+	omev1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// KVCachePoolLister helps list KVCachePools.
+// All objects returned here must be treated as read-only.
+type KVCachePoolLister interface {
+	// List lists all KVCachePools in the indexer.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*omev1beta1.KVCachePool, err error)
+	// KVCachePools returns an object that can list and get KVCachePools.
+	KVCachePools(namespace string) KVCachePoolNamespaceLister
+	KVCachePoolListerExpansion
+}
+
+// kVCachePoolLister implements the KVCachePoolLister interface.
+type kVCachePoolLister struct {
+	listers.ResourceIndexer[*omev1beta1.KVCachePool]
+}
+
+// NewKVCachePoolLister returns a new KVCachePoolLister.
+func NewKVCachePoolLister(indexer cache.Indexer) KVCachePoolLister {
+	return &kVCachePoolLister{listers.New[*omev1beta1.KVCachePool](indexer, omev1beta1.Resource("kvcachepool"))}
+}
+
+// KVCachePools returns an object that can list and get KVCachePools.
+func (s *kVCachePoolLister) KVCachePools(namespace string) KVCachePoolNamespaceLister {
+	return kVCachePoolNamespaceLister{listers.NewNamespaced[*omev1beta1.KVCachePool](s.ResourceIndexer, namespace)}
+}
+
+// KVCachePoolNamespaceLister helps list and get KVCachePools.
+// All objects returned here must be treated as read-only.
+type KVCachePoolNamespaceLister interface {
+	// List lists all KVCachePools in the indexer for a given namespace.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*omev1beta1.KVCachePool, err error)
+	// Get retrieves the KVCachePool from the indexer for a given namespace and name.
+	// Objects returned here must be treated as read-only.
+	Get(name string) (*omev1beta1.KVCachePool, error)
+	KVCachePoolNamespaceListerExpansion
+}
+
+// kVCachePoolNamespaceLister implements the KVCachePoolNamespaceLister
+// interface.
+type kVCachePoolNamespaceLister struct {
+	listers.ResourceIndexer[*omev1beta1.KVCachePool]
+}