diff --git a/oeps/0010-kv-cache-server-component/README.md b/oeps/0010-kv-cache-server-component/README.md index 27bc3721..864d8962 100644 --- a/oeps/0010-kv-cache-server-component/README.md +++ b/oeps/0010-kv-cache-server-component/README.md @@ -18,7 +18,7 @@ to the pool. - [Naming](#naming) - [Deployment Modes](#deployment-modes) - [Provider and Backend Model](#provider-and-backend-model) - - [External Provider Inputs](#external-provider-inputs) + - [Provider Design Inputs](#provider-design-inputs) - [User Stories](#user-stories) - [Story 1: Pre-create a Node-local LMCache Pool](#story-1-pre-create-a-node-local-lmcache-pool) - [Story 2: Bind an InferenceService to an Existing Pool](#story-2-bind-an-inferenceservice-to-an-existing-pool) @@ -95,11 +95,12 @@ The initial alpha supports four `KVCachePool` deployment modes: 3. `DistributedStore` 4. `ProviderManaged` -`NodeLocal` covers LMCache-style one-cache-server-per-node deployments. +`RawDeployment` covers simple OME-managed Kubernetes Deployment-backed pool +workloads. `NodeLocal` covers LMCache-style one-cache-server-per-node +deployments. `DistributedStore` covers coordinated store systems such as Mooncake master/store deployments. `ProviderManaged` covers providers that expose their -own controller and CRD, such as LMCache's `LMCacheEngine`. `External` binding -to infrastructure that OME does not manage is intentionally deferred. +own controller and CRD, such as LMCache's `LMCacheEngine`. ## Motivation @@ -135,10 +136,12 @@ namespace. alpha. 4. Add `ServingRuntime.spec.kvCacheConnectors` for runtime-side connector support. -5. Keep the pool API provider-neutral and extensible to LMCache, Mooncake, - NIXL-backed configurations, and future providers. -6. Reuse existing OME workload configuration types where appropriate, - including `PodSpec`, `RunnerSpec`, and `ComponentExtensionSpec`. +5. Keep the pool API provider-neutral and extensible to LMCache CPU RAM, + Mooncake, NIXL-backed configurations, and future providers. +6. Reuse existing OME workload configuration patterns where appropriate, + especially `PodSpec`, `ComponentExtensionSpec`, and Kubernetes container + fields, without embedding serving-runner-only abstractions such as + `RunnerSpec`. 7. Keep all pool pod/container configuration under `spec.workloads[]`. 8. Publish normalized connection information in `KVCachePool.status`. 9. Preserve existing `InferenceService` behavior when `spec.kvCachePool` is @@ -244,9 +247,6 @@ Mode meanings: - `ProviderManaged`: OME owns the `KVCachePool` intent but delegates provider implementation resources to a provider controller or provider CRD. -`External` is deferred until OME defines a clear status, ownership, and -validation contract for infrastructure it does not manage. - ### Provider and Backend Model The API distinguishes provider from backend. @@ -256,14 +256,14 @@ provider may expose connection metadata, create provider workloads, or translate pool intent into a provider-native CR. `provider.backends[]` identifies storage or transfer backends used underneath -the provider. For example, LMCache may be the provider while Mooncake or NIXL -is configured as a backend. +the provider. For example, LMCache may be the provider while CPU RAM, Mooncake, +or NIXL is configured as a backend. Provider-specific configuration belongs under `provider.config`. Backend-specific configuration belongs under `provider.backends[].config`. There is no giant top-level `providerConfig` bag. -### External Provider Inputs +### Provider Design Inputs The API shape is influenced by these provider designs: @@ -272,9 +272,9 @@ The API shape is influenced by these provider designs: 2. [LMCache operator](https://docs.lmcache.ai/mp/operator.html) motivates `ProviderManaged`, where OME reconciles provider-native resources and reflects their connection status. -3. [LMCache storage backends](https://docs.lmcache.ai/kv_cache/storage_backends/mooncake.html) +3. [LMCache storage backends](https://docs.lmcache.ai/kv_cache/storage_backends/local_storage.html) motivate separating **provider** from **backend** because LMCache can use - Mooncake or NIXL beneath the LMCache integration layer. + CPU RAM, Mooncake, or NIXL beneath the LMCache integration layer. 4. [Mooncake Store](https://kvcache-ai.github.io/Mooncake/design/mooncake-store.html) motivates `DistributedStore` and named pool workloads such as `master` and `store`. @@ -437,10 +437,6 @@ type KVCacheProviderSpec struct { // +required Name KVCacheProvider `json:"name"` - // Version optionally constrains the provider implementation version. - // +optional - Version *string `json:"version,omitempty"` - // Backends identifies storage or transfer backends used by the provider. // +optional // +listType=map @@ -452,7 +448,7 @@ type KVCacheProviderSpec struct { Config *runtime.RawExtension `json:"config,omitempty"` } -// +kubebuilder:validation:Enum=Local;Mooncake;NIXL;Redis;Filesystem +// +kubebuilder:validation:Enum=Local;CPURAM;Mooncake;NIXL;Redis type KVCacheBackendType string type KVCacheBackendSpec struct { @@ -473,7 +469,9 @@ type KVCacheBackendSpec struct { Cache policy: ```go -// +kubebuilder:validation:Enum=LRU;LFU;FIFO;ProviderDefault +// KVCacheEvictionPolicy is the desired cache eviction behavior. Leaving the +// field unset selects the provider's default policy. +// +kubebuilder:validation:Enum=LRU;LFU;FIFO type KVCacheEvictionPolicy string type KVCachePolicySpec struct { @@ -482,10 +480,6 @@ type KVCachePolicySpec struct { // +optional Capacity *resource.Quantity `json:"capacity,omitempty"` - // TTLSeconds optionally limits cache entry lifetime. - // +optional - TTLSeconds *int64 `json:"ttlSeconds,omitempty"` - // EvictionPolicy is the desired eviction behavior. // +optional EvictionPolicy *KVCacheEvictionPolicy `json:"evictionPolicy,omitempty"` @@ -493,10 +487,6 @@ type KVCachePolicySpec struct { // ChunkSize is a provider-neutral chunk/page/block size hint. // +optional ChunkSize *resource.Quantity `json:"chunkSize,omitempty"` - - // Keyspace optionally scopes generated cache keys. - // +optional - Keyspace *string `json:"keyspace,omitempty"` } ``` @@ -510,20 +500,23 @@ type KVCachePoolWorkloadSpec struct { Name string `json:"name"` // PodSpec provides pod-level customization for this pool workload. + // Container configuration is expressed through PodSpec.Containers. // +optional PodSpec `json:",inline"` - // ComponentExtensionSpec reuses OME workload knobs such as replicas, - // autoscaling, labels, annotations, PDB, and deployment strategy. - // +optional + // ComponentExtensionSpec provides replicas, autoscaling, labels, + // annotations, and PodDisruptionBudget configuration for this workload. ComponentExtensionSpec `json:",inline"` - - // Runner customizes the primary container for this workload. - // +optional - Runner *RunnerSpec `json:"runner,omitempty"` } ``` +`KVCachePoolWorkloadSpec` intentionally composes the existing +`ComponentExtensionSpec` instead of adding a parallel pool-specific scaling +shape. That keeps `minReplicas`, `maxReplicas`, `scaleMetric`, `scaleTarget`, +`kedaConfig`, labels, annotations, PDB, and deployment strategy behavior aligned +with OME's existing component API. Provider adapters remain responsible for +validating which extension fields are meaningful for each deployment mode. + #### InferenceService Extension Add `KVCachePool *KVCachePoolRef` to `InferenceServiceSpec`: @@ -589,8 +582,11 @@ type KVCacheConnectorSpec struct { // +listType=atomic DeploymentModes []KVCachePoolDeploymentMode `json:"deploymentModes,omitempty"` - // Components provides component-specific connector configuration. + // Components provides component-specific connector configuration. Keys + // must be one of the InferenceService ComponentType values (engine, + // decoder, router, predictor); other keys are rejected at admission. // +optional + // +kubebuilder:validation:XValidation:rule="self.all(k, k in ['engine','decoder','router','predictor'])",message="components key must be one of engine, decoder, router, predictor" Components map[ComponentType]KVCacheConnectorComponentSpec `json:"components,omitempty"` } @@ -612,26 +608,33 @@ type KVCacheConnectorComponentSpec struct { } type KVCacheConnectorConfig struct { - // ConnectorClass names the runtime connector implementation, such as - // LMCacheConnectorV1. + // ConnectorClass maps to "kv_connector". // +optional ConnectorClass *string `json:"connectorClass,omitempty"` - // Role describes the component role understood by the runtime adapter, such - // as kv_both, kv_producer, or kv_consumer. + // Role maps to "kv_role". // +optional Role *string `json:"role,omitempty"` - // ConnectionRefName optionally selects a named connection entry published - // by KVCachePool status. + // ExtraConfig maps to "kv_connector_extra_config". // +optional - ConnectionRefName *string `json:"connectionRefName,omitempty"` + // +kubebuilder:pruning:PreserveUnknownFields + ExtraConfig *runtime.RawExtension `json:"extraConfig,omitempty"` + + // ConfigMapRef sources the full --kv-transfer-config JSON from a + // ConfigMap; when set, the inline fields are ignored. + // +optional + ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"` } ``` -`KVCacheConnectorConfig` is intentionally typed. It is not a -`runtime.RawExtension`. Provider-specific escaping remains on the pool provider -and backend specs. +`KVCacheConnectorConfig` keeps common runtime connector intent typed while +allowing narrowly scoped escape hatches for runtime-native transfer config. +`extraConfig` is limited to the nested connector extra-config payload. +`configMapRef` is for advanced runtimes that need to own the complete +`--kv-transfer-config` JSON; when it is set, the inline fields are ignored. +Provider-specific pool configuration still belongs under `provider.config` or +`provider.backends[].config`. #### Status @@ -641,10 +644,6 @@ and backend specs. type KVCachePoolStatus struct { duckv1.Status `json:",inline"` - // Phase is a coarse lifecycle summary. - // +optional - Phase KVCachePoolPhase `json:"phase,omitempty"` - // Connection contains normalized connection information consumed by // ServingRuntime connector adapters. // +optional @@ -689,8 +688,8 @@ type KVCachePoolPortStatus struct { type KVCachePoolWorkloadStatus struct { Name string `json:"name"` - ReadyReplicas int32 `json:"readyReplicas,omitempty"` - DesiredReplicas int32 `json:"desiredReplicas,omitempty"` + ReadyReplicas int32 `json:"readyReplicas"` + DesiredReplicas int32 `json:"desiredReplicas"` } ``` @@ -712,7 +711,7 @@ spec: name: LMCache backends: - name: local-memory - type: Local + type: CPURAM config: mode: Multiprocess endpointDiscovery: NodeHostIP @@ -731,33 +730,34 @@ spec: - name: shm hostPath: path: /dev/shm - runner: - image: lmcache/standalone:nightly - command: - - /opt/venv/bin/lmcache - args: - - server - ports: - - name: transfer - containerPort: 6555 - hostPort: 6555 - - name: http - containerPort: 8080 - hostPort: 8080 - - name: metrics - containerPort: 9090 - hostPort: 9090 - readinessProbe: - httpGet: - path: /healthcheck - port: http - resources: - requests: - cpu: "4" - memory: 64Gi - limits: - cpu: "8" - memory: 80Gi + containers: + - name: server + image: lmcache/standalone:nightly + command: + - /opt/venv/bin/lmcache + args: + - server + ports: + - name: transfer + containerPort: 6555 + hostPort: 6555 + - name: http + containerPort: 8080 + hostPort: 8080 + - name: metrics + containerPort: 9090 + hostPort: 9090 + readinessProbe: + httpGet: + path: /healthcheck + port: http + resources: + requests: + cpu: "4" + memory: 64Gi + limits: + cpu: "8" + memory: 80Gi ``` #### LMCache ProviderManaged Pool @@ -783,12 +783,13 @@ spec: - name: server nodeSelector: node-type: gpu - runner: - image: lmcache/standalone:nightly - resources: - requests: - cpu: "4" - memory: 64Gi + containers: + - name: server + image: lmcache/standalone:nightly + resources: + requests: + cpu: "4" + memory: 64Gi ``` The provider adapter translates the generic pool and workload intent into the @@ -816,40 +817,42 @@ spec: workloads: - name: master minReplicas: 1 - runner: - image: mooncake/mooncake-transfer-engine:latest - command: - - mooncake_master - args: - - --enable_http_metadata_server=true - - --http_metadata_server_host=0.0.0.0 - - --http_metadata_server_port=8080 - - --rpc_port=50051 - - --metrics_port=9003 - ports: - - name: rpc - containerPort: 50051 - - name: metadata - containerPort: 8080 - - name: metrics - containerPort: 9003 - resources: - requests: - cpu: "4" - memory: 8Gi + containers: + - name: master + image: mooncake/mooncake-transfer-engine:latest + command: + - mooncake_master + args: + - --enable_http_metadata_server=true + - --http_metadata_server_host=0.0.0.0 + - --http_metadata_server_port=8080 + - --rpc_port=50051 + - --metrics_port=9003 + ports: + - name: rpc + containerPort: 50051 + - name: metadata + containerPort: 8080 + - name: metrics + containerPort: 9003 + resources: + requests: + cpu: "4" + memory: 8Gi - name: store minReplicas: 4 - runner: - image: mooncake/mooncake-transfer-engine:latest - command: - - mooncake_client - ports: - - name: rpc - containerPort: 50052 - resources: - requests: - cpu: "8" - memory: 180Gi + containers: + - name: store + image: mooncake/mooncake-transfer-engine:latest + command: + - mooncake_client + ports: + - name: rpc + containerPort: 50052 + resources: + requests: + cpu: "8" + memory: 180Gi volumes: - name: cache-data emptyDir: {} @@ -911,8 +914,6 @@ spec: name: vllm-lmcache kvCachePool: name: lmcache-node-pool - kind: KVCachePool - apiGroup: ome.io engine: minReplicas: 2 ``` @@ -957,7 +958,7 @@ When `spec.kvCachePool` is present: ### Connector Merge Rules -Connector injection uses a three-way merge: +Connector injection uses this ordered merge: ```text ServingRuntime component config @@ -978,6 +979,11 @@ Environment merge should preserve the same precedence. Generated connector env and connector `EnvironmentOverride` should not overwrite explicit `InferenceService` component env values. +When `connectorConfig.configMapRef` is set, the runtime connector adapter uses +that referenced full transfer-config payload instead of generating one from +`connectorClass`, `role`, and `extraConfig`. The adapter still merges +`runtimeArgsOverride` and `environmentOverride` with the same precedence rules. + ### Provider Adapter Contracts Introduce two internal adapter boundaries. @@ -1044,7 +1050,7 @@ Provider validation should fail fast for: 2. unsupported deployment mode for provider; 3. duplicate workload names; 4. missing required workload roles for a mode; -5. missing runner image when a workload requires one; +5. missing container image when OME must create a workload; 6. provider config that cannot be decoded by the selected adapter; and 7. missing connection status from provider-managed resources. @@ -1060,7 +1066,7 @@ Controller-level metrics: 6. `ome_kvcache_provider_errors_total` Pool workloads should preserve provider metrics ports configured in -`workloads[].runner.ports`. +`workloads[].containers[].ports`. Recommended labels: @@ -1223,6 +1229,10 @@ references without changing behavior when the field is absent. - 2026-05-10: Reworked design to introduce namespace-scoped `KVCachePool` CRD, reference-only `InferenceService.spec.kvCachePool`, and runtime-side `ServingRuntime.spec.kvCacheConnectors`. +- 2026-05-11: Aligned the OEP with the alpha implementation by trimming + speculative cache-policy fields, using provider defaults by omission, + composing `ComponentExtensionSpec` for workload extensions, and adding + runtime connector `extraConfig`/`configMapRef` escape hatches. ## Drawbacks @@ -1267,14 +1277,15 @@ OME could add both namespaced and cluster-scoped variants. This proposal defers cluster scope. The initial resource is namespaced to keep ownership, RBAC, service discovery, and connection status straightforward. -### Top-level Runner Fields +### Top-level Pod or Container Fields -OME could put `runner`, `PodSpec`, and scaling fields directly on -`KVCachePoolSpec`. +OME could put pod, container, and scaling fields directly on +`KVCachePoolSpec`, or reuse the `InferenceService` `runner` shape. This proposal rejects that shape because it conflicts with multi-role providers such as Mooncake. All pod and container configuration lives under -`spec.workloads[]`. +`spec.workloads[]`, and pool workloads use Kubernetes `containers` rather than +the serving-component-specific `runner` field. ### Separate `managementMode` diff --git a/pkg/apis/ome/v1beta1/inference_service.go b/pkg/apis/ome/v1beta1/inference_service.go index 04832cbb..7e039b1c 100644 --- a/pkg/apis/ome/v1beta1/inference_service.go +++ b/pkg/apis/ome/v1beta1/inference_service.go @@ -53,6 +53,13 @@ type InferenceServiceSpec struct { // AcceleratorSelector specifies accelerator selection preferences // +optional AcceleratorSelector *AcceleratorSelector `json:"acceleratorSelector,omitempty"` + + // KVCachePool references a namespace-scoped KVCachePool that this service + // should attach to for distributed KV cache reuse. Connector behavior is + // derived from the referenced pool and from the selected ServingRuntime's + // kvCacheConnectors. + // +optional + KVCachePool *KVCachePoolRef `json:"kvCachePool,omitempty"` } // AcceleratorSelector defines how to select accelerators for the InferenceService diff --git a/pkg/apis/ome/v1beta1/kvcachepool_types.go b/pkg/apis/ome/v1beta1/kvcachepool_types.go new file mode 100644 index 00000000..c393981a --- /dev/null +++ b/pkg/apis/ome/v1beta1/kvcachepool_types.go @@ -0,0 +1,321 @@ +package v1beta1 + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "knative.dev/pkg/apis" + duckv1 "knative.dev/pkg/apis/duck/v1" +) + +// KVCachePoolDeploymentMode describes how OME reconciles a KVCachePool. +// +kubebuilder:validation:Enum=RawDeployment;NodeLocal;DistributedStore;ProviderManaged +type KVCachePoolDeploymentMode string + +const ( + KVCachePoolRawDeployment KVCachePoolDeploymentMode = "RawDeployment" + KVCachePoolNodeLocal KVCachePoolDeploymentMode = "NodeLocal" + KVCachePoolDistributedStore KVCachePoolDeploymentMode = "DistributedStore" + KVCachePoolProviderManaged KVCachePoolDeploymentMode = "ProviderManaged" +) + +// KVCacheProvider identifies the primary KV cache integration layer. +// +kubebuilder:validation:Enum=LMCache;Mooncake;NIXL +type KVCacheProvider string + +const ( + KVCacheProviderLMCache KVCacheProvider = "LMCache" + KVCacheProviderMooncake KVCacheProvider = "Mooncake" + KVCacheProviderNIXL KVCacheProvider = "NIXL" +) + +// KVCacheBackendType identifies a storage or transfer backend used underneath a provider. +// +kubebuilder:validation:Enum=Local;CPURAM;Mooncake;NIXL;Redis +type KVCacheBackendType string + +const ( + KVCacheBackendLocal KVCacheBackendType = "Local" + KVCacheBackendCPURAM KVCacheBackendType = "CPURAM" + KVCacheBackendMooncake KVCacheBackendType = "Mooncake" + KVCacheBackendNIXL KVCacheBackendType = "NIXL" + KVCacheBackendRedis KVCacheBackendType = "Redis" +) + +// KVCacheEvictionPolicy is the desired cache eviction behavior. Leaving the +// field unset selects the provider's default policy. +// +kubebuilder:validation:Enum=LRU;IsolatedLRU;noop +type KVCacheEvictionPolicy string + +const ( + KVCacheEvictionLRU KVCacheEvictionPolicy = "LRU" + KVCacheEvictionIsolatedLRU KVCacheEvictionPolicy = "IsolatedLRU" + KVCacheEvictionNoop KVCacheEvictionPolicy = "noop" +) + +// KVCachePoolSpec defines the desired state of a KVCachePool. +type KVCachePoolSpec struct { + // Provider identifies the primary KV cache integration layer. + // +required + Provider KVCacheProviderSpec `json:"provider"` + + // DeploymentMode describes how OME reconciles this pool. + // +required + DeploymentMode KVCachePoolDeploymentMode `json:"deploymentMode"` + + // Cache describes provider-neutral cache policy. + // +optional + Cache *KVCachePolicySpec `json:"cache,omitempty"` + + // Workloads contains pod and container configuration for OME-managed pool + // roles. This is the only place where pool pod/container config appears. + // +optional + // +listType=map + // +listMapKey=name + Workloads []KVCachePoolWorkloadSpec `json:"workloads,omitempty"` +} + +// KVCacheProviderSpec identifies the primary provider for a KVCachePool and any +// storage or transfer backends used underneath the provider. +type KVCacheProviderSpec struct { + // +required + Name KVCacheProvider `json:"name"` + + // Backends identifies storage or transfer backends used by the provider. + // +optional + // +listType=map + // +listMapKey=name + Backends []KVCacheBackendSpec `json:"backends,omitempty"` + + // Config contains provider-scoped settings that are not portable OME API. + // +optional + // +kubebuilder:pruning:PreserveUnknownFields + Config *runtime.RawExtension `json:"config,omitempty"` +} + +// KVCacheBackendSpec identifies a storage or transfer backend used by a provider. +type KVCacheBackendSpec struct { + // +required + Name string `json:"name"` + + // Type identifies the backend implementation. + // +required + Type KVCacheBackendType `json:"type"` + + // Config contains backend-scoped settings that are not portable OME API. + // +optional + // +kubebuilder:pruning:PreserveUnknownFields + Config *runtime.RawExtension `json:"config,omitempty"` +} + +// KVCachePolicySpec captures provider-neutral cache policy. Providers translate +// these settings to their native size, memory, segment, or storage knobs. +type KVCachePolicySpec struct { + // Capacity is the intended total pool capacity. + // +optional + Capacity *resource.Quantity `json:"capacity,omitempty"` + + // EvictionPolicy is the desired eviction behavior. + // +optional + // +kubebuilder:default=LRU + EvictionPolicy *KVCacheEvictionPolicy `json:"evictionPolicy,omitempty"` + + // ChunkSize is a provider-neutral chunk/page/block size hint. + // +optional + ChunkSize *resource.Quantity `json:"chunkSize,omitempty"` +} + +// KVCachePoolWorkloadSpec describes pod and container configuration for an +// OME-managed pool role such as server, master, or store. It composes the same +// PodSpec and ComponentExtensionSpec used by engine, decoder, and router +// components so pool workloads inherit consistent replica, autoscaling, label, +// and PDB semantics. +type KVCachePoolWorkloadSpec struct { + // Name identifies the provider or backend role, such as server, master, or + // store. + // +required + Name string `json:"name"` + + // PodSpec provides pod-level customization for this pool workload. + // Container configuration is expressed through PodSpec.Containers. + // +optional + PodSpec `json:",inline"` + + // ComponentExtensionSpec provides replicas, autoscaling, labels, + // annotations, and PodDisruptionBudget configuration for this workload. + ComponentExtensionSpec `json:",inline"` +} + +// KVCachePoolRef is a reference from another resource to a KVCachePool. +type KVCachePoolRef struct { + // Name of the KVCachePool being referenced. + // +required + Name string `json:"name"` + + // Kind of the referenced resource. Defaults to KVCachePool. + // +optional + // +kubebuilder:default="KVCachePool" + Kind *string `json:"kind,omitempty"` + + // APIGroup of the referenced resource. Defaults to ome.io. + // +optional + // +kubebuilder:default="ome.io" + APIGroup *string `json:"apiGroup,omitempty"` +} + +// KVCacheConnectorSpec describes runtime-side support for attaching serving +// components to a KVCachePool with a specific provider. +type KVCacheConnectorSpec struct { + // Provider identifies the pool provider this connector supports. + // +required + Provider KVCacheProvider `json:"provider"` + + // DeploymentModes lists supported pool deployment modes. An empty list + // means all modes supported by this provider adapter. + // +optional + // +listType=atomic + DeploymentModes []KVCachePoolDeploymentMode `json:"deploymentModes,omitempty"` + + // Components provides component-specific connector configuration. Keys + // must be one of the InferenceService ComponentType values (engine, + // decoder, router, predictor); other keys are rejected at admission. + // +optional + // +kubebuilder:validation:XValidation:rule="self.all(k, k in ['engine','decoder','router'])",message="components key must be one of engine, decoder, router, predictor" + Components map[ComponentType]KVCacheConnectorComponentSpec `json:"components,omitempty"` +} + +// KVCacheConnectorComponentSpec configures runtime-side connector behavior for +// a single serving component. +type KVCacheConnectorComponentSpec struct { + // ConnectorConfig is typed connector intent interpreted by the + // provider/runtime adapter. + // +optional + ConnectorConfig *KVCacheConnectorConfig `json:"connectorConfig,omitempty"` + + // RuntimeArgsOverride provides connector-specific runtime args. Matching + // args replace existing values; missing args are appended. + // +optional + // +listType=atomic + RuntimeArgsOverride []string `json:"runtimeArgsOverride,omitempty"` + + // EnvironmentOverride provides connector-specific environment variables. + // +optional + EnvironmentOverride map[string]string `json:"environmentOverride,omitempty"` +} + +// KVCacheConnectorConfig mirrors the runtime --kv-transfer-config JSON +// payload. Either set the inline fields or reference a ConfigMap holding the +// full JSON. +type KVCacheConnectorConfig struct { + // ConnectorClass maps to "kv_connector". + // +optional + ConnectorClass *string `json:"connectorClass,omitempty"` + + // Role maps to "kv_role". + // +optional + Role *string `json:"role,omitempty"` + + // ExtraConfig maps to "kv_connector_extra_config". + // +optional + // +kubebuilder:pruning:PreserveUnknownFields + ExtraConfig *runtime.RawExtension `json:"extraConfig,omitempty"` + + // ConfigMapRef sources the full --kv-transfer-config JSON from a + // ConfigMap; when set, the inline fields are ignored. + // +optional + ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"` +} + +// KVCachePoolStatus describes the observed state of a KVCachePool. +type KVCachePoolStatus struct { + // Conditions for the KVCachePool. The controller sets the standard `Ready` + // condition along with any provider- or workload-specific conditions. + duckv1.Status `json:",inline"` + + // Connection contains normalized connection information consumed by + // ServingRuntime connector adapters. + // +optional + Connection *KVCachePoolConnectionStatus `json:"connection,omitempty"` + + // Workloads reports provider workload status. + // +optional + // +listType=map + // +listMapKey=name + Workloads []KVCachePoolWorkloadStatus `json:"workloads,omitempty"` +} + +// KVCachePoolConnectionStatus normalizes connection information for +// runtime-side connector injection. +type KVCachePoolConnectionStatus struct { + // Endpoint is the primary in-cluster endpoint when one exists. + // +optional + Endpoint *apis.URL `json:"endpoint,omitempty"` + + // Ports lists named connection ports. + // +optional + // +listType=map + // +listMapKey=name + Ports []KVCachePoolPortStatus `json:"ports,omitempty"` + + // ConfigMapRef points to provider-generated connection config when needed. + // +optional + ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"` + + // SecretRef points to provider-generated credentials when needed. + // +optional + SecretRef *corev1.LocalObjectReference `json:"secretRef,omitempty"` + + // ProviderStatus contains provider-scoped observed state, not desired + // configuration. + // +optional + // +kubebuilder:pruning:PreserveUnknownFields + ProviderStatus *runtime.RawExtension `json:"providerStatus,omitempty"` +} + +// KVCachePoolPortStatus is a named connection port published by a pool. +type KVCachePoolPortStatus struct { + // +required + Name string `json:"name"` + // +required + Port int32 `json:"port"` +} + +// KVCachePoolWorkloadStatus reports the observed state of a pool workload +// role. +type KVCachePoolWorkloadStatus struct { + // +required + Name string `json:"name"` + ReadyReplicas int32 `json:"readyReplicas"` + DesiredReplicas int32 `json:"desiredReplicas"` +} + +// KVCachePool is the Schema for distributed KV cache pools. +// +k8s:openapi-gen=true +// +genclient +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Provider",type="string",JSONPath=".spec.provider.name" +// +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".spec.deploymentMode" +// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:resource:path=kvcachepools,shortName=kvcp +type KVCachePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec KVCachePoolSpec `json:"spec,omitempty"` + Status KVCachePoolStatus `json:"status,omitempty"` +} + +// KVCachePoolList contains a list of KVCachePool. +// +k8s:openapi-gen=true +// +kubebuilder:object:root=true +type KVCachePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []KVCachePool `json:"items"` +} + +func init() { + SchemeBuilder.Register(&KVCachePool{}, &KVCachePoolList{}) +} diff --git a/pkg/apis/ome/v1beta1/servingruntime_types.go b/pkg/apis/ome/v1beta1/servingruntime_types.go index 952e8d9c..3707c35f 100644 --- a/pkg/apis/ome/v1beta1/servingruntime_types.go +++ b/pkg/apis/ome/v1beta1/servingruntime_types.go @@ -226,6 +226,15 @@ type ServingRuntimeSpec struct { // AcceleratorRequirements specifies the accelerator requirements for this runtime // +optional AcceleratorRequirements *AcceleratorRequirements `json:"acceleratorRequirements,omitempty"` + + // KVCacheConnectors describes runtime-side support for attaching serving + // components to referenced KVCachePools. The InferenceService controller + // uses these entries to inject provider-specific connector args, env, and + // configuration when a service references a KVCachePool. + // +optional + // +listType=map + // +listMapKey=provider + KVCacheConnectors []KVCacheConnectorSpec `json:"kvCacheConnectors,omitempty"` } // AcceleratorRequirements specifies the accelerator requirements for this runtime diff --git a/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go b/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go index bb366602..9d00afc2 100644 --- a/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/ome/v1beta1/zz_generated.deepcopy.go @@ -1480,6 +1480,11 @@ func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec) { *out = new(AcceleratorSelector) (*in).DeepCopyInto(*out) } + if in.KVCachePool != nil { + in, out := &in.KVCachePool, &out.KVCachePool + *out = new(KVCachePoolRef) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec. @@ -1526,6 +1531,402 @@ func (in *InferenceServiceStatus) DeepCopy() *InferenceServiceStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCacheBackendSpec) DeepCopyInto(out *KVCacheBackendSpec) { + *out = *in + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = new(runtime.RawExtension) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheBackendSpec. +func (in *KVCacheBackendSpec) DeepCopy() *KVCacheBackendSpec { + if in == nil { + return nil + } + out := new(KVCacheBackendSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCacheConnectorComponentSpec) DeepCopyInto(out *KVCacheConnectorComponentSpec) { + *out = *in + if in.ConnectorConfig != nil { + in, out := &in.ConnectorConfig, &out.ConnectorConfig + *out = new(KVCacheConnectorConfig) + (*in).DeepCopyInto(*out) + } + if in.RuntimeArgsOverride != nil { + in, out := &in.RuntimeArgsOverride, &out.RuntimeArgsOverride + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.EnvironmentOverride != nil { + in, out := &in.EnvironmentOverride, &out.EnvironmentOverride + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheConnectorComponentSpec. +func (in *KVCacheConnectorComponentSpec) DeepCopy() *KVCacheConnectorComponentSpec { + if in == nil { + return nil + } + out := new(KVCacheConnectorComponentSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCacheConnectorConfig) DeepCopyInto(out *KVCacheConnectorConfig) { + *out = *in + if in.ConnectorClass != nil { + in, out := &in.ConnectorClass, &out.ConnectorClass + *out = new(string) + **out = **in + } + if in.Role != nil { + in, out := &in.Role, &out.Role + *out = new(string) + **out = **in + } + if in.ExtraConfig != nil { + in, out := &in.ExtraConfig, &out.ExtraConfig + *out = new(runtime.RawExtension) + (*in).DeepCopyInto(*out) + } + if in.ConfigMapRef != nil { + in, out := &in.ConfigMapRef, &out.ConfigMapRef + *out = new(v1.LocalObjectReference) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheConnectorConfig. +func (in *KVCacheConnectorConfig) DeepCopy() *KVCacheConnectorConfig { + if in == nil { + return nil + } + out := new(KVCacheConnectorConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCacheConnectorSpec) DeepCopyInto(out *KVCacheConnectorSpec) { + *out = *in + if in.DeploymentModes != nil { + in, out := &in.DeploymentModes, &out.DeploymentModes + *out = make([]KVCachePoolDeploymentMode, len(*in)) + copy(*out, *in) + } + if in.Components != nil { + in, out := &in.Components, &out.Components + *out = make(map[ComponentType]KVCacheConnectorComponentSpec, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheConnectorSpec. +func (in *KVCacheConnectorSpec) DeepCopy() *KVCacheConnectorSpec { + if in == nil { + return nil + } + out := new(KVCacheConnectorSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePolicySpec) DeepCopyInto(out *KVCachePolicySpec) { + *out = *in + if in.Capacity != nil { + in, out := &in.Capacity, &out.Capacity + x := (*in).DeepCopy() + *out = &x + } + if in.EvictionPolicy != nil { + in, out := &in.EvictionPolicy, &out.EvictionPolicy + *out = new(KVCacheEvictionPolicy) + **out = **in + } + if in.ChunkSize != nil { + in, out := &in.ChunkSize, &out.ChunkSize + x := (*in).DeepCopy() + *out = &x + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePolicySpec. +func (in *KVCachePolicySpec) DeepCopy() *KVCachePolicySpec { + if in == nil { + return nil + } + out := new(KVCachePolicySpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePool) DeepCopyInto(out *KVCachePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePool. +func (in *KVCachePool) DeepCopy() *KVCachePool { + if in == nil { + return nil + } + out := new(KVCachePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KVCachePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolConnectionStatus) DeepCopyInto(out *KVCachePoolConnectionStatus) { + *out = *in + if in.Endpoint != nil { + in, out := &in.Endpoint, &out.Endpoint + *out = new(apis.URL) + (*in).DeepCopyInto(*out) + } + if in.Ports != nil { + in, out := &in.Ports, &out.Ports + *out = make([]KVCachePoolPortStatus, len(*in)) + copy(*out, *in) + } + if in.ConfigMapRef != nil { + in, out := &in.ConfigMapRef, &out.ConfigMapRef + *out = new(v1.LocalObjectReference) + **out = **in + } + if in.SecretRef != nil { + in, out := &in.SecretRef, &out.SecretRef + *out = new(v1.LocalObjectReference) + **out = **in + } + if in.ProviderStatus != nil { + in, out := &in.ProviderStatus, &out.ProviderStatus + *out = new(runtime.RawExtension) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolConnectionStatus. +func (in *KVCachePoolConnectionStatus) DeepCopy() *KVCachePoolConnectionStatus { + if in == nil { + return nil + } + out := new(KVCachePoolConnectionStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolList) DeepCopyInto(out *KVCachePoolList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]KVCachePool, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolList. +func (in *KVCachePoolList) DeepCopy() *KVCachePoolList { + if in == nil { + return nil + } + out := new(KVCachePoolList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KVCachePoolList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolPortStatus) DeepCopyInto(out *KVCachePoolPortStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolPortStatus. +func (in *KVCachePoolPortStatus) DeepCopy() *KVCachePoolPortStatus { + if in == nil { + return nil + } + out := new(KVCachePoolPortStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolRef) DeepCopyInto(out *KVCachePoolRef) { + *out = *in + if in.Kind != nil { + in, out := &in.Kind, &out.Kind + *out = new(string) + **out = **in + } + if in.APIGroup != nil { + in, out := &in.APIGroup, &out.APIGroup + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolRef. +func (in *KVCachePoolRef) DeepCopy() *KVCachePoolRef { + if in == nil { + return nil + } + out := new(KVCachePoolRef) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolSpec) DeepCopyInto(out *KVCachePoolSpec) { + *out = *in + in.Provider.DeepCopyInto(&out.Provider) + if in.Cache != nil { + in, out := &in.Cache, &out.Cache + *out = new(KVCachePolicySpec) + (*in).DeepCopyInto(*out) + } + if in.Workloads != nil { + in, out := &in.Workloads, &out.Workloads + *out = make([]KVCachePoolWorkloadSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolSpec. +func (in *KVCachePoolSpec) DeepCopy() *KVCachePoolSpec { + if in == nil { + return nil + } + out := new(KVCachePoolSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolStatus) DeepCopyInto(out *KVCachePoolStatus) { + *out = *in + in.Status.DeepCopyInto(&out.Status) + if in.Connection != nil { + in, out := &in.Connection, &out.Connection + *out = new(KVCachePoolConnectionStatus) + (*in).DeepCopyInto(*out) + } + if in.Workloads != nil { + in, out := &in.Workloads, &out.Workloads + *out = make([]KVCachePoolWorkloadStatus, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolStatus. +func (in *KVCachePoolStatus) DeepCopy() *KVCachePoolStatus { + if in == nil { + return nil + } + out := new(KVCachePoolStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolWorkloadSpec) DeepCopyInto(out *KVCachePoolWorkloadSpec) { + *out = *in + in.PodSpec.DeepCopyInto(&out.PodSpec) + in.ComponentExtensionSpec.DeepCopyInto(&out.ComponentExtensionSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolWorkloadSpec. +func (in *KVCachePoolWorkloadSpec) DeepCopy() *KVCachePoolWorkloadSpec { + if in == nil { + return nil + } + out := new(KVCachePoolWorkloadSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCachePoolWorkloadStatus) DeepCopyInto(out *KVCachePoolWorkloadStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCachePoolWorkloadStatus. +func (in *KVCachePoolWorkloadStatus) DeepCopy() *KVCachePoolWorkloadStatus { + if in == nil { + return nil + } + out := new(KVCachePoolWorkloadStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KVCacheProviderSpec) DeepCopyInto(out *KVCacheProviderSpec) { + *out = *in + if in.Backends != nil { + in, out := &in.Backends, &out.Backends + *out = make([]KVCacheBackendSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = new(runtime.RawExtension) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVCacheProviderSpec. +func (in *KVCacheProviderSpec) DeepCopy() *KVCacheProviderSpec { + if in == nil { + return nil + } + out := new(KVCacheProviderSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *KedaConfig) DeepCopyInto(out *KedaConfig) { *out = *in @@ -2430,6 +2831,13 @@ func (in *ServingRuntimeSpec) DeepCopyInto(out *ServingRuntimeSpec) { *out = new(AcceleratorRequirements) (*in).DeepCopyInto(*out) } + if in.KVCacheConnectors != nil { + in, out := &in.KVCacheConnectors, &out.KVCacheConnectors + *out = make([]KVCacheConnectorSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServingRuntimeSpec. diff --git a/pkg/apis/ome/v1beta1/zz_generated.defaults.go b/pkg/apis/ome/v1beta1/zz_generated.defaults.go index 6aa4ca7b..f9c8dbff 100644 --- a/pkg/apis/ome/v1beta1/zz_generated.defaults.go +++ b/pkg/apis/ome/v1beta1/zz_generated.defaults.go @@ -20,6 +20,8 @@ func RegisterDefaults(scheme *runtime.Scheme) error { scheme.AddTypeDefaultingFunc(&ClusterServingRuntimeList{}, func(obj interface{}) { SetObjectDefaults_ClusterServingRuntimeList(obj.(*ClusterServingRuntimeList)) }) scheme.AddTypeDefaultingFunc(&InferenceService{}, func(obj interface{}) { SetObjectDefaults_InferenceService(obj.(*InferenceService)) }) scheme.AddTypeDefaultingFunc(&InferenceServiceList{}, func(obj interface{}) { SetObjectDefaults_InferenceServiceList(obj.(*InferenceServiceList)) }) + scheme.AddTypeDefaultingFunc(&KVCachePool{}, func(obj interface{}) { SetObjectDefaults_KVCachePool(obj.(*KVCachePool)) }) + scheme.AddTypeDefaultingFunc(&KVCachePoolList{}, func(obj interface{}) { SetObjectDefaults_KVCachePoolList(obj.(*KVCachePoolList)) }) scheme.AddTypeDefaultingFunc(&ServingRuntime{}, func(obj interface{}) { SetObjectDefaults_ServingRuntime(obj.(*ServingRuntime)) }) scheme.AddTypeDefaultingFunc(&ServingRuntimeList{}, func(obj interface{}) { SetObjectDefaults_ServingRuntimeList(obj.(*ServingRuntimeList)) }) return nil @@ -3106,6 +3108,163 @@ func SetObjectDefaults_InferenceServiceList(in *InferenceServiceList) { } } +func SetObjectDefaults_KVCachePool(in *KVCachePool) { + for i := range in.Spec.Workloads { + a := &in.Spec.Workloads[i] + for j := range a.PodSpec.Volumes { + b := &a.PodSpec.Volumes[j] + if b.VolumeSource.ISCSI != nil { + if b.VolumeSource.ISCSI.ISCSIInterface == "" { + b.VolumeSource.ISCSI.ISCSIInterface = "default" + } + } + if b.VolumeSource.RBD != nil { + if b.VolumeSource.RBD.RBDPool == "" { + b.VolumeSource.RBD.RBDPool = "rbd" + } + if b.VolumeSource.RBD.RadosUser == "" { + b.VolumeSource.RBD.RadosUser = "admin" + } + if b.VolumeSource.RBD.Keyring == "" { + b.VolumeSource.RBD.Keyring = "/etc/ceph/keyring" + } + } + if b.VolumeSource.AzureDisk != nil { + if b.VolumeSource.AzureDisk.CachingMode == nil { + ptrVar1 := v1.AzureDataDiskCachingMode(v1.AzureDataDiskCachingReadWrite) + b.VolumeSource.AzureDisk.CachingMode = &ptrVar1 + } + if b.VolumeSource.AzureDisk.FSType == nil { + var ptrVar1 string = "ext4" + b.VolumeSource.AzureDisk.FSType = &ptrVar1 + } + if b.VolumeSource.AzureDisk.ReadOnly == nil { + var ptrVar1 bool = false + b.VolumeSource.AzureDisk.ReadOnly = &ptrVar1 + } + if b.VolumeSource.AzureDisk.Kind == nil { + ptrVar1 := v1.AzureDataDiskKind(v1.AzureSharedBlobDisk) + b.VolumeSource.AzureDisk.Kind = &ptrVar1 + } + } + if b.VolumeSource.ScaleIO != nil { + if b.VolumeSource.ScaleIO.StorageMode == "" { + b.VolumeSource.ScaleIO.StorageMode = "ThinProvisioned" + } + if b.VolumeSource.ScaleIO.FSType == "" { + b.VolumeSource.ScaleIO.FSType = "xfs" + } + } + } + for j := range a.PodSpec.InitContainers { + b := &a.PodSpec.InitContainers[j] + for k := range b.Ports { + c := &b.Ports[k] + if c.Protocol == "" { + c.Protocol = "TCP" + } + } + if b.LivenessProbe != nil { + if b.LivenessProbe.ProbeHandler.GRPC != nil { + if b.LivenessProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.LivenessProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + if b.ReadinessProbe != nil { + if b.ReadinessProbe.ProbeHandler.GRPC != nil { + if b.ReadinessProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.ReadinessProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + if b.StartupProbe != nil { + if b.StartupProbe.ProbeHandler.GRPC != nil { + if b.StartupProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.StartupProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + } + for j := range a.PodSpec.Containers { + b := &a.PodSpec.Containers[j] + for k := range b.Ports { + c := &b.Ports[k] + if c.Protocol == "" { + c.Protocol = "TCP" + } + } + if b.LivenessProbe != nil { + if b.LivenessProbe.ProbeHandler.GRPC != nil { + if b.LivenessProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.LivenessProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + if b.ReadinessProbe != nil { + if b.ReadinessProbe.ProbeHandler.GRPC != nil { + if b.ReadinessProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.ReadinessProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + if b.StartupProbe != nil { + if b.StartupProbe.ProbeHandler.GRPC != nil { + if b.StartupProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.StartupProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + } + for j := range a.PodSpec.EphemeralContainers { + b := &a.PodSpec.EphemeralContainers[j] + for k := range b.EphemeralContainerCommon.Ports { + c := &b.EphemeralContainerCommon.Ports[k] + if c.Protocol == "" { + c.Protocol = "TCP" + } + } + if b.EphemeralContainerCommon.LivenessProbe != nil { + if b.EphemeralContainerCommon.LivenessProbe.ProbeHandler.GRPC != nil { + if b.EphemeralContainerCommon.LivenessProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.EphemeralContainerCommon.LivenessProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + if b.EphemeralContainerCommon.ReadinessProbe != nil { + if b.EphemeralContainerCommon.ReadinessProbe.ProbeHandler.GRPC != nil { + if b.EphemeralContainerCommon.ReadinessProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.EphemeralContainerCommon.ReadinessProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + if b.EphemeralContainerCommon.StartupProbe != nil { + if b.EphemeralContainerCommon.StartupProbe.ProbeHandler.GRPC != nil { + if b.EphemeralContainerCommon.StartupProbe.ProbeHandler.GRPC.Service == nil { + var ptrVar1 string = "" + b.EphemeralContainerCommon.StartupProbe.ProbeHandler.GRPC.Service = &ptrVar1 + } + } + } + } + } +} + +func SetObjectDefaults_KVCachePoolList(in *KVCachePoolList) { + for i := range in.Items { + a := &in.Items[i] + SetObjectDefaults_KVCachePool(a) + } +} + func SetObjectDefaults_ServingRuntime(in *ServingRuntime) { if in.Spec.RouterConfig != nil { for i := range in.Spec.RouterConfig.PodSpec.Volumes { diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_kvcachepool.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_kvcachepool.go new file mode 100644 index 00000000..4e737c62 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_kvcachepool.go @@ -0,0 +1,34 @@ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1" + omev1beta1 "github.com/sgl-project/ome/pkg/client/clientset/versioned/typed/ome/v1beta1" + gentype "k8s.io/client-go/gentype" +) + +// fakeKVCachePools implements KVCachePoolInterface +type fakeKVCachePools struct { + *gentype.FakeClientWithList[*v1beta1.KVCachePool, *v1beta1.KVCachePoolList] + Fake *FakeOmeV1beta1 +} + +func newFakeKVCachePools(fake *FakeOmeV1beta1, namespace string) omev1beta1.KVCachePoolInterface { + return &fakeKVCachePools{ + gentype.NewFakeClientWithList[*v1beta1.KVCachePool, *v1beta1.KVCachePoolList]( + fake.Fake, + namespace, + v1beta1.SchemeGroupVersion.WithResource("kvcachepools"), + v1beta1.SchemeGroupVersion.WithKind("KVCachePool"), + func() *v1beta1.KVCachePool { return &v1beta1.KVCachePool{} }, + func() *v1beta1.KVCachePoolList { return &v1beta1.KVCachePoolList{} }, + func(dst, src *v1beta1.KVCachePoolList) { dst.ListMeta = src.ListMeta }, + func(list *v1beta1.KVCachePoolList) []*v1beta1.KVCachePool { return gentype.ToPointerSlice(list.Items) }, + func(list *v1beta1.KVCachePoolList, items []*v1beta1.KVCachePool) { + list.Items = gentype.FromPointerSlice(items) + }, + ), + fake, + } +} diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go index 4f90ccaa..2b494a52 100644 --- a/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go +++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/fake/fake_ome_client.go @@ -36,6 +36,10 @@ func (c *FakeOmeV1beta1) InferenceServices(namespace string) v1beta1.InferenceSe return newFakeInferenceServices(c, namespace) } +func (c *FakeOmeV1beta1) KVCachePools(namespace string) v1beta1.KVCachePoolInterface { + return newFakeKVCachePools(c, namespace) +} + func (c *FakeOmeV1beta1) ServingRuntimes(namespace string) v1beta1.ServingRuntimeInterface { return newFakeServingRuntimes(c, namespace) } diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go index 8d6fa046..d87e4ee6 100644 --- a/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go +++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/generated_expansion.go @@ -14,4 +14,6 @@ type FineTunedWeightExpansion interface{} type InferenceServiceExpansion interface{} +type KVCachePoolExpansion interface{} + type ServingRuntimeExpansion interface{} diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/kvcachepool.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/kvcachepool.go new file mode 100644 index 00000000..b51919ad --- /dev/null +++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/kvcachepool.go @@ -0,0 +1,54 @@ +// Code generated by client-gen. DO NOT EDIT. + +package v1beta1 + +import ( + context "context" + + omev1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1" + scheme "github.com/sgl-project/ome/pkg/client/clientset/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + gentype "k8s.io/client-go/gentype" +) + +// KVCachePoolsGetter has a method to return a KVCachePoolInterface. +// A group's client should implement this interface. +type KVCachePoolsGetter interface { + KVCachePools(namespace string) KVCachePoolInterface +} + +// KVCachePoolInterface has methods to work with KVCachePool resources. +type KVCachePoolInterface interface { + Create(ctx context.Context, kVCachePool *omev1beta1.KVCachePool, opts v1.CreateOptions) (*omev1beta1.KVCachePool, error) + Update(ctx context.Context, kVCachePool *omev1beta1.KVCachePool, opts v1.UpdateOptions) (*omev1beta1.KVCachePool, error) + // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). + UpdateStatus(ctx context.Context, kVCachePool *omev1beta1.KVCachePool, opts v1.UpdateOptions) (*omev1beta1.KVCachePool, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*omev1beta1.KVCachePool, error) + List(ctx context.Context, opts v1.ListOptions) (*omev1beta1.KVCachePoolList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *omev1beta1.KVCachePool, err error) + KVCachePoolExpansion +} + +// kVCachePools implements KVCachePoolInterface +type kVCachePools struct { + *gentype.ClientWithList[*omev1beta1.KVCachePool, *omev1beta1.KVCachePoolList] +} + +// newKVCachePools returns a KVCachePools +func newKVCachePools(c *OmeV1beta1Client, namespace string) *kVCachePools { + return &kVCachePools{ + gentype.NewClientWithList[*omev1beta1.KVCachePool, *omev1beta1.KVCachePoolList]( + "kvcachepools", + c.RESTClient(), + scheme.ParameterCodec, + namespace, + func() *omev1beta1.KVCachePool { return &omev1beta1.KVCachePool{} }, + func() *omev1beta1.KVCachePoolList { return &omev1beta1.KVCachePoolList{} }, + ), + } +} diff --git a/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go b/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go index 30fbe169..c0db29ab 100644 --- a/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go +++ b/pkg/client/clientset/versioned/typed/ome/v1beta1/ome_client.go @@ -18,6 +18,7 @@ type OmeV1beta1Interface interface { ClusterServingRuntimesGetter FineTunedWeightsGetter InferenceServicesGetter + KVCachePoolsGetter ServingRuntimesGetter } @@ -50,6 +51,10 @@ func (c *OmeV1beta1Client) InferenceServices(namespace string) InferenceServiceI return newInferenceServices(c, namespace) } +func (c *OmeV1beta1Client) KVCachePools(namespace string) KVCachePoolInterface { + return newKVCachePools(c, namespace) +} + func (c *OmeV1beta1Client) ServingRuntimes(namespace string) ServingRuntimeInterface { return newServingRuntimes(c, namespace) } diff --git a/pkg/client/informers/externalversions/generic.go b/pkg/client/informers/externalversions/generic.go index 4ca3f72c..5cda6325 100644 --- a/pkg/client/informers/externalversions/generic.go +++ b/pkg/client/informers/externalversions/generic.go @@ -49,6 +49,8 @@ func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().FineTunedWeights().Informer()}, nil case v1beta1.SchemeGroupVersion.WithResource("inferenceservices"): return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().InferenceServices().Informer()}, nil + case v1beta1.SchemeGroupVersion.WithResource("kvcachepools"): + return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().KVCachePools().Informer()}, nil case v1beta1.SchemeGroupVersion.WithResource("servingruntimes"): return &genericInformer{resource: resource.GroupResource(), informer: f.Ome().V1beta1().ServingRuntimes().Informer()}, nil diff --git a/pkg/client/informers/externalversions/ome/v1beta1/interface.go b/pkg/client/informers/externalversions/ome/v1beta1/interface.go index 7463f41a..2d2990ed 100644 --- a/pkg/client/informers/externalversions/ome/v1beta1/interface.go +++ b/pkg/client/informers/externalversions/ome/v1beta1/interface.go @@ -20,6 +20,8 @@ type Interface interface { FineTunedWeights() FineTunedWeightInformer // InferenceServices returns a InferenceServiceInformer. InferenceServices() InferenceServiceInformer + // KVCachePools returns a KVCachePoolInformer. + KVCachePools() KVCachePoolInformer // ServingRuntimes returns a ServingRuntimeInformer. ServingRuntimes() ServingRuntimeInformer } @@ -65,6 +67,11 @@ func (v *version) InferenceServices() InferenceServiceInformer { return &inferenceServiceInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} } +// KVCachePools returns a KVCachePoolInformer. +func (v *version) KVCachePools() KVCachePoolInformer { + return &kVCachePoolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} +} + // ServingRuntimes returns a ServingRuntimeInformer. func (v *version) ServingRuntimes() ServingRuntimeInformer { return &servingRuntimeInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} diff --git a/pkg/client/informers/externalversions/ome/v1beta1/kvcachepool.go b/pkg/client/informers/externalversions/ome/v1beta1/kvcachepool.go new file mode 100644 index 00000000..4934b5ad --- /dev/null +++ b/pkg/client/informers/externalversions/ome/v1beta1/kvcachepool.go @@ -0,0 +1,86 @@ +// Code generated by informer-gen. DO NOT EDIT. + +package v1beta1 + +import ( + context "context" + time "time" + + apisomev1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1" + versioned "github.com/sgl-project/ome/pkg/client/clientset/versioned" + internalinterfaces "github.com/sgl-project/ome/pkg/client/informers/externalversions/internalinterfaces" + omev1beta1 "github.com/sgl-project/ome/pkg/client/listers/ome/v1beta1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// KVCachePoolInformer provides access to a shared informer and lister for +// KVCachePools. +type KVCachePoolInformer interface { + Informer() cache.SharedIndexInformer + Lister() omev1beta1.KVCachePoolLister +} + +type kVCachePoolInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc + namespace string +} + +// NewKVCachePoolInformer constructs a new informer for KVCachePool type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewKVCachePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredKVCachePoolInformer(client, namespace, resyncPeriod, indexers, nil) +} + +// NewFilteredKVCachePoolInformer constructs a new informer for KVCachePool type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredKVCachePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.OmeV1beta1().KVCachePools(namespace).List(context.Background(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.OmeV1beta1().KVCachePools(namespace).Watch(context.Background(), options) + }, + ListWithContextFunc: func(ctx context.Context, options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.OmeV1beta1().KVCachePools(namespace).List(ctx, options) + }, + WatchFuncWithContext: func(ctx context.Context, options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.OmeV1beta1().KVCachePools(namespace).Watch(ctx, options) + }, + }, + &apisomev1beta1.KVCachePool{}, + resyncPeriod, + indexers, + ) +} + +func (f *kVCachePoolInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredKVCachePoolInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *kVCachePoolInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&apisomev1beta1.KVCachePool{}, f.defaultInformer) +} + +func (f *kVCachePoolInformer) Lister() omev1beta1.KVCachePoolLister { + return omev1beta1.NewKVCachePoolLister(f.Informer().GetIndexer()) +} diff --git a/pkg/client/listers/ome/v1beta1/expansion_generated.go b/pkg/client/listers/ome/v1beta1/expansion_generated.go index aad67952..57480c01 100644 --- a/pkg/client/listers/ome/v1beta1/expansion_generated.go +++ b/pkg/client/listers/ome/v1beta1/expansion_generated.go @@ -38,6 +38,14 @@ type InferenceServiceListerExpansion interface{} // InferenceServiceNamespaceLister. type InferenceServiceNamespaceListerExpansion interface{} +// KVCachePoolListerExpansion allows custom methods to be added to +// KVCachePoolLister. +type KVCachePoolListerExpansion interface{} + +// KVCachePoolNamespaceListerExpansion allows custom methods to be added to +// KVCachePoolNamespaceLister. +type KVCachePoolNamespaceListerExpansion interface{} + // ServingRuntimeListerExpansion allows custom methods to be added to // ServingRuntimeLister. type ServingRuntimeListerExpansion interface{} diff --git a/pkg/client/listers/ome/v1beta1/kvcachepool.go b/pkg/client/listers/ome/v1beta1/kvcachepool.go new file mode 100644 index 00000000..ae2c423e --- /dev/null +++ b/pkg/client/listers/ome/v1beta1/kvcachepool.go @@ -0,0 +1,54 @@ +// Code generated by lister-gen. DO NOT EDIT. + +package v1beta1 + +import ( + omev1beta1 "github.com/sgl-project/ome/pkg/apis/ome/v1beta1" + labels "k8s.io/apimachinery/pkg/labels" + listers "k8s.io/client-go/listers" + cache "k8s.io/client-go/tools/cache" +) + +// KVCachePoolLister helps list KVCachePools. +// All objects returned here must be treated as read-only. +type KVCachePoolLister interface { + // List lists all KVCachePools in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*omev1beta1.KVCachePool, err error) + // KVCachePools returns an object that can list and get KVCachePools. + KVCachePools(namespace string) KVCachePoolNamespaceLister + KVCachePoolListerExpansion +} + +// kVCachePoolLister implements the KVCachePoolLister interface. +type kVCachePoolLister struct { + listers.ResourceIndexer[*omev1beta1.KVCachePool] +} + +// NewKVCachePoolLister returns a new KVCachePoolLister. +func NewKVCachePoolLister(indexer cache.Indexer) KVCachePoolLister { + return &kVCachePoolLister{listers.New[*omev1beta1.KVCachePool](indexer, omev1beta1.Resource("kvcachepool"))} +} + +// KVCachePools returns an object that can list and get KVCachePools. +func (s *kVCachePoolLister) KVCachePools(namespace string) KVCachePoolNamespaceLister { + return kVCachePoolNamespaceLister{listers.NewNamespaced[*omev1beta1.KVCachePool](s.ResourceIndexer, namespace)} +} + +// KVCachePoolNamespaceLister helps list and get KVCachePools. +// All objects returned here must be treated as read-only. +type KVCachePoolNamespaceLister interface { + // List lists all KVCachePools in the indexer for a given namespace. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*omev1beta1.KVCachePool, err error) + // Get retrieves the KVCachePool from the indexer for a given namespace and name. + // Objects returned here must be treated as read-only. + Get(name string) (*omev1beta1.KVCachePool, error) + KVCachePoolNamespaceListerExpansion +} + +// kVCachePoolNamespaceLister implements the KVCachePoolNamespaceLister +// interface. +type kVCachePoolNamespaceLister struct { + listers.ResourceIndexer[*omev1beta1.KVCachePool] +}