Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
269 changes: 140 additions & 129 deletions oeps/0010-kv-cache-server-component/README.md

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions pkg/apis/ome/v1beta1/inference_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ type InferenceServiceSpec struct {
// AcceleratorSelector specifies accelerator selection preferences
// +optional
AcceleratorSelector *AcceleratorSelector `json:"acceleratorSelector,omitempty"`

// KVCachePool references a namespace-scoped KVCachePool that this service
// should attach to for distributed KV cache reuse. Connector behavior is
// derived from the referenced pool and from the selected ServingRuntime's
// kvCacheConnectors.
// +optional
KVCachePool *KVCachePoolRef `json:"kvCachePool,omitempty"`
}

// AcceleratorSelector defines how to select accelerators for the InferenceService
Expand Down
321 changes: 321 additions & 0 deletions pkg/apis/ome/v1beta1/kvcachepool_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
package v1beta1

import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"knative.dev/pkg/apis"
duckv1 "knative.dev/pkg/apis/duck/v1"
)

// KVCachePoolDeploymentMode describes how OME reconciles a KVCachePool.
// +kubebuilder:validation:Enum=RawDeployment;NodeLocal;DistributedStore;ProviderManaged
type KVCachePoolDeploymentMode string

const (
KVCachePoolRawDeployment KVCachePoolDeploymentMode = "RawDeployment"
KVCachePoolNodeLocal KVCachePoolDeploymentMode = "NodeLocal"
KVCachePoolDistributedStore KVCachePoolDeploymentMode = "DistributedStore"
KVCachePoolProviderManaged KVCachePoolDeploymentMode = "ProviderManaged"
)

// KVCacheProvider identifies the primary KV cache integration layer.
// +kubebuilder:validation:Enum=LMCache;Mooncake;NIXL
type KVCacheProvider string

const (
KVCacheProviderLMCache KVCacheProvider = "LMCache"
KVCacheProviderMooncake KVCacheProvider = "Mooncake"
KVCacheProviderNIXL KVCacheProvider = "NIXL"
)

// KVCacheBackendType identifies a storage or transfer backend used underneath a provider.
// +kubebuilder:validation:Enum=Local;CPURAM;Mooncake;NIXL;Redis
type KVCacheBackendType string

const (
KVCacheBackendLocal KVCacheBackendType = "Local"
KVCacheBackendCPURAM KVCacheBackendType = "CPURAM"
KVCacheBackendMooncake KVCacheBackendType = "Mooncake"
KVCacheBackendNIXL KVCacheBackendType = "NIXL"
KVCacheBackendRedis KVCacheBackendType = "Redis"
)

// KVCacheEvictionPolicy is the desired cache eviction behavior. Leaving the
// field unset selects the provider's default policy.
// +kubebuilder:validation:Enum=LRU;IsolatedLRU;noop
type KVCacheEvictionPolicy string

const (
KVCacheEvictionLRU KVCacheEvictionPolicy = "LRU"
KVCacheEvictionIsolatedLRU KVCacheEvictionPolicy = "IsolatedLRU"
KVCacheEvictionNoop KVCacheEvictionPolicy = "noop"
)

// KVCachePoolSpec defines the desired state of a KVCachePool.
type KVCachePoolSpec struct {
// Provider identifies the primary KV cache integration layer.
// +required
Provider KVCacheProviderSpec `json:"provider"`

// DeploymentMode describes how OME reconciles this pool.
// +required
DeploymentMode KVCachePoolDeploymentMode `json:"deploymentMode"`

// Cache describes provider-neutral cache policy.
// +optional
Cache *KVCachePolicySpec `json:"cache,omitempty"`

// Workloads contains pod and container configuration for OME-managed pool
// roles. This is the only place where pool pod/container config appears.
// +optional
// +listType=map
// +listMapKey=name
Workloads []KVCachePoolWorkloadSpec `json:"workloads,omitempty"`
}

// KVCacheProviderSpec identifies the primary provider for a KVCachePool and any
// storage or transfer backends used underneath the provider.
type KVCacheProviderSpec struct {
// +required
Name KVCacheProvider `json:"name"`

// Backends identifies storage or transfer backends used by the provider.
// +optional
// +listType=map
// +listMapKey=name
Backends []KVCacheBackendSpec `json:"backends,omitempty"`

// Config contains provider-scoped settings that are not portable OME API.
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
Config *runtime.RawExtension `json:"config,omitempty"`
}

// KVCacheBackendSpec identifies a storage or transfer backend used by a provider.
type KVCacheBackendSpec struct {
// +required
Name string `json:"name"`

// Type identifies the backend implementation.
// +required
Type KVCacheBackendType `json:"type"`

// Config contains backend-scoped settings that are not portable OME API.
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
Config *runtime.RawExtension `json:"config,omitempty"`
}

// KVCachePolicySpec captures provider-neutral cache policy. Providers translate
// these settings to their native size, memory, segment, or storage knobs.
type KVCachePolicySpec struct {
// Capacity is the intended total pool capacity.
// +optional
Capacity *resource.Quantity `json:"capacity,omitempty"`

// EvictionPolicy is the desired eviction behavior.
// +optional
// +kubebuilder:default=LRU
EvictionPolicy *KVCacheEvictionPolicy `json:"evictionPolicy,omitempty"`

// ChunkSize is a provider-neutral chunk/page/block size hint.
// +optional
ChunkSize *resource.Quantity `json:"chunkSize,omitempty"`
}

// KVCachePoolWorkloadSpec describes pod and container configuration for an
// OME-managed pool role such as server, master, or store. It composes the same
// PodSpec and ComponentExtensionSpec used by engine, decoder, and router
// components so pool workloads inherit consistent replica, autoscaling, label,
// and PDB semantics.
type KVCachePoolWorkloadSpec struct {
// Name identifies the provider or backend role, such as server, master, or
// store.
// +required
Name string `json:"name"`

// PodSpec provides pod-level customization for this pool workload.
// Container configuration is expressed through PodSpec.Containers.
// +optional
PodSpec `json:",inline"`

// ComponentExtensionSpec provides replicas, autoscaling, labels,
// annotations, and PodDisruptionBudget configuration for this workload.
ComponentExtensionSpec `json:",inline"`
}

// KVCachePoolRef is a reference from another resource to a KVCachePool.
type KVCachePoolRef struct {
// Name of the KVCachePool being referenced.
// +required
Name string `json:"name"`

// Kind of the referenced resource. Defaults to KVCachePool.
// +optional
// +kubebuilder:default="KVCachePool"
Kind *string `json:"kind,omitempty"`

// APIGroup of the referenced resource. Defaults to ome.io.
// +optional
// +kubebuilder:default="ome.io"
APIGroup *string `json:"apiGroup,omitempty"`
}

// KVCacheConnectorSpec describes runtime-side support for attaching serving
// components to a KVCachePool with a specific provider.
type KVCacheConnectorSpec struct {
// Provider identifies the pool provider this connector supports.
// +required
Provider KVCacheProvider `json:"provider"`

// DeploymentModes lists supported pool deployment modes. An empty list
// means all modes supported by this provider adapter.
// +optional
// +listType=atomic
DeploymentModes []KVCachePoolDeploymentMode `json:"deploymentModes,omitempty"`

// Components provides component-specific connector configuration. Keys
// must be one of the InferenceService ComponentType values (engine,
// decoder, router, predictor); other keys are rejected at admission.
// +optional
// +kubebuilder:validation:XValidation:rule="self.all(k, k in ['engine','decoder','router'])",message="components key must be one of engine, decoder, router, predictor"
Components map[ComponentType]KVCacheConnectorComponentSpec `json:"components,omitempty"`
}

// KVCacheConnectorComponentSpec configures runtime-side connector behavior for
// a single serving component.
type KVCacheConnectorComponentSpec struct {
// ConnectorConfig is typed connector intent interpreted by the
// provider/runtime adapter.
// +optional
ConnectorConfig *KVCacheConnectorConfig `json:"connectorConfig,omitempty"`

// RuntimeArgsOverride provides connector-specific runtime args. Matching
// args replace existing values; missing args are appended.
// +optional
// +listType=atomic
RuntimeArgsOverride []string `json:"runtimeArgsOverride,omitempty"`

// EnvironmentOverride provides connector-specific environment variables.
// +optional
EnvironmentOverride map[string]string `json:"environmentOverride,omitempty"`
}

// KVCacheConnectorConfig mirrors the runtime --kv-transfer-config JSON
// payload. Either set the inline fields or reference a ConfigMap holding the
// full JSON.
type KVCacheConnectorConfig struct {
// ConnectorClass maps to "kv_connector".
// +optional
ConnectorClass *string `json:"connectorClass,omitempty"`

// Role maps to "kv_role".
// +optional
Role *string `json:"role,omitempty"`

// ExtraConfig maps to "kv_connector_extra_config".
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
ExtraConfig *runtime.RawExtension `json:"extraConfig,omitempty"`

// ConfigMapRef sources the full --kv-transfer-config JSON from a
// ConfigMap; when set, the inline fields are ignored.
// +optional
ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"`
}

// KVCachePoolStatus describes the observed state of a KVCachePool.
type KVCachePoolStatus struct {
// Conditions for the KVCachePool. The controller sets the standard `Ready`
// condition along with any provider- or workload-specific conditions.
duckv1.Status `json:",inline"`

// Connection contains normalized connection information consumed by
// ServingRuntime connector adapters.
// +optional
Connection *KVCachePoolConnectionStatus `json:"connection,omitempty"`

// Workloads reports provider workload status.
// +optional
// +listType=map
// +listMapKey=name
Workloads []KVCachePoolWorkloadStatus `json:"workloads,omitempty"`
}

// KVCachePoolConnectionStatus normalizes connection information for
// runtime-side connector injection.
type KVCachePoolConnectionStatus struct {
// Endpoint is the primary in-cluster endpoint when one exists.
// +optional
Endpoint *apis.URL `json:"endpoint,omitempty"`

// Ports lists named connection ports.
// +optional
// +listType=map
// +listMapKey=name
Ports []KVCachePoolPortStatus `json:"ports,omitempty"`

// ConfigMapRef points to provider-generated connection config when needed.
// +optional
ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"`

// SecretRef points to provider-generated credentials when needed.
// +optional
SecretRef *corev1.LocalObjectReference `json:"secretRef,omitempty"`

// ProviderStatus contains provider-scoped observed state, not desired
// configuration.
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
ProviderStatus *runtime.RawExtension `json:"providerStatus,omitempty"`
}

// KVCachePoolPortStatus is a named connection port published by a pool.
type KVCachePoolPortStatus struct {
// +required
Name string `json:"name"`
// +required
Port int32 `json:"port"`
}

// KVCachePoolWorkloadStatus reports the observed state of a pool workload
// role.
type KVCachePoolWorkloadStatus struct {
// +required
Name string `json:"name"`
ReadyReplicas int32 `json:"readyReplicas"`
DesiredReplicas int32 `json:"desiredReplicas"`
}

// KVCachePool is the Schema for distributed KV cache pools.
// +k8s:openapi-gen=true
// +genclient
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="Provider",type="string",JSONPath=".spec.provider.name"
// +kubebuilder:printcolumn:name="Mode",type="string",JSONPath=".spec.deploymentMode"
// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
// +kubebuilder:resource:path=kvcachepools,shortName=kvcp
type KVCachePool struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec KVCachePoolSpec `json:"spec,omitempty"`
Status KVCachePoolStatus `json:"status,omitempty"`
}

// KVCachePoolList contains a list of KVCachePool.
// +k8s:openapi-gen=true
// +kubebuilder:object:root=true
type KVCachePoolList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []KVCachePool `json:"items"`
}

func init() {
SchemeBuilder.Register(&KVCachePool{}, &KVCachePoolList{})
}
9 changes: 9 additions & 0 deletions pkg/apis/ome/v1beta1/servingruntime_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,15 @@ type ServingRuntimeSpec struct {
// AcceleratorRequirements specifies the accelerator requirements for this runtime
// +optional
AcceleratorRequirements *AcceleratorRequirements `json:"acceleratorRequirements,omitempty"`

// KVCacheConnectors describes runtime-side support for attaching serving
// components to referenced KVCachePools. The InferenceService controller
// uses these entries to inject provider-specific connector args, env, and
// configuration when a service references a KVCachePool.
// +optional
// +listType=map
// +listMapKey=provider
KVCacheConnectors []KVCacheConnectorSpec `json:"kvCacheConnectors,omitempty"`
}

// AcceleratorRequirements specifies the accelerator requirements for this runtime
Expand Down
Loading
Loading