dgenio · dgenio · May 14, 2026 · May 13, 2026 · May 14, 2026 · May 14, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -52,7 +52,7 @@ Use these terms consistently. Never substitute synonyms:
 - Error messages are part of the contract — tests must assert both exception type and message.
 - Keep modules ≤ 300 lines. Split if needed.
 - No randomness in matching, routing, or summarization. Deterministic outputs always.
-- No new dependencies without justification. The dep list is intentionally minimal (`httpx` only).
+- No new dependencies without justification. The dep list is intentionally minimal (`httpx`, `pydantic`).
 
 ## Security rules
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+- LLM tool-format adapters and middleware (`agent_kernel.adapters`): `OpenAIMiddleware` (OpenAI
+  Responses API + Chat Completions, auto-detected on input) and `AnthropicMiddleware` (Anthropic
+  Messages with `cache_control` support). Both translate `Capability` objects to vendor tool
+  schemas, route tool calls through the full kernel pipeline (grant → invoke → firewall → trace),
+  and surface kernel errors (`PolicyDenied`, `CapabilityNotFound`, `DriverError`) as tool-result
+  errors so the LLM can react. Pre/post hooks (`intercept_tool_call`, `intercept_tool_result`,
+  sync or async) support logging, metrics, approval gates, and per-call justification injection.
+  Zero runtime dependency on the `openai` / `anthropic` SDK packages. (#55, #50, #40)
+- New `Capability` fields for LLM adapters: `parameters_model: type[pydantic.BaseModel] | None`
+  (input schema source + validation), `parameters_schema: dict | None` (raw JSON Schema escape
+  hatch), and `tool_hints: ToolHints | None` (vendor hints — Anthropic `cache_control`, OpenAI
+  `strict` mode). All default to ``None``; existing capabilities and tests are unaffected.
+- New `ToolHints` dataclass and `OpenAIMiddleware` / `AnthropicMiddleware` top-level exports.
+- New `AdapterParseError(AgentKernelError)` exception raised by adapter parse / validation
+  helpers (`tool_call_to_request`, `tool_use_to_request`, `make_namespace_safe_name`) instead
+  of bare `ValueError`. Satisfies `AGENTS.md`'s "no bare ValueError to callers" rule and
+  gives consumers a stable adapter-specific exception type. Also catches capability IDs that
+  contain the reserved OpenAI namespace separator `__` (which would otherwise produce
+  colliding tool names).
+- `Kernel.list_capabilities()` convenience accessor returning every registered capability in
+  registration order. Used by the new adapters but generally useful for tooling that needs to
+  enumerate the registry without keyword search.
 - Declarative policy engine (`DeclarativePolicyEngine`) that loads rules from YAML or TOML files.
   Rules are evaluated top-down with first-match-wins semantics; supports `safety_class`, `sensitivity`,
   `roles`, `attributes`, and `min_justification` match conditions. (#42)
@@ -28,6 +50,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Example policy files in `examples/policies/` (YAML and TOML formats).
 
 ### Changed
+- Runtime dependencies now include `pydantic>=2` in addition to `httpx`. Pydantic is used by the new
+  `agent_kernel.adapters` package for JSON-Schema generation and argument validation when a
+  `Capability` declares a `parameters_model`. Existing kernel behavior is unchanged; pydantic is not
+  imported at module load by anything outside the adapters.
 - `PolicyEngine` protocol no longer requires `explain()`. Engines that need to support
   `Kernel.explain_denial()` should implement the new `ExplainingPolicyEngine` protocol. Built-in
   engines satisfy both. This avoids a breaking typing change for downstream implementers.

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -87,3 +87,24 @@ Stores full results by opaque handle ID with TTL. `expand()` supports pagination
 
 ### TraceStore
 Records every `ActionTrace`. `explain(action_id)` returns the full audit record.
+
+### Adapters (`agent_kernel.adapters`)
+Vendor-specific tool-format adapters that translate between `Capability` objects
+and the tool shapes used by LLM provider APIs:
+
+- **`OpenAIMiddleware`** — emits OpenAI tool definitions (Responses API or Chat
+  Completions shape), parses `response.output` / `message.tool_calls`, and
+  returns `function_call_output` / tool-result messages. Dotted capability IDs
+  map to `namespace__function` (OpenAI tool names cannot contain `.`).
+- **`AnthropicMiddleware`** — emits Anthropic tool definitions with optional
+  `cache_control` blocks, parses `tool_use` content blocks, and returns
+  `tool_result` content blocks. Dotted capability IDs are preserved as-is.
+
+Both classes share `BaseToolMiddleware`, which owns hook registration
+(`intercept_tool_call`, `intercept_tool_result`), pre/post dispatch (sync or
+async), and conversion of kernel exceptions (`PolicyDenied`,
+`CapabilityNotFound`, `DriverError`) into tool-result errors the LLM can react
+to. Input arguments are validated against `Capability.parameters_model`
+(pydantic) when present. **Zero runtime dependency** on the `openai` /
+`anthropic` SDK packages. See [`docs/integrations.md`](integrations.md) for
+usage examples.
diff --git a/docs/integrations.md b/docs/integrations.md
@@ -128,3 +128,202 @@ When mapping MCP tools to capabilities, prefer task-shaped names:
 | `write_file` | `fs.write_file` | WRITE |
 | `delete_file` | `fs.delete_file` | DESTRUCTIVE |
 | `execute_code` | `sandbox.run_code` | DESTRUCTIVE |
+
+## LLM tool-format adapters
+
+`agent_kernel.adapters` converts `Capability` objects into the tool shapes
+expected by OpenAI and Anthropic, and routes the matching tool-call objects
+back through the kernel pipeline (grant → invoke → firewall → trace). The
+adapters are pure dict transforms — there is **no runtime dependency** on the
+`openai` or `anthropic` SDK packages.
+
+### Input schemas with pydantic
+
+Capabilities advertise their input schema via two optional fields on
+`Capability`:
+
+- `parameters_model: type[pydantic.BaseModel] | None` — pydantic model. The
+  adapter calls `.model_json_schema()` and validates tool-call arguments
+  against the model before invocation.
+- `parameters_schema: dict | None` — raw JSON Schema, used verbatim. No
+  argument validation is performed (use `parameters_model` for that).
+
+`Capability.allowed_fields` is an **output redaction** control consumed by the
+firewall — it is *not* used as an input schema source.
+
+```python
+from pydantic import BaseModel, Field
+
+from agent_kernel import Capability, SafetyClass
+
+
+class ListInvoicesArgs(BaseModel):
+    customer_id: str
+    limit: int = Field(default=10, ge=1, le=100)
+
+
+list_invoices = Capability(
+    capability_id="billing.list_invoices",
+    name="List Invoices",
+    description="List invoices for a customer",
+    safety_class=SafetyClass.READ,
+    parameters_model=ListInvoicesArgs,
+)
+```
+
+### OpenAI middleware
+
+```python
+import asyncio
+
+from agent_kernel import Kernel, OpenAIMiddleware, Principal
+
+
+async def main() -> None:
+    kernel = Kernel(registry=registry, ...)
+    principal = Principal(principal_id="agent-1", roles=["reader"])
+    mw = OpenAIMiddleware(kernel, principal)
+
+    tools = mw.get_tools()                       # → list[dict] for OpenAI SDK
+    # response = await openai_client.responses.create(model=..., tools=tools, ...)
+    # outputs = await mw.handle_tool_calls(response.output)
+    # → list of {"type": "function_call_output", "call_id", "output"} dicts.
+
+
+asyncio.run(main())
+```
+
+The default output shape is **OpenAI Responses API**
+(`function_call_output`). Use `format="chat_completions"` to emit nested
+`{"type": "function", "function": {...}}` tool definitions and
+`{"role": "tool", ...}` result messages instead:
+
+```python
+mw = OpenAIMiddleware(kernel, principal, format="chat_completions")
+```
+
+`handle_tool_calls` auto-detects the input shape per call regardless of the
+configured output format, so you can pass either Responses-API
+`response.output` items or Chat-Completions `message.tool_calls` items.
+
+#### Namespace mapping
+
+OpenAI tool names cannot contain `.`, so dotted capability IDs are mapped to
+double-underscore form on the way out and restored on the way back:
+
+| Capability ID | OpenAI tool name |
+|---|---|
+| `billing.list_invoices` | `billing__list_invoices` |
+| `billing.invoices.list` | `billing__invoices__list` |
+
+Capability IDs that already contain `__` cannot be round-tripped unambiguously
+(`a__b` and `a.b` would both produce the OpenAI tool name `a__b`). The adapter
+rejects them at tool-emit time with an `AdapterParseError` rather than
+silently emitting colliding tools.
+
+#### Strict mode
+
+Set `Capability.tool_hints = ToolHints(strict=True)` to emit the tool
+definition with OpenAI's `strict: true` flag. The adapter normalises the
+schema so every property is required and `additionalProperties` is `false`
+at every level. If normalisation fails (e.g. a schema feature OpenAI strict
+mode does not accept) the adapter falls back to non-strict and emits a
+warning.
+
+**Strict mode caveats**
+
+OpenAI strict mode requires every property be listed in `required`. The
+adapter's normaliser enforces this unconditionally. That means pydantic
+fields with non-`None` defaults — which pydantic itself emits as
+*not* required — will be forced into `required` after normalisation. The
+LLM is then expected to always include the field even though pydantic would
+fall back to the default if it were omitted.
+
+To express a truly-optional field under strict mode, use the `Optional[T]`
+pattern (with `None` as the default):
+
+```python
+class ListInvoicesArgs(BaseModel):
+    customer_id: str           # required, str
+    limit: int = 10            # forced into required by strict mode
+    cursor: str | None = None  # required + nullable (LLM can pass null)
+```
+
+Pydantic emits `Optional[str] = None` (or `str | None = None`) as
+`{"anyOf": [{"type": "string"}, {"type": "null"}]}`. OpenAI strict mode
+accepts `null` as a valid value for such fields, so the LLM can effectively
+"omit" them by passing `null`.
+
+### Anthropic middleware
+
+```python
+import asyncio
+
+from agent_kernel import AnthropicMiddleware, Kernel, Principal
+
+
+async def main() -> None:
+    kernel = Kernel(registry=registry, ...)
+    principal = Principal(principal_id="agent-1", roles=["reader"])
+    mw = AnthropicMiddleware(kernel, principal)
+
+    tools = mw.get_tools()                       # → list[dict] for Anthropic SDK
+    # message = await anthropic_client.messages.create(model=..., tools=tools, ...)
+    # tool_results = await mw.handle_tool_uses(message.content)
+    # → list of {"type": "tool_result", "tool_use_id", "content"} blocks.
+
+
+asyncio.run(main())
+```
+
+#### Prompt cache control
+
+Set `Capability.tool_hints = ToolHints(cache_control={"type": "ephemeral"})`
+to attach Anthropic's prompt-cache control block to that capability's tool
+definition. To apply a default to every tool that does not specify its own,
+pass `default_cache_control` to the middleware:
+
+```python
+mw = AnthropicMiddleware(
+    kernel,
+    principal,
+    default_cache_control={"type": "ephemeral"},
+)
+```
+
+### Hooks (pre/post invocation)
+
+Both middlewares accept synchronous or asynchronous callbacks via
+`intercept_tool_call(callback)` and `intercept_tool_result(callback)`. Hooks
+fire in registration order. Pre-hooks receive a mutable `ToolCallEvent`
+(useful for logging, metrics, approval gates, injecting `justification` for
+WRITE/DESTRUCTIVE calls); post-hooks receive a `ToolResultEvent` carrying
+either the kernel `Frame` or an error string.
+
+```python
+async def audit(event):
+    log.info("tool_call", capability=event.capability_id, principal=event.principal_id)
+
+def gate(event):
+    if event.capability_id.startswith("billing.delete"):
+        event.aborted = True
+        event.abort_reason = "deletions require manual approval"
+
+mw.intercept_tool_call(audit)
+mw.intercept_tool_call(gate)
+```
+
+Setting `event.aborted = True` skips kernel invocation and produces a
+tool-result error block containing `event.abort_reason`. Setting
+`event.justification = "..."` lets a hook supply the per-call justification
+the policy engine requires for WRITE/DESTRUCTIVE capabilities. Per-call
+overrides can also be threaded through arguments as `_justification` (the
+adapter pops it before passing args to the driver).
+
+### Errors are tool results, not exceptions
+
+`PolicyDenied`, `CapabilityNotFound`, `DriverError`, argument-validation
+failures, and hook abort signals are all returned to the LLM as a tool result
+with `error: true` (Anthropic also sets `is_error: true`). Raised exceptions
+would crash the surrounding agent loop; the LLM cannot react to an
+exception.
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,10 @@ classifiers = [
     "Topic :: Security",
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
-dependencies = ["httpx>=0.27"]
+dependencies = [
+    "httpx>=0.27",
+    "pydantic>=2",
+]
 
 [project.urls]
 Homepage = "https://github.com/dgenio/agent-kernel"

diff --git a/src/agent_kernel/__init__.py b/src/agent_kernel/__init__.py
@@ -25,6 +25,10 @@
 
     from agent_kernel import HandleStore, TraceStore
 
+LLM tool-format adapters::
+
+    from agent_kernel import OpenAIMiddleware, AnthropicMiddleware
+
 Errors::
 
     from agent_kernel import (
@@ -35,12 +39,14 @@
     )
 """
 
+from .adapters import AnthropicMiddleware, OpenAIMiddleware
 from .drivers.base import Driver, ExecutionContext
 from .drivers.http import HTTPDriver
 from .drivers.mcp import MCPDriver
 from .drivers.memory import InMemoryDriver, make_billing_driver
 from .enums import SafetyClass, SensitivityTag
 from .errors import (
+    AdapterParseError,
     AgentKernelError,
     CapabilityAlreadyRegistered,
     CapabilityNotFound,
@@ -76,6 +82,7 @@
     RawResult,
     ResponseMode,
     RoutePlan,
+    ToolHints,
 )
 from .policy import DefaultPolicyEngine, ExplainingPolicyEngine, PolicyEngine
 from .policy_dsl import DeclarativePolicyEngine, PolicyMatch, PolicyRule
@@ -111,10 +118,12 @@
     "ResponseMode",
     "RoutePlan",
     "ActionTrace",
+    "ToolHints",
     # enums
     "SafetyClass",
     "SensitivityTag",
     # errors
+    "AdapterParseError",
     "AgentKernelError",
     "CapabilityAlreadyRegistered",
     "CapabilityNotFound",
@@ -152,4 +161,7 @@
     # stores
     "HandleStore",
     "TraceStore",
+    # adapters
+    "AnthropicMiddleware",
+    "OpenAIMiddleware",
 ]
diff --git a/src/agent_kernel/adapters/__init__.py b/src/agent_kernel/adapters/__init__.py
@@ -0,0 +1,35 @@
+"""LLM tool-format adapters and middleware.
+
+The adapter layer translates between :class:`~agent_kernel.Capability` objects
+and vendor-specific tool shapes (OpenAI Responses / Chat Completions,
+Anthropic Messages) without depending on the vendor SDKs at runtime. The
+middleware classes also route a vendor's tool-call objects through the full
+kernel pipeline (grant → invoke → firewall → trace), returning vendor-shaped
+tool-result objects.
+
+Two middleware classes share a common base (:class:`BaseToolMiddleware`) which
+owns hook registration, dispatch, and error-as-result conversion.
+"""
+
+from __future__ import annotations
+
+from ._base import (
+    BaseToolMiddleware,
+    ToolCallEvent,
+    ToolCallHook,
+    ToolResultEvent,
+    ToolResultHook,
+)
+from .anthropic import AnthropicMiddleware
+from .openai import OpenAIMiddleware, OpenAIToolFormat
+
+__all__ = [
+    "AnthropicMiddleware",
+    "BaseToolMiddleware",
+    "OpenAIMiddleware",
+    "OpenAIToolFormat",
+    "ToolCallEvent",
+    "ToolCallHook",
+    "ToolResultEvent",
+    "ToolResultHook",
+]