From 41a3a5773bcfa82205066877955600887bc0e95b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Mon, 11 May 2026 09:44:22 +0200 Subject: [PATCH 1/8] =?UTF-8?q?feat(sdk):=20canonical-object=20pipeline=20?= =?UTF-8?q?=E2=80=94=20SchemaHandler=20table,=20BufferAnchor,=20push=5Fmes?= =?UTF-8?q?sage=5Fv2=20ABI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A coherent set of changes across pj_base and pj_plugins that establishes the canonical-object pipeline. Canonical-object SDK (pj_base/sdk/canonical_object.hpp): - BufferAnchor + PayloadView for zero-copy payload sharing (Span view + shared_ptr anchor). - sdk::Image, CompressedImage, PointCloud canonical types built around the view+anchor pattern so parsers can return them without copying the source bytes. - CanonicalObjectKind enum + SchemaClassification descriptor. - PixelFormat with kRGB888/kRGBA8888/kBGR888/kBGRA8888/kMono8/kMono16. MessageParser plugin base: - SchemaHandler table: per-schema registration with parse_scalars and parse_object callables. The base's classifySchema / parseScalars / parseObject methods are now table lookups. Plugins call registerSchemaHandler() in their constructor (or in bindSchema) to declare what they know about each type name. - parse() is no longer pure virtual: default implementation routes through parseScalars + writeHost.appendRecord, so plugins that register all their schemas via the table inherit parse() for free. C ABI: - PJ_payload_t / PJ_payload_anchor_t / PJ_payload_fetcher_t cross-ABI types: idempotent byte-fetcher with a release callback. - New push_message_v2 tail slot on PJ_data_source_runtime_host_vtable_t: the DataSource hands the host an idempotent fetcher; the host applies the active ObjectIngestPolicy (kPureLazy / kLazyObjectsEagerScalars / kEager) to decide when (and whether) to invoke it. - vtable size sentinel updated deliberately: 80 -> 104 bytes (MIN_VTABLE_SIZE pinned at the v4.0 baseline of 80). SDK C++ helpers: - DataSourceRuntimeHostView::pushMessage template: wraps a C++ closure (returning either PayloadView or vector) into a PJ_payload_fetcher_t and delegates to push_message_v2. Returns an explicit error when the host doesn't expose the slot — no silent fallback to the legacy raw-message path. - ObjectIngestPolicy + ObjectIngestPolicyResolver with hierarchical override cascade: topic > data_source > kind > default. - MessageParserHandle::classifySchema wrapper for the tail-slot call. Canonical-object blob serialization: - Flat byte layout for Image / CompressedImage / PointCloud crossing the C ABI. Writer/reader pair under sdk/detail/. Tests: - object_ingest_policy_test: cascade rules at all four levels + last-write-wins. - push_message_v2_test: mock host exercising the template's fetcher wrap (vector and PayloadView shapes), idempotency under repeated fetch, ctx lifetime via shared_ptr canary, anchor propagation past fetcher release, and the explicit error when the host predates the slot. Status: design sketch posted as a draft. Compiles cleanly with the companion parser/runtime work; not yet exercised end-to-end against real data sources. --- pj_base/CMakeLists.txt | 2 + .../include/pj_base/canonical_object_abi.h | 136 +++++++ .../include/pj_base/data_source_protocol.h | 97 +++++ .../include/pj_base/message_parser_protocol.h | 48 ++- .../include/pj_base/sdk/canonical_object.hpp | 283 ++++++++++++++ .../pj_base/sdk/data_source_host_views.hpp | 110 ++++++ .../detail/canonical_object_serialization.hpp | 345 ++++++++++++++++++ .../sdk/detail/message_parser_trampolines.hpp | 103 ++++++ .../sdk/message_parser_plugin_base.hpp | 223 ++++++++++- .../pj_base/sdk/object_ingest_policy.hpp | 119 ++++++ pj_base/tests/abi_layout_sentinels_test.cpp | 3 +- pj_base/tests/object_ingest_policy_test.cpp | 92 +++++ pj_base/tests/push_message_v2_test.cpp | 226 ++++++++++++ .../pj_plugins/host/message_parser_handle.hpp | 21 ++ pj_plugins/tests/data_source_library_test.cpp | 2 + .../tests/file_source_integration_test.cpp | 1 + 16 files changed, 1805 insertions(+), 6 deletions(-) create mode 100644 pj_base/include/pj_base/canonical_object_abi.h create mode 100644 pj_base/include/pj_base/sdk/canonical_object.hpp create mode 100644 pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp create mode 100644 pj_base/include/pj_base/sdk/object_ingest_policy.hpp create mode 100644 pj_base/tests/object_ingest_policy_test.cpp create mode 100644 pj_base/tests/push_message_v2_test.cpp diff --git a/pj_base/CMakeLists.txt b/pj_base/CMakeLists.txt index 12876b9..b3dece2 100644 --- a/pj_base/CMakeLists.txt +++ b/pj_base/CMakeLists.txt @@ -56,6 +56,8 @@ if(PJ_BUILD_TESTS) tests/platform_test.cpp tests/arrow_holders_test.cpp tests/media_metadata_test.cpp + tests/object_ingest_policy_test.cpp + tests/push_message_v2_test.cpp ) foreach(test_src ${PJ_BASE_TESTS}) diff --git a/pj_base/include/pj_base/canonical_object_abi.h b/pj_base/include/pj_base/canonical_object_abi.h new file mode 100644 index 0000000..b370951 --- /dev/null +++ b/pj_base/include/pj_base/canonical_object_abi.h @@ -0,0 +1,136 @@ +/** + * @file canonical_object_abi.h + * @brief C ABI representation of canonical objects produced by parsers. + * + * The C++ vocabulary lives in pj_base/sdk/canonical_object.hpp + * (sdk::CanonicalObject = std::variant). + * This file defines the wire format used to cross the plugin C ABI boundary + * for that variant: parser plugins produce a flat byte blob with a small + * header describing the kind, and the host deserializes it back to the + * C++ type. + * + * The blob layout is little-endian, packed, with no implementation-defined + * padding. Trampolines and host loader use it directly. + */ +#ifndef PJ_CANONICAL_OBJECT_ABI_H +#define PJ_CANONICAL_OBJECT_ABI_H + +#include +#include +#include + +#include "pj_base/plugin_data_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Owned buffer of named field values produced by the parse_scalars slot. + * The plugin owns the @p fields array; the host calls @p release(alloc_handle) + * when done. release MAY be NULL if the plugin manages the buffer in a way + * that does not require explicit release between calls. + */ +typedef struct PJ_named_field_value_buffer_t { + const PJ_named_field_value_t* fields; + size_t count; + void* alloc_handle; + void (*release)(void* alloc_handle); +} PJ_named_field_value_buffer_t; + +/** + * Canonical object kinds. Numeric values are stable across releases — never + * renumber. Mirror of PJ::sdk::CanonicalObjectKind for use across the C ABI. + */ +typedef enum PJ_canonical_object_kind_t { + PJ_CANONICAL_OBJECT_KIND_NONE = 0, + PJ_CANONICAL_OBJECT_KIND_IMAGE = 1, + PJ_CANONICAL_OBJECT_KIND_COMPRESSED_IMAGE = 2, + PJ_CANONICAL_OBJECT_KIND_POINTCLOUD = 3, + /* Reserve future kinds; appended at the tail. */ + /* PJ_CANONICAL_OBJECT_KIND_MARKERS = 4, */ + /* PJ_CANONICAL_OBJECT_KIND_OCCUPANCY_GRID = 5, */ +} PJ_canonical_object_kind_t; + +/** + * Schema classification — what kind a parser declares for a given schema. + * Returned a priori (without parsing payload) by the classify_schema slot. + * + * Currently a single field plus reserved padding to keep the struct size + * stable across future minor extensions (declarative metadata can attach + * via additional structs returned by other slots, not by growing this one). + */ +typedef struct PJ_schema_classification_t { + uint16_t object_kind; /**< PJ_canonical_object_kind_t. */ + uint16_t reserved; /**< Must be zero. */ +} PJ_schema_classification_t; + +/** + * Canonical object as a flat byte blob produced by the parse_object slot. + * + * Layout of @p data: + * + * header (12 bytes, little-endian): + * uint16_t kind // PJ_canonical_object_kind_t + * uint16_t reserved + * int64_t timestamp_ns + * + * body (varies by kind, immediately follows the header): + * + * KIND_IMAGE: + * uint32_t width + * uint32_t height + * uint16_t pixel_format + * uint16_t reserved + * uint32_t pixels_size + * uint8_t pixels[pixels_size] // tightly packed, no row stride + * + * KIND_COMPRESSED_IMAGE: + * uint8_t format // 0=unknown, 1=JPEG, 2=PNG, 3=QOI + * uint8_t has_depth_min + * uint8_t has_depth_max + * uint8_t reserved + * float depth_min // valid iff has_depth_min + * float depth_max // valid iff has_depth_max + * uint32_t bytes_size + * uint8_t bytes[bytes_size] + * + * KIND_POINTCLOUD: + * uint32_t width + * uint32_t height + * uint32_t point_step + * uint32_t row_step + * uint8_t is_bigendian + * uint8_t is_dense + * uint16_t fields_count + * fields[fields_count]: + * uint32_t name_size + * char name[name_size] + * uint32_t offset + * uint8_t datatype // 0=unknown,1=i8,2=u8,3=i16,4=u16, + * // 5=i32,6=u32,7=f32,8=f64 + * uint8_t reserved[3] + * uint32_t count + * uint32_t data_size + * uint8_t data[data_size] + * + * Memory ownership: + * The blob's @p data is owned by the parser plugin. The plugin allocates + * it during parse_object and the host calls @p release(ctx, data) when it + * is done with the bytes. release MAY be NULL if data points into a + * plugin-internal buffer that the plugin manages itself across calls. + */ +typedef struct PJ_canonical_object_blob_t { + const uint8_t* data; + uint64_t size; + /** Opaque handle the plugin uses to identify the allocation. */ + void* alloc_handle; + /** Release callback invoked by the host. NULL means no release needed. */ + void (*release)(void* alloc_handle); +} PJ_canonical_object_blob_t; + +#ifdef __cplusplus +} +#endif + +#endif /* PJ_CANONICAL_OBJECT_ABI_H */ diff --git a/pj_base/include/pj_base/data_source_protocol.h b/pj_base/include/pj_base/data_source_protocol.h index 584ede1..03dd463 100644 --- a/pj_base/include/pj_base/data_source_protocol.h +++ b/pj_base/include/pj_base/data_source_protocol.h @@ -128,6 +128,58 @@ typedef struct { uint32_t id; } PJ_parser_binding_handle_t; +/** + * Ownership token kept alive while a non-owning byte buffer is in use. + * `ctx` is opaque to the host; `release(ctx)` is invoked once when the host + * no longer needs the bytes referenced by the buffer. `ctx` MAY be NULL — + * meaning the buffer was static / borrowed from an external lifetime — in + * which case `release` is also expected to be NULL. + * + * Mirrors the pattern of PJ_canonical_object_blob_t but applies to raw + * payload bytes, not to serialized canonical objects. + */ +typedef struct PJ_payload_anchor_t { + void* ctx; + void (*release)(void* ctx); +} PJ_payload_anchor_t; + +/** + * Payload bytes plus an ownership anchor. The host treats `data` as a + * non-owning view, valid until `anchor.release(anchor.ctx)` is invoked. + * + * For zero-copy ingest, the producer (DataSource plugin) returns a payload + * whose anchor keeps the source buffer (mcap chunk, mmap, …) alive. The + * host hands the same payload to a parser (which can build canonical + * objects holding spans into the buffer) and only releases the anchor when + * everyone done with the bytes. + */ +typedef struct PJ_payload_t { + const uint8_t* data; + size_t size; + PJ_payload_anchor_t anchor; +} PJ_payload_t; + +/** + * Idempotent fetcher of payload bytes. The host invokes `fetch(ctx, &out, + * &err)` zero, one, or many times depending on the active + * ObjectIngestPolicy and on consumer pulls. Returns true and populates + * `*out` on success; returns false and (optionally) populates `*err` on + * failure (file read error, source torn down, etc.). + * + * The host ALWAYS calls `release(ctx)` exactly once when it no longer + * needs the fetcher — at the end of ingest for kEager, when the + * corresponding ObjectStore entry is dropped for lazy modes. `release` + * MAY be NULL if the plugin manages the ctx via some external mechanism. + * + * `fetch` MUST be thread-safe: the host may invoke it from the ingest + * thread (kEager) or from consumer threads (lazy pull). + */ +typedef struct PJ_payload_fetcher_t { + void* ctx; + bool (*fetch)(void* ctx, PJ_payload_t* out_payload, PJ_error_t* out_error) PJ_NOEXCEPT; + void (*release)(void* ctx); +} PJ_payload_fetcher_t; + /** * Request to bind (or look up) a parser for a given topic. * All string views must remain valid for the duration of the call. @@ -232,6 +284,51 @@ typedef struct PJ_data_source_runtime_host_vtable_t { * are loaded. */ const char* (*list_available_encodings)(void* ctx)PJ_NOEXCEPT; + + /* --------------------------------------------------------------------- + * Tail slots — appended after v4.0. Readers MUST gate access on + * `vtable->struct_size > offsetof(slot)` before calling. + * --------------------------------------------------------------------- */ + + /** + * [stream-thread] Push a message via a deferred byte fetcher. The plugin + * hands the host a callable that produces the payload bytes when + * invoked; the host applies the active ObjectIngestPolicy (resolved via + * the application-configured ObjectIngestPolicyResolver against + * source_id, topic, and the parser's classifySchema kind) to decide: + * + * - kEager: invoke fetcher now, parser.parseScalars + * writes columns, parser.parseObject + * materializes the canonical object into + * the ObjectStore via pushOwned. Fetcher + * released after. + * - kLazyObjectsEagerScalars: invoke fetcher now, parser.parseScalars + * writes columns. ObjectStore.pushLazy + * retains the fetcher closure for pull-time + * re-invocation; bytes dropped after + * parseScalars. + * - kPureLazy: do not invoke fetcher at ingest. Register + * ObjectStore entry that defers fetcher + * invocation until consumer pull. No + * scalar columns produced. + * + * The plugin is policy-agnostic: it does not query the policy nor + * track which mode is active. Just constructs the fetcher and hands + * it off via this slot. + * + * Lifetime: the fetcher's `ctx` is allocated by the plugin. The host + * is responsible for calling `fetcher.release(fetcher.ctx)` exactly + * once when the fetcher is no longer needed (kEager: after the + * single fetch; lazy modes: when the ObjectStore entry it backs is + * dropped). `fetcher.fetch` must be thread-safe. + * + * Returns false + error on failure (binding handle invalid, + * ObjectStore push failed, etc.). On failure the host still calls + * `fetcher.release` so the plugin's ctx leaks no resources. + */ + bool (*push_message_v2)( + void* ctx, PJ_parser_binding_handle_t handle, int64_t host_timestamp_ns, + PJ_payload_fetcher_t fetcher, PJ_error_t* out_error) PJ_NOEXCEPT; } PJ_data_source_runtime_host_vtable_t; /** Fat pointer pairing a runtime host context with its vtable. */ diff --git a/pj_base/include/pj_base/message_parser_protocol.h b/pj_base/include/pj_base/message_parser_protocol.h index e3c06ae..bf8d290 100644 --- a/pj_base/include/pj_base/message_parser_protocol.h +++ b/pj_base/include/pj_base/message_parser_protocol.h @@ -8,9 +8,16 @@ * append_arrow_ipc — see plugin_data_api.h. Parsers stay per-record; * the host coalesces into Arrow batches internally. * + * v4 appendable tail (no version bump — protocol stays at 4): + * - classify_schema, parse_scalars, parse_object: pure-functional API + * that returns typed values instead of writing to host views. Enables + * lazy materialization and removes the parser's coupling to push policy. + * See pj_base/canonical_object_abi.h for the wire format. + * * The host obtains the plugin's vtable via `PJ_get_message_parser_vtable()` * and drives the plugin through: create -> bind(registry) -> - * (bind_schema) -> parse* -> destroy. + * (bind_schema) -> (classify_schema) -> parse* / parseScalars / parseObject + * -> destroy. */ #ifndef PJ_MESSAGE_PARSER_PROTOCOL_H #define PJ_MESSAGE_PARSER_PROTOCOL_H @@ -19,6 +26,7 @@ #include #include +#include "pj_base/canonical_object_abi.h" #include "pj_base/plugin_data_api.h" #ifdef __cplusplus @@ -110,6 +118,44 @@ typedef struct PJ_message_parser_vtable_t { * Tail slots beyond here are OPTIONAL. Host reads MUST check both * struct_size and slot-nullability via PJ_HAS_TAIL_SLOT. * ==================================================================== */ + + /** + * [thread-safe] A priori classification of the bound schema. Cheap; no + * payload required. Host invokes this after bind_schema(). Returns + * @p out_classification by value (POD). + * + * NULL or absent (struct_size too small) → host treats as + * PJ_CANONICAL_OBJECT_KIND_NONE. + * + * Pure-functional contract: no host side-effects. + */ + bool (*classify_schema)(void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, + PJ_schema_classification_t* out_classification, PJ_error_t* out_error) PJ_NOEXCEPT; + + /** + * [stream-thread] Pure-functional alternative to parse(): returns the + * scalar fields by value (out parameter) instead of writing them to the + * parser write host. The host invokes this in preference to parse() when + * available; legacy plugins keep using parse(). + * + * The plugin owns @p out_fields.fields buffer; @p out_fields.release is + * called by the host when done. release MAY be NULL. + */ + bool (*parse_scalars)(void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, + PJ_named_field_value_buffer_t* out_fields, PJ_error_t* out_error) PJ_NOEXCEPT; + + /** + * [stream-thread] Pure-functional production of a canonical object from + * the payload. Fills @p out_blob with the serialized object (see layout + * in canonical_object_abi.h). Only meaningful when classify_schema() + * returned a non-zero kind. + * + * Pure-functional contract: no writes to the object write host. The + * caller (DataSource / app) decides whether to push the blob eagerly, + * capture it inside a lazy lambda, or hand it directly to a consumer. + */ + bool (*parse_object)(void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, + PJ_canonical_object_blob_t* out_blob, PJ_error_t* out_error) PJ_NOEXCEPT; } PJ_message_parser_vtable_t; /* The vtable above is ABI-APPENDABLE: new slots may be added at the tail; * host reads guard with PJ_HAS_TAIL_SLOT. See PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE. */ diff --git a/pj_base/include/pj_base/sdk/canonical_object.hpp b/pj_base/include/pj_base/sdk/canonical_object.hpp new file mode 100644 index 0000000..b55e84e --- /dev/null +++ b/pj_base/include/pj_base/sdk/canonical_object.hpp @@ -0,0 +1,283 @@ +/** + * @file canonical_object.hpp + * @brief Canonical object types produced by MessageParser plugins and consumed + * by widgets and toolboxes. + * + * This header defines the vocabulary that bridges parser plugins (which + * understand wire formats: ROS, Foxglove, Protobuf, etc.) and consumer code + * (widgets, toolboxes) that renders or processes the result. The ObjectStore + * itself remains agnostic to these types — it stores opaque bytes; the + * decoding into a CanonicalObject happens in the consumer at pull time, by + * invoking the parser's parseObject() against the bytes. + * + * Reference report: docs/claude_reports/2026.05.07-arquitectura-objectstore-pipeline-misalignment.md + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "pj_base/span.hpp" +#include "pj_base/types.hpp" + +namespace PJ { +namespace sdk { + +// ----------------------------------------------------------------------------- +// Schema classification +// ----------------------------------------------------------------------------- + +// ----------------------------------------------------------------------------- +// Buffer anchor — type-erased ownership token shared between a payload buffer +// and any non-owning views derived from it. Carries no data, only keeps the +// underlying allocation alive while at least one anchor copy exists. Concrete +// typical type erased here is std::shared_ptr>; consumers +// never need to know. +// ----------------------------------------------------------------------------- + +using BufferAnchor = std::shared_ptr; + +/// Non-owning view + ownership anchor of a payload buffer. Used by the host +/// to hand a parser a message payload without committing to a copy: the parser +/// reads `bytes` and, in the canonical object it returns, may keep a Span into +/// the same memory plus a copy of `anchor` so the bytes outlive the parse call. +/// +/// `anchor` may be empty when the caller does not share ownership — in that +/// case the parser must materialize any bytes it wants to retain (the C ABI +/// trampoline path is the typical case; in-process direct calls are expected +/// to provide a non-empty anchor). +struct PayloadView { + Span bytes; + BufferAnchor anchor; +}; + +/// What kind of canonical object a parser produces for a given schema. +/// Returned a priori (without parsing payload) by classifySchema(). kNone means +/// the parser only produces scalars for the Datastore — no ObjectTopic to +/// register. +enum class CanonicalObjectKind : uint16_t { + kNone = 0, + kImage = 1, ///< sdk::Image — pixels already in canonical PixelFormat. + kCompressedImage = 2, ///< sdk::CompressedImage — JPEG/PNG/QOI bytes, undecoded. + kPointCloud = 3, ///< sdk::PointCloud — packed points + per-channel field layout. + // Reserved for future kinds; keep numeric values stable across releases. + // kMarkers = 4, + // kOccupancyGrid = 5, +}; + +/// A priori classification of a schema, returned by MessageParser::classifySchema(). +/// Currently a single field; struct (vs raw enum) leaves room to attach +/// declarative metadata later (preferred cache size, expected rate, etc.) without +/// breaking the API. What deliberately does NOT belong here: parse cost hints +/// (the DataSource knows the payload size), retention policy, eager/lazy choice. +struct SchemaClassification { + CanonicalObjectKind object_kind = CanonicalObjectKind::kNone; +}; + +// ----------------------------------------------------------------------------- +// Pixel formats — canonical for sdk::Image +// ----------------------------------------------------------------------------- + +/// Canonical pixel format for sdk::Image. The buffer may include row padding +/// (sdk::Image::row_step >= width * bytesPerPixel(format)); consumers must +/// honor row_step rather than assuming tightly-packed. +/// +/// Both R-G-B and B-G-R orderings are first-class citizens. ROS bgr8/bgra8 +/// (and many machine-vision sources) deliver bytes in B-G-R order natively; +/// keeping the byte order in the format tag (instead of swizzling at parse +/// time) lets the consumer hand bytes straight to a renderer that supports +/// GL_BGR / GL_BGRA texture uploads — zero-copy all the way. +/// +/// Note: pj_scene2D (and other consumers) currently define their own pixel +/// format. Harmonizing on this canonical enum is part of consumer-side +/// migration; this header defines the SDK-level vocabulary. +enum class PixelFormat : uint16_t { + kUnknown = 0, + kRGB888 = 1, ///< 3 bytes/pixel, R-G-B order. + kRGBA8888 = 2, ///< 4 bytes/pixel, R-G-B-A order. + kMono8 = 3, ///< 1 byte/pixel, grayscale. + kMono16 = 4, ///< 2 bytes/pixel, grayscale (depth, etc.); see is_bigendian. + kBGR888 = 5, ///< 3 bytes/pixel, B-G-R order (ROS bgr8, OpenCV native). + kBGRA8888 = 6, ///< 4 bytes/pixel, B-G-R-A order (ROS bgra8). +}; + +/// Bytes per pixel for a given format. Returns 0 for kUnknown. +[[nodiscard]] constexpr uint32_t bytesPerPixel(PixelFormat format) noexcept { + switch (format) { + case PixelFormat::kRGB888: + case PixelFormat::kBGR888: + return 3; + case PixelFormat::kRGBA8888: + case PixelFormat::kBGRA8888: + return 4; + case PixelFormat::kMono8: + return 1; + case PixelFormat::kMono16: + return 2; + case PixelFormat::kUnknown: + return 0; + } + return 0; +} + +// ----------------------------------------------------------------------------- +// sdk::Image — already-decoded image +// ----------------------------------------------------------------------------- + +/// Image already decoded into a canonical pixel format. If the producer +/// (parser) returns this, the consumer can upload the pixels directly to a +/// renderer (QRhi or otherwise) without going through any codec. +/// +/// Layout: `pixels` is a non-owning view of size at least `row_step * height`. +/// `row_step` may exceed `width * bytesPerPixel(pixel_format)` when the wire +/// format included per-row padding; consumers must honor it. `anchor` keeps +/// the underlying buffer alive — the parser may have made `pixels` a view +/// into the source payload (zero-copy) or into a freshly-allocated vector +/// (when the wire format required conversion); consumers don't need to know +/// which. +/// +/// For mono16 buffers `is_bigendian` indicates the byte order of each sample; +/// otherwise it is unused. RGB/BGR ordering is encoded in `pixel_format`. +struct Image { + uint32_t width = 0; + uint32_t height = 0; + PixelFormat pixel_format = PixelFormat::kUnknown; + uint32_t row_step = 0; + bool is_bigendian = false; + Span pixels; + BufferAnchor anchor; + Timestamp timestamp_ns = 0; +}; + +// ----------------------------------------------------------------------------- +// sdk::CompressedImage — undecoded compressed image bytes +// ----------------------------------------------------------------------------- + +/// Image still in compressed wire format (JPEG/PNG/QOI). The consumer is +/// expected to run it through the appropriate codec (pj_scene2D::JpegCodec, +/// PngCodec, etc.) to obtain an sdk::Image. +/// +/// The parser does NOT decompress: it only extracts the compressed payload +/// from whatever wrapper the wire format used (CDR for ROS2, etc.) and tags it +/// with the format. +struct CompressedImage { + enum class Format : uint8_t { + kUnknown = 0, + kJPEG = 1, + kPNG = 2, + kQOI = 3, + }; + + /// Auxiliary metadata that some wrappers attach to the compressed bytes + /// and that the consumer needs to decode correctly. The parser fills the + /// fields it can; consumers ignore those they don't care about. + struct Extras { + /// For ROS compressedDepth: the depth-quantization range to use after + /// PNG decoding. Both nullopt for non-depth compressed images. + std::optional compressed_depth_min; + std::optional compressed_depth_max; + }; + + Format format = Format::kUnknown; + Span bytes; + BufferAnchor anchor; + Timestamp timestamp_ns = 0; + Extras extras; +}; + +// ----------------------------------------------------------------------------- +// sdk::PointCloud — packed point cloud +// ----------------------------------------------------------------------------- + +/// Description of one channel inside a packed point cloud (x, y, z, intensity, +/// rgb, ring, time, …). Mirrors the shape of sensor_msgs/PointField but the +/// type is canonical PJ vocabulary, not a ROS-specific enum. +struct PointField { + enum class Datatype : uint8_t { + kUnknown = 0, + kInt8 = 1, + kUint8 = 2, + kInt16 = 3, + kUint16 = 4, + kInt32 = 5, + kUint32 = 6, + kFloat32 = 7, + kFloat64 = 8, + }; + + std::string name; + uint32_t offset = 0; ///< Byte offset of this field within a single point. + Datatype datatype = Datatype::kUnknown; + uint32_t count = 1; ///< Number of elements of `datatype` (typically 1). +}; + +/// Bytes per element for a given PointField datatype. Returns 0 for kUnknown. +[[nodiscard]] constexpr uint32_t bytesPerElement(PointField::Datatype dt) noexcept { + switch (dt) { + case PointField::Datatype::kInt8: + case PointField::Datatype::kUint8: + return 1; + case PointField::Datatype::kInt16: + case PointField::Datatype::kUint16: + return 2; + case PointField::Datatype::kInt32: + case PointField::Datatype::kUint32: + case PointField::Datatype::kFloat32: + return 4; + case PointField::Datatype::kFloat64: + return 8; + case PointField::Datatype::kUnknown: + return 0; + } + return 0; +} + +/// Packed point cloud. The `data` buffer holds `width * height` points, each +/// occupying `point_step` bytes laid out per `fields`. `is_dense=false` means +/// some points may be invalid (typically NaN-filled). +struct PointCloud { + uint32_t width = 0; + uint32_t height = 1; + uint32_t point_step = 0; ///< Bytes per point. + uint32_t row_step = 0; ///< Bytes per row (= point_step * width when no padding). + bool is_bigendian = false; + bool is_dense = true; + std::vector fields; + Span data; + BufferAnchor anchor; + Timestamp timestamp_ns = 0; +}; + +// ----------------------------------------------------------------------------- +// CanonicalObject — variant carried by parser->parseObject() +// ----------------------------------------------------------------------------- + +/// Sum type of all canonical objects a parser may produce. Closed for now; +/// extending it (kMarkers, kOccupancyGrid, …) requires bumping +/// PJ_MESSAGE_PARSER_PROTOCOL_VERSION (compatible append at the end). +using CanonicalObject = std::variant; + +/// Helper: get the kind tag for a CanonicalObject without unpacking it. +[[nodiscard]] inline CanonicalObjectKind kindOf(const CanonicalObject& obj) noexcept { + return std::visit( + [](const auto& concrete) -> CanonicalObjectKind { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return CanonicalObjectKind::kImage; + } else if constexpr (std::is_same_v) { + return CanonicalObjectKind::kCompressedImage; + } else if constexpr (std::is_same_v) { + return CanonicalObjectKind::kPointCloud; + } else { + return CanonicalObjectKind::kNone; + } + }, + obj); +} + +} // namespace sdk +} // namespace PJ diff --git a/pj_base/include/pj_base/sdk/data_source_host_views.hpp b/pj_base/include/pj_base/sdk/data_source_host_views.hpp index 7dc191d..0710833 100644 --- a/pj_base/include/pj_base/sdk/data_source_host_views.hpp +++ b/pj_base/include/pj_base/sdk/data_source_host_views.hpp @@ -21,6 +21,7 @@ #include "pj_base/data_source_protocol.h" #include "pj_base/expected.hpp" +#include "pj_base/sdk/object_ingest_policy.hpp" #include "pj_base/sdk/plugin_data_api.hpp" namespace PJ { @@ -217,6 +218,108 @@ class DataSourceRuntimeHostView { return okStatus(); } + /// Push a message via a deferred byte fetcher. The DataSource hands the + /// host a callable that produces the payload bytes when invoked. The host + /// applies the active ObjectIngestPolicy (resolved via the + /// ObjectIngestPolicyResolver below for source_id, topic, kind) to decide + /// whether to invoke the fetcher at ingest, only on consumer pull, or + /// never. The DataSource is policy-agnostic — it neither queries the + /// policy nor tracks which mode is active. + /// + /// The fetcher MUST be idempotent — the host may invoke it zero, one, or + /// many times depending on policy and consumer pulls. It MUST be + /// thread-safe: invocations may come from the ingest thread (kEager) or + /// from consumer threads (lazy pulls). Capture by shared_ptr (file + /// readers, mcap chunks) so the source buffer outlives every pending + /// pull. + /// + /// Fetcher return type: + /// - sdk::PayloadView { bytes, anchor } — preferred, zero-copy. The + /// anchor is propagated through the C ABI as a heap-held shared_ptr + /// copy that the host releases when no longer needed. + /// - std::vector — legacy form. The vector is + /// heap-relocated and used as its own anchor; bytes survive across + /// the C ABI boundary at the cost of one alloc-and-move. + /// + /// The host MUST advertise the push_message_v2 tail slot. We wrap the + /// closure into a PJ_payload_fetcher_t and hand it over verbatim; the + /// host applies ObjectIngestPolicy and decides when (and whether) to + /// invoke it. There is no legacy fallback: a host that doesn't expose + /// the slot returns an explicit error here rather than silently + /// degrading to a kEager push_raw_message. + template + [[nodiscard]] Status pushMessage( + ParserBindingHandle handle, Timestamp host_timestamp_ns, Fetcher&& fetcher) const { + if (!valid()) { + return unexpected(std::string("runtime host is not bound")); + } + if (!PJ_HAS_TAIL_SLOT(PJ_data_source_runtime_host_vtable_t, host_.vtable, push_message_v2)) { + return unexpected(std::string("runtime host does not expose push_message_v2")); + } + + using FetcherT = std::decay_t; + auto* ctx = new FetcherT(std::forward(fetcher)); + + PJ_payload_fetcher_t abi_fetcher{ + .ctx = ctx, + .fetch = +[](void* c, PJ_payload_t* out, PJ_error_t* err) noexcept -> bool { + try { + auto& fn = *static_cast(c); + using Result = std::decay_t; + if constexpr (std::is_same_v) { + // Zero-copy path: hold a heap copy of the BufferAnchor so it + // survives across the C ABI; release_fn deletes the holder + // (and decrements the underlying shared_ptr ref count). + auto pv = fn(); + auto* held = new sdk::BufferAnchor(std::move(pv.anchor)); + out->data = pv.bytes.data(); + out->size = pv.bytes.size(); + out->anchor.ctx = held; + out->anchor.release = +[](void* h) noexcept { + delete static_cast(h); + }; + } else { + // Closure returns std::vector: heap-hold the vector; + // it owns its bytes. + auto* held = new std::vector(fn()); + out->data = held->data(); + out->size = held->size(); + out->anchor.ctx = held; + out->anchor.release = +[](void* h) noexcept { + delete static_cast*>(h); + }; + } + return true; + } catch (const std::exception& e) { + sdk::fillError(err, 1, "plugin", e.what()); + return false; + } catch (...) { + sdk::fillError(err, 1, "plugin", "unknown exception in payload fetcher"); + return false; + } + }, + .release = +[](void* c) noexcept { delete static_cast(c); }, + }; + + PJ_error_t err{}; + if (!host_.vtable->push_message_v2(host_.ctx, handle, host_timestamp_ns, abi_fetcher, &err)) { + return unexpected(errorToString(err)); + } + return okStatus(); + } + + /// Access (mutable) the resolver of ObjectIngestPolicy for this runtime. + /// The application configures it during setup; the host (when the + /// push_message_v2 dispatch lands) consults it per message. + /// + /// Implementation status (RFC): + /// The resolver is a per-DataSourceRuntimeHostView local instance for + /// now. In production it will be host-owned and shared across views; + /// the SDK surface stays the same. + [[nodiscard]] sdk::ObjectIngestPolicyResolver& objectIngestPolicy() const { + return policy_resolver_; + } + /** * Display a modal message box and wait for user response. * @return The button clicked, or kOk if the host does not support dialogs. @@ -277,6 +380,13 @@ class DataSourceRuntimeHostView { private: PJ_data_source_runtime_host_t host_{}; + + // RFC-only: local-to-view policy resolver. Production wiring will move + // this to a host-side singleton accessed through the service registry; + // the public surface (objectIngestPolicy()) stays the same. mutable + // because configuring the policy is conceptually a side concern, not + // a mutation of the view. + mutable sdk::ObjectIngestPolicyResolver policy_resolver_{}; }; } // namespace PJ diff --git a/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp b/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp new file mode 100644 index 0000000..e57f1a4 --- /dev/null +++ b/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp @@ -0,0 +1,345 @@ +/** + * @file detail/canonical_object_serialization.hpp + * @brief (De)serialization of PJ::sdk::CanonicalObject to/from the byte + * layout defined in pj_base/canonical_object_abi.h. + * + * The blob crosses the C ABI as raw bytes; this header turns it into the + * C++ variant on the host side and back into bytes on the plugin side. + * + * Endianness: writes/reads multi-byte integers using std::memcpy under the + * assumption that the host architecture is little-endian (the ABI mandates + * little-endian). Big-endian targets would need an explicit byte-swap layer + * here; documented as a known limitation in this iteration. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "pj_base/canonical_object_abi.h" +#include "pj_base/expected.hpp" +#include "pj_base/sdk/canonical_object.hpp" + +namespace PJ { +namespace sdk { +namespace detail { + +// ----------------------------------------------------------------------------- +// Low-level write helpers (host-endian = little-endian assumed) +// ----------------------------------------------------------------------------- + +inline void appendBytes(std::vector& out, const void* src, size_t n) { + if (n == 0) return; + const auto* p = static_cast(src); + out.insert(out.end(), p, p + n); +} + +template +inline void appendPod(std::vector& out, T value) { + static_assert(std::is_trivially_copyable_v, "appendPod requires trivially copyable"); + appendBytes(out, &value, sizeof(T)); +} + +// ----------------------------------------------------------------------------- +// Low-level read helpers +// ----------------------------------------------------------------------------- + +class BlobReader { + public: + BlobReader(const uint8_t* data, size_t size) : ptr_(data), end_(data + size) {} + + [[nodiscard]] bool remaining(size_t n) const noexcept { + return static_cast(end_ - ptr_) >= n; + } + + template + Expected readPod() { + static_assert(std::is_trivially_copyable_v, "readPod requires trivially copyable"); + if (!remaining(sizeof(T))) { + return unexpected(std::string("blob truncated")); + } + T value; + std::memcpy(&value, ptr_, sizeof(T)); + ptr_ += sizeof(T); + return value; + } + + Expected> readBytes(size_t n) { + if (!remaining(n)) { + return unexpected(std::string("blob truncated reading bytes")); + } + std::vector out(ptr_, ptr_ + n); + ptr_ += n; + return out; + } + + Expected readString(size_t n) { + if (!remaining(n)) { + return unexpected(std::string("blob truncated reading string")); + } + std::string out(reinterpret_cast(ptr_), n); + ptr_ += n; + return out; + } + + private: + const uint8_t* ptr_; + const uint8_t* end_; +}; + +// ----------------------------------------------------------------------------- +// Serialization (C++ → bytes) +// ----------------------------------------------------------------------------- + +inline void writeImageBody(std::vector& out, const Image& img) { + appendPod(out, img.width); + appendPod(out, img.height); + appendPod(out, static_cast(img.pixel_format)); + appendPod(out, img.is_bigendian ? 1 : 0); + appendPod(out, 0); // reserved + appendPod(out, img.row_step); + const uint32_t pixels_size = static_cast(img.pixels.size()); + appendPod(out, pixels_size); + if (pixels_size > 0) { + appendBytes(out, img.pixels.data(), pixels_size); + } +} + +inline void writeCompressedImageBody(std::vector& out, const CompressedImage& ci) { + appendPod(out, static_cast(ci.format)); + appendPod(out, ci.extras.compressed_depth_min.has_value() ? 1 : 0); + appendPod(out, ci.extras.compressed_depth_max.has_value() ? 1 : 0); + appendPod(out, 0); // reserved + appendPod(out, ci.extras.compressed_depth_min.value_or(0.0f)); + appendPod(out, ci.extras.compressed_depth_max.value_or(0.0f)); + const uint32_t bytes_size = static_cast(ci.bytes.size()); + appendPod(out, bytes_size); + if (bytes_size > 0) { + appendBytes(out, ci.bytes.data(), bytes_size); + } +} + +inline void writePointCloudBody(std::vector& out, const PointCloud& pc) { + appendPod(out, pc.width); + appendPod(out, pc.height); + appendPod(out, pc.point_step); + appendPod(out, pc.row_step); + appendPod(out, pc.is_bigendian ? 1 : 0); + appendPod(out, pc.is_dense ? 1 : 0); + appendPod(out, static_cast(pc.fields.size())); + for (const auto& f : pc.fields) { + const uint32_t name_size = static_cast(f.name.size()); + appendPod(out, name_size); + appendBytes(out, f.name.data(), name_size); + appendPod(out, f.offset); + appendPod(out, static_cast(f.datatype)); + appendPod(out, 0); // reserved + appendPod(out, 0); // reserved + appendPod(out, 0); // reserved + appendPod(out, f.count); + } + const uint32_t data_size = static_cast(pc.data.size()); + appendPod(out, data_size); + if (data_size > 0) { + appendBytes(out, pc.data.data(), data_size); + } +} + +/// Serialize a CanonicalObject into a flat byte buffer matching the layout +/// in canonical_object_abi.h. Caller owns the returned vector. +inline std::vector serializeCanonicalObject(const CanonicalObject& obj) { + std::vector out; + out.reserve(64); // header + small body; body grows for image/pointcloud + + // Header: kind (u16), reserved (u16), timestamp (i64). + const auto kind = kindOf(obj); + appendPod(out, static_cast(kind)); + appendPod(out, 0); // reserved + std::visit( + [&](const auto& concrete) { + appendPod(out, concrete.timestamp_ns); + using T = std::decay_t; + if constexpr (std::is_same_v) { + writeImageBody(out, concrete); + } else if constexpr (std::is_same_v) { + writeCompressedImageBody(out, concrete); + } else if constexpr (std::is_same_v) { + writePointCloudBody(out, concrete); + } + }, + obj); + + return out; +} + +// ----------------------------------------------------------------------------- +// Deserialization (bytes → C++) +// ----------------------------------------------------------------------------- + +// On the deserialize side we don't have a foreign anchor — the bytes come +// from the blob buffer. Wrap them in a shared_ptr and use that as +// the anchor; the Span points into the wrapped vector. Net cost: one alloc +// per object, same as before the iter-3 SDK change. +inline Expected readImageBody(BlobReader& r, Timestamp ts) { + auto width = r.readPod(); + if (!width) return unexpected(width.error()); + auto height = r.readPod(); + if (!height) return unexpected(height.error()); + auto pixel_format_raw = r.readPod(); + if (!pixel_format_raw) return unexpected(pixel_format_raw.error()); + auto is_be = r.readPod(); + if (!is_be) return unexpected(is_be.error()); + /*reserved*/ if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + auto row_step = r.readPod(); + if (!row_step) return unexpected(row_step.error()); + auto pixels_size = r.readPod(); + if (!pixels_size) return unexpected(pixels_size.error()); + auto pixels = r.readBytes(*pixels_size); + if (!pixels) return unexpected(pixels.error()); + + auto owned = std::make_shared>(std::move(*pixels)); + Span view(owned->data(), owned->size()); + return Image{ + .width = *width, + .height = *height, + .pixel_format = static_cast(*pixel_format_raw), + .row_step = *row_step, + .is_bigendian = (*is_be != 0), + .pixels = view, + .anchor = owned, + .timestamp_ns = ts, + }; +} + +inline Expected readCompressedImageBody(BlobReader& r, Timestamp ts) { + auto format_raw = r.readPod(); + if (!format_raw) return unexpected(format_raw.error()); + auto has_min = r.readPod(); + if (!has_min) return unexpected(has_min.error()); + auto has_max = r.readPod(); + if (!has_max) return unexpected(has_max.error()); + /*reserved*/ if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + auto depth_min = r.readPod(); + if (!depth_min) return unexpected(depth_min.error()); + auto depth_max = r.readPod(); + if (!depth_max) return unexpected(depth_max.error()); + auto bytes_size = r.readPod(); + if (!bytes_size) return unexpected(bytes_size.error()); + auto bytes = r.readBytes(*bytes_size); + if (!bytes) return unexpected(bytes.error()); + + auto owned = std::make_shared>(std::move(*bytes)); + CompressedImage ci{}; + ci.format = static_cast(*format_raw); + ci.bytes = Span(owned->data(), owned->size()); + ci.anchor = owned; + ci.timestamp_ns = ts; + if (*has_min != 0) ci.extras.compressed_depth_min = *depth_min; + if (*has_max != 0) ci.extras.compressed_depth_max = *depth_max; + return ci; +} + +inline Expected readPointCloudBody(BlobReader& r, Timestamp ts) { + auto width = r.readPod(); + if (!width) return unexpected(width.error()); + auto height = r.readPod(); + if (!height) return unexpected(height.error()); + auto point_step = r.readPod(); + if (!point_step) return unexpected(point_step.error()); + auto row_step = r.readPod(); + if (!row_step) return unexpected(row_step.error()); + auto is_be = r.readPod(); + if (!is_be) return unexpected(is_be.error()); + auto is_dense = r.readPod(); + if (!is_dense) return unexpected(is_dense.error()); + auto fields_count = r.readPod(); + if (!fields_count) return unexpected(fields_count.error()); + + std::vector fields; + fields.reserve(*fields_count); + for (uint16_t i = 0; i < *fields_count; ++i) { + auto name_size = r.readPod(); + if (!name_size) return unexpected(name_size.error()); + auto name = r.readString(*name_size); + if (!name) return unexpected(name.error()); + auto offset = r.readPod(); + if (!offset) return unexpected(offset.error()); + auto datatype_raw = r.readPod(); + if (!datatype_raw) return unexpected(datatype_raw.error()); + /*reserved×3*/ for (int j = 0; j < 3; ++j) { + if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + } + auto count = r.readPod(); + if (!count) return unexpected(count.error()); + fields.push_back(PointField{ + .name = std::move(*name), + .offset = *offset, + .datatype = static_cast(*datatype_raw), + .count = *count, + }); + } + + auto data_size = r.readPod(); + if (!data_size) return unexpected(data_size.error()); + auto data = r.readBytes(*data_size); + if (!data) return unexpected(data.error()); + + auto owned = std::make_shared>(std::move(*data)); + Span view(owned->data(), owned->size()); + return PointCloud{ + .width = *width, + .height = *height, + .point_step = *point_step, + .row_step = *row_step, + .is_bigendian = (*is_be != 0), + .is_dense = (*is_dense != 0), + .fields = std::move(fields), + .data = view, + .anchor = owned, + .timestamp_ns = ts, + }; +} + +/// Deserialize a flat byte buffer into a CanonicalObject. Returns unexpected +/// on truncation, unknown kind, or any inconsistency. +inline Expected deserializeCanonicalObject(const uint8_t* data, size_t size) { + if (data == nullptr) { + return unexpected(std::string("null blob")); + } + BlobReader r(data, size); + + auto kind_raw = r.readPod(); + if (!kind_raw) return unexpected(kind_raw.error()); + /*reserved*/ if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + auto ts = r.readPod(); + if (!ts) return unexpected(ts.error()); + + switch (static_cast(*kind_raw)) { + case CanonicalObjectKind::kImage: { + auto img = readImageBody(r, *ts); + if (!img) return unexpected(img.error()); + return CanonicalObject{std::move(*img)}; + } + case CanonicalObjectKind::kCompressedImage: { + auto ci = readCompressedImageBody(r, *ts); + if (!ci) return unexpected(ci.error()); + return CanonicalObject{std::move(*ci)}; + } + case CanonicalObjectKind::kPointCloud: { + auto pc = readPointCloudBody(r, *ts); + if (!pc) return unexpected(pc.error()); + return CanonicalObject{std::move(*pc)}; + } + case CanonicalObjectKind::kNone: + default: + return unexpected(std::string("unknown or unsupported canonical object kind")); + } +} + +} // namespace detail +} // namespace sdk +} // namespace PJ diff --git a/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp b/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp index caa56b6..8264cc1 100644 --- a/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp +++ b/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp @@ -7,6 +7,8 @@ */ #pragma once +#include "pj_base/sdk/detail/canonical_object_serialization.hpp" + namespace PJ { inline void MessageParserPluginBase::trampoline_destroy(void* ctx) noexcept { @@ -127,4 +129,105 @@ inline const void* MessageParserPluginBase::trampoline_get_plugin_extension(void } } +// ----------------------------------------------------------------------------- +// Pure-functional API trampolines (canonical-object tail of the vtable) +// ----------------------------------------------------------------------------- + +inline bool MessageParserPluginBase::trampoline_classify_schema( + void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, + PJ_schema_classification_t* out_classification, PJ_error_t* out_error) noexcept { + auto* self = static_cast(ctx); + if (out_classification == nullptr) { + self->storeError(out_error, 2, "plugin", "classify_schema called with null out_classification"); + return false; + } + try { + auto name_sv = type_name.data == nullptr ? std::string_view{} : std::string_view(type_name.data, type_name.size); + Span schema_span(schema.data, schema.size); + const auto cls = self->classifySchema(name_sv, schema_span); + out_classification->object_kind = static_cast(cls.object_kind); + out_classification->reserved = 0; + return true; + } catch (const std::exception& e) { + self->storeError(out_error, 1, "plugin", std::string("classify_schema threw: ") + e.what()); + return false; + } catch (...) { + self->storeError(out_error, 1, "plugin", "unknown exception in classify_schema"); + return false; + } +} + +inline bool MessageParserPluginBase::trampoline_parse_scalars( + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, + PJ_named_field_value_buffer_t* out_fields, PJ_error_t* out_error) noexcept { + auto* self = static_cast(ctx); + if (out_fields == nullptr) { + self->storeError(out_error, 2, "plugin", "parse_scalars called with null out_fields"); + return false; + } + try { + Span payload_span(payload.data, payload.size); + auto result = self->parseScalars(timestamp_ns, payload_span); + if (!result) { + self->storeError(out_error, 1, "plugin", std::move(result).error()); + return false; + } + // Hand the C++ vector to the plugin-owned buffer so PJ_string_view_t + // entries inside the ABI structs remain valid until the next call. + self->scalars_owned_buf_ = std::move(*result); + self->scalars_abi_buf_ = sdk::toAbiNamed( + Span(self->scalars_owned_buf_.data(), self->scalars_owned_buf_.size())); + + out_fields->fields = self->scalars_abi_buf_.data(); + out_fields->count = self->scalars_abi_buf_.size(); + out_fields->alloc_handle = nullptr; // buffer kept alive by the plugin instance + out_fields->release = nullptr; + return true; + } catch (const std::exception& e) { + self->storeError(out_error, 1, "plugin", std::string("parse_scalars threw: ") + e.what()); + return false; + } catch (...) { + self->storeError(out_error, 1, "plugin", "unknown exception in parse_scalars"); + return false; + } +} + +inline bool MessageParserPluginBase::trampoline_parse_object( + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, + PJ_canonical_object_blob_t* out_blob, PJ_error_t* out_error) noexcept { + auto* self = static_cast(ctx); + if (out_blob == nullptr) { + self->storeError(out_error, 2, "plugin", "parse_object called with null out_blob"); + return false; + } + try { + // C ABI path: caller does not share ownership of the payload buffer. + // Pass an empty anchor; the plugin must materialize anything it wants + // to retain past this call. The serialized blob written to out_blob is + // copied into self->object_blob_buf_ before we return, so a span-into- + // payload that the plugin keeps inside its CanonicalObject is fine for + // the duration of the serialize call below. + Span payload_span(payload.data, payload.size); + sdk::PayloadView payload_view{payload_span, sdk::BufferAnchor{}}; + auto result = self->parseObject(timestamp_ns, payload_view); + if (!result) { + self->storeError(out_error, 1, "plugin", std::move(result).error()); + return false; + } + self->object_blob_buf_ = sdk::detail::serializeCanonicalObject(*result); + + out_blob->data = self->object_blob_buf_.data(); + out_blob->size = self->object_blob_buf_.size(); + out_blob->alloc_handle = nullptr; // buffer kept alive by the plugin instance + out_blob->release = nullptr; + return true; + } catch (const std::exception& e) { + self->storeError(out_error, 1, "plugin", std::string("parse_object threw: ") + e.what()); + return false; + } catch (...) { + self->storeError(out_error, 1, "plugin", "unknown exception in parse_object"); + return false; + } +} + } // namespace PJ diff --git a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp index f2b88a9..bf68690 100644 --- a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp +++ b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp @@ -13,18 +13,51 @@ #include #include +#include #include #include +#include #include +#include #include "pj_base/expected.hpp" #include "pj_base/message_parser_protocol.h" #include "pj_base/plugin_abi_export.h" +#include "pj_base/sdk/canonical_object.hpp" #include "pj_base/sdk/plugin_data_api.hpp" #include "pj_base/sdk/service_registry.hpp" #include "pj_base/sdk/service_traits.hpp" namespace PJ { +namespace sdk { + +/// Per-schema handler bundle: classification + the two parse routes for one +/// schema type. Plugins build a table of these in their constructor; the +/// MessageParserPluginBase base class then implements classifySchema / +/// parseScalars / parseObject as final lookups into the table. +/// +/// Either parse_scalars or parse_object may be null (or both), reflecting +/// schemas that produce only scalars, only objects, or that the plugin +/// recognizes but routes through the legacy parse() path. +struct SchemaHandler { + CanonicalObjectKind object_kind = CanonicalObjectKind::kNone; + + /// Scalar route: returns owned column data — no anchor needed because the + /// returned vector and any string_views inside it are materialized by the + /// parser, independent of the caller's payload buffer. + std::function>(Timestamp, Span)> + parse_scalars; + + /// Canonical-object route: takes a PayloadView so the parser can return a + /// CanonicalObject whose internal Span(s) reference the same underlying + /// buffer (zero-copy). The parser propagates `payload.anchor` into the + /// returned object so its bytes outlive this call. When the caller passes + /// an empty anchor, the parser must materialize whatever it wants to retain. + std::function(Timestamp, PayloadView)> + parse_object; +}; + +} // namespace sdk /** * Base class for MessageParser plugins (protocol v4). @@ -59,10 +92,26 @@ class MessageParserPluginBase { return okStatus(); } - /// Bind a message schema. Default is no-op (for parsers that don't need schema). + /// Bind a message schema. The base implementation records the type name + /// verbatim so subsequent parseScalars / parseObject calls can dispatch + /// against the registered handler table without needing it as a parameter. + /// + /// The base does NO domain-specific normalization on the type name — + /// the SDK has no idea whether a name like \"pkg/msg/Type\" is valid or + /// equivalent to \"pkg/Type\" in some plugin's domain (that\'s a ROS-2 + /// convention, not a general one). Plugins that have their own naming + /// convention should apply it here, in their override, before delegating + /// to MessageParserPluginBase::bindSchema with the canonical form. They + /// must also use that same canonical form when calling + /// registerSchemaHandler. + /// + /// Subclasses that override this MUST call MessageParserPluginBase::bindSchema() + /// first (or set bound_type_name_ themselves) before any plugin-specific + /// schema setup, otherwise the table-based dispatch will fail to find the + /// schema's handler. virtual Status bindSchema(std::string_view type_name, Span schema) { - (void)type_name; (void)schema; + bound_type_name_.assign(type_name); return okStatus(); } @@ -75,8 +124,140 @@ class MessageParserPluginBase { return okStatus(); } - /// Parse one raw message and write decoded fields via writeHost(). PURE VIRTUAL. - virtual Status parse(Timestamp timestamp_ns, Span payload) = 0; + /// Parse one raw message and write decoded fields via writeHost(). + /// + /// The default implementation dispatches through the SchemaHandler table: + /// it invokes parseScalars() (which looks up the registered handler for + /// bound_type_name_) and shovels the returned vector to + /// writeHost().appendRecord(). Plugins that register all their schemas + /// via registerSchemaHandler() therefore inherit a working parse() for + /// free — no override needed. + /// + /// Subclasses MAY override to (a) add a fallback for type names not in + /// the registered table (e.g. a ROS-style generic flattener that handles + /// any message whose schema definition is known to the plugin), or + /// (b) retain a fully imperative implementation during migration to the + /// table-based dispatch. Plugins that have already migrated do not need + /// to override. + /// + /// This entry point exists for compatibility with the legacy v4 ingest + /// path (host calls parser.parse() directly to push fields to writeHost). + /// New host code should prefer pushing through parseScalars() / parseObject() + /// — the pure-functional pair enables lazy materialization, because the + /// caller (DataSource / app) needs the result returned, not pushed. Once + /// every host migrates to that path, parse() will be deprecated. + virtual Status parse(Timestamp timestamp_ns, Span payload) { + if (!writeHostBound()) { + return unexpected(std::string("write host not bound")); + } + auto fields = parseScalars(timestamp_ns, payload); + if (!fields) { + return unexpected(std::move(fields).error()); + } + if (fields->empty()) { + return okStatus(); + } + return writeHost().appendRecord( + timestamp_ns, + Span(fields->data(), fields->size())); + } + + // --------------------------------------------------------------------------- + // Pure-functional API (added in protocol v5, ABI-appendable) + // --------------------------------------------------------------------------- + // + // Design principle: the parser does NOT decide push policy (eager vs lazy) + // and does NOT decide where the result goes (Datastore, ObjectStore, none). + // Both decisions belong to the caller (DataSource / app). The parser is + // strictly a translator: bytes in, typed values out. Always eager when + // invoked — there is no internal deferral. Lazyness is modeled by callers + // wrapping these methods inside a lambda that fires on pull. + // + // Plugins extend the parser by populating a per-schema handler table in + // the constructor (registerSchemaHandler). The base class implements + // classifySchema / parseScalars / parseObject as final lookups into that + // table. Plugins do NOT override the three methods. + + /// Register a handler for one schema type name. Typically called once per + /// supported schema in the plugin's constructor. + /// + /// The type_name is stored verbatim — the base class does no domain- + /// specific normalization. Plugins that have their own naming convention + /// (e.g. ROS-2 \"pkg/msg/Type\" vs ROS-1 \"pkg/Type\") must register and + /// look up using a single canonical form they pick. The base class will + /// look up handlers using the bound_type_name_ value the plugin set in + /// bindSchema, so the two must agree on the convention. + /// + /// Either `handler.parse_scalars` or `handler.parse_object` may be null — + /// the base class returns the appropriate unexpected when an absent route + /// is invoked for that schema. + void registerSchemaHandler(std::string_view type_name, sdk::SchemaHandler handler) { + handlers_.insert_or_assign(std::string(type_name), std::move(handler)); + } + + /// Strict lookup — returns nullptr if no handler is registered for this + /// exact type name. Caller must not retain the pointer past the next + /// mutation of the handler table. There is no fallback / default + /// mechanism in the SDK: a plugin that wants behaviour for unknown + /// types is expected to register a handler under the bound name itself + /// (typically inside its bindSchema override). + [[nodiscard]] const sdk::SchemaHandler* findSchemaHandler(std::string_view type_name) const { + auto it = handlers_.find(std::string(type_name)); + if (it == handlers_.end()) { + return nullptr; + } + return &it->second; + } + + /// Lookup against the registered handler table. Non-virtual: plugins + /// populate the table via registerSchemaHandler() rather than overriding; + /// the C ABI trampolines invoke this directly on MessageParserPluginBase*. + /// Returns kNone when no handler is registered for this type name. + sdk::SchemaClassification classifySchema( + std::string_view type_name, Span schema) const { + (void)schema; + if (const auto* h = findSchemaHandler(type_name)) { + return {h->object_kind}; + } + return {}; + } + + /// Invoke the registered scalar handler for the currently-bound schema. + /// Returns unexpected if no handler is registered, or if the registered + /// handler did not provide a parse_scalars callable. Non-virtual — see + /// classifySchema above for the rationale. + Expected> parseScalars( + Timestamp timestamp_ns, Span payload) { + const auto* h = findSchemaHandler(bound_type_name_); + if (h == nullptr) { + return unexpected(std::string("parser does not register schema: ") + bound_type_name_); + } + if (!h->parse_scalars) { + return unexpected(std::string("registered handler has no parse_scalars: ") + bound_type_name_); + } + return h->parse_scalars(timestamp_ns, payload); + } + + /// Invoke the registered object handler for the currently-bound schema. + /// Returns unexpected if no handler is registered, or if the registered + /// handler did not provide a parse_object callable (i.e. this schema + /// produces only scalars). Non-virtual — see classifySchema above. + /// + /// `payload.anchor` may be empty; in that case the parser is expected to + /// materialize anything it wants to outlive this call. In-process callers + /// that already own the payload buffer should pass a non-empty anchor so + /// the parser can return a zero-copy CanonicalObject. + Expected parseObject( + Timestamp timestamp_ns, sdk::PayloadView payload) { + const auto* h = findSchemaHandler(bound_type_name_); + if (h == nullptr) { + return unexpected(std::string("parser does not register schema: ") + bound_type_name_); + } + if (!h->parse_object) { + return unexpected(std::string("registered handler has no parse_object: ") + bound_type_name_); + } + return h->parse_object(timestamp_ns, payload); + } /// Return a pointer to a static plugin-exposed extension for @p id, or /// nullptr if unknown. Default returns nullptr. @@ -104,6 +285,10 @@ class MessageParserPluginBase { trampoline_load_config, trampoline_parse, trampoline_get_plugin_extension, + // Tail slots: pure-functional API (canonical-object). + trampoline_classify_schema, + trampoline_parse_scalars, + trampoline_parse_object, }; return &vt; } @@ -130,12 +315,33 @@ class MessageParserPluginBase { return write_host_view_.valid(); } + protected: + /// Last type name received by bindSchema, stored verbatim. Used by the + /// table-based dispatch in classifySchema / parseScalars / parseObject: + /// the base looks up the handler for this string in the registered table. + /// + /// Subclasses that override bindSchema must either call the base class + /// implementation or set this member themselves. If the plugin has its + /// own naming convention, the canonical form it picks must be the same + /// here and at registerSchemaHandler — the base does not normalize. + std::string bound_type_name_; + private: sdk::ServiceRegistry service_registry_{}; sdk::ParserWriteHostView write_host_view_{PJ_parser_write_host_t{}}; sdk::ParserObjectWriteHostView object_write_host_view_{}; std::string config_buf_; + // Schema handler table populated by the plugin via registerSchemaHandler(). + std::unordered_map handlers_; + + // Buffers kept alive between parse_scalars / parse_object calls so the host + // can read the returned slices safely. release callbacks in the ABI structs + // are NULL — the plugin owns the buffers and overwrites them on each call. + std::vector scalars_owned_buf_; + std::vector scalars_abi_buf_; + std::vector object_blob_buf_; + static void storeError(PJ_error_t* out_error, int32_t code, std::string_view domain, std::string_view message) { sdk::fillError(out_error, code, domain, message); } @@ -149,6 +355,15 @@ class MessageParserPluginBase { static bool trampoline_parse( void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_error_t* out_error) noexcept; static const void* trampoline_get_plugin_extension(void* ctx, PJ_string_view_t id) noexcept; + static bool trampoline_classify_schema( + void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, + PJ_schema_classification_t* out_classification, PJ_error_t* out_error) noexcept; + static bool trampoline_parse_scalars( + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, + PJ_named_field_value_buffer_t* out_fields, PJ_error_t* out_error) noexcept; + static bool trampoline_parse_object( + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, + PJ_canonical_object_blob_t* out_blob, PJ_error_t* out_error) noexcept; }; } // namespace PJ diff --git a/pj_base/include/pj_base/sdk/object_ingest_policy.hpp b/pj_base/include/pj_base/sdk/object_ingest_policy.hpp new file mode 100644 index 0000000..bce03c7 --- /dev/null +++ b/pj_base/include/pj_base/sdk/object_ingest_policy.hpp @@ -0,0 +1,119 @@ +/** + * @file object_ingest_policy.hpp + * @brief Configurable policy that the host applies when a DataSource hands + * it a deferred byte fetcher via DataSourceRuntimeHostView::pushMessage. + * + * The DataSource is policy-agnostic: it only fabricates a callable that + * produces the raw payload bytes when invoked. The host decides — based on + * the policy resolved for (source_id, topic, kind) — whether to invoke the + * fetcher immediately (parse and store now), invoke it once for scalars + * and again on each pull, or never invoke it during ingest and only on + * consumer pulls. + * + * Reference design: docs/claude_reports/2026.05.07-arquitectura-objectstore-pipeline-misalignment.md + */ +#pragma once + +#include +#include +#include + +#include "pj_base/sdk/canonical_object.hpp" + +namespace PJ { +namespace sdk { + +enum class ObjectIngestPolicy : uint8_t { + /// Host never invokes the fetcher during ingest. The (timestamp, fetcher) + /// pair is registered in the ObjectStore and the fetcher fires only when a + /// consumer pulls. No scalar timeseries are produced for this topic — its + /// scalar fields (header.stamp, width, height, …) do not appear in the + /// Datastore. The topic shows up as an ObjectTopic without children in the + /// unified curve tree. Best for very large blobs (point clouds, 4K video) + /// when scalar timeseries are not interesting. + kPureLazy, + + /// Host invokes the fetcher once during ingest to obtain bytes; parser's + /// parseScalars runs and writes scalar fields to the Datastore; bytes are + /// then dropped from RAM. The ObjectStore retains only the fetcher closure + /// for re-invocation on pull (which means the file/source is read again). + /// Best for the common case: scalar timeseries appear in the tree, the + /// blob does not stay in RAM, and pulls re-read on demand. + kLazyObjectsEagerScalars, + + /// Host invokes the fetcher once during ingest, parser's parseScalars and + /// parseObject both run, the canonical object is serialized into the + /// ObjectStore via pushOwned. Pull is trivial — bytes are already there. + /// Highest memory cost; the only viable mode for streaming sources that + /// have no persistent reader to re-read from. Streaming-only fallback. + kEager, +}; + +/// Resolver with hierarchical overrides: +/// +/// topic > data_source > kind > default +/// +/// The application sets the levels it cares about during setup; the host +/// queries resolve(source_id, topic, kind) for each message. The resolver +/// is intentionally an opaque carrier — its policy decisions are the +/// host's concern, not the DataSource plugin's. +/// +/// Typical setup: +/// +/// resolver.setDefault(kLazyObjectsEagerScalars); +/// resolver.setForKind(CanonicalObjectKind::kCompressedImage, kPureLazy); +/// resolver.setForKind(CanonicalObjectKind::kPointCloud, kPureLazy); +/// // kImage stays at kLazyObjectsEagerScalars: width/height/encoding columns are useful +/// +class ObjectIngestPolicyResolver { + public: + /// Default policy applied when no more specific override matches. + void setDefault(ObjectIngestPolicy policy) { + default_ = policy; + } + + /// Override the default for a specific canonical object kind. Useful when + /// (e.g.) all PointCloud2 topics should be lazy regardless of source. + void setForKind(CanonicalObjectKind kind, ObjectIngestPolicy policy) { + by_kind_[kind] = policy; + } + + /// Override the default for all topics of a specific DataSource, keyed by + /// the plugin manifest "id". + void setForDataSource(std::string_view source_id, ObjectIngestPolicy policy) { + by_source_[std::string(source_id)] = policy; + } + + /// Override the default for a specific topic name. Highest precedence. + void setForTopic(std::string_view topic_name, ObjectIngestPolicy policy) { + by_topic_[std::string(topic_name)] = policy; + } + + /// Resolve the policy for a given (source_id, topic_name, object_kind). + /// Precedence: topic > source > kind > default. The first match wins — + /// no merging or composition between levels. + [[nodiscard]] ObjectIngestPolicy resolve( + std::string_view source_id, + std::string_view topic_name, + CanonicalObjectKind object_kind) const { + if (auto it = by_topic_.find(std::string(topic_name)); it != by_topic_.end()) { + return it->second; + } + if (auto it = by_source_.find(std::string(source_id)); it != by_source_.end()) { + return it->second; + } + if (auto it = by_kind_.find(object_kind); it != by_kind_.end()) { + return it->second; + } + return default_; + } + + private: + ObjectIngestPolicy default_ = ObjectIngestPolicy::kLazyObjectsEagerScalars; + std::unordered_map by_kind_; + std::unordered_map by_source_; + std::unordered_map by_topic_; +}; + +} // namespace sdk +} // namespace PJ diff --git a/pj_base/tests/abi_layout_sentinels_test.cpp b/pj_base/tests/abi_layout_sentinels_test.cpp index ccbf2fe..2be7936 100644 --- a/pj_base/tests/abi_layout_sentinels_test.cpp +++ b/pj_base/tests/abi_layout_sentinels_test.cpp @@ -79,7 +79,8 @@ static_assert(offsetof(PJ_message_parser_vtable_t, struct_size) == 4, "v4 prefix static_assert(offsetof(PJ_message_parser_vtable_t, bind) == 32, "v4 bind slot pinned"); static_assert(offsetof(PJ_message_parser_vtable_t, parse) == 64, "v4 parse slot pinned"); static_assert(offsetof(PJ_message_parser_vtable_t, get_plugin_extension) == 72, "v4 last baseline slot pinned"); -static_assert(sizeof(PJ_message_parser_vtable_t) == 80, "MessageParser vtable size (update deliberately on append)"); +// 80 baseline (v4.0) + 3 canonical-object tail slots × 8 bytes each = 104. +static_assert(sizeof(PJ_message_parser_vtable_t) == 104, "MessageParser vtable size (update deliberately on append)"); static_assert(PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE == 80, "MIN vtable size is pinned at v4.0 — NEVER INCREASE"); static_assert(PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE <= sizeof(PJ_message_parser_vtable_t), "MIN must never exceed current"); diff --git a/pj_base/tests/object_ingest_policy_test.cpp b/pj_base/tests/object_ingest_policy_test.cpp new file mode 100644 index 0000000..184f114 --- /dev/null +++ b/pj_base/tests/object_ingest_policy_test.cpp @@ -0,0 +1,92 @@ +#include "pj_base/sdk/object_ingest_policy.hpp" + +#include + +using PJ::sdk::CanonicalObjectKind; +using PJ::sdk::ObjectIngestPolicy; +using PJ::sdk::ObjectIngestPolicyResolver; + +TEST(ObjectIngestPolicyResolverTest, DefaultPolicyIsLazyScalars) { + ObjectIngestPolicyResolver r; + EXPECT_EQ(r.resolve("any_source", "/any/topic", CanonicalObjectKind::kImage), + ObjectIngestPolicy::kLazyObjectsEagerScalars); +} + +TEST(ObjectIngestPolicyResolverTest, SetDefaultIsRespected) { + ObjectIngestPolicyResolver r; + r.setDefault(ObjectIngestPolicy::kEager); + EXPECT_EQ(r.resolve("any_source", "/any/topic", CanonicalObjectKind::kImage), + ObjectIngestPolicy::kEager); +} + +TEST(ObjectIngestPolicyResolverTest, KindOverrideFiresOnMatch) { + ObjectIngestPolicyResolver r; + r.setDefault(ObjectIngestPolicy::kLazyObjectsEagerScalars); + r.setForKind(CanonicalObjectKind::kPointCloud, ObjectIngestPolicy::kPureLazy); + + EXPECT_EQ(r.resolve("src", "/lidar/points", CanonicalObjectKind::kPointCloud), + ObjectIngestPolicy::kPureLazy); + // Different kind falls through to default. + EXPECT_EQ(r.resolve("src", "/cam/image", CanonicalObjectKind::kImage), + ObjectIngestPolicy::kLazyObjectsEagerScalars); +} + +TEST(ObjectIngestPolicyResolverTest, SourceOverridesKind) { + ObjectIngestPolicyResolver r; + r.setDefault(ObjectIngestPolicy::kLazyObjectsEagerScalars); + r.setForKind(CanonicalObjectKind::kPointCloud, ObjectIngestPolicy::kPureLazy); + r.setForDataSource("mcap_source", ObjectIngestPolicy::kEager); + + // Source matches → kEager beats the kPointCloud kind override. + EXPECT_EQ(r.resolve("mcap_source", "/lidar/points", CanonicalObjectKind::kPointCloud), + ObjectIngestPolicy::kEager); + // Different source → kind override fires. + EXPECT_EQ(r.resolve("ros2_stream", "/lidar/points", CanonicalObjectKind::kPointCloud), + ObjectIngestPolicy::kPureLazy); +} + +TEST(ObjectIngestPolicyResolverTest, TopicOverridesEverything) { + ObjectIngestPolicyResolver r; + r.setDefault(ObjectIngestPolicy::kLazyObjectsEagerScalars); + r.setForKind(CanonicalObjectKind::kPointCloud, ObjectIngestPolicy::kPureLazy); + r.setForDataSource("mcap_source", ObjectIngestPolicy::kEager); + r.setForTopic("/diagnostics/lidar", ObjectIngestPolicy::kPureLazy); + + // Topic match wins over source and kind. + EXPECT_EQ(r.resolve("mcap_source", "/diagnostics/lidar", CanonicalObjectKind::kPointCloud), + ObjectIngestPolicy::kPureLazy); + // Different topic → source override fires. + EXPECT_EQ(r.resolve("mcap_source", "/other/lidar", CanonicalObjectKind::kPointCloud), + ObjectIngestPolicy::kEager); +} + +TEST(ObjectIngestPolicyResolverTest, TypicalApplicationSetup) { + // Mirror the recommended setup: large blobs lazy by default, raw images keep + // their metadata as columns. + ObjectIngestPolicyResolver r; + r.setDefault(ObjectIngestPolicy::kLazyObjectsEagerScalars); + r.setForKind(CanonicalObjectKind::kCompressedImage, ObjectIngestPolicy::kPureLazy); + r.setForKind(CanonicalObjectKind::kPointCloud, ObjectIngestPolicy::kPureLazy); + + EXPECT_EQ(r.resolve("mcap", "/cam/raw", CanonicalObjectKind::kImage), + ObjectIngestPolicy::kLazyObjectsEagerScalars); + EXPECT_EQ(r.resolve("mcap", "/cam/jpeg", CanonicalObjectKind::kCompressedImage), + ObjectIngestPolicy::kPureLazy); + EXPECT_EQ(r.resolve("mcap", "/lidar", CanonicalObjectKind::kPointCloud), + ObjectIngestPolicy::kPureLazy); + // Scalar-only topic (no canonical) takes the default. + EXPECT_EQ(r.resolve("mcap", "/diagnostics", CanonicalObjectKind::kNone), + ObjectIngestPolicy::kLazyObjectsEagerScalars); +} + +TEST(ObjectIngestPolicyResolverTest, LastWriteWinsForSameKey) { + ObjectIngestPolicyResolver r; + r.setForKind(CanonicalObjectKind::kImage, ObjectIngestPolicy::kEager); + r.setForKind(CanonicalObjectKind::kImage, ObjectIngestPolicy::kPureLazy); + EXPECT_EQ(r.resolve("src", "/topic", CanonicalObjectKind::kImage), + ObjectIngestPolicy::kPureLazy); + + r.setForTopic("/x", ObjectIngestPolicy::kLazyObjectsEagerScalars); + r.setForTopic("/x", ObjectIngestPolicy::kEager); + EXPECT_EQ(r.resolve("src", "/x", CanonicalObjectKind::kImage), ObjectIngestPolicy::kEager); +} diff --git a/pj_base/tests/push_message_v2_test.cpp b/pj_base/tests/push_message_v2_test.cpp new file mode 100644 index 0000000..9a56c59 --- /dev/null +++ b/pj_base/tests/push_message_v2_test.cpp @@ -0,0 +1,226 @@ +// Tests for the SDK template `DataSourceRuntimeHostView::pushMessage` and +// its delegation to the C ABI slot `push_message_v2`. We exercise: +// +// 1. Vector closure → captured fetcher in the host yields the same bytes. +// 2. PayloadView closure → ditto, with the producer-supplied anchor +// flowing through the C ABI. +// 3. Multiple fetcher invocations are idempotent (same bytes each time). +// 4. The heap-held closure context is destroyed exactly once when the +// host calls fetcher.release. +// 5. When the host predates the slot (struct_size short OR field NULL), +// the SDK template falls back to push_raw_message — both for vector +// and for PayloadView closures. + +#include "pj_base/data_source_protocol.h" +#include "pj_base/sdk/canonical_object.hpp" +#include "pj_base/sdk/data_source_host_views.hpp" + +#include + +#include +#include +#include +#include + +namespace { + +// Captured state from a push_message_v2 invocation. +struct CapturedPush { + PJ_parser_binding_handle_t handle{}; + int64_t timestamp_ns = 0; + PJ_payload_fetcher_t fetcher{}; + bool received = false; +}; + +// Mock runtime host — exposes a vtable that captures push_message_v2 calls +// and, alternatively, push_raw_message calls (for the legacy fallback). +class MockHost { + public: + MockHost() { + vtable_.protocol_version = PJ_DATA_SOURCE_PROTOCOL_VERSION; + vtable_.struct_size = sizeof(PJ_data_source_runtime_host_vtable_t); + vtable_.push_raw_message = &MockHost::pushRawMessageThunk; + vtable_.push_message_v2 = &MockHost::pushMessageV2Thunk; + host_.ctx = this; + host_.vtable = &vtable_; + } + + // Drop the v2 slot — both clearing the field and shrinking struct_size, + // matching the runtime scenario where the host predates the addition. + void disablePushMessageV2() { + vtable_.push_message_v2 = nullptr; + vtable_.struct_size = offsetof(PJ_data_source_runtime_host_vtable_t, push_message_v2); + } + + PJ::DataSourceRuntimeHostView view() const { + return PJ::DataSourceRuntimeHostView(host_); + } + + CapturedPush& captured() { return captured_; } + std::vector& receivedRawBytes() { return raw_bytes_; } + + private: + static bool pushRawMessageThunk( + void* ctx, PJ_parser_binding_handle_t /*handle*/, int64_t /*ts*/, + PJ_bytes_view_t payload, PJ_error_t* /*err*/) noexcept { + auto* self = static_cast(ctx); + self->raw_bytes_.assign(payload.data, payload.data + payload.size); + return true; + } + + static bool pushMessageV2Thunk( + void* ctx, PJ_parser_binding_handle_t handle, int64_t ts, + PJ_payload_fetcher_t fetcher, PJ_error_t* /*err*/) noexcept { + auto* self = static_cast(ctx); + self->captured_.handle = handle; + self->captured_.timestamp_ns = ts; + self->captured_.fetcher = fetcher; + self->captured_.received = true; + return true; + } + + PJ_data_source_runtime_host_vtable_t vtable_{}; + PJ_data_source_runtime_host_t host_{}; + CapturedPush captured_; + std::vector raw_bytes_; +}; + +// Helper: invoke a captured fetcher and assert the produced bytes match +// the expected content. Releases the payload anchor. +void invokeFetcherAndExpect(PJ_payload_fetcher_t& fetcher, const std::vector& expected) { + PJ_payload_t payload{}; + PJ_error_t err{}; + ASSERT_NE(fetcher.fetch, nullptr); + ASSERT_TRUE(fetcher.fetch(fetcher.ctx, &payload, &err)); + ASSERT_EQ(payload.size, expected.size()); + EXPECT_EQ(0, std::memcmp(payload.data, expected.data(), expected.size())); + if (payload.anchor.release) { + payload.anchor.release(payload.anchor.ctx); + } +} + +// ---------- Tests against the new push_message_v2 path ---------- + +TEST(PushMessageV2Test, VectorClosureFlowsThroughSlot) { + MockHost host; + std::vector expected{1, 2, 3, 4, 5}; + + auto status = host.view().pushMessage( + PJ::ParserBindingHandle{42}, 1000, + [bytes = expected]() { return bytes; }); + + ASSERT_TRUE(status); + ASSERT_TRUE(host.captured().received); + EXPECT_EQ(host.captured().handle.id, 42U); + EXPECT_EQ(host.captured().timestamp_ns, 1000); + invokeFetcherAndExpect(host.captured().fetcher, expected); + host.captured().fetcher.release(host.captured().fetcher.ctx); +} + +TEST(PushMessageV2Test, PayloadViewClosureFlowsThroughSlot) { + MockHost host; + std::vector expected{10, 20, 30}; + auto owned = std::make_shared>(expected); + + auto status = host.view().pushMessage( + PJ::ParserBindingHandle{7}, 2000, + [owned]() -> PJ::sdk::PayloadView { + return {PJ::Span(owned->data(), owned->size()), owned}; + }); + + ASSERT_TRUE(status); + invokeFetcherAndExpect(host.captured().fetcher, expected); + host.captured().fetcher.release(host.captured().fetcher.ctx); +} + +TEST(PushMessageV2Test, FetchIsIdempotent) { + MockHost host; + std::vector expected{0x42, 0x43}; + + ASSERT_TRUE(host.view().pushMessage( + PJ::ParserBindingHandle{1}, 0, + [bytes = expected]() { return bytes; })); + + // Multiple invocations must yield the same bytes each time. + for (int i = 0; i < 3; ++i) { + invokeFetcherAndExpect(host.captured().fetcher, expected); + } + host.captured().fetcher.release(host.captured().fetcher.ctx); +} + +TEST(PushMessageV2Test, FetcherCtxReleasedAfterHostCalls) { + MockHost host; + auto canary = std::make_shared(42); + std::weak_ptr witness = canary; + + ASSERT_TRUE(host.view().pushMessage( + PJ::ParserBindingHandle{1}, 0, + [canary]() { return std::vector{}; })); + + // Drop our local reference; the heap-held closure copy keeps the canary + // alive while the fetcher is owned by the host. + canary.reset(); + EXPECT_FALSE(witness.expired()) + << "closure should still keep the canary alive (held in heap fetcher ctx)"; + + // Host releases the fetcher → closure destroyed → captured shared_ptr + // destroyed → canary's last reference drops. + host.captured().fetcher.release(host.captured().fetcher.ctx); + EXPECT_TRUE(witness.expired()) + << "after release, the captured shared_ptr should have been the last reference"; +} + +TEST(PushMessageV2Test, PayloadAnchorPropagates) { + MockHost host; + auto owned = std::make_shared>(std::vector{0x99, 0x9A}); + std::weak_ptr> witness = owned; + + ASSERT_TRUE(host.view().pushMessage( + PJ::ParserBindingHandle{1}, 0, + [owned]() -> PJ::sdk::PayloadView { + return {PJ::Span(owned->data(), owned->size()), owned}; + })); + + // The closure holds the owned vector via its shared_ptr capture. + // After releasing our local owned, the closure's copy keeps it alive. + owned.reset(); + EXPECT_FALSE(witness.expired()); + + // Invoke the fetcher: it builds a PayloadView into the same buffer; the + // anchor returned to the host is yet another shared_ptr copy, so the + // buffer survives even past the closure's release. + PJ_payload_t payload{}; + PJ_error_t err{}; + ASSERT_TRUE(host.captured().fetcher.fetch( + host.captured().fetcher.ctx, &payload, &err)); + EXPECT_EQ(payload.size, 2U); + + // Releasing the fetcher (closure dies) does NOT kill the buffer because + // the active payload anchor still holds a reference. + host.captured().fetcher.release(host.captured().fetcher.ctx); + EXPECT_FALSE(witness.expired()) + << "active payload anchor should still keep the buffer alive"; + + // Releasing the payload anchor drops the last reference. + if (payload.anchor.release) { + payload.anchor.release(payload.anchor.ctx); + } + EXPECT_TRUE(witness.expired()); +} + +// ---------- Host without push_message_v2 returns explicit error ---------- + +TEST(PushMessageV2Test, ReturnsErrorWhenSlotMissing) { + MockHost host; + host.disablePushMessageV2(); + + std::vector expected{0xA, 0xB, 0xC}; + auto status = host.view().pushMessage( + PJ::ParserBindingHandle{1}, 100, + [bytes = expected]() { return bytes; }); + EXPECT_FALSE(status); // explicit failure — no silent fallback to push_raw_message + EXPECT_FALSE(host.captured().received); + EXPECT_TRUE(host.receivedRawBytes().empty()); +} + +} // namespace diff --git a/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp b/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp index 4d7e50d..d4c4ffd 100644 --- a/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp +++ b/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp @@ -4,10 +4,12 @@ */ #pragma once +#include #include #include #include +#include #include #include #include @@ -103,6 +105,25 @@ class MessageParserHandle { return okStatus(); } + /// A priori classification of the bound schema. Tail-slot gated; when + /// the plugin doesn't expose classify_schema (older protocol header) + /// returns kNone, matching the host contract documented in + /// message_parser_protocol.h. + [[nodiscard]] sdk::CanonicalObjectKind classifySchema( + std::string_view type_name, Span schema) const { + if (!PJ_HAS_TAIL_SLOT(PJ_message_parser_vtable_t, vt_, classify_schema)) { + return sdk::CanonicalObjectKind::kNone; + } + PJ_string_view_t tn{type_name.data(), type_name.size()}; + PJ_bytes_view_t sc{schema.data(), schema.size()}; + PJ_schema_classification_t out{}; + PJ_error_t err{}; + if (!vt_->classify_schema(ctx_, tn, sc, &out, &err)) { + return sdk::CanonicalObjectKind::kNone; + } + return static_cast(out.object_kind); + } + /// Query a plugin-exposed extension by reverse-DNS id. Tail-slot gated. [[nodiscard]] const void* getPluginExtension(std::string_view id) const { if (!PJ_HAS_TAIL_SLOT(PJ_message_parser_vtable_t, vt_, get_plugin_extension)) { diff --git a/pj_plugins/tests/data_source_library_test.cpp b/pj_plugins/tests/data_source_library_test.cpp index 2f6854d..b2cd9c8 100644 --- a/pj_plugins/tests/data_source_library_test.cpp +++ b/pj_plugins/tests/data_source_library_test.cpp @@ -100,6 +100,7 @@ PJ_data_source_runtime_host_t makeRuntimeHost(bool with_encodings) { .push_raw_message = rhPushRawMessage, .show_message_box = rhShowMessageBox, .list_available_encodings = rhListEncodings, + .push_message_v2 = nullptr, }; static const PJ_data_source_runtime_host_vtable_t no_enc_vt = { .protocol_version = 1, @@ -115,6 +116,7 @@ PJ_data_source_runtime_host_t makeRuntimeHost(bool with_encodings) { .push_raw_message = rhPushRawMessage, .show_message_box = rhShowMessageBox, .list_available_encodings = nullptr, + .push_message_v2 = nullptr, }; return PJ_data_source_runtime_host_t{ .ctx = reinterpret_cast(0x2), diff --git a/pj_plugins/tests/file_source_integration_test.cpp b/pj_plugins/tests/file_source_integration_test.cpp index d1fbef5..99b1954 100644 --- a/pj_plugins/tests/file_source_integration_test.cpp +++ b/pj_plugins/tests/file_source_integration_test.cpp @@ -154,6 +154,7 @@ PJ_data_source_runtime_host_t makeRuntimeHost(RuntimeHostState* state) { .push_raw_message = rhPushRawMessage, .show_message_box = rhShowMessageBox, .list_available_encodings = nullptr, + .push_message_v2 = nullptr, }; return PJ_data_source_runtime_host_t{.ctx = state, .vtable = &vtable}; } From 7c5bca10d1c5e0e4360c34f31a09ca659c7b2c2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Tue, 12 May 2026 11:18:44 +0200 Subject: [PATCH 2/8] chore: apply clang-format to canonical-object-pipeline sources No behavior change. Reformats SDK headers, ABI headers, message parser plumbing, and the new ingest policy / push_message_v2 tests per .clang-format (Google style, 120-col, InsertBraces: true). Output of running pre-commit's clang-format hook over the files touched in this branch. --- .../include/pj_base/data_source_protocol.h | 4 +- .../include/pj_base/message_parser_protocol.h | 15 +- .../include/pj_base/sdk/canonical_object.hpp | 8 +- .../pj_base/sdk/data_source_host_views.hpp | 11 +- .../detail/canonical_object_serialization.hpp | 167 +++++++++++++----- .../sdk/detail/message_parser_trampolines.hpp | 12 +- .../sdk/message_parser_plugin_base.hpp | 31 ++-- .../pj_base/sdk/object_ingest_policy.hpp | 4 +- pj_base/tests/object_ingest_policy_test.cpp | 42 ++--- pj_base/tests/push_message_v2_test.cpp | 68 +++---- .../pj_plugins/host/message_parser_handle.hpp | 3 +- 11 files changed, 203 insertions(+), 162 deletions(-) diff --git a/pj_base/include/pj_base/data_source_protocol.h b/pj_base/include/pj_base/data_source_protocol.h index 03dd463..04d57e0 100644 --- a/pj_base/include/pj_base/data_source_protocol.h +++ b/pj_base/include/pj_base/data_source_protocol.h @@ -327,8 +327,8 @@ typedef struct PJ_data_source_runtime_host_vtable_t { * `fetcher.release` so the plugin's ctx leaks no resources. */ bool (*push_message_v2)( - void* ctx, PJ_parser_binding_handle_t handle, int64_t host_timestamp_ns, - PJ_payload_fetcher_t fetcher, PJ_error_t* out_error) PJ_NOEXCEPT; + void* ctx, PJ_parser_binding_handle_t handle, int64_t host_timestamp_ns, PJ_payload_fetcher_t fetcher, + PJ_error_t* out_error) PJ_NOEXCEPT; } PJ_data_source_runtime_host_vtable_t; /** Fat pointer pairing a runtime host context with its vtable. */ diff --git a/pj_base/include/pj_base/message_parser_protocol.h b/pj_base/include/pj_base/message_parser_protocol.h index bf8d290..897dc30 100644 --- a/pj_base/include/pj_base/message_parser_protocol.h +++ b/pj_base/include/pj_base/message_parser_protocol.h @@ -129,8 +129,9 @@ typedef struct PJ_message_parser_vtable_t { * * Pure-functional contract: no host side-effects. */ - bool (*classify_schema)(void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, - PJ_schema_classification_t* out_classification, PJ_error_t* out_error) PJ_NOEXCEPT; + bool (*classify_schema)( + void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, PJ_schema_classification_t* out_classification, + PJ_error_t* out_error) PJ_NOEXCEPT; /** * [stream-thread] Pure-functional alternative to parse(): returns the @@ -141,8 +142,9 @@ typedef struct PJ_message_parser_vtable_t { * The plugin owns @p out_fields.fields buffer; @p out_fields.release is * called by the host when done. release MAY be NULL. */ - bool (*parse_scalars)(void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, - PJ_named_field_value_buffer_t* out_fields, PJ_error_t* out_error) PJ_NOEXCEPT; + bool (*parse_scalars)( + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_named_field_value_buffer_t* out_fields, + PJ_error_t* out_error) PJ_NOEXCEPT; /** * [stream-thread] Pure-functional production of a canonical object from @@ -154,8 +156,9 @@ typedef struct PJ_message_parser_vtable_t { * caller (DataSource / app) decides whether to push the blob eagerly, * capture it inside a lazy lambda, or hand it directly to a consumer. */ - bool (*parse_object)(void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, - PJ_canonical_object_blob_t* out_blob, PJ_error_t* out_error) PJ_NOEXCEPT; + bool (*parse_object)( + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_canonical_object_blob_t* out_blob, + PJ_error_t* out_error) PJ_NOEXCEPT; } PJ_message_parser_vtable_t; /* The vtable above is ABI-APPENDABLE: new slots may be added at the tail; * host reads guard with PJ_HAS_TAIL_SLOT. See PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE. */ diff --git a/pj_base/include/pj_base/sdk/canonical_object.hpp b/pj_base/include/pj_base/sdk/canonical_object.hpp index b55e84e..7b41d68 100644 --- a/pj_base/include/pj_base/sdk/canonical_object.hpp +++ b/pj_base/include/pj_base/sdk/canonical_object.hpp @@ -210,9 +210,9 @@ struct PointField { }; std::string name; - uint32_t offset = 0; ///< Byte offset of this field within a single point. + uint32_t offset = 0; ///< Byte offset of this field within a single point. Datatype datatype = Datatype::kUnknown; - uint32_t count = 1; ///< Number of elements of `datatype` (typically 1). + uint32_t count = 1; ///< Number of elements of `datatype` (typically 1). }; /// Bytes per element for a given PointField datatype. Returns 0 for kUnknown. @@ -242,8 +242,8 @@ struct PointField { struct PointCloud { uint32_t width = 0; uint32_t height = 1; - uint32_t point_step = 0; ///< Bytes per point. - uint32_t row_step = 0; ///< Bytes per row (= point_step * width when no padding). + uint32_t point_step = 0; ///< Bytes per point. + uint32_t row_step = 0; ///< Bytes per row (= point_step * width when no padding). bool is_bigendian = false; bool is_dense = true; std::vector fields; diff --git a/pj_base/include/pj_base/sdk/data_source_host_views.hpp b/pj_base/include/pj_base/sdk/data_source_host_views.hpp index 0710833..f6c4bbf 100644 --- a/pj_base/include/pj_base/sdk/data_source_host_views.hpp +++ b/pj_base/include/pj_base/sdk/data_source_host_views.hpp @@ -248,8 +248,7 @@ class DataSourceRuntimeHostView { /// the slot returns an explicit error here rather than silently /// degrading to a kEager push_raw_message. template - [[nodiscard]] Status pushMessage( - ParserBindingHandle handle, Timestamp host_timestamp_ns, Fetcher&& fetcher) const { + [[nodiscard]] Status pushMessage(ParserBindingHandle handle, Timestamp host_timestamp_ns, Fetcher&& fetcher) const { if (!valid()) { return unexpected(std::string("runtime host is not bound")); } @@ -275,9 +274,7 @@ class DataSourceRuntimeHostView { out->data = pv.bytes.data(); out->size = pv.bytes.size(); out->anchor.ctx = held; - out->anchor.release = +[](void* h) noexcept { - delete static_cast(h); - }; + out->anchor.release = +[](void* h) noexcept { delete static_cast(h); }; } else { // Closure returns std::vector: heap-hold the vector; // it owns its bytes. @@ -285,9 +282,7 @@ class DataSourceRuntimeHostView { out->data = held->data(); out->size = held->size(); out->anchor.ctx = held; - out->anchor.release = +[](void* h) noexcept { - delete static_cast*>(h); - }; + out->anchor.release = +[](void* h) noexcept { delete static_cast*>(h); }; } return true; } catch (const std::exception& e) { diff --git a/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp b/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp index e57f1a4..dd56d72 100644 --- a/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp +++ b/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp @@ -33,7 +33,9 @@ namespace detail { // ----------------------------------------------------------------------------- inline void appendBytes(std::vector& out, const void* src, size_t n) { - if (n == 0) return; + if (n == 0) { + return; + } const auto* p = static_cast(src); out.insert(out.end(), p, p + n); } @@ -186,20 +188,34 @@ inline std::vector serializeCanonicalObject(const CanonicalObject& obj) // per object, same as before the iter-3 SDK change. inline Expected readImageBody(BlobReader& r, Timestamp ts) { auto width = r.readPod(); - if (!width) return unexpected(width.error()); + if (!width) { + return unexpected(width.error()); + } auto height = r.readPod(); - if (!height) return unexpected(height.error()); + if (!height) { + return unexpected(height.error()); + } auto pixel_format_raw = r.readPod(); - if (!pixel_format_raw) return unexpected(pixel_format_raw.error()); + if (!pixel_format_raw) { + return unexpected(pixel_format_raw.error()); + } auto is_be = r.readPod(); - if (!is_be) return unexpected(is_be.error()); - /*reserved*/ if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + if (!is_be) { + return unexpected(is_be.error()); + } + /*reserved*/ if (auto rsv = r.readPod(); !rsv) { return unexpected(rsv.error()); } auto row_step = r.readPod(); - if (!row_step) return unexpected(row_step.error()); + if (!row_step) { + return unexpected(row_step.error()); + } auto pixels_size = r.readPod(); - if (!pixels_size) return unexpected(pixels_size.error()); + if (!pixels_size) { + return unexpected(pixels_size.error()); + } auto pixels = r.readBytes(*pixels_size); - if (!pixels) return unexpected(pixels.error()); + if (!pixels) { + return unexpected(pixels.error()); + } auto owned = std::make_shared>(std::move(*pixels)); Span view(owned->data(), owned->size()); @@ -217,20 +233,34 @@ inline Expected readImageBody(BlobReader& r, Timestamp ts) { inline Expected readCompressedImageBody(BlobReader& r, Timestamp ts) { auto format_raw = r.readPod(); - if (!format_raw) return unexpected(format_raw.error()); + if (!format_raw) { + return unexpected(format_raw.error()); + } auto has_min = r.readPod(); - if (!has_min) return unexpected(has_min.error()); + if (!has_min) { + return unexpected(has_min.error()); + } auto has_max = r.readPod(); - if (!has_max) return unexpected(has_max.error()); - /*reserved*/ if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + if (!has_max) { + return unexpected(has_max.error()); + } + /*reserved*/ if (auto rsv = r.readPod(); !rsv) { return unexpected(rsv.error()); } auto depth_min = r.readPod(); - if (!depth_min) return unexpected(depth_min.error()); + if (!depth_min) { + return unexpected(depth_min.error()); + } auto depth_max = r.readPod(); - if (!depth_max) return unexpected(depth_max.error()); + if (!depth_max) { + return unexpected(depth_max.error()); + } auto bytes_size = r.readPod(); - if (!bytes_size) return unexpected(bytes_size.error()); + if (!bytes_size) { + return unexpected(bytes_size.error()); + } auto bytes = r.readBytes(*bytes_size); - if (!bytes) return unexpected(bytes.error()); + if (!bytes) { + return unexpected(bytes.error()); + } auto owned = std::make_shared>(std::move(*bytes)); CompressedImage ci{}; @@ -238,55 +268,90 @@ inline Expected readCompressedImageBody(BlobReader& r, Timestam ci.bytes = Span(owned->data(), owned->size()); ci.anchor = owned; ci.timestamp_ns = ts; - if (*has_min != 0) ci.extras.compressed_depth_min = *depth_min; - if (*has_max != 0) ci.extras.compressed_depth_max = *depth_max; + if (*has_min != 0) { + ci.extras.compressed_depth_min = *depth_min; + } + if (*has_max != 0) { + ci.extras.compressed_depth_max = *depth_max; + } return ci; } inline Expected readPointCloudBody(BlobReader& r, Timestamp ts) { auto width = r.readPod(); - if (!width) return unexpected(width.error()); + if (!width) { + return unexpected(width.error()); + } auto height = r.readPod(); - if (!height) return unexpected(height.error()); + if (!height) { + return unexpected(height.error()); + } auto point_step = r.readPod(); - if (!point_step) return unexpected(point_step.error()); + if (!point_step) { + return unexpected(point_step.error()); + } auto row_step = r.readPod(); - if (!row_step) return unexpected(row_step.error()); + if (!row_step) { + return unexpected(row_step.error()); + } auto is_be = r.readPod(); - if (!is_be) return unexpected(is_be.error()); + if (!is_be) { + return unexpected(is_be.error()); + } auto is_dense = r.readPod(); - if (!is_dense) return unexpected(is_dense.error()); + if (!is_dense) { + return unexpected(is_dense.error()); + } auto fields_count = r.readPod(); - if (!fields_count) return unexpected(fields_count.error()); + if (!fields_count) { + return unexpected(fields_count.error()); + } std::vector fields; fields.reserve(*fields_count); for (uint16_t i = 0; i < *fields_count; ++i) { auto name_size = r.readPod(); - if (!name_size) return unexpected(name_size.error()); + if (!name_size) { + return unexpected(name_size.error()); + } auto name = r.readString(*name_size); - if (!name) return unexpected(name.error()); + if (!name) { + return unexpected(name.error()); + } auto offset = r.readPod(); - if (!offset) return unexpected(offset.error()); + if (!offset) { + return unexpected(offset.error()); + } auto datatype_raw = r.readPod(); - if (!datatype_raw) return unexpected(datatype_raw.error()); + if (!datatype_raw) { + return unexpected(datatype_raw.error()); + } /*reserved×3*/ for (int j = 0; j < 3; ++j) { - if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + if (auto rsv = r.readPod(); !rsv) { + return unexpected(rsv.error()); + } } auto count = r.readPod(); - if (!count) return unexpected(count.error()); - fields.push_back(PointField{ - .name = std::move(*name), - .offset = *offset, - .datatype = static_cast(*datatype_raw), - .count = *count, - }); + if (!count) { + return unexpected(count.error()); + } + fields.push_back( + PointField{ + .name = std::move(*name), + .offset = *offset, + .datatype = static_cast(*datatype_raw), + .count = *count, + }); } auto data_size = r.readPod(); - if (!data_size) return unexpected(data_size.error()); + if (!data_size) { + return unexpected(data_size.error()); + } auto data = r.readBytes(*data_size); - if (!data) return unexpected(data.error()); + if (!data) { + return unexpected(data.error()); + } auto owned = std::make_shared>(std::move(*data)); Span view(owned->data(), owned->size()); @@ -313,25 +378,35 @@ inline Expected deserializeCanonicalObject(const uint8_t* data, BlobReader r(data, size); auto kind_raw = r.readPod(); - if (!kind_raw) return unexpected(kind_raw.error()); - /*reserved*/ if (auto rsv = r.readPod(); !rsv) return unexpected(rsv.error()); + if (!kind_raw) { + return unexpected(kind_raw.error()); + } + /*reserved*/ if (auto rsv = r.readPod(); !rsv) { return unexpected(rsv.error()); } auto ts = r.readPod(); - if (!ts) return unexpected(ts.error()); + if (!ts) { + return unexpected(ts.error()); + } switch (static_cast(*kind_raw)) { case CanonicalObjectKind::kImage: { auto img = readImageBody(r, *ts); - if (!img) return unexpected(img.error()); + if (!img) { + return unexpected(img.error()); + } return CanonicalObject{std::move(*img)}; } case CanonicalObjectKind::kCompressedImage: { auto ci = readCompressedImageBody(r, *ts); - if (!ci) return unexpected(ci.error()); + if (!ci) { + return unexpected(ci.error()); + } return CanonicalObject{std::move(*ci)}; } case CanonicalObjectKind::kPointCloud: { auto pc = readPointCloudBody(r, *ts); - if (!pc) return unexpected(pc.error()); + if (!pc) { + return unexpected(pc.error()); + } return CanonicalObject{std::move(*pc)}; } case CanonicalObjectKind::kNone: diff --git a/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp b/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp index 8264cc1..01a4422 100644 --- a/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp +++ b/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp @@ -134,8 +134,8 @@ inline const void* MessageParserPluginBase::trampoline_get_plugin_extension(void // ----------------------------------------------------------------------------- inline bool MessageParserPluginBase::trampoline_classify_schema( - void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, - PJ_schema_classification_t* out_classification, PJ_error_t* out_error) noexcept { + void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, PJ_schema_classification_t* out_classification, + PJ_error_t* out_error) noexcept { auto* self = static_cast(ctx); if (out_classification == nullptr) { self->storeError(out_error, 2, "plugin", "classify_schema called with null out_classification"); @@ -158,8 +158,8 @@ inline bool MessageParserPluginBase::trampoline_classify_schema( } inline bool MessageParserPluginBase::trampoline_parse_scalars( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, - PJ_named_field_value_buffer_t* out_fields, PJ_error_t* out_error) noexcept { + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_named_field_value_buffer_t* out_fields, + PJ_error_t* out_error) noexcept { auto* self = static_cast(ctx); if (out_fields == nullptr) { self->storeError(out_error, 2, "plugin", "parse_scalars called with null out_fields"); @@ -193,8 +193,8 @@ inline bool MessageParserPluginBase::trampoline_parse_scalars( } inline bool MessageParserPluginBase::trampoline_parse_object( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, - PJ_canonical_object_blob_t* out_blob, PJ_error_t* out_error) noexcept { + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_canonical_object_blob_t* out_blob, + PJ_error_t* out_error) noexcept { auto* self = static_cast(ctx); if (out_blob == nullptr) { self->storeError(out_error, 2, "plugin", "parse_object called with null out_blob"); diff --git a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp index bf68690..93672cd 100644 --- a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp +++ b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp @@ -45,16 +45,14 @@ struct SchemaHandler { /// Scalar route: returns owned column data — no anchor needed because the /// returned vector and any string_views inside it are materialized by the /// parser, independent of the caller's payload buffer. - std::function>(Timestamp, Span)> - parse_scalars; + std::function>(Timestamp, Span)> parse_scalars; /// Canonical-object route: takes a PayloadView so the parser can return a /// CanonicalObject whose internal Span(s) reference the same underlying /// buffer (zero-copy). The parser propagates `payload.anchor` into the /// returned object so its bytes outlive this call. When the caller passes /// an empty anchor, the parser must materialize whatever it wants to retain. - std::function(Timestamp, PayloadView)> - parse_object; + std::function(Timestamp, PayloadView)> parse_object; }; } // namespace sdk @@ -157,9 +155,7 @@ class MessageParserPluginBase { if (fields->empty()) { return okStatus(); } - return writeHost().appendRecord( - timestamp_ns, - Span(fields->data(), fields->size())); + return writeHost().appendRecord(timestamp_ns, Span(fields->data(), fields->size())); } // --------------------------------------------------------------------------- @@ -213,8 +209,7 @@ class MessageParserPluginBase { /// populate the table via registerSchemaHandler() rather than overriding; /// the C ABI trampolines invoke this directly on MessageParserPluginBase*. /// Returns kNone when no handler is registered for this type name. - sdk::SchemaClassification classifySchema( - std::string_view type_name, Span schema) const { + sdk::SchemaClassification classifySchema(std::string_view type_name, Span schema) const { (void)schema; if (const auto* h = findSchemaHandler(type_name)) { return {h->object_kind}; @@ -226,8 +221,7 @@ class MessageParserPluginBase { /// Returns unexpected if no handler is registered, or if the registered /// handler did not provide a parse_scalars callable. Non-virtual — see /// classifySchema above for the rationale. - Expected> parseScalars( - Timestamp timestamp_ns, Span payload) { + Expected> parseScalars(Timestamp timestamp_ns, Span payload) { const auto* h = findSchemaHandler(bound_type_name_); if (h == nullptr) { return unexpected(std::string("parser does not register schema: ") + bound_type_name_); @@ -247,8 +241,7 @@ class MessageParserPluginBase { /// materialize anything it wants to outlive this call. In-process callers /// that already own the payload buffer should pass a non-empty anchor so /// the parser can return a zero-copy CanonicalObject. - Expected parseObject( - Timestamp timestamp_ns, sdk::PayloadView payload) { + Expected parseObject(Timestamp timestamp_ns, sdk::PayloadView payload) { const auto* h = findSchemaHandler(bound_type_name_); if (h == nullptr) { return unexpected(std::string("parser does not register schema: ") + bound_type_name_); @@ -356,14 +349,14 @@ class MessageParserPluginBase { void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_error_t* out_error) noexcept; static const void* trampoline_get_plugin_extension(void* ctx, PJ_string_view_t id) noexcept; static bool trampoline_classify_schema( - void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, - PJ_schema_classification_t* out_classification, PJ_error_t* out_error) noexcept; + void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, PJ_schema_classification_t* out_classification, + PJ_error_t* out_error) noexcept; static bool trampoline_parse_scalars( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, - PJ_named_field_value_buffer_t* out_fields, PJ_error_t* out_error) noexcept; + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_named_field_value_buffer_t* out_fields, + PJ_error_t* out_error) noexcept; static bool trampoline_parse_object( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, - PJ_canonical_object_blob_t* out_blob, PJ_error_t* out_error) noexcept; + void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_canonical_object_blob_t* out_blob, + PJ_error_t* out_error) noexcept; }; } // namespace PJ diff --git a/pj_base/include/pj_base/sdk/object_ingest_policy.hpp b/pj_base/include/pj_base/sdk/object_ingest_policy.hpp index bce03c7..f7821cd 100644 --- a/pj_base/include/pj_base/sdk/object_ingest_policy.hpp +++ b/pj_base/include/pj_base/sdk/object_ingest_policy.hpp @@ -93,9 +93,7 @@ class ObjectIngestPolicyResolver { /// Precedence: topic > source > kind > default. The first match wins — /// no merging or composition between levels. [[nodiscard]] ObjectIngestPolicy resolve( - std::string_view source_id, - std::string_view topic_name, - CanonicalObjectKind object_kind) const { + std::string_view source_id, std::string_view topic_name, CanonicalObjectKind object_kind) const { if (auto it = by_topic_.find(std::string(topic_name)); it != by_topic_.end()) { return it->second; } diff --git a/pj_base/tests/object_ingest_policy_test.cpp b/pj_base/tests/object_ingest_policy_test.cpp index 184f114..3f7a10c 100644 --- a/pj_base/tests/object_ingest_policy_test.cpp +++ b/pj_base/tests/object_ingest_policy_test.cpp @@ -8,15 +8,14 @@ using PJ::sdk::ObjectIngestPolicyResolver; TEST(ObjectIngestPolicyResolverTest, DefaultPolicyIsLazyScalars) { ObjectIngestPolicyResolver r; - EXPECT_EQ(r.resolve("any_source", "/any/topic", CanonicalObjectKind::kImage), - ObjectIngestPolicy::kLazyObjectsEagerScalars); + EXPECT_EQ( + r.resolve("any_source", "/any/topic", CanonicalObjectKind::kImage), ObjectIngestPolicy::kLazyObjectsEagerScalars); } TEST(ObjectIngestPolicyResolverTest, SetDefaultIsRespected) { ObjectIngestPolicyResolver r; r.setDefault(ObjectIngestPolicy::kEager); - EXPECT_EQ(r.resolve("any_source", "/any/topic", CanonicalObjectKind::kImage), - ObjectIngestPolicy::kEager); + EXPECT_EQ(r.resolve("any_source", "/any/topic", CanonicalObjectKind::kImage), ObjectIngestPolicy::kEager); } TEST(ObjectIngestPolicyResolverTest, KindOverrideFiresOnMatch) { @@ -24,11 +23,9 @@ TEST(ObjectIngestPolicyResolverTest, KindOverrideFiresOnMatch) { r.setDefault(ObjectIngestPolicy::kLazyObjectsEagerScalars); r.setForKind(CanonicalObjectKind::kPointCloud, ObjectIngestPolicy::kPureLazy); - EXPECT_EQ(r.resolve("src", "/lidar/points", CanonicalObjectKind::kPointCloud), - ObjectIngestPolicy::kPureLazy); + EXPECT_EQ(r.resolve("src", "/lidar/points", CanonicalObjectKind::kPointCloud), ObjectIngestPolicy::kPureLazy); // Different kind falls through to default. - EXPECT_EQ(r.resolve("src", "/cam/image", CanonicalObjectKind::kImage), - ObjectIngestPolicy::kLazyObjectsEagerScalars); + EXPECT_EQ(r.resolve("src", "/cam/image", CanonicalObjectKind::kImage), ObjectIngestPolicy::kLazyObjectsEagerScalars); } TEST(ObjectIngestPolicyResolverTest, SourceOverridesKind) { @@ -38,11 +35,9 @@ TEST(ObjectIngestPolicyResolverTest, SourceOverridesKind) { r.setForDataSource("mcap_source", ObjectIngestPolicy::kEager); // Source matches → kEager beats the kPointCloud kind override. - EXPECT_EQ(r.resolve("mcap_source", "/lidar/points", CanonicalObjectKind::kPointCloud), - ObjectIngestPolicy::kEager); + EXPECT_EQ(r.resolve("mcap_source", "/lidar/points", CanonicalObjectKind::kPointCloud), ObjectIngestPolicy::kEager); // Different source → kind override fires. - EXPECT_EQ(r.resolve("ros2_stream", "/lidar/points", CanonicalObjectKind::kPointCloud), - ObjectIngestPolicy::kPureLazy); + EXPECT_EQ(r.resolve("ros2_stream", "/lidar/points", CanonicalObjectKind::kPointCloud), ObjectIngestPolicy::kPureLazy); } TEST(ObjectIngestPolicyResolverTest, TopicOverridesEverything) { @@ -53,11 +48,10 @@ TEST(ObjectIngestPolicyResolverTest, TopicOverridesEverything) { r.setForTopic("/diagnostics/lidar", ObjectIngestPolicy::kPureLazy); // Topic match wins over source and kind. - EXPECT_EQ(r.resolve("mcap_source", "/diagnostics/lidar", CanonicalObjectKind::kPointCloud), - ObjectIngestPolicy::kPureLazy); + EXPECT_EQ( + r.resolve("mcap_source", "/diagnostics/lidar", CanonicalObjectKind::kPointCloud), ObjectIngestPolicy::kPureLazy); // Different topic → source override fires. - EXPECT_EQ(r.resolve("mcap_source", "/other/lidar", CanonicalObjectKind::kPointCloud), - ObjectIngestPolicy::kEager); + EXPECT_EQ(r.resolve("mcap_source", "/other/lidar", CanonicalObjectKind::kPointCloud), ObjectIngestPolicy::kEager); } TEST(ObjectIngestPolicyResolverTest, TypicalApplicationSetup) { @@ -68,23 +62,19 @@ TEST(ObjectIngestPolicyResolverTest, TypicalApplicationSetup) { r.setForKind(CanonicalObjectKind::kCompressedImage, ObjectIngestPolicy::kPureLazy); r.setForKind(CanonicalObjectKind::kPointCloud, ObjectIngestPolicy::kPureLazy); - EXPECT_EQ(r.resolve("mcap", "/cam/raw", CanonicalObjectKind::kImage), - ObjectIngestPolicy::kLazyObjectsEagerScalars); - EXPECT_EQ(r.resolve("mcap", "/cam/jpeg", CanonicalObjectKind::kCompressedImage), - ObjectIngestPolicy::kPureLazy); - EXPECT_EQ(r.resolve("mcap", "/lidar", CanonicalObjectKind::kPointCloud), - ObjectIngestPolicy::kPureLazy); + EXPECT_EQ(r.resolve("mcap", "/cam/raw", CanonicalObjectKind::kImage), ObjectIngestPolicy::kLazyObjectsEagerScalars); + EXPECT_EQ(r.resolve("mcap", "/cam/jpeg", CanonicalObjectKind::kCompressedImage), ObjectIngestPolicy::kPureLazy); + EXPECT_EQ(r.resolve("mcap", "/lidar", CanonicalObjectKind::kPointCloud), ObjectIngestPolicy::kPureLazy); // Scalar-only topic (no canonical) takes the default. - EXPECT_EQ(r.resolve("mcap", "/diagnostics", CanonicalObjectKind::kNone), - ObjectIngestPolicy::kLazyObjectsEagerScalars); + EXPECT_EQ( + r.resolve("mcap", "/diagnostics", CanonicalObjectKind::kNone), ObjectIngestPolicy::kLazyObjectsEagerScalars); } TEST(ObjectIngestPolicyResolverTest, LastWriteWinsForSameKey) { ObjectIngestPolicyResolver r; r.setForKind(CanonicalObjectKind::kImage, ObjectIngestPolicy::kEager); r.setForKind(CanonicalObjectKind::kImage, ObjectIngestPolicy::kPureLazy); - EXPECT_EQ(r.resolve("src", "/topic", CanonicalObjectKind::kImage), - ObjectIngestPolicy::kPureLazy); + EXPECT_EQ(r.resolve("src", "/topic", CanonicalObjectKind::kImage), ObjectIngestPolicy::kPureLazy); r.setForTopic("/x", ObjectIngestPolicy::kLazyObjectsEagerScalars); r.setForTopic("/x", ObjectIngestPolicy::kEager); diff --git a/pj_base/tests/push_message_v2_test.cpp b/pj_base/tests/push_message_v2_test.cpp index 9a56c59..b0b6722 100644 --- a/pj_base/tests/push_message_v2_test.cpp +++ b/pj_base/tests/push_message_v2_test.cpp @@ -11,10 +11,6 @@ // the SDK template falls back to push_raw_message — both for vector // and for PayloadView closures. -#include "pj_base/data_source_protocol.h" -#include "pj_base/sdk/canonical_object.hpp" -#include "pj_base/sdk/data_source_host_views.hpp" - #include #include @@ -22,6 +18,10 @@ #include #include +#include "pj_base/data_source_protocol.h" +#include "pj_base/sdk/canonical_object.hpp" +#include "pj_base/sdk/data_source_host_views.hpp" + namespace { // Captured state from a push_message_v2 invocation. @@ -56,21 +56,25 @@ class MockHost { return PJ::DataSourceRuntimeHostView(host_); } - CapturedPush& captured() { return captured_; } - std::vector& receivedRawBytes() { return raw_bytes_; } + CapturedPush& captured() { + return captured_; + } + std::vector& receivedRawBytes() { + return raw_bytes_; + } private: static bool pushRawMessageThunk( - void* ctx, PJ_parser_binding_handle_t /*handle*/, int64_t /*ts*/, - PJ_bytes_view_t payload, PJ_error_t* /*err*/) noexcept { + void* ctx, PJ_parser_binding_handle_t /*handle*/, int64_t /*ts*/, PJ_bytes_view_t payload, + PJ_error_t* /*err*/) noexcept { auto* self = static_cast(ctx); self->raw_bytes_.assign(payload.data, payload.data + payload.size); return true; } static bool pushMessageV2Thunk( - void* ctx, PJ_parser_binding_handle_t handle, int64_t ts, - PJ_payload_fetcher_t fetcher, PJ_error_t* /*err*/) noexcept { + void* ctx, PJ_parser_binding_handle_t handle, int64_t ts, PJ_payload_fetcher_t fetcher, + PJ_error_t* /*err*/) noexcept { auto* self = static_cast(ctx); self->captured_.handle = handle; self->captured_.timestamp_ns = ts; @@ -105,9 +109,7 @@ TEST(PushMessageV2Test, VectorClosureFlowsThroughSlot) { MockHost host; std::vector expected{1, 2, 3, 4, 5}; - auto status = host.view().pushMessage( - PJ::ParserBindingHandle{42}, 1000, - [bytes = expected]() { return bytes; }); + auto status = host.view().pushMessage(PJ::ParserBindingHandle{42}, 1000, [bytes = expected]() { return bytes; }); ASSERT_TRUE(status); ASSERT_TRUE(host.captured().received); @@ -122,11 +124,9 @@ TEST(PushMessageV2Test, PayloadViewClosureFlowsThroughSlot) { std::vector expected{10, 20, 30}; auto owned = std::make_shared>(expected); - auto status = host.view().pushMessage( - PJ::ParserBindingHandle{7}, 2000, - [owned]() -> PJ::sdk::PayloadView { - return {PJ::Span(owned->data(), owned->size()), owned}; - }); + auto status = host.view().pushMessage(PJ::ParserBindingHandle{7}, 2000, [owned]() -> PJ::sdk::PayloadView { + return {PJ::Span(owned->data(), owned->size()), owned}; + }); ASSERT_TRUE(status); invokeFetcherAndExpect(host.captured().fetcher, expected); @@ -137,9 +137,7 @@ TEST(PushMessageV2Test, FetchIsIdempotent) { MockHost host; std::vector expected{0x42, 0x43}; - ASSERT_TRUE(host.view().pushMessage( - PJ::ParserBindingHandle{1}, 0, - [bytes = expected]() { return bytes; })); + ASSERT_TRUE(host.view().pushMessage(PJ::ParserBindingHandle{1}, 0, [bytes = expected]() { return bytes; })); // Multiple invocations must yield the same bytes each time. for (int i = 0; i < 3; ++i) { @@ -153,21 +151,17 @@ TEST(PushMessageV2Test, FetcherCtxReleasedAfterHostCalls) { auto canary = std::make_shared(42); std::weak_ptr witness = canary; - ASSERT_TRUE(host.view().pushMessage( - PJ::ParserBindingHandle{1}, 0, - [canary]() { return std::vector{}; })); + ASSERT_TRUE(host.view().pushMessage(PJ::ParserBindingHandle{1}, 0, [canary]() { return std::vector{}; })); // Drop our local reference; the heap-held closure copy keeps the canary // alive while the fetcher is owned by the host. canary.reset(); - EXPECT_FALSE(witness.expired()) - << "closure should still keep the canary alive (held in heap fetcher ctx)"; + EXPECT_FALSE(witness.expired()) << "closure should still keep the canary alive (held in heap fetcher ctx)"; // Host releases the fetcher → closure destroyed → captured shared_ptr // destroyed → canary's last reference drops. host.captured().fetcher.release(host.captured().fetcher.ctx); - EXPECT_TRUE(witness.expired()) - << "after release, the captured shared_ptr should have been the last reference"; + EXPECT_TRUE(witness.expired()) << "after release, the captured shared_ptr should have been the last reference"; } TEST(PushMessageV2Test, PayloadAnchorPropagates) { @@ -175,11 +169,9 @@ TEST(PushMessageV2Test, PayloadAnchorPropagates) { auto owned = std::make_shared>(std::vector{0x99, 0x9A}); std::weak_ptr> witness = owned; - ASSERT_TRUE(host.view().pushMessage( - PJ::ParserBindingHandle{1}, 0, - [owned]() -> PJ::sdk::PayloadView { - return {PJ::Span(owned->data(), owned->size()), owned}; - })); + ASSERT_TRUE(host.view().pushMessage(PJ::ParserBindingHandle{1}, 0, [owned]() -> PJ::sdk::PayloadView { + return {PJ::Span(owned->data(), owned->size()), owned}; + })); // The closure holds the owned vector via its shared_ptr capture. // After releasing our local owned, the closure's copy keeps it alive. @@ -191,15 +183,13 @@ TEST(PushMessageV2Test, PayloadAnchorPropagates) { // buffer survives even past the closure's release. PJ_payload_t payload{}; PJ_error_t err{}; - ASSERT_TRUE(host.captured().fetcher.fetch( - host.captured().fetcher.ctx, &payload, &err)); + ASSERT_TRUE(host.captured().fetcher.fetch(host.captured().fetcher.ctx, &payload, &err)); EXPECT_EQ(payload.size, 2U); // Releasing the fetcher (closure dies) does NOT kill the buffer because // the active payload anchor still holds a reference. host.captured().fetcher.release(host.captured().fetcher.ctx); - EXPECT_FALSE(witness.expired()) - << "active payload anchor should still keep the buffer alive"; + EXPECT_FALSE(witness.expired()) << "active payload anchor should still keep the buffer alive"; // Releasing the payload anchor drops the last reference. if (payload.anchor.release) { @@ -215,9 +205,7 @@ TEST(PushMessageV2Test, ReturnsErrorWhenSlotMissing) { host.disablePushMessageV2(); std::vector expected{0xA, 0xB, 0xC}; - auto status = host.view().pushMessage( - PJ::ParserBindingHandle{1}, 100, - [bytes = expected]() { return bytes; }); + auto status = host.view().pushMessage(PJ::ParserBindingHandle{1}, 100, [bytes = expected]() { return bytes; }); EXPECT_FALSE(status); // explicit failure — no silent fallback to push_raw_message EXPECT_FALSE(host.captured().received); EXPECT_TRUE(host.receivedRawBytes().empty()); diff --git a/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp b/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp index d4c4ffd..f299e6f 100644 --- a/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp +++ b/pj_plugins/include/pj_plugins/host/message_parser_handle.hpp @@ -109,8 +109,7 @@ class MessageParserHandle { /// the plugin doesn't expose classify_schema (older protocol header) /// returns kNone, matching the host contract documented in /// message_parser_protocol.h. - [[nodiscard]] sdk::CanonicalObjectKind classifySchema( - std::string_view type_name, Span schema) const { + [[nodiscard]] sdk::CanonicalObjectKind classifySchema(std::string_view type_name, Span schema) const { if (!PJ_HAS_TAIL_SLOT(PJ_message_parser_vtable_t, vt_, classify_schema)) { return sdk::CanonicalObjectKind::kNone; } From a7e2d3458ceaeb995daa2a09f9f41cf466335322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Tue, 12 May 2026 11:21:17 +0200 Subject: [PATCH 3/8] chore(sdk): drop dead doc references from SDK header comments The file-level docstrings of canonical_object.hpp and object_ingest_policy.hpp pointed at a private report path that does not exist in this repository. Remove the dangling references; the surrounding prose already explains the design. --- pj_base/include/pj_base/sdk/canonical_object.hpp | 2 -- pj_base/include/pj_base/sdk/object_ingest_policy.hpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/pj_base/include/pj_base/sdk/canonical_object.hpp b/pj_base/include/pj_base/sdk/canonical_object.hpp index 7b41d68..348bfe9 100644 --- a/pj_base/include/pj_base/sdk/canonical_object.hpp +++ b/pj_base/include/pj_base/sdk/canonical_object.hpp @@ -9,8 +9,6 @@ * itself remains agnostic to these types — it stores opaque bytes; the * decoding into a CanonicalObject happens in the consumer at pull time, by * invoking the parser's parseObject() against the bytes. - * - * Reference report: docs/claude_reports/2026.05.07-arquitectura-objectstore-pipeline-misalignment.md */ #pragma once diff --git a/pj_base/include/pj_base/sdk/object_ingest_policy.hpp b/pj_base/include/pj_base/sdk/object_ingest_policy.hpp index f7821cd..7ab5bda 100644 --- a/pj_base/include/pj_base/sdk/object_ingest_policy.hpp +++ b/pj_base/include/pj_base/sdk/object_ingest_policy.hpp @@ -9,8 +9,6 @@ * fetcher immediately (parse and store now), invoke it once for scalars * and again on each pull, or never invoke it during ingest and only on * consumer pulls. - * - * Reference design: docs/claude_reports/2026.05.07-arquitectura-objectstore-pipeline-misalignment.md */ #pragma once From e982135b7530e6e1a129913eeffc2b7d0e77678b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Wed, 13 May 2026 13:55:45 +0200 Subject: [PATCH 4/8] refactor(sdk): drop C ABI wire format for canonical objects and pure-functional scalars The canonical-object pipeline and the pure-functional scalar path are C++ SDK contracts: MessageParserPluginBase::parseObject() and parseScalars() are called directly on the C++ pointer by the in-process runtime host, preserving zero-copy via BufferAnchor. The C ABI slots (parse_object, parse_scalars) and their wire-format support are removed; pure-C plugins emit scalars via the parse() slot writing to writeHost. Removed: - detail/canonical_object_serialization.hpp (full file). - PJ_canonical_object_blob_t and PJ_named_field_value_buffer_t from canonical_object_abi.h. - parse_object and parse_scalars slots from PJ_message_parser_vtable_t. - trampoline_parse_object and trampoline_parse_scalars. - Per-instance buffers (scalars_owned_buf_, scalars_abi_buf_, object_blob_buf_) that only the trampolines used. Kept: - SchemaHandler::parse_scalars / parse_object (C++ callables registered by plugins) and MessageParserPluginBase::parseScalars / parseObject (C++ methods invoked by the host). - classify_schema C ABI slot (used by MessageParserHandle::classifySchema). - The parse() slot for pure-C plugins that emit scalars to writeHost. Vtable size: PJ_message_parser_vtable_t shrinks from 104 to 88 bytes (80 v4.0 baseline + 1 tail slot for classify_schema). PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE stays pinned at 80. --- .../include/pj_base/canonical_object_abi.h | 111 +---- .../include/pj_base/data_source_protocol.h | 3 - .../include/pj_base/message_parser_protocol.h | 41 +- .../detail/canonical_object_serialization.hpp | 420 ------------------ .../sdk/detail/message_parser_trampolines.hpp | 75 ---- .../sdk/message_parser_plugin_base.hpp | 16 - pj_base/tests/abi_layout_sentinels_test.cpp | 4 +- 7 files changed, 29 insertions(+), 641 deletions(-) delete mode 100644 pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp diff --git a/pj_base/include/pj_base/canonical_object_abi.h b/pj_base/include/pj_base/canonical_object_abi.h index b370951..f42b2e9 100644 --- a/pj_base/include/pj_base/canonical_object_abi.h +++ b/pj_base/include/pj_base/canonical_object_abi.h @@ -1,16 +1,20 @@ /** * @file canonical_object_abi.h - * @brief C ABI representation of canonical objects produced by parsers. + * @brief C ABI vocabulary for schema classification. * - * The C++ vocabulary lives in pj_base/sdk/canonical_object.hpp - * (sdk::CanonicalObject = std::variant). - * This file defines the wire format used to cross the plugin C ABI boundary - * for that variant: parser plugins produce a flat byte blob with a small - * header describing the kind, and the host deserializes it back to the - * C++ type. + * The host invokes classify_schema (a slot in PJ_message_parser_vtable_t) + * after bind_schema to learn what kind of canonical object the parser will + * produce for that schema. The parser returns a PJ_schema_classification_t + * carrying a PJ_canonical_object_kind_t. * - * The blob layout is little-endian, packed, with no implementation-defined - * padding. Trampolines and host loader use it directly. + * Canonical-object production (sdk::Image / sdk::CompressedImage / + * sdk::PointCloud) and the pure-functional scalar production + * (Expected>) are C++ SDK contracts: plugins + * inheriting from MessageParserPluginBase register handlers in + * SchemaHandler, and the in-process host consumes them via + * MessageParserPluginBase::parseObject() and parseScalars() called + * directly on the C++ pointer. Pure-C plugins emit scalars via the + * parse() slot (writing to writeHost). */ #ifndef PJ_CANONICAL_OBJECT_ABI_H #define PJ_CANONICAL_OBJECT_ABI_H @@ -25,22 +29,11 @@ extern "C" { #endif -/** - * Owned buffer of named field values produced by the parse_scalars slot. - * The plugin owns the @p fields array; the host calls @p release(alloc_handle) - * when done. release MAY be NULL if the plugin manages the buffer in a way - * that does not require explicit release between calls. - */ -typedef struct PJ_named_field_value_buffer_t { - const PJ_named_field_value_t* fields; - size_t count; - void* alloc_handle; - void (*release)(void* alloc_handle); -} PJ_named_field_value_buffer_t; - /** * Canonical object kinds. Numeric values are stable across releases — never - * renumber. Mirror of PJ::sdk::CanonicalObjectKind for use across the C ABI. + * renumber. Returned by the classify_schema slot to advertise what kind of + * canonical object the parser will produce for this schema (or kNone if + * the parser only produces scalars). */ typedef enum PJ_canonical_object_kind_t { PJ_CANONICAL_OBJECT_KIND_NONE = 0, @@ -56,79 +49,15 @@ typedef enum PJ_canonical_object_kind_t { * Schema classification — what kind a parser declares for a given schema. * Returned a priori (without parsing payload) by the classify_schema slot. * - * Currently a single field plus reserved padding to keep the struct size - * stable across future minor extensions (declarative metadata can attach - * via additional structs returned by other slots, not by growing this one). + * Single field plus reserved padding to keep the struct size stable across + * future minor extensions. The reserved byte must be zero today; readers + * accept any value (forward compat). */ typedef struct PJ_schema_classification_t { uint16_t object_kind; /**< PJ_canonical_object_kind_t. */ - uint16_t reserved; /**< Must be zero. */ + uint16_t reserved; } PJ_schema_classification_t; -/** - * Canonical object as a flat byte blob produced by the parse_object slot. - * - * Layout of @p data: - * - * header (12 bytes, little-endian): - * uint16_t kind // PJ_canonical_object_kind_t - * uint16_t reserved - * int64_t timestamp_ns - * - * body (varies by kind, immediately follows the header): - * - * KIND_IMAGE: - * uint32_t width - * uint32_t height - * uint16_t pixel_format - * uint16_t reserved - * uint32_t pixels_size - * uint8_t pixels[pixels_size] // tightly packed, no row stride - * - * KIND_COMPRESSED_IMAGE: - * uint8_t format // 0=unknown, 1=JPEG, 2=PNG, 3=QOI - * uint8_t has_depth_min - * uint8_t has_depth_max - * uint8_t reserved - * float depth_min // valid iff has_depth_min - * float depth_max // valid iff has_depth_max - * uint32_t bytes_size - * uint8_t bytes[bytes_size] - * - * KIND_POINTCLOUD: - * uint32_t width - * uint32_t height - * uint32_t point_step - * uint32_t row_step - * uint8_t is_bigendian - * uint8_t is_dense - * uint16_t fields_count - * fields[fields_count]: - * uint32_t name_size - * char name[name_size] - * uint32_t offset - * uint8_t datatype // 0=unknown,1=i8,2=u8,3=i16,4=u16, - * // 5=i32,6=u32,7=f32,8=f64 - * uint8_t reserved[3] - * uint32_t count - * uint32_t data_size - * uint8_t data[data_size] - * - * Memory ownership: - * The blob's @p data is owned by the parser plugin. The plugin allocates - * it during parse_object and the host calls @p release(ctx, data) when it - * is done with the bytes. release MAY be NULL if data points into a - * plugin-internal buffer that the plugin manages itself across calls. - */ -typedef struct PJ_canonical_object_blob_t { - const uint8_t* data; - uint64_t size; - /** Opaque handle the plugin uses to identify the allocation. */ - void* alloc_handle; - /** Release callback invoked by the host. NULL means no release needed. */ - void (*release)(void* alloc_handle); -} PJ_canonical_object_blob_t; - #ifdef __cplusplus } #endif diff --git a/pj_base/include/pj_base/data_source_protocol.h b/pj_base/include/pj_base/data_source_protocol.h index 04d57e0..07eaa67 100644 --- a/pj_base/include/pj_base/data_source_protocol.h +++ b/pj_base/include/pj_base/data_source_protocol.h @@ -134,9 +134,6 @@ typedef struct { * no longer needs the bytes referenced by the buffer. `ctx` MAY be NULL — * meaning the buffer was static / borrowed from an external lifetime — in * which case `release` is also expected to be NULL. - * - * Mirrors the pattern of PJ_canonical_object_blob_t but applies to raw - * payload bytes, not to serialized canonical objects. */ typedef struct PJ_payload_anchor_t { void* ctx; diff --git a/pj_base/include/pj_base/message_parser_protocol.h b/pj_base/include/pj_base/message_parser_protocol.h index 897dc30..fc34507 100644 --- a/pj_base/include/pj_base/message_parser_protocol.h +++ b/pj_base/include/pj_base/message_parser_protocol.h @@ -8,16 +8,16 @@ * append_arrow_ipc — see plugin_data_api.h. Parsers stay per-record; * the host coalesces into Arrow batches internally. * - * v4 appendable tail (no version bump — protocol stays at 4): - * - classify_schema, parse_scalars, parse_object: pure-functional API - * that returns typed values instead of writing to host views. Enables - * lazy materialization and removes the parser's coupling to push policy. - * See pj_base/canonical_object_abi.h for the wire format. + * Pure-functional production (scalars by value, canonical objects by + * value with BufferAnchor) is a C++ SDK contract: parsers inheriting from + * MessageParserPluginBase register handlers in SchemaHandler and the + * in-process host calls parseScalars() / parseObject() directly on the + * C++ pointer. Pure-C plugins use the parse() slot to write scalars to + * writeHost. * * The host obtains the plugin's vtable via `PJ_get_message_parser_vtable()` * and drives the plugin through: create -> bind(registry) -> - * (bind_schema) -> (classify_schema) -> parse* / parseScalars / parseObject - * -> destroy. + * (bind_schema) -> (classify_schema) -> parse -> destroy. */ #ifndef PJ_MESSAGE_PARSER_PROTOCOL_H #define PJ_MESSAGE_PARSER_PROTOCOL_H @@ -132,33 +132,6 @@ typedef struct PJ_message_parser_vtable_t { bool (*classify_schema)( void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, PJ_schema_classification_t* out_classification, PJ_error_t* out_error) PJ_NOEXCEPT; - - /** - * [stream-thread] Pure-functional alternative to parse(): returns the - * scalar fields by value (out parameter) instead of writing them to the - * parser write host. The host invokes this in preference to parse() when - * available; legacy plugins keep using parse(). - * - * The plugin owns @p out_fields.fields buffer; @p out_fields.release is - * called by the host when done. release MAY be NULL. - */ - bool (*parse_scalars)( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_named_field_value_buffer_t* out_fields, - PJ_error_t* out_error) PJ_NOEXCEPT; - - /** - * [stream-thread] Pure-functional production of a canonical object from - * the payload. Fills @p out_blob with the serialized object (see layout - * in canonical_object_abi.h). Only meaningful when classify_schema() - * returned a non-zero kind. - * - * Pure-functional contract: no writes to the object write host. The - * caller (DataSource / app) decides whether to push the blob eagerly, - * capture it inside a lazy lambda, or hand it directly to a consumer. - */ - bool (*parse_object)( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_canonical_object_blob_t* out_blob, - PJ_error_t* out_error) PJ_NOEXCEPT; } PJ_message_parser_vtable_t; /* The vtable above is ABI-APPENDABLE: new slots may be added at the tail; * host reads guard with PJ_HAS_TAIL_SLOT. See PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE. */ diff --git a/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp b/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp deleted file mode 100644 index dd56d72..0000000 --- a/pj_base/include/pj_base/sdk/detail/canonical_object_serialization.hpp +++ /dev/null @@ -1,420 +0,0 @@ -/** - * @file detail/canonical_object_serialization.hpp - * @brief (De)serialization of PJ::sdk::CanonicalObject to/from the byte - * layout defined in pj_base/canonical_object_abi.h. - * - * The blob crosses the C ABI as raw bytes; this header turns it into the - * C++ variant on the host side and back into bytes on the plugin side. - * - * Endianness: writes/reads multi-byte integers using std::memcpy under the - * assumption that the host architecture is little-endian (the ABI mandates - * little-endian). Big-endian targets would need an explicit byte-swap layer - * here; documented as a known limitation in this iteration. - */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "pj_base/canonical_object_abi.h" -#include "pj_base/expected.hpp" -#include "pj_base/sdk/canonical_object.hpp" - -namespace PJ { -namespace sdk { -namespace detail { - -// ----------------------------------------------------------------------------- -// Low-level write helpers (host-endian = little-endian assumed) -// ----------------------------------------------------------------------------- - -inline void appendBytes(std::vector& out, const void* src, size_t n) { - if (n == 0) { - return; - } - const auto* p = static_cast(src); - out.insert(out.end(), p, p + n); -} - -template -inline void appendPod(std::vector& out, T value) { - static_assert(std::is_trivially_copyable_v, "appendPod requires trivially copyable"); - appendBytes(out, &value, sizeof(T)); -} - -// ----------------------------------------------------------------------------- -// Low-level read helpers -// ----------------------------------------------------------------------------- - -class BlobReader { - public: - BlobReader(const uint8_t* data, size_t size) : ptr_(data), end_(data + size) {} - - [[nodiscard]] bool remaining(size_t n) const noexcept { - return static_cast(end_ - ptr_) >= n; - } - - template - Expected readPod() { - static_assert(std::is_trivially_copyable_v, "readPod requires trivially copyable"); - if (!remaining(sizeof(T))) { - return unexpected(std::string("blob truncated")); - } - T value; - std::memcpy(&value, ptr_, sizeof(T)); - ptr_ += sizeof(T); - return value; - } - - Expected> readBytes(size_t n) { - if (!remaining(n)) { - return unexpected(std::string("blob truncated reading bytes")); - } - std::vector out(ptr_, ptr_ + n); - ptr_ += n; - return out; - } - - Expected readString(size_t n) { - if (!remaining(n)) { - return unexpected(std::string("blob truncated reading string")); - } - std::string out(reinterpret_cast(ptr_), n); - ptr_ += n; - return out; - } - - private: - const uint8_t* ptr_; - const uint8_t* end_; -}; - -// ----------------------------------------------------------------------------- -// Serialization (C++ → bytes) -// ----------------------------------------------------------------------------- - -inline void writeImageBody(std::vector& out, const Image& img) { - appendPod(out, img.width); - appendPod(out, img.height); - appendPod(out, static_cast(img.pixel_format)); - appendPod(out, img.is_bigendian ? 1 : 0); - appendPod(out, 0); // reserved - appendPod(out, img.row_step); - const uint32_t pixels_size = static_cast(img.pixels.size()); - appendPod(out, pixels_size); - if (pixels_size > 0) { - appendBytes(out, img.pixels.data(), pixels_size); - } -} - -inline void writeCompressedImageBody(std::vector& out, const CompressedImage& ci) { - appendPod(out, static_cast(ci.format)); - appendPod(out, ci.extras.compressed_depth_min.has_value() ? 1 : 0); - appendPod(out, ci.extras.compressed_depth_max.has_value() ? 1 : 0); - appendPod(out, 0); // reserved - appendPod(out, ci.extras.compressed_depth_min.value_or(0.0f)); - appendPod(out, ci.extras.compressed_depth_max.value_or(0.0f)); - const uint32_t bytes_size = static_cast(ci.bytes.size()); - appendPod(out, bytes_size); - if (bytes_size > 0) { - appendBytes(out, ci.bytes.data(), bytes_size); - } -} - -inline void writePointCloudBody(std::vector& out, const PointCloud& pc) { - appendPod(out, pc.width); - appendPod(out, pc.height); - appendPod(out, pc.point_step); - appendPod(out, pc.row_step); - appendPod(out, pc.is_bigendian ? 1 : 0); - appendPod(out, pc.is_dense ? 1 : 0); - appendPod(out, static_cast(pc.fields.size())); - for (const auto& f : pc.fields) { - const uint32_t name_size = static_cast(f.name.size()); - appendPod(out, name_size); - appendBytes(out, f.name.data(), name_size); - appendPod(out, f.offset); - appendPod(out, static_cast(f.datatype)); - appendPod(out, 0); // reserved - appendPod(out, 0); // reserved - appendPod(out, 0); // reserved - appendPod(out, f.count); - } - const uint32_t data_size = static_cast(pc.data.size()); - appendPod(out, data_size); - if (data_size > 0) { - appendBytes(out, pc.data.data(), data_size); - } -} - -/// Serialize a CanonicalObject into a flat byte buffer matching the layout -/// in canonical_object_abi.h. Caller owns the returned vector. -inline std::vector serializeCanonicalObject(const CanonicalObject& obj) { - std::vector out; - out.reserve(64); // header + small body; body grows for image/pointcloud - - // Header: kind (u16), reserved (u16), timestamp (i64). - const auto kind = kindOf(obj); - appendPod(out, static_cast(kind)); - appendPod(out, 0); // reserved - std::visit( - [&](const auto& concrete) { - appendPod(out, concrete.timestamp_ns); - using T = std::decay_t; - if constexpr (std::is_same_v) { - writeImageBody(out, concrete); - } else if constexpr (std::is_same_v) { - writeCompressedImageBody(out, concrete); - } else if constexpr (std::is_same_v) { - writePointCloudBody(out, concrete); - } - }, - obj); - - return out; -} - -// ----------------------------------------------------------------------------- -// Deserialization (bytes → C++) -// ----------------------------------------------------------------------------- - -// On the deserialize side we don't have a foreign anchor — the bytes come -// from the blob buffer. Wrap them in a shared_ptr and use that as -// the anchor; the Span points into the wrapped vector. Net cost: one alloc -// per object, same as before the iter-3 SDK change. -inline Expected readImageBody(BlobReader& r, Timestamp ts) { - auto width = r.readPod(); - if (!width) { - return unexpected(width.error()); - } - auto height = r.readPod(); - if (!height) { - return unexpected(height.error()); - } - auto pixel_format_raw = r.readPod(); - if (!pixel_format_raw) { - return unexpected(pixel_format_raw.error()); - } - auto is_be = r.readPod(); - if (!is_be) { - return unexpected(is_be.error()); - } - /*reserved*/ if (auto rsv = r.readPod(); !rsv) { return unexpected(rsv.error()); } - auto row_step = r.readPod(); - if (!row_step) { - return unexpected(row_step.error()); - } - auto pixels_size = r.readPod(); - if (!pixels_size) { - return unexpected(pixels_size.error()); - } - auto pixels = r.readBytes(*pixels_size); - if (!pixels) { - return unexpected(pixels.error()); - } - - auto owned = std::make_shared>(std::move(*pixels)); - Span view(owned->data(), owned->size()); - return Image{ - .width = *width, - .height = *height, - .pixel_format = static_cast(*pixel_format_raw), - .row_step = *row_step, - .is_bigendian = (*is_be != 0), - .pixels = view, - .anchor = owned, - .timestamp_ns = ts, - }; -} - -inline Expected readCompressedImageBody(BlobReader& r, Timestamp ts) { - auto format_raw = r.readPod(); - if (!format_raw) { - return unexpected(format_raw.error()); - } - auto has_min = r.readPod(); - if (!has_min) { - return unexpected(has_min.error()); - } - auto has_max = r.readPod(); - if (!has_max) { - return unexpected(has_max.error()); - } - /*reserved*/ if (auto rsv = r.readPod(); !rsv) { return unexpected(rsv.error()); } - auto depth_min = r.readPod(); - if (!depth_min) { - return unexpected(depth_min.error()); - } - auto depth_max = r.readPod(); - if (!depth_max) { - return unexpected(depth_max.error()); - } - auto bytes_size = r.readPod(); - if (!bytes_size) { - return unexpected(bytes_size.error()); - } - auto bytes = r.readBytes(*bytes_size); - if (!bytes) { - return unexpected(bytes.error()); - } - - auto owned = std::make_shared>(std::move(*bytes)); - CompressedImage ci{}; - ci.format = static_cast(*format_raw); - ci.bytes = Span(owned->data(), owned->size()); - ci.anchor = owned; - ci.timestamp_ns = ts; - if (*has_min != 0) { - ci.extras.compressed_depth_min = *depth_min; - } - if (*has_max != 0) { - ci.extras.compressed_depth_max = *depth_max; - } - return ci; -} - -inline Expected readPointCloudBody(BlobReader& r, Timestamp ts) { - auto width = r.readPod(); - if (!width) { - return unexpected(width.error()); - } - auto height = r.readPod(); - if (!height) { - return unexpected(height.error()); - } - auto point_step = r.readPod(); - if (!point_step) { - return unexpected(point_step.error()); - } - auto row_step = r.readPod(); - if (!row_step) { - return unexpected(row_step.error()); - } - auto is_be = r.readPod(); - if (!is_be) { - return unexpected(is_be.error()); - } - auto is_dense = r.readPod(); - if (!is_dense) { - return unexpected(is_dense.error()); - } - auto fields_count = r.readPod(); - if (!fields_count) { - return unexpected(fields_count.error()); - } - - std::vector fields; - fields.reserve(*fields_count); - for (uint16_t i = 0; i < *fields_count; ++i) { - auto name_size = r.readPod(); - if (!name_size) { - return unexpected(name_size.error()); - } - auto name = r.readString(*name_size); - if (!name) { - return unexpected(name.error()); - } - auto offset = r.readPod(); - if (!offset) { - return unexpected(offset.error()); - } - auto datatype_raw = r.readPod(); - if (!datatype_raw) { - return unexpected(datatype_raw.error()); - } - /*reserved×3*/ for (int j = 0; j < 3; ++j) { - if (auto rsv = r.readPod(); !rsv) { - return unexpected(rsv.error()); - } - } - auto count = r.readPod(); - if (!count) { - return unexpected(count.error()); - } - fields.push_back( - PointField{ - .name = std::move(*name), - .offset = *offset, - .datatype = static_cast(*datatype_raw), - .count = *count, - }); - } - - auto data_size = r.readPod(); - if (!data_size) { - return unexpected(data_size.error()); - } - auto data = r.readBytes(*data_size); - if (!data) { - return unexpected(data.error()); - } - - auto owned = std::make_shared>(std::move(*data)); - Span view(owned->data(), owned->size()); - return PointCloud{ - .width = *width, - .height = *height, - .point_step = *point_step, - .row_step = *row_step, - .is_bigendian = (*is_be != 0), - .is_dense = (*is_dense != 0), - .fields = std::move(fields), - .data = view, - .anchor = owned, - .timestamp_ns = ts, - }; -} - -/// Deserialize a flat byte buffer into a CanonicalObject. Returns unexpected -/// on truncation, unknown kind, or any inconsistency. -inline Expected deserializeCanonicalObject(const uint8_t* data, size_t size) { - if (data == nullptr) { - return unexpected(std::string("null blob")); - } - BlobReader r(data, size); - - auto kind_raw = r.readPod(); - if (!kind_raw) { - return unexpected(kind_raw.error()); - } - /*reserved*/ if (auto rsv = r.readPod(); !rsv) { return unexpected(rsv.error()); } - auto ts = r.readPod(); - if (!ts) { - return unexpected(ts.error()); - } - - switch (static_cast(*kind_raw)) { - case CanonicalObjectKind::kImage: { - auto img = readImageBody(r, *ts); - if (!img) { - return unexpected(img.error()); - } - return CanonicalObject{std::move(*img)}; - } - case CanonicalObjectKind::kCompressedImage: { - auto ci = readCompressedImageBody(r, *ts); - if (!ci) { - return unexpected(ci.error()); - } - return CanonicalObject{std::move(*ci)}; - } - case CanonicalObjectKind::kPointCloud: { - auto pc = readPointCloudBody(r, *ts); - if (!pc) { - return unexpected(pc.error()); - } - return CanonicalObject{std::move(*pc)}; - } - case CanonicalObjectKind::kNone: - default: - return unexpected(std::string("unknown or unsupported canonical object kind")); - } -} - -} // namespace detail -} // namespace sdk -} // namespace PJ diff --git a/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp b/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp index 01a4422..90cd721 100644 --- a/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp +++ b/pj_base/include/pj_base/sdk/detail/message_parser_trampolines.hpp @@ -7,8 +7,6 @@ */ #pragma once -#include "pj_base/sdk/detail/canonical_object_serialization.hpp" - namespace PJ { inline void MessageParserPluginBase::trampoline_destroy(void* ctx) noexcept { @@ -157,77 +155,4 @@ inline bool MessageParserPluginBase::trampoline_classify_schema( } } -inline bool MessageParserPluginBase::trampoline_parse_scalars( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_named_field_value_buffer_t* out_fields, - PJ_error_t* out_error) noexcept { - auto* self = static_cast(ctx); - if (out_fields == nullptr) { - self->storeError(out_error, 2, "plugin", "parse_scalars called with null out_fields"); - return false; - } - try { - Span payload_span(payload.data, payload.size); - auto result = self->parseScalars(timestamp_ns, payload_span); - if (!result) { - self->storeError(out_error, 1, "plugin", std::move(result).error()); - return false; - } - // Hand the C++ vector to the plugin-owned buffer so PJ_string_view_t - // entries inside the ABI structs remain valid until the next call. - self->scalars_owned_buf_ = std::move(*result); - self->scalars_abi_buf_ = sdk::toAbiNamed( - Span(self->scalars_owned_buf_.data(), self->scalars_owned_buf_.size())); - - out_fields->fields = self->scalars_abi_buf_.data(); - out_fields->count = self->scalars_abi_buf_.size(); - out_fields->alloc_handle = nullptr; // buffer kept alive by the plugin instance - out_fields->release = nullptr; - return true; - } catch (const std::exception& e) { - self->storeError(out_error, 1, "plugin", std::string("parse_scalars threw: ") + e.what()); - return false; - } catch (...) { - self->storeError(out_error, 1, "plugin", "unknown exception in parse_scalars"); - return false; - } -} - -inline bool MessageParserPluginBase::trampoline_parse_object( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_canonical_object_blob_t* out_blob, - PJ_error_t* out_error) noexcept { - auto* self = static_cast(ctx); - if (out_blob == nullptr) { - self->storeError(out_error, 2, "plugin", "parse_object called with null out_blob"); - return false; - } - try { - // C ABI path: caller does not share ownership of the payload buffer. - // Pass an empty anchor; the plugin must materialize anything it wants - // to retain past this call. The serialized blob written to out_blob is - // copied into self->object_blob_buf_ before we return, so a span-into- - // payload that the plugin keeps inside its CanonicalObject is fine for - // the duration of the serialize call below. - Span payload_span(payload.data, payload.size); - sdk::PayloadView payload_view{payload_span, sdk::BufferAnchor{}}; - auto result = self->parseObject(timestamp_ns, payload_view); - if (!result) { - self->storeError(out_error, 1, "plugin", std::move(result).error()); - return false; - } - self->object_blob_buf_ = sdk::detail::serializeCanonicalObject(*result); - - out_blob->data = self->object_blob_buf_.data(); - out_blob->size = self->object_blob_buf_.size(); - out_blob->alloc_handle = nullptr; // buffer kept alive by the plugin instance - out_blob->release = nullptr; - return true; - } catch (const std::exception& e) { - self->storeError(out_error, 1, "plugin", std::string("parse_object threw: ") + e.what()); - return false; - } catch (...) { - self->storeError(out_error, 1, "plugin", "unknown exception in parse_object"); - return false; - } -} - } // namespace PJ diff --git a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp index 93672cd..9b82a74 100644 --- a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp +++ b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp @@ -278,10 +278,7 @@ class MessageParserPluginBase { trampoline_load_config, trampoline_parse, trampoline_get_plugin_extension, - // Tail slots: pure-functional API (canonical-object). trampoline_classify_schema, - trampoline_parse_scalars, - trampoline_parse_object, }; return &vt; } @@ -328,13 +325,6 @@ class MessageParserPluginBase { // Schema handler table populated by the plugin via registerSchemaHandler(). std::unordered_map handlers_; - // Buffers kept alive between parse_scalars / parse_object calls so the host - // can read the returned slices safely. release callbacks in the ABI structs - // are NULL — the plugin owns the buffers and overwrites them on each call. - std::vector scalars_owned_buf_; - std::vector scalars_abi_buf_; - std::vector object_blob_buf_; - static void storeError(PJ_error_t* out_error, int32_t code, std::string_view domain, std::string_view message) { sdk::fillError(out_error, code, domain, message); } @@ -351,12 +341,6 @@ class MessageParserPluginBase { static bool trampoline_classify_schema( void* ctx, PJ_string_view_t type_name, PJ_bytes_view_t schema, PJ_schema_classification_t* out_classification, PJ_error_t* out_error) noexcept; - static bool trampoline_parse_scalars( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_named_field_value_buffer_t* out_fields, - PJ_error_t* out_error) noexcept; - static bool trampoline_parse_object( - void* ctx, int64_t timestamp_ns, PJ_bytes_view_t payload, PJ_canonical_object_blob_t* out_blob, - PJ_error_t* out_error) noexcept; }; } // namespace PJ diff --git a/pj_base/tests/abi_layout_sentinels_test.cpp b/pj_base/tests/abi_layout_sentinels_test.cpp index 2be7936..8018af5 100644 --- a/pj_base/tests/abi_layout_sentinels_test.cpp +++ b/pj_base/tests/abi_layout_sentinels_test.cpp @@ -79,8 +79,8 @@ static_assert(offsetof(PJ_message_parser_vtable_t, struct_size) == 4, "v4 prefix static_assert(offsetof(PJ_message_parser_vtable_t, bind) == 32, "v4 bind slot pinned"); static_assert(offsetof(PJ_message_parser_vtable_t, parse) == 64, "v4 parse slot pinned"); static_assert(offsetof(PJ_message_parser_vtable_t, get_plugin_extension) == 72, "v4 last baseline slot pinned"); -// 80 baseline (v4.0) + 3 canonical-object tail slots × 8 bytes each = 104. -static_assert(sizeof(PJ_message_parser_vtable_t) == 104, "MessageParser vtable size (update deliberately on append)"); +// 80 baseline (v4.0) + 1 tail slot × 8 bytes = 88. +static_assert(sizeof(PJ_message_parser_vtable_t) == 88, "MessageParser vtable size (update deliberately on append)"); static_assert(PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE == 80, "MIN vtable size is pinned at v4.0 — NEVER INCREASE"); static_assert(PJ_MESSAGE_PARSER_MIN_VTABLE_SIZE <= sizeof(PJ_message_parser_vtable_t), "MIN must never exceed current"); From 05ca0a41e03c8c64185e090a07635f03edbcc977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Wed, 13 May 2026 14:21:33 +0200 Subject: [PATCH 5/8] refactor(sdk): drop objectIngestPolicy from DataSourceRuntimeHostView MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ObjectIngestPolicy is a host-owned concern: the host instantiates its own ObjectIngestPolicyResolver during file-load setup and consults it on each push_message_v2 dispatch. DataSource plugins are policy-agnostic — they fabricate a payload fetcher via runtimeHost().pushMessage() and hand it off without inspecting or configuring policy. Exposing a per-view resolver in DataSourceRuntimeHostView contradicted that contract: mutations went to a local instance the host never read. The accessor is removed; the type sdk::ObjectIngestPolicyResolver stays in pj_base/sdk/object_ingest_policy.hpp as host-side vocabulary. Changes: - Drop the objectIngestPolicy() accessor. - Drop the mutable policy_resolver_ member. - Replace transitive include of object_ingest_policy.hpp with an explicit include of canonical_object.hpp (needed for PayloadView in pushMessage). --- .../pj_base/sdk/data_source_host_views.hpp | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/pj_base/include/pj_base/sdk/data_source_host_views.hpp b/pj_base/include/pj_base/sdk/data_source_host_views.hpp index f6c4bbf..ceb63e3 100644 --- a/pj_base/include/pj_base/sdk/data_source_host_views.hpp +++ b/pj_base/include/pj_base/sdk/data_source_host_views.hpp @@ -21,7 +21,7 @@ #include "pj_base/data_source_protocol.h" #include "pj_base/expected.hpp" -#include "pj_base/sdk/object_ingest_policy.hpp" +#include "pj_base/sdk/canonical_object.hpp" #include "pj_base/sdk/plugin_data_api.hpp" namespace PJ { @@ -303,18 +303,6 @@ class DataSourceRuntimeHostView { return okStatus(); } - /// Access (mutable) the resolver of ObjectIngestPolicy for this runtime. - /// The application configures it during setup; the host (when the - /// push_message_v2 dispatch lands) consults it per message. - /// - /// Implementation status (RFC): - /// The resolver is a per-DataSourceRuntimeHostView local instance for - /// now. In production it will be host-owned and shared across views; - /// the SDK surface stays the same. - [[nodiscard]] sdk::ObjectIngestPolicyResolver& objectIngestPolicy() const { - return policy_resolver_; - } - /** * Display a modal message box and wait for user response. * @return The button clicked, or kOk if the host does not support dialogs. @@ -375,13 +363,6 @@ class DataSourceRuntimeHostView { private: PJ_data_source_runtime_host_t host_{}; - - // RFC-only: local-to-view policy resolver. Production wiring will move - // this to a host-side singleton accessed through the service registry; - // the public surface (objectIngestPolicy()) stays the same. mutable - // because configuring the policy is conceptually a side concern, not - // a mutation of the view. - mutable sdk::ObjectIngestPolicyResolver policy_resolver_{}; }; } // namespace PJ From 5aadc6dde548ef1527eb9dd507c004c93adee9ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Wed, 13 May 2026 14:53:49 +0200 Subject: [PATCH 6/8] chore(sdk): enforce final on pure-functional FINAL methods + sync stale comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Mark classifySchema / parseScalars / parseObject as virtual...final so derived plugins get a compile error if they try to override (the methods are invoked on MessageParserPluginBase*, so an override would silently not run). Matches the FINAL contract from the RFC. - Adapt the "Pure-functional API" section header to reflect the current state (no "added in protocol v5" — those slots are not at the C ABI level; the contract is C++ SDK direct-call only). - Rename test DefaultPolicyIsLazyScalars to DefaultPolicyIsLazyObjectsEagerScalars to match the enum value. - Rewrite the push_message_v2_test.cpp header summary so case 5 matches the assertion (explicit error when the host doesn't expose the slot; no silent fallback). --- .../sdk/message_parser_plugin_base.hpp | 29 ++++++++++++------- pj_base/tests/object_ingest_policy_test.cpp | 2 +- pj_base/tests/push_message_v2_test.cpp | 6 ++-- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp index 9b82a74..e5ca74e 100644 --- a/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp +++ b/pj_base/include/pj_base/sdk/message_parser_plugin_base.hpp @@ -159,7 +159,7 @@ class MessageParserPluginBase { } // --------------------------------------------------------------------------- - // Pure-functional API (added in protocol v5, ABI-appendable) + // Pure-functional API // --------------------------------------------------------------------------- // // Design principle: the parser does NOT decide push policy (eager vs lazy) @@ -171,8 +171,9 @@ class MessageParserPluginBase { // // Plugins extend the parser by populating a per-schema handler table in // the constructor (registerSchemaHandler). The base class implements - // classifySchema / parseScalars / parseObject as final lookups into that - // table. Plugins do NOT override the three methods. + // classifySchema / parseScalars / parseObject as `final` lookups into that + // table, invoked by the host directly on a MessageParserPluginBase* pointer + // (no vtable indirection, no cross-ABI copy). /// Register a handler for one schema type name. Typically called once per /// supported schema in the plugin's constructor. @@ -205,11 +206,16 @@ class MessageParserPluginBase { return &it->second; } - /// Lookup against the registered handler table. Non-virtual: plugins - /// populate the table via registerSchemaHandler() rather than overriding; - /// the C ABI trampolines invoke this directly on MessageParserPluginBase*. + /// Lookup against the registered handler table. Marked `final`: plugins + /// populate the table via registerSchemaHandler() rather than overriding. + /// The C ABI trampolines call this on MessageParserPluginBase*; a derived + /// override would never be invoked, so the compiler rejects it explicitly. /// Returns kNone when no handler is registered for this type name. - sdk::SchemaClassification classifySchema(std::string_view type_name, Span schema) const { + /// + /// `type_name` is passed as a parameter (rather than using bound_type_name_) + /// because classification may be queried for any schema this parser handles, + /// including before bindSchema has fixed the instance to one. + virtual sdk::SchemaClassification classifySchema(std::string_view type_name, Span schema) const final { (void)schema; if (const auto* h = findSchemaHandler(type_name)) { return {h->object_kind}; @@ -219,9 +225,10 @@ class MessageParserPluginBase { /// Invoke the registered scalar handler for the currently-bound schema. /// Returns unexpected if no handler is registered, or if the registered - /// handler did not provide a parse_scalars callable. Non-virtual — see + /// handler did not provide a parse_scalars callable. Marked `final` — see /// classifySchema above for the rationale. - Expected> parseScalars(Timestamp timestamp_ns, Span payload) { + virtual Expected> parseScalars( + Timestamp timestamp_ns, Span payload) final { const auto* h = findSchemaHandler(bound_type_name_); if (h == nullptr) { return unexpected(std::string("parser does not register schema: ") + bound_type_name_); @@ -235,13 +242,13 @@ class MessageParserPluginBase { /// Invoke the registered object handler for the currently-bound schema. /// Returns unexpected if no handler is registered, or if the registered /// handler did not provide a parse_object callable (i.e. this schema - /// produces only scalars). Non-virtual — see classifySchema above. + /// produces only scalars). Marked `final` — see classifySchema above. /// /// `payload.anchor` may be empty; in that case the parser is expected to /// materialize anything it wants to outlive this call. In-process callers /// that already own the payload buffer should pass a non-empty anchor so /// the parser can return a zero-copy CanonicalObject. - Expected parseObject(Timestamp timestamp_ns, sdk::PayloadView payload) { + virtual Expected parseObject(Timestamp timestamp_ns, sdk::PayloadView payload) final { const auto* h = findSchemaHandler(bound_type_name_); if (h == nullptr) { return unexpected(std::string("parser does not register schema: ") + bound_type_name_); diff --git a/pj_base/tests/object_ingest_policy_test.cpp b/pj_base/tests/object_ingest_policy_test.cpp index 3f7a10c..a6c9f68 100644 --- a/pj_base/tests/object_ingest_policy_test.cpp +++ b/pj_base/tests/object_ingest_policy_test.cpp @@ -6,7 +6,7 @@ using PJ::sdk::CanonicalObjectKind; using PJ::sdk::ObjectIngestPolicy; using PJ::sdk::ObjectIngestPolicyResolver; -TEST(ObjectIngestPolicyResolverTest, DefaultPolicyIsLazyScalars) { +TEST(ObjectIngestPolicyResolverTest, DefaultPolicyIsLazyObjectsEagerScalars) { ObjectIngestPolicyResolver r; EXPECT_EQ( r.resolve("any_source", "/any/topic", CanonicalObjectKind::kImage), ObjectIngestPolicy::kLazyObjectsEagerScalars); diff --git a/pj_base/tests/push_message_v2_test.cpp b/pj_base/tests/push_message_v2_test.cpp index b0b6722..509650a 100644 --- a/pj_base/tests/push_message_v2_test.cpp +++ b/pj_base/tests/push_message_v2_test.cpp @@ -7,9 +7,9 @@ // 3. Multiple fetcher invocations are idempotent (same bytes each time). // 4. The heap-held closure context is destroyed exactly once when the // host calls fetcher.release. -// 5. When the host predates the slot (struct_size short OR field NULL), -// the SDK template falls back to push_raw_message — both for vector -// and for PayloadView closures. +// 5. When the host does not expose push_message_v2 (struct_size short +// or field NULL), pushMessage returns an explicit error rather than +// degrading silently. #include From 3f834a9f96bfedd18b12b14ec270eec86380bff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Wed, 13 May 2026 14:54:17 +0200 Subject: [PATCH 7/8] docs(sdk): tighten doc + safety around push_raw_message, payload size, variant extension, fetcher type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - push_raw_message: clarify it's the eager-only path; plugins needing lazy materialization or ObjectIngestPolicy dispatch use push_message_v2. - PJ_payload_t::size: size_t -> uint64_t for ABI-explicit 64-bit width (the protocol pins everything else to 64-bit; staying with size_t was a portability hazard on cross-compiles). - CanonicalObject variant: rewrite the extension-policy comment. Appending alternatives is forward-compatible — older hosts receiving an unknown kind reject the message, no protocol bump required. - pushMessage template: static_assert that the Fetcher returns either sdk::PayloadView or std::vector. Catches misuse at compile time instead of falling through to UB in the else branch. --- pj_base/include/pj_base/data_source_protocol.h | 8 +++++++- pj_base/include/pj_base/sdk/canonical_object.hpp | 9 ++++++--- pj_base/include/pj_base/sdk/data_source_host_views.hpp | 8 +++++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pj_base/include/pj_base/data_source_protocol.h b/pj_base/include/pj_base/data_source_protocol.h index 07eaa67..0161c91 100644 --- a/pj_base/include/pj_base/data_source_protocol.h +++ b/pj_base/include/pj_base/data_source_protocol.h @@ -152,7 +152,7 @@ typedef struct PJ_payload_anchor_t { */ typedef struct PJ_payload_t { const uint8_t* data; - size_t size; + uint64_t size; PJ_payload_anchor_t anchor; } PJ_payload_t; @@ -254,6 +254,12 @@ typedef struct PJ_data_source_runtime_host_vtable_t { * @p handle must have been obtained from ensure_parser_binding. * @p host_timestamp_ns is nanoseconds since the Unix epoch * (1970-01-01T00:00:00Z). Returns false + error on failure. + * + * Eager-only push: the host parses immediately and the bytes are not + * retained for later replay. Plugins that need lazy materialization or + * ObjectIngestPolicy dispatch should use push_message_v2 instead. This + * slot remains for sources that fan-out raw bytes without an associated + * fetcher (streaming or eager-only consumers). */ bool (*push_raw_message)( void* ctx, PJ_parser_binding_handle_t handle, int64_t host_timestamp_ns, PJ_bytes_view_t payload, diff --git a/pj_base/include/pj_base/sdk/canonical_object.hpp b/pj_base/include/pj_base/sdk/canonical_object.hpp index 348bfe9..5e5aae0 100644 --- a/pj_base/include/pj_base/sdk/canonical_object.hpp +++ b/pj_base/include/pj_base/sdk/canonical_object.hpp @@ -254,9 +254,12 @@ struct PointCloud { // CanonicalObject — variant carried by parser->parseObject() // ----------------------------------------------------------------------------- -/// Sum type of all canonical objects a parser may produce. Closed for now; -/// extending it (kMarkers, kOccupancyGrid, …) requires bumping -/// PJ_MESSAGE_PARSER_PROTOCOL_VERSION (compatible append at the end). +/// Sum type of all canonical objects a parser may produce. New alternatives +/// (kMarkers, kOccupancyGrid, …) are appended at the tail and announced via +/// CanonicalObjectKind. Plugins built against an older SDK keep producing +/// the alternatives they know; hosts built against an older SDK that receive +/// an unknown kind reject the message rather than crashing. Forward-compatible +/// — no protocol bump required. using CanonicalObject = std::variant; /// Helper: get the kind tag for a CanonicalObject without unpacking it. diff --git a/pj_base/include/pj_base/sdk/data_source_host_views.hpp b/pj_base/include/pj_base/sdk/data_source_host_views.hpp index ceb63e3..bce4836 100644 --- a/pj_base/include/pj_base/sdk/data_source_host_views.hpp +++ b/pj_base/include/pj_base/sdk/data_source_host_views.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include "pj_base/data_source_protocol.h" #include "pj_base/expected.hpp" @@ -249,6 +250,12 @@ class DataSourceRuntimeHostView { /// degrading to a kEager push_raw_message. template [[nodiscard]] Status pushMessage(ParserBindingHandle handle, Timestamp host_timestamp_ns, Fetcher&& fetcher) const { + using FetcherT = std::decay_t; + using FetcherResult = std::decay_t>; + static_assert( + std::is_same_v || std::is_same_v>, + "Fetcher must return sdk::PayloadView (zero-copy) or std::vector"); + if (!valid()) { return unexpected(std::string("runtime host is not bound")); } @@ -256,7 +263,6 @@ class DataSourceRuntimeHostView { return unexpected(std::string("runtime host does not expose push_message_v2")); } - using FetcherT = std::decay_t; auto* ctx = new FetcherT(std::forward(fetcher)); PJ_payload_fetcher_t abi_fetcher{ From 4d0333b30ff527509eda223451720592a43364ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20I=C3=B1igo=20Blasco?= Date: Wed, 13 May 2026 14:56:24 +0200 Subject: [PATCH 8/8] test(abi): pin layout of canonical-object pipeline structs and runtime host vtable Compile-time sentinels for the public ABI types added in the canonical-object pipeline series: - PJ_canonical_object_kind_t: enum size pinned at 4 bytes. - PJ_schema_classification_t: 4 bytes; field offsets pinned. - PJ_payload_anchor_t: 16 bytes (ctx + release fn ptr). - PJ_payload_t: 32 bytes (data + size + anchor); field offsets pinned. - PJ_payload_fetcher_t: 24 bytes (ctx + fetch + release fn ptr). - PJ_data_source_runtime_host_vtable_t: 104 bytes (12 fn ptrs + prefix). Offsets of report_message, push_raw_message, list_available_encodings, and the push_message_v2 tail slot are pinned. struct size grows deliberately as future tail slots append. Any unintended reorder or size change of these public ABI types now fails the build instead of silently breaking binary compatibility with existing plugins. --- pj_base/tests/abi_layout_sentinels_test.cpp | 38 +++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pj_base/tests/abi_layout_sentinels_test.cpp b/pj_base/tests/abi_layout_sentinels_test.cpp index 8018af5..59777ef 100644 --- a/pj_base/tests/abi_layout_sentinels_test.cpp +++ b/pj_base/tests/abi_layout_sentinels_test.cpp @@ -94,6 +94,44 @@ static_assert(sizeof(PJ_toolbox_vtable_t) == 88, "Toolbox vtable size (update de static_assert(PJ_TOOLBOX_MIN_VTABLE_SIZE == 88, "MIN vtable size is pinned at v4.0 — NEVER INCREASE"); static_assert(PJ_TOOLBOX_MIN_VTABLE_SIZE <= sizeof(PJ_toolbox_vtable_t), "MIN must never exceed current"); +// --- Canonical-object pipeline structs --------------------------------------- +// Public ABI types crossing the boundary for the v4 canonical-object pipeline. +// Sizes and offsets are pinned; any change is a deliberate ABI revision. +static_assert(sizeof(PJ_canonical_object_kind_t) == 4, "enum layout pinned"); +static_assert(sizeof(PJ_schema_classification_t) == 4, "PJ_schema_classification_t layout pinned"); +static_assert(offsetof(PJ_schema_classification_t, object_kind) == 0, "object_kind at offset 0"); +static_assert(offsetof(PJ_schema_classification_t, reserved) == 2, "reserved at offset 2"); + +static_assert(sizeof(PJ_payload_anchor_t) == 16, "PJ_payload_anchor_t pinned (ctx + release fn ptr)"); +static_assert(offsetof(PJ_payload_anchor_t, ctx) == 0, "ctx at offset 0"); +static_assert(offsetof(PJ_payload_anchor_t, release) == 8, "release at offset 8"); + +static_assert(sizeof(PJ_payload_t) == 32, "PJ_payload_t pinned (data + size + anchor)"); +static_assert(offsetof(PJ_payload_t, data) == 0, "data at offset 0"); +static_assert(offsetof(PJ_payload_t, size) == 8, "size at offset 8"); +static_assert(offsetof(PJ_payload_t, anchor) == 16, "anchor at offset 16"); + +static_assert(sizeof(PJ_payload_fetcher_t) == 24, "PJ_payload_fetcher_t pinned (ctx + fetch + release)"); +static_assert(offsetof(PJ_payload_fetcher_t, ctx) == 0, "ctx at offset 0"); +static_assert(offsetof(PJ_payload_fetcher_t, fetch) == 8, "fetch at offset 8"); +static_assert(offsetof(PJ_payload_fetcher_t, release) == 16, "release at offset 16"); + +// --- DataSource runtime host vtable (ABI-APPENDABLE within v4) --------------- +// The vtable the host exposes to plugins under "pj.runtime.v1". Offsets of +// existing slots are pinned; size grows deliberately as tail slots append. +static_assert(offsetof(PJ_data_source_runtime_host_vtable_t, protocol_version) == 0, "v1 prefix pinned"); +static_assert(offsetof(PJ_data_source_runtime_host_vtable_t, struct_size) == 4, "v1 prefix pinned"); +static_assert(offsetof(PJ_data_source_runtime_host_vtable_t, report_message) == 8, "v1 first slot pinned"); +static_assert( + offsetof(PJ_data_source_runtime_host_vtable_t, push_raw_message) == 72, "v1 push_raw_message slot pinned"); +static_assert( + offsetof(PJ_data_source_runtime_host_vtable_t, list_available_encodings) == 88, + "v1 list_available_encodings slot pinned"); +static_assert( + offsetof(PJ_data_source_runtime_host_vtable_t, push_message_v2) == 96, "v1 push_message_v2 tail slot pinned"); +static_assert( + sizeof(PJ_data_source_runtime_host_vtable_t) == 104, "Runtime host vtable size (update deliberately on append)"); + // --- ABI version symbol ------------------------------------------------------ static_assert(PJ_ABI_VERSION == 4, "v4 ABI version");