From 0d1d9b8a0e80e1522ffa1764ded1be4989658c5b Mon Sep 17 00:00:00 2001 From: Zhuo Wang Date: Wed, 11 Feb 2026 17:59:58 +0800 Subject: [PATCH] refactor: Optimize SnapshotUtil ancestor methods with early return - Optimize IsAncestorOf to return early instead of collecting all ancestors - Optimize IsParentAncestorOf to return early when matching parent is found - Add TableMetadata overloads for AncestorsOf, AncestorsBetween, OldestAncestorOf - Add IsParentAncestorOf overload with custom lookup function --- src/iceberg/util/snapshot_util.cc | 144 ++++++++++++++-------- src/iceberg/util/snapshot_util_internal.h | 95 +++++++++++--- 2 files changed, 171 insertions(+), 68 deletions(-) diff --git a/src/iceberg/util/snapshot_util.cc b/src/iceberg/util/snapshot_util.cc index 84e7a10bc..202224516 100644 --- a/src/iceberg/util/snapshot_util.cc +++ b/src/iceberg/util/snapshot_util.cc @@ -41,9 +41,13 @@ namespace iceberg { Result>> SnapshotUtil::AncestorsOf( const Table& table, int64_t snapshot_id) { - return table.SnapshotById(snapshot_id).and_then([&table](const auto& snapshot) { - return AncestorsOf(table, snapshot); - }); + return AncestorsOf(*table.metadata(), snapshot_id); +} + +Result>> SnapshotUtil::AncestorsOf( + const TableMetadata& metadata, int64_t snapshot_id) { + return AncestorsOf(snapshot_id, + [&metadata](int64_t id) { return metadata.SnapshotById(id); }); } Result>> SnapshotUtil::AncestorsOf( @@ -54,72 +58,96 @@ Result>> SnapshotUtil::AncestorsOf( }); } -Result SnapshotUtil::IsAncestorOf(const Table& table, int64_t snapshot_id, +Result SnapshotUtil::IsAncestorOf(const Table& table, int64_t ancestor_snapshot_id) { - ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id)); - return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto& snapshot) { - return snapshot != nullptr && snapshot->snapshot_id == ancestor_snapshot_id; - }); + return IsAncestorOf(*table.metadata(), ancestor_snapshot_id); } -Result SnapshotUtil::IsAncestorOf( - int64_t snapshot_id, int64_t ancestor_snapshot_id, - const std::function>(int64_t)>& lookup) { - ICEBERG_ASSIGN_OR_RAISE(auto snapshot, lookup(snapshot_id)); - ICEBERG_CHECK(snapshot != nullptr, "Cannot find snapshot: {}", snapshot_id); - ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(snapshot, lookup)); - return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto& ancestor) { - return ancestor != nullptr && ancestor->snapshot_id == ancestor_snapshot_id; - }); +Result SnapshotUtil::IsAncestorOf(const TableMetadata& metadata, + int64_t ancestor_snapshot_id) { + ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot()); + ICEBERG_CHECK(current != nullptr, "Current snapshot is null"); + return IsAncestorOf(metadata, current->snapshot_id, ancestor_snapshot_id); } -Result SnapshotUtil::IsAncestorOf(const Table& table, +Result SnapshotUtil::IsAncestorOf(const Table& table, int64_t snapshot_id, int64_t ancestor_snapshot_id) { - ICEBERG_ASSIGN_OR_RAISE(auto current, table.current_snapshot()); - ICEBERG_CHECK(current != nullptr, "Current snapshot is null"); - return IsAncestorOf(table, current->snapshot_id, ancestor_snapshot_id); + return IsAncestorOf(*table.metadata(), snapshot_id, ancestor_snapshot_id); } Result SnapshotUtil::IsAncestorOf(const TableMetadata& metadata, + int64_t snapshot_id, int64_t ancestor_snapshot_id) { - ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot()); - ICEBERG_CHECK(current != nullptr, "Current snapshot is null"); + return IsAncestorOf(snapshot_id, ancestor_snapshot_id, + [&metadata](int64_t id) { return metadata.SnapshotById(id); }); +} - // Create a lookup function that uses the metadata - auto lookup = [&metadata](int64_t id) -> Result> { - return metadata.SnapshotById(id); - }; +Result SnapshotUtil::IsAncestorOf( + int64_t snapshot_id, int64_t ancestor_snapshot_id, + const std::function>(int64_t)>& lookup) { + if (snapshot_id == ancestor_snapshot_id) { + return true; + } - ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(current->snapshot_id, lookup)); - return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto& snapshot) { - return snapshot != nullptr && snapshot->snapshot_id == ancestor_snapshot_id; - }); + ICEBERG_ASSIGN_OR_RAISE(auto current, lookup(snapshot_id)); + ICEBERG_CHECK(current != nullptr, "Cannot find snapshot: {}", snapshot_id); + + while (current != nullptr && current->parent_snapshot_id.has_value()) { + int64_t parent_id = current->parent_snapshot_id.value(); + auto parent_result = lookup(parent_id); + ICEBERG_ACTION_FOR_NOT_FOUND(parent_result, { break; }); + if (parent_id == ancestor_snapshot_id) { + return true; + } + current = std::move(parent_result.value()); + } + + return false; } Result SnapshotUtil::IsParentAncestorOf(const Table& table, int64_t snapshot_id, int64_t ancestor_parent_snapshot_id) { - ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id)); - return std::ranges::any_of( - ancestors, [ancestor_parent_snapshot_id](const auto& snapshot) { - return snapshot != nullptr && snapshot->parent_snapshot_id.has_value() && - snapshot->parent_snapshot_id.value() == ancestor_parent_snapshot_id; - }); + return IsParentAncestorOf(*table.metadata(), snapshot_id, ancestor_parent_snapshot_id); +} + +Result SnapshotUtil::IsParentAncestorOf(const TableMetadata& metadata, + int64_t snapshot_id, + int64_t ancestor_parent_snapshot_id) { + return IsParentAncestorOf( + snapshot_id, ancestor_parent_snapshot_id, + [&metadata](int64_t id) { return metadata.SnapshotById(id); }); +} + +Result SnapshotUtil::IsParentAncestorOf( + int64_t snapshot_id, int64_t ancestor_parent_snapshot_id, + const std::function>(int64_t)>& lookup) { + ICEBERG_ASSIGN_OR_RAISE(auto current, lookup(snapshot_id)); + ICEBERG_CHECK(current != nullptr, "Cannot find snapshot: {}", snapshot_id); + + while (current != nullptr) { + if (!current->parent_snapshot_id.has_value()) { + break; + } + if (current->parent_snapshot_id.value() == ancestor_parent_snapshot_id) { + return true; + } + auto parent_result = lookup(current->parent_snapshot_id.value()); + ICEBERG_ACTION_FOR_NOT_FOUND(parent_result, { break; }); + current = std::move(parent_result.value()); + } + + return false; } Result>> SnapshotUtil::CurrentAncestors( const Table& table) { - ICEBERG_ASSIGN_OR_RAISE(auto current, table.current_snapshot()); - return AncestorsOf(table, current); + return CurrentAncestors(*table.metadata()); } Result>> SnapshotUtil::CurrentAncestors( const TableMetadata& metadata) { ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot()); - auto lookup = [&metadata](int64_t id) -> Result> { - return metadata.SnapshotById(id); - }; - - return AncestorsOf(current, lookup); + return AncestorsOf(metadata, current); } Result> SnapshotUtil::CurrentAncestorIds(const Table& table) { @@ -137,7 +165,12 @@ Result>> SnapshotUtil::OldestAncestor( Result>> SnapshotUtil::OldestAncestorOf( const Table& table, int64_t snapshot_id) { - ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id)); + return OldestAncestorOf(*table.metadata(), snapshot_id); +} + +Result>> SnapshotUtil::OldestAncestorOf( + const TableMetadata& metadata, int64_t snapshot_id) { + ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(metadata, snapshot_id)); if (ancestors.empty()) { return std::nullopt; } @@ -151,7 +184,7 @@ Result>> SnapshotUtil::OldestAncestorAft auto current = std::move(current_result.value()); std::optional> last_snapshot = std::nullopt; - ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, current)); + ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(*table.metadata(), current)); for (const auto& snapshot : ancestors) { auto snapshot_timestamp_ms = snapshot->timestamp_ms; if (snapshot_timestamp_ms < timestamp_ms) { @@ -200,7 +233,13 @@ Result> SnapshotUtil::AncestorIdsBetween( Result>> SnapshotUtil::AncestorsBetween( const Table& table, int64_t latest_snapshot_id, std::optional oldest_snapshot_id) { - ICEBERG_ASSIGN_OR_RAISE(auto start, table.SnapshotById(latest_snapshot_id)); + return AncestorsBetween(*table.metadata(), latest_snapshot_id, oldest_snapshot_id); +} + +Result>> SnapshotUtil::AncestorsBetween( + const TableMetadata& metadata, int64_t latest_snapshot_id, + std::optional oldest_snapshot_id) { + ICEBERG_ASSIGN_OR_RAISE(auto start, metadata.SnapshotById(latest_snapshot_id)); if (oldest_snapshot_id.has_value()) { if (latest_snapshot_id == oldest_snapshot_id.value()) { @@ -208,21 +247,22 @@ Result>> SnapshotUtil::AncestorsBetween( } return AncestorsOf(start, - [&table, oldest_snapshot_id = oldest_snapshot_id.value()]( + [&metadata, oldest_snapshot_id = oldest_snapshot_id.value()]( int64_t id) -> Result> { if (id == oldest_snapshot_id) { return nullptr; } - return table.SnapshotById(id); + return metadata.SnapshotById(id); }); } else { - return AncestorsOf(table, start); + return AncestorsOf(metadata, start); } } Result>> SnapshotUtil::AncestorsOf( - const Table& table, const std::shared_ptr& snapshot) { - return AncestorsOf(snapshot, [&table](int64_t id) { return table.SnapshotById(id); }); + const TableMetadata& metadata, const std::shared_ptr& snapshot) { + return AncestorsOf(snapshot, + [&metadata](int64_t id) { return metadata.SnapshotById(id); }); } Result>> SnapshotUtil::AncestorsOf( diff --git a/src/iceberg/util/snapshot_util_internal.h b/src/iceberg/util/snapshot_util_internal.h index ce5e02782..8a3158185 100644 --- a/src/iceberg/util/snapshot_util_internal.h +++ b/src/iceberg/util/snapshot_util_internal.h @@ -44,6 +44,14 @@ class ICEBERG_EXPORT SnapshotUtil { static Result>> AncestorsOf(const Table& table, int64_t snapshot_id); + /// \brief Returns a vector of ancestors of the given snapshot. + /// + /// \param metadata The table metadata + /// \param snapshot_id The snapshot ID to start from + /// \return A vector of ancestor snapshots + static Result>> AncestorsOf( + const TableMetadata& metadata, int64_t snapshot_id); + /// \brief Returns a vector of ancestors of the given snapshot. /// /// \param snapshot_id The snapshot ID to start from @@ -53,6 +61,23 @@ class ICEBERG_EXPORT SnapshotUtil { int64_t snapshot_id, const std::function>(int64_t)>& lookup); + /// \brief Returns whether ancestor_snapshot_id is an ancestor of the table's current + /// state. + /// + /// \param table The table to check + /// \param ancestor_snapshot_id The ancestor snapshot ID to check for + /// \return true if ancestor_snapshot_id is an ancestor of the current snapshot + static Result IsAncestorOf(const Table& table, int64_t ancestor_snapshot_id); + + /// \brief Returns whether ancestor_snapshot_id is an ancestor of the metadata's current + /// state. + /// + /// \param metadata The table metadata to check + /// \param ancestor_snapshot_id The ancestor snapshot ID to check for + /// \return true if ancestor_snapshot_id is an ancestor of the current snapshot + static Result IsAncestorOf(const TableMetadata& metadata, + int64_t ancestor_snapshot_id); + /// \brief Returns whether ancestor_snapshot_id is an ancestor of snapshot_id. /// /// \param table The table to check @@ -62,6 +87,15 @@ class ICEBERG_EXPORT SnapshotUtil { static Result IsAncestorOf(const Table& table, int64_t snapshot_id, int64_t ancestor_snapshot_id); + /// \brief Returns whether ancestor_snapshot_id is an ancestor of snapshot_id. + /// + /// \param metadata The table metadata to check + /// \param snapshot_id The snapshot ID to check + /// \param ancestor_snapshot_id The ancestor snapshot ID to check for + /// \return true if ancestor_snapshot_id is an ancestor of snapshot_id + static Result IsAncestorOf(const TableMetadata& metadata, int64_t snapshot_id, + int64_t ancestor_snapshot_id); + /// \brief Returns whether ancestor_snapshot_id is an ancestor of snapshot_id using the /// given lookup function. /// @@ -73,32 +107,37 @@ class ICEBERG_EXPORT SnapshotUtil { int64_t snapshot_id, int64_t ancestor_snapshot_id, const std::function>(int64_t)>& lookup); - /// \brief Returns whether ancestor_snapshot_id is an ancestor of the table's current - /// state. + /// \brief Returns whether some ancestor of snapshot_id has parentId matches + /// ancestor_parent_snapshot_id. /// /// \param table The table to check - /// \param ancestor_snapshot_id The ancestor snapshot ID to check for - /// \return true if ancestor_snapshot_id is an ancestor of the current snapshot - static Result IsAncestorOf(const Table& table, int64_t ancestor_snapshot_id); + /// \param snapshot_id The snapshot ID to check + /// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to check for + /// \return true if any ancestor has the given parent ID + static Result IsParentAncestorOf(const Table& table, int64_t snapshot_id, + int64_t ancestor_parent_snapshot_id); - /// \brief Returns whether ancestor_snapshot_id is an ancestor of the metadata's current - /// state. + /// \brief Returns whether some ancestor of snapshot_id has parentId matches + /// ancestor_parent_snapshot_id. /// /// \param metadata The table metadata to check - /// \param ancestor_snapshot_id The ancestor snapshot ID to check for - /// \return true if ancestor_snapshot_id is an ancestor of the current snapshot - static Result IsAncestorOf(const TableMetadata& metadata, - int64_t ancestor_snapshot_id); + /// \param snapshot_id The snapshot ID to check + /// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to check for + /// \return true if any ancestor has the given parent ID + static Result IsParentAncestorOf(const TableMetadata& metadata, + int64_t snapshot_id, + int64_t ancestor_parent_snapshot_id); /// \brief Returns whether some ancestor of snapshot_id has parentId matches /// ancestor_parent_snapshot_id. /// - /// \param table The table to check /// \param snapshot_id The snapshot ID to check /// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to check for + /// \param lookup Function to lookup snapshots by ID /// \return true if any ancestor has the given parent ID - static Result IsParentAncestorOf(const Table& table, int64_t snapshot_id, - int64_t ancestor_parent_snapshot_id); + static Result IsParentAncestorOf( + int64_t snapshot_id, int64_t ancestor_parent_snapshot_id, + const std::function>(int64_t)>& lookup); /// \brief Returns a vector that traverses the table's snapshots from the current to the /// last known ancestor. @@ -147,6 +186,19 @@ class ICEBERG_EXPORT SnapshotUtil { static Result>> OldestAncestorOf( const Table& table, int64_t snapshot_id); + /// \brief Traverses the history and finds the oldest ancestor of the specified + /// snapshot. + /// + /// Oldest ancestor is defined as the ancestor snapshot whose parent is null or has been + /// expired. If the specified snapshot has no parent or parent has been expired, the + /// specified snapshot itself is returned. + /// + /// \param metadata The table metadata + /// \param snapshot_id The ID of the snapshot to find the oldest ancestor + /// \return The oldest snapshot, or nullopt if not found + static Result>> OldestAncestorOf( + const TableMetadata& metadata, int64_t snapshot_id); + /// \brief Traverses the history of the table's current snapshot, finds the oldest /// snapshot that was committed either at or after a given time. /// @@ -192,6 +244,17 @@ class ICEBERG_EXPORT SnapshotUtil { const Table& table, int64_t latest_snapshot_id, std::optional oldest_snapshot_id); + /// \brief Returns a vector of ancestors between two snapshots. + /// + /// \param metadata The table metadata + /// \param latest_snapshot_id The latest snapshot ID + /// \param oldest_snapshot_id The oldest snapshot ID (optional, nullopt means all + /// ancestors) + /// \return A vector of ancestor snapshots between the two snapshots + static Result>> AncestorsBetween( + const TableMetadata& metadata, int64_t latest_snapshot_id, + std::optional oldest_snapshot_id); + /// \brief Traverses the history of the table's current snapshot and finds the snapshot /// with the given snapshot id as its parent. /// @@ -317,11 +380,11 @@ class ICEBERG_EXPORT SnapshotUtil { private: /// \brief Helper function to traverse ancestors of a snapshot. /// - /// \param table The table + /// \param metadata The table metadata /// \param snapshot The snapshot to start from /// \return A vector of ancestor snapshots static Result>> AncestorsOf( - const Table& table, const std::shared_ptr& snapshot); + const TableMetadata& metadata, const std::shared_ptr& snapshot); /// \brief Helper function to traverse ancestors of a snapshot using a lookup function. ///