txgraph: Avoid representative lookup for each dependency (optimization)

The m_deps_to_add vector is sorted by child Cluster*, which matches the order of an_clusters. This means we can walk through m_deps_to_add while doing the representative lookups for an_clusters, and reuse them.
2025-04-29 14:59:39 -04:00 · 2025-01-22 14:53:32 -05:00 · 2025-01-22 14:53:32 -05:00 · 1171953ac6
commit 1171953ac6
parent 64f69ec8c3
1 changed files with 25 additions and 12 deletions
--- a/src/txgraph.cpp
+++ b/src/txgraph.cpp
@ -734,6 +734,15 @@ void TxGraphImpl::GroupClusters() noexcept
    std::sort(an_clusters.begin(), an_clusters.end());
    an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end());
    // Sort the dependencies by child Cluster.
    std::sort(m_deps_to_add.begin(), m_deps_to_add.end(), [&](auto& a, auto& b) noexcept {
        auto [_a_par, a_chl] = a;
        auto [_b_par, b_chl] = b;
        auto a_chl_cluster = m_entries[a_chl].m_locator.cluster;
        auto b_chl_cluster = m_entries[b_chl].m_locator.cluster;
        return std::less{}(a_chl_cluster, b_chl_cluster);
    });
    // Run the union-find algorithm to to find partitions of the input Clusters which need to be
    // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
    {
@ -813,6 +822,8 @@ void TxGraphImpl::GroupClusters() noexcept
        // Populate the an_clusters and an_deps data structures with the list of input Clusters,
        // and the input dependencies, annotated with the representative of the Cluster partition
        // it applies to.
        an_deps.reserve(m_deps_to_add.size());
        auto deps_it = m_deps_to_add.begin();
        for (size_t i = 0; i < partition_data.size(); ++i) {
            auto& data = partition_data[i];
            // Find the representative of the partition Cluster i is in, and store it with the
@ -820,18 +831,20 @@ void TxGraphImpl::GroupClusters() noexcept
            auto rep = find_root_fn(&data)->cluster;
            Assume(an_clusters[i].second == nullptr);
            an_clusters[i].second = rep;
-        }
+            // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep.
-        an_deps.reserve(m_deps_to_add.size());
+            while (deps_it != m_deps_to_add.end()) {
-        for (auto [par, chl] : m_deps_to_add) {
+                auto [par, chl] = *deps_it;
-            auto chl_cluster = m_entries[chl].m_locator.cluster;
+                auto chl_cluster = m_entries[chl].m_locator.cluster;
-            auto par_cluster = m_entries[par].m_locator.cluster;
+                if (std::greater{}(chl_cluster, data.cluster)) break;
-            // Nothing to do if either parent or child transaction is removed already.
+                // Skip dependencies that apply to earlier Clusters (those necessary are for
-            if (par_cluster == nullptr || chl_cluster == nullptr) continue;
+                // deleted transactions, as otherwise we'd have processed them already).
-            // Find the representative of the partition which this dependency's child is in (which
+                if (chl_cluster == data.cluster) {
-            // should be the same as the one for the parent).
+                    auto par_cluster = m_entries[par].m_locator.cluster;
-            auto rep = find_root_fn(locate_fn(chl_cluster))->cluster;
+                    // Also filter out dependencies applying to a removed parent.
-            // Create an_deps entry.
+                    if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep);
-            an_deps.emplace_back(std::pair{par, chl}, rep);
+                }
                ++deps_it;
            }
        }
    }