diff --git a/.github/workflows/master_ci.yml b/.github/workflows/master_ci.yml index e9c1edd59..847d995ef 100644 --- a/.github/workflows/master_ci.yml +++ b/.github/workflows/master_ci.yml @@ -78,6 +78,31 @@ jobs: exit 1 fi + mt_kahypar_64bit_test: + name: Ubuntu 64 bit Build + runs-on: ubuntu-24.04 + env: + CI_ACTIVE : 1 + + steps: + - name: Checkout HEAD + uses: actions/checkout@v4 + + - name: Install Dependencies + run: | + sudo apt-get install libtbb-dev libboost-program-options-dev libhwloc-dev gcc-14 g++-14 + + - name: Install Mt-KaHyPar + env: + CC: gcc-14 + CXX: g++-14 + run: | + rm -rf build + mkdir build + cd build + cmake .. --preset=default -DKAHYPAR_CI_BUILD=ON -DKAHYPAR_USE_64_BIT_IDS=ON + make -j2 MtKaHyPar + mt_kahypar_test_suite: name: Test Suite runs-on: ubuntu-22.04 # note: stay on 22.04 since lcov behaves weird on 24.04 diff --git a/mt-kahypar/datastructures/graph.cpp b/mt-kahypar/datastructures/graph.cpp index 5b609e67c..91ba7c2d0 100644 --- a/mt-kahypar/datastructures/graph.cpp +++ b/mt-kahypar/datastructures/graph.cpp @@ -161,7 +161,7 @@ namespace mt_kahypar::ds { tbb::enumerable_thread_specific local_max_degree(0); // first pass generating unique coarse arcs to determine coarse node degrees - tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) { + tbb::parallel_for(ID(0), num_coarse_nodes, [&](NodeID cu) { auto& clear_list = clear_lists.local(); ArcWeight volume_cu = 0.0; for (auto i = cluster_bounds[cu]; i < cluster_bounds[cu + 1]; ++i) { @@ -192,7 +192,7 @@ namespace mt_kahypar::ds { coarse_graph._max_degree = local_max_degree.combine([](size_t lhs, size_t rhs) { return std::max(lhs, rhs); }); // second pass generating unique coarse arcs - tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) { + tbb::parallel_for(ID(0), num_coarse_nodes, [&](NodeID cu) { auto& clear_list = clear_lists.local(); for (auto i = cluster_bounds[cu]; i < cluster_bounds[cu+1]; ++i) { for (const Arc& arc : arcsOf(nodes_sorted_by_cluster[i])) { @@ -244,7 +244,7 @@ namespace mt_kahypar::ds { ds::Array>& tmp_pos = _tmp_graph_buffer->tmp_pos; ds::Array>& tmp_indices = _tmp_graph_buffer->tmp_indices; ds::Array>& coarse_node_volumes = _tmp_graph_buffer->tmp_node_volumes; - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(_num_nodes), [&](const NodeID u) { ASSERT(static_cast(communities[u]) < _num_nodes); mapping[communities[u]] = UL(1); tmp_pos[u] = 0; @@ -258,7 +258,7 @@ namespace mt_kahypar::ds { // Remap community ids coarse_graph._num_nodes = mapping_prefix_sum.total_sum(); - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(_num_nodes), [&](const NodeID u) { communities[u] = mapping_prefix_sum[communities[u]]; }); @@ -269,7 +269,7 @@ namespace mt_kahypar::ds { // the tmp adjacence array. // Compute number of arcs in tmp adjacence array with parallel prefix sum ASSERT(coarse_graph._num_nodes <= coarse_node_volumes.size()); - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(_num_nodes), [&](const NodeID u) { const NodeID coarse_u = communities[u]; ASSERT(static_cast(coarse_u) < coarse_graph._num_nodes); coarse_node_volumes[coarse_u] += nodeVolume(u); // not deterministic! @@ -287,7 +287,7 @@ namespace mt_kahypar::ds { // Write all arcs into corresponding tmp adjacence array blocks ds::Array& tmp_arcs = _tmp_graph_buffer->tmp_arcs; ds::Array& valid_arcs = _tmp_graph_buffer->valid_arcs; - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(_num_nodes), [&](const NodeID u) { const NodeID coarse_u = communities[u]; ASSERT(static_cast(coarse_u) < coarse_graph._num_nodes); for ( const Arc& arc : arcsOf(u) ) { @@ -306,7 +306,7 @@ namespace mt_kahypar::ds { // Therefore, we sort the arcs according to their endpoints // and aggregate weight of arcs with equal endpoints. tbb::enumerable_thread_specific local_max_degree(0); - tbb::parallel_for(0U, static_cast(coarse_graph._num_nodes), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(coarse_graph._num_nodes), [&](const NodeID u) { const size_t tmp_arc_start = tmp_indices_prefix_sum[u]; const size_t tmp_arc_end = tmp_indices_prefix_sum[u + 1]; // commented out comparison is needed for deterministic arc weights @@ -353,7 +353,7 @@ namespace mt_kahypar::ds { } }); }, [&] { - tbb::parallel_for(0U, static_cast(coarse_graph._num_nodes), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(coarse_graph._num_nodes), [&](const NodeID u) { const size_t start_index_pos = valid_arcs_prefix_sum[tmp_indices_prefix_sum[u]]; ASSERT(start_index_pos <= coarse_graph._num_arcs); coarse_graph._indices[u] = start_index_pos; @@ -400,7 +400,7 @@ namespace mt_kahypar::ds { // deterministic reduce of node volumes since double addition is not commutative or associative // node volumes are computed in for loop because deterministic reduce does not have dynamic load balancing // whereas for loop does. this important since each node incurs O(degree) time - tbb::parallel_for(0U, NodeID(numNodes()), [&](NodeID u) { computeNodeVolume(u); }); + tbb::parallel_for(ID(0), static_cast(numNodes()), [&](NodeID u) { computeNodeVolume(u); }); auto aggregate_volume = [&](const tbb::blocked_range& r, ArcWeight partial_volume) -> ArcWeight { for (NodeID u = r.begin(); u < r.end(); ++u) { @@ -408,7 +408,7 @@ namespace mt_kahypar::ds { } return partial_volume; }; - auto r = tbb::blocked_range(0U, numNodes(), 1000); + auto r = tbb::blocked_range(ID(0), numNodes(), 1000); _total_volume = tbb::parallel_deterministic_reduce(r, 0.0, aggregate_volume, std::plus<>()); } diff --git a/mt-kahypar/datastructures/hypergraph_common.h b/mt-kahypar/datastructures/hypergraph_common.h index c9bd5d84b..7e148c7aa 100644 --- a/mt-kahypar/datastructures/hypergraph_common.h +++ b/mt-kahypar/datastructures/hypergraph_common.h @@ -58,18 +58,20 @@ using RatingType = double; #define ID(X) static_cast(X) using HypernodeID = uint64_t; using HyperedgeID = uint64_t; +// louvain graph +using NodeID = uint64_t; #else #define ID(X) static_cast(X) using HypernodeID = uint32_t; using HyperedgeID = uint32_t; +// louvain graph +using NodeID = uint32_t; #endif using HypernodeWeight = int32_t; using HyperedgeWeight = int32_t; using PartitionID = int32_t; using Gain = HyperedgeWeight; -// Graph Types -using NodeID = uint32_t; using ArcWeight = double; struct Arc { diff --git a/mt-kahypar/io/command_line_options.cpp b/mt-kahypar/io/command_line_options.cpp index 123730748..2542f1394 100644 --- a/mt-kahypar/io/command_line_options.cpp +++ b/mt-kahypar/io/command_line_options.cpp @@ -131,7 +131,7 @@ namespace mt_kahypar { po::value(&context.partition.perform_parallel_recursion_in_deep_multilevel)->value_name("")->default_value(true), "If true, then we perform parallel recursion within the deep multilevel scheme.") ("smallest-maxnet-threshold", - po::value(&context.partition.smallest_large_he_size_threshold)->value_name(""), + po::value(&context.partition.smallest_large_he_size_threshold)->value_name(""), "No hyperedge whose size is smaller than this threshold is removed in the large hyperedge removal step (see maxnet-removal-factor)") ("maxnet-removal-factor", po::value(&context.partition.large_hyperedge_size_threshold_factor)->value_name( @@ -572,8 +572,8 @@ namespace mt_kahypar { &context.refinement.flows.alpha))->value_name(""), "Size constraint for flow problem: (1 + alpha * epsilon) * c(V) / k - c(V_1) (alpha = r-flow-scaling)") ((initial_partitioning ? "i-r-flow-max-num-pins" : "r-flow-max-num-pins"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.max_num_pins : - &context.refinement.flows.max_num_pins))->value_name(""), + po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.max_num_pins : + &context.refinement.flows.max_num_pins))->value_name(""), "Maximum number of pins a flow problem is allowed to contain") ((initial_partitioning ? "i-r-flow-find-most-balanced-cut" : "r-flow-find-most-balanced-cut"), po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.find_most_balanced_cut : diff --git a/mt-kahypar/partition/coarsening/multilevel_coarsener.h b/mt-kahypar/partition/coarsening/multilevel_coarsener.h index 2873c5b00..0dadc45fc 100644 --- a/mt-kahypar/partition/coarsening/multilevel_coarsener.h +++ b/mt-kahypar/partition/coarsening/multilevel_coarsener.h @@ -242,7 +242,7 @@ class MultilevelCoarsener : public ICoarsener, tbb::enumerable_thread_specific num_nodes_update_threshold(0); ds::FixedVertexSupport fixed_vertices = current_hg.copyOfFixedVertexSupport(); fixed_vertices.setMaxBlockWeight(_context.partition.max_part_weights); - tbb::parallel_for(0U, current_hg.initialNumNodes(), [&](const HypernodeID id) { + tbb::parallel_for(ID(0), current_hg.initialNumNodes(), [&](const HypernodeID id) { ASSERT(id < _current_vertices.size()); const HypernodeID hn = _current_vertices[id]; if (current_hg.nodeIsEnabled(hn)) { diff --git a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp index 42c67b6d5..452cad623 100644 --- a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp +++ b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp @@ -104,7 +104,7 @@ bool ParallelLocalMovingModularity::localMoving(Graph& g } else { auto& nodes = permutation.permutation; nodes.resize(graph.numNodes()); - tbb::parallel_for(0U, static_cast(graph.numNodes()), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(graph.numNodes()), [&](const NodeID u) { nodes[u] = u; communities[u] = u; _cluster_volumes[u].store(graph.nodeVolume(u), std::memory_order_relaxed); @@ -346,7 +346,7 @@ template void ParallelLocalMovingModularity::initializeClusterVolumes(const Graph& graph, ds::Clustering& communities) { _reciprocal_total_volume = 1.0 / graph.totalVolume(); _vol_multiplier_div_by_node_vol = _reciprocal_total_volume; - tbb::parallel_for(0U, static_cast(graph.numNodes()), [&](const NodeID u) { + tbb::parallel_for(ID(0), static_cast(graph.numNodes()), [&](const NodeID u) { const PartitionID community_id = communities[u]; _cluster_volumes[community_id] += graph.nodeVolume(u); }); diff --git a/mt-kahypar/partition/refinement/fm/global_rollback.cpp b/mt-kahypar/partition/refinement/fm/global_rollback.cpp index 6931d491a..43f934dd7 100644 --- a/mt-kahypar/partition/refinement/fm/global_rollback.cpp +++ b/mt-kahypar/partition/refinement/fm/global_rollback.cpp @@ -411,7 +411,7 @@ namespace mt_kahypar { last_recalc_round.assign(phg.initialNumEdges(), CAtomic(0)); } } else{ - tbb::parallel_for(0U, phg.initialNumEdges(), recalculate_and_distribute_for_hyperedge); + tbb::parallel_for(ID(0), phg.initialNumEdges(), recalculate_and_distribute_for_hyperedge); } } diff --git a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp index ae0b94de7..324726aa1 100644 --- a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp +++ b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp @@ -371,7 +371,7 @@ namespace mt_kahypar { std::swap(move_tracker.moveOrder, tmp_move_order); move_tracker.runningMoveID.store(first_move_id + next_move_index); - tbb::parallel_for(ID(0), next_move_index, [&](const MoveID move_id) { + tbb::parallel_for(static_cast(0), next_move_index, [&](const MoveID move_id) { const Move& m = move_tracker.moveOrder[move_id]; if (m.isValid()) { move_tracker.moveOfNode[m.node] = first_move_id + move_id;