diff --git a/.github/workflows/master_ci.yml b/.github/workflows/master_ci.yml
index e9c1edd59..847d995ef 100644
--- a/.github/workflows/master_ci.yml
+++ b/.github/workflows/master_ci.yml
@@ -78,6 +78,31 @@ jobs:
             exit 1
           fi
 
+  mt_kahypar_64bit_test:
+    name: Ubuntu 64 bit Build
+    runs-on: ubuntu-24.04
+    env:
+      CI_ACTIVE : 1
+
+    steps:
+      - name: Checkout HEAD
+        uses: actions/checkout@v4
+
+      - name: Install Dependencies
+        run: |
+          sudo apt-get install libtbb-dev libboost-program-options-dev libhwloc-dev gcc-14 g++-14
+
+      - name: Install Mt-KaHyPar
+        env:
+          CC: gcc-14
+          CXX: g++-14
+        run: |
+          rm -rf build
+          mkdir build
+          cd build
+          cmake .. --preset=default -DKAHYPAR_CI_BUILD=ON -DKAHYPAR_USE_64_BIT_IDS=ON
+          make -j2 MtKaHyPar
+
   mt_kahypar_test_suite:
     name: Test Suite
     runs-on: ubuntu-22.04  # note: stay on 22.04 since lcov behaves weird on 24.04
diff --git a/mt-kahypar/datastructures/graph.cpp b/mt-kahypar/datastructures/graph.cpp
index 5b609e67c..91ba7c2d0 100644
--- a/mt-kahypar/datastructures/graph.cpp
+++ b/mt-kahypar/datastructures/graph.cpp
@@ -161,7 +161,7 @@ namespace mt_kahypar::ds {
     tbb::enumerable_thread_specific<size_t> local_max_degree(0);
 
     // first pass generating unique coarse arcs to determine coarse node degrees
-    tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) {
+    tbb::parallel_for(ID(0), num_coarse_nodes, [&](NodeID cu) {
       auto& clear_list = clear_lists.local();
       ArcWeight volume_cu = 0.0;
       for (auto i = cluster_bounds[cu]; i < cluster_bounds[cu + 1]; ++i) {
@@ -192,7 +192,7 @@ namespace mt_kahypar::ds {
     coarse_graph._max_degree = local_max_degree.combine([](size_t lhs, size_t rhs) { return std::max(lhs, rhs); });
 
     // second pass generating unique coarse arcs
-    tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) {
+    tbb::parallel_for(ID(0), num_coarse_nodes, [&](NodeID cu) {
       auto& clear_list = clear_lists.local();
       for (auto i = cluster_bounds[cu]; i < cluster_bounds[cu+1]; ++i) {
         for (const Arc& arc : arcsOf(nodes_sorted_by_cluster[i])) {
@@ -244,7 +244,7 @@ namespace mt_kahypar::ds {
     ds::Array<parallel::IntegralAtomicWrapper<size_t>>& tmp_pos = _tmp_graph_buffer->tmp_pos;
     ds::Array<parallel::IntegralAtomicWrapper<size_t>>& tmp_indices = _tmp_graph_buffer->tmp_indices;
     ds::Array<parallel::AtomicWrapper<ArcWeight>>& coarse_node_volumes = _tmp_graph_buffer->tmp_node_volumes;
-    tbb::parallel_for(0U, static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
+    tbb::parallel_for(ID(0), static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
       ASSERT(static_cast<size_t>(communities[u]) < _num_nodes);
       mapping[communities[u]] = UL(1);
       tmp_pos[u] = 0;
@@ -258,7 +258,7 @@ namespace mt_kahypar::ds {
 
     // Remap community ids
     coarse_graph._num_nodes = mapping_prefix_sum.total_sum();
-    tbb::parallel_for(0U, static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
+    tbb::parallel_for(ID(0), static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
       communities[u] = mapping_prefix_sum[communities[u]];
     });
 
@@ -269,7 +269,7 @@ namespace mt_kahypar::ds {
     // the tmp adjacence array.
     // Compute number of arcs in tmp adjacence array with parallel prefix sum
     ASSERT(coarse_graph._num_nodes <= coarse_node_volumes.size());
-    tbb::parallel_for(0U, static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
+    tbb::parallel_for(ID(0), static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
       const NodeID coarse_u = communities[u];
       ASSERT(static_cast<size_t>(coarse_u) < coarse_graph._num_nodes);
       coarse_node_volumes[coarse_u] += nodeVolume(u);     // not deterministic!
@@ -287,7 +287,7 @@ namespace mt_kahypar::ds {
     // Write all arcs into corresponding tmp adjacence array blocks
     ds::Array<Arc>& tmp_arcs = _tmp_graph_buffer->tmp_arcs;
     ds::Array<size_t>& valid_arcs = _tmp_graph_buffer->valid_arcs;
-    tbb::parallel_for(0U, static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
+    tbb::parallel_for(ID(0), static_cast<NodeID>(_num_nodes), [&](const NodeID u) {
       const NodeID coarse_u = communities[u];
       ASSERT(static_cast<size_t>(coarse_u) < coarse_graph._num_nodes);
       for ( const Arc& arc : arcsOf(u) ) {
@@ -306,7 +306,7 @@ namespace mt_kahypar::ds {
     // Therefore, we sort the arcs according to their endpoints
     // and aggregate weight of arcs with equal endpoints.
     tbb::enumerable_thread_specific<size_t> local_max_degree(0);
-    tbb::parallel_for(0U, static_cast<NodeID>(coarse_graph._num_nodes), [&](const NodeID u) {
+    tbb::parallel_for(ID(0), static_cast<NodeID>(coarse_graph._num_nodes), [&](const NodeID u) {
       const size_t tmp_arc_start = tmp_indices_prefix_sum[u];
       const size_t tmp_arc_end = tmp_indices_prefix_sum[u + 1];
       // commented out comparison is needed for deterministic arc weights
@@ -353,7 +353,7 @@ namespace mt_kahypar::ds {
         }
       });
     }, [&] {
-      tbb::parallel_for(0U, static_cast<NodeID>(coarse_graph._num_nodes), [&](const NodeID u) {
+      tbb::parallel_for(ID(0), static_cast<NodeID>(coarse_graph._num_nodes), [&](const NodeID u) {
         const size_t start_index_pos = valid_arcs_prefix_sum[tmp_indices_prefix_sum[u]];
         ASSERT(start_index_pos <= coarse_graph._num_arcs);
         coarse_graph._indices[u] = start_index_pos;
@@ -400,7 +400,7 @@ namespace mt_kahypar::ds {
     // deterministic reduce of node volumes since double addition is not commutative or associative
     // node volumes are computed in for loop because deterministic reduce does not have dynamic load balancing
     // whereas for loop does. this important since each node incurs O(degree) time
-    tbb::parallel_for(0U, NodeID(numNodes()), [&](NodeID u) { computeNodeVolume(u); });
+    tbb::parallel_for(ID(0), static_cast<NodeID>(numNodes()), [&](NodeID u) { computeNodeVolume(u); });
 
     auto aggregate_volume = [&](const tbb::blocked_range<NodeID>& r, ArcWeight partial_volume) -> ArcWeight {
       for (NodeID u = r.begin(); u < r.end(); ++u) {
@@ -408,7 +408,7 @@ namespace mt_kahypar::ds {
       }
       return partial_volume;
     };
-    auto r = tbb::blocked_range<NodeID>(0U, numNodes(), 1000);
+    auto r = tbb::blocked_range<NodeID>(ID(0), numNodes(), 1000);
     _total_volume = tbb::parallel_deterministic_reduce(r, 0.0, aggregate_volume, std::plus<>());
   }
 
diff --git a/mt-kahypar/datastructures/hypergraph_common.h b/mt-kahypar/datastructures/hypergraph_common.h
index c9bd5d84b..7e148c7aa 100644
--- a/mt-kahypar/datastructures/hypergraph_common.h
+++ b/mt-kahypar/datastructures/hypergraph_common.h
@@ -58,18 +58,20 @@ using RatingType = double;
 #define ID(X) static_cast<uint64_t>(X)
 using HypernodeID = uint64_t;
 using HyperedgeID = uint64_t;
+// louvain graph
+using NodeID = uint64_t;
 #else
 #define ID(X) static_cast<uint32_t>(X)
 using HypernodeID = uint32_t;
 using HyperedgeID = uint32_t;
+// louvain graph
+using NodeID = uint32_t;
 #endif
 using HypernodeWeight = int32_t;
 using HyperedgeWeight = int32_t;
 using PartitionID = int32_t;
 using Gain = HyperedgeWeight;
 
-// Graph Types
-using NodeID = uint32_t;
 using ArcWeight = double;
 
 struct Arc {
diff --git a/mt-kahypar/io/command_line_options.cpp b/mt-kahypar/io/command_line_options.cpp
index 123730748..2542f1394 100644
--- a/mt-kahypar/io/command_line_options.cpp
+++ b/mt-kahypar/io/command_line_options.cpp
@@ -131,7 +131,7 @@ namespace mt_kahypar {
              po::value<bool>(&context.partition.perform_parallel_recursion_in_deep_multilevel)->value_name("<bool>")->default_value(true),
              "If true, then we perform parallel recursion within the deep multilevel scheme.")
             ("smallest-maxnet-threshold",
-            po::value<uint32_t>(&context.partition.smallest_large_he_size_threshold)->value_name("<uint32_t>"),
+            po::value<HypernodeID>(&context.partition.smallest_large_he_size_threshold)->value_name("<int>"),
             "No hyperedge whose size is smaller than this threshold is removed in the large hyperedge removal step (see maxnet-removal-factor)")
             ("maxnet-removal-factor",
              po::value<double>(&context.partition.large_hyperedge_size_threshold_factor)->value_name(
@@ -572,8 +572,8 @@ namespace mt_kahypar {
                       &context.refinement.flows.alpha))->value_name("<double>"),
              "Size constraint for flow problem: (1 + alpha * epsilon) * c(V) / k - c(V_1) (alpha = r-flow-scaling)")
             ((initial_partitioning ? "i-r-flow-max-num-pins" : "r-flow-max-num-pins"),
-             po::value<uint32_t>((initial_partitioning ? &context.initial_partitioning.refinement.flows.max_num_pins :
-                      &context.refinement.flows.max_num_pins))->value_name("<uint32_t>"),
+             po::value<HypernodeID>((initial_partitioning ? &context.initial_partitioning.refinement.flows.max_num_pins :
+                      &context.refinement.flows.max_num_pins))->value_name("<int>"),
              "Maximum number of pins a flow problem is allowed to contain")
             ((initial_partitioning ? "i-r-flow-find-most-balanced-cut" : "r-flow-find-most-balanced-cut"),
              po::value<bool>((initial_partitioning ? &context.initial_partitioning.refinement.flows.find_most_balanced_cut :
diff --git a/mt-kahypar/partition/coarsening/multilevel_coarsener.h b/mt-kahypar/partition/coarsening/multilevel_coarsener.h
index 2873c5b00..0dadc45fc 100644
--- a/mt-kahypar/partition/coarsening/multilevel_coarsener.h
+++ b/mt-kahypar/partition/coarsening/multilevel_coarsener.h
@@ -242,7 +242,7 @@ class MultilevelCoarsener : public ICoarsener,
     tbb::enumerable_thread_specific<HypernodeID> num_nodes_update_threshold(0);
     ds::FixedVertexSupport<Hypergraph> fixed_vertices = current_hg.copyOfFixedVertexSupport();
     fixed_vertices.setMaxBlockWeight(_context.partition.max_part_weights);
-    tbb::parallel_for(0U, current_hg.initialNumNodes(), [&](const HypernodeID id) {
+    tbb::parallel_for(ID(0), current_hg.initialNumNodes(), [&](const HypernodeID id) {
       ASSERT(id < _current_vertices.size());
       const HypernodeID hn = _current_vertices[id];
       if (current_hg.nodeIsEnabled(hn)) {
diff --git a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp
index 42c67b6d5..452cad623 100644
--- a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp
+++ b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp
@@ -104,7 +104,7 @@ bool ParallelLocalMovingModularity<Hypergraph>::localMoving(Graph<Hypergraph>& g
   } else {
     auto& nodes = permutation.permutation;
     nodes.resize(graph.numNodes());
-    tbb::parallel_for(0U, static_cast<NodeID>(graph.numNodes()), [&](const NodeID u) {
+    tbb::parallel_for(ID(0), static_cast<NodeID>(graph.numNodes()), [&](const NodeID u) {
       nodes[u] = u;
       communities[u] = u;
       _cluster_volumes[u].store(graph.nodeVolume(u), std::memory_order_relaxed);
@@ -346,7 +346,7 @@ template<class Hypergraph>
 void ParallelLocalMovingModularity<Hypergraph>::initializeClusterVolumes(const Graph<Hypergraph>& graph, ds::Clustering& communities) {
   _reciprocal_total_volume = 1.0 / graph.totalVolume();
   _vol_multiplier_div_by_node_vol =  _reciprocal_total_volume;
-  tbb::parallel_for(0U, static_cast<NodeID>(graph.numNodes()), [&](const NodeID u) {
+  tbb::parallel_for(ID(0), static_cast<NodeID>(graph.numNodes()), [&](const NodeID u) {
     const PartitionID community_id = communities[u];
     _cluster_volumes[community_id] += graph.nodeVolume(u);
   });
diff --git a/mt-kahypar/partition/refinement/fm/global_rollback.cpp b/mt-kahypar/partition/refinement/fm/global_rollback.cpp
index 6931d491a..43f934dd7 100644
--- a/mt-kahypar/partition/refinement/fm/global_rollback.cpp
+++ b/mt-kahypar/partition/refinement/fm/global_rollback.cpp
@@ -411,7 +411,7 @@ namespace mt_kahypar {
         last_recalc_round.assign(phg.initialNumEdges(), CAtomic<uint32_t>(0));
       }
     } else{
-      tbb::parallel_for(0U, phg.initialNumEdges(), recalculate_and_distribute_for_hyperedge);
+      tbb::parallel_for(ID(0), phg.initialNumEdges(), recalculate_and_distribute_for_hyperedge);
     }
   }
 
diff --git a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp
index ae0b94de7..324726aa1 100644
--- a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp
+++ b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp
@@ -371,7 +371,7 @@ namespace mt_kahypar {
 
     std::swap(move_tracker.moveOrder, tmp_move_order);
     move_tracker.runningMoveID.store(first_move_id + next_move_index);
-    tbb::parallel_for(ID(0), next_move_index, [&](const MoveID move_id) {
+    tbb::parallel_for(static_cast<MoveID>(0), next_move_index, [&](const MoveID move_id) {
       const Move& m = move_tracker.moveOrder[move_id];
       if (m.isValid()) {
         move_tracker.moveOfNode[m.node] = first_move_id + move_id;