[tet.py] Use MPI IO to speed up configurations dump and load.

The previous code will allgather all configurations every step during gradient descent which is very slow when mpi size growing. This update let every process only store its own configuration without any other configurations and write/load the configuration to/from the corresponding position in the configuration file by MPI IO.
USTC-TNS · Oct 29, 2023 · 7c76797 · 7c76797
1 parent ab1ea0a
commit 7c76797
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 25 deletions.
diff --git a/CHANGELOG.org b/CHANGELOG.org
@@ -6,6 +6,10 @@
 + *tetragono*: Add =gm_conf_eq= to equilibrium the configuration of the sampling lattice.
 *** Changed
 + *tetragono*: The physics edge index should be non-negative now.
++ *tetragono*: Use MPI IO to speed up the performance of configurations. Previously, every process will read all
+  configuration and choose a single configuration when reading, and allgather all configuration and write to file when
+  writing. Now, the program uses MPI IO to write file parallelly, and read the corresponding configuration directly
+  without read all configurations.
 *** Deprecated
 *** Removed
 + *tetragono*: Remove ansatz product state support, which may be added again in the future, but it require many updates.

diff --git a/tetragono/tetragono/sampling_lattice/gradient.py b/tetragono/tetragono/sampling_lattice/gradient.py
@@ -22,7 +22,7 @@
 import TAT
 from ..sampling_lattice import SamplingLattice, Observer, SweepSampling, ErgodicSampling, DirectSampling
 from ..utility import (show, showln, mpi_rank, mpi_size, SignalHandler, seed_differ, lattice_randomize, write_to_file,
-                       get_imported_function, restrict_wrapper, allgather_array, bcast_number)
+                       get_imported_function, restrict_wrapper, bcast_number, write_configurations)
 
 
 def check_difference(state, observer, grad, energy_observer, configuration_pool, check_difference_delta):
@@ -217,11 +217,7 @@ def gradient_descent(
                     sampling = SweepSampling(state, configuration_cut_dimension, restrict, hopping_hamiltonians)
                     sampling_total_step = sampling_total_step
                     # Initial sweep configuration
-                    if len(sampling_configurations) < mpi_size:
-                        choose = TAT.random.uniform_int(0, len(sampling_configurations) - 1)()
-                    else:
-                        choose = mpi_rank
-                    sampling.configuration.import_configuration(sampling_configurations[choose])
+                    sampling.configuration.import_configuration(sampling_configurations)
                 elif sampling_method == "ergodic":
                     sampling = ErgodicSampling(state, configuration_cut_dimension, restrict)
                     sampling_total_step = sampling.total_step
@@ -240,9 +236,9 @@ def gradient_descent(
                             configuration_pool.append((possibility, configuration))
                         show(f"sampling {sampling_step}/{sampling_total_step}, energy={observer.energy}")
                 # Save configuration
-                gathered_configurations = allgather_array(configuration.export_configuration())
-                sampling_configurations.resize(gathered_configurations.shape, refcheck=False)
-                np.copyto(sampling_configurations, gathered_configurations)
+                new_configurations = configuration.export_configuration()
+                sampling_configurations.resize(new_configurations.shape, refcheck=False)
+                np.copyto(sampling_configurations, new_configurations)
             showln(f"sampling done, total_step={sampling_total_step}, energy={observer.energy}")
             if sampling_method == "direct":
                 showln(f"direct sampling stability is {observer.stability}")
@@ -320,7 +316,8 @@ def gradient_descent(
             if save_state_file:
                 write_to_file(state, save_state_file.replace("%s", str(grad_step)).replace("%t", time_str))
             if save_configuration_file:
-                write_to_file(sampling_configurations, save_configuration_file)
+                write_configurations(sampling_configurations,
+                                     save_configuration_file.replace("%s", str(grad_step)).replace("%t", time_str))
             # Yield the measurement result
             yield (measurement_whole_result, measurement_result)
 

diff --git a/tetragono/tetragono/shell.py b/tetragono/tetragono/shell.py
@@ -23,7 +23,8 @@
 import numpy as np
 import TAT
 from .utility import (mpi_rank, mpi_size, mpi_comm, write_to_file, read_from_file, show, showln, seed_differ,
-                      get_imported_function, allgather_array, restrict_wrapper)
+                      get_imported_function, allgather_array, restrict_wrapper, write_configurations,
+                      read_configurations)
 from . import conversion
 from .exact_state import ExactState
 from .simple_update_lattice import SimpleUpdateLattice
@@ -466,10 +467,7 @@ def gm_conf_dump(self, name):
         name : str
             The file name.
         """
-        if self.gm_conf is None:
-            showln("gm_conf is None")
-        else:
-            write_to_file(self.gm_conf, name)
+        write_configurations(self.gm_conf, name)
 
     @AutoCmd.decorator
     def gm_load(self, name):
@@ -483,6 +481,25 @@ def gm_load(self, name):
         """
         self.gm = read_from_file(name)
 
+    @AutoCmd.decorator
+    def gm_conf_load_compat(self, name):
+        """
+        Load the sampling lattice configuration from file.
+
+        Parameters
+        ----------
+        name : str
+            The file name.
+        """
+        config = read_from_file(name)
+        size = len(config)
+        if size < mpi_size:
+            with seed_differ:
+                choose = TAT.random.uniform_int(0, size - 1)()
+        else:
+            choose = mpi_rank
+        self.gm_conf = config[choose]
+
     @AutoCmd.decorator
     def gm_conf_load(self, name):
         """
@@ -493,7 +510,7 @@ def gm_conf_load(self, name):
         name : str
             The file name.
         """
-        self.gm_conf = read_from_file(name).copy()
+        self.gm_conf = read_configurations(name)
 
     @AutoCmd.decorator
     def gm_conf_create(self, module_name, *args, **kwargs):
@@ -512,7 +529,7 @@ def gm_conf_create(self, module_name, *args, **kwargs):
             configuration = gm_Configuration(self.gm, -1)
             initial_configuration = get_imported_function(module_name, "initial_configuration")
             configuration = initial_configuration(configuration, *args, **kwargs)
-            self.gm_conf = allgather_array(configuration.export_configuration())
+            self.gm_conf = configuration.export_configuration()
 
     @AutoCmd.decorator
     def gm_clear_symmetry(self):
@@ -589,19 +606,15 @@ def gm_conf_eq(self, step, configuration_cut_dimension, sweep_hopping_hamiltonia
                 hopping_hamiltonians = None
             sampling = SweepSampling(state, configuration_cut_dimension, restrict, hopping_hamiltonians)
             # Initial sweep configuration
-            if len(sampling_configurations) < mpi_size:
-                choose = TAT.random.uniform_int(0, len(sampling_configurations) - 1)()
-            else:
-                choose = mpi_rank
-            sampling.configuration.import_configuration(sampling_configurations[choose])
+            sampling.configuration.import_configuration(sampling_configurations)
             # Equilibium
             for sampling_step in range(step):
                 possibility, configuration = sampling()
                 show(f"equilibium {sampling_step}/{step}")
             # Save configuration
-            gathered_configurations = allgather_array(configuration.export_configuration())
-            sampling_configurations.resize(gathered_configurations.shape, refcheck=False)
-            np.copyto(sampling_configurations, gathered_configurations)
+            new_configurations = configuration.export_configuration()
+            sampling_configurations.resize(new_configurations.shape, refcheck=False)
+            np.copyto(sampling_configurations, new_configurations)
             showln(f"equilibium done, total_step={step}")
 
 
@@ -678,6 +691,7 @@ def precmd(self, line):
     gm_to_ex = app.gm_to_ex
     gm_conf_dump = app.gm_conf_dump
     gm_conf_load = app.gm_conf_load
+    gm_conf_load_compat = app.gm_conf_load_compat
     gm_conf_create = app.gm_conf_create
     gm_conf_eq = app.gm_conf_eq
     gm_hamiltonian = app.gm_hamiltonian

diff --git a/tetragono/tetragono/utility.py b/tetragono/tetragono/utility.py
@@ -319,3 +319,34 @@ def restrict(configuration, replacement=None):
     else:
         restrict = origin_restrict
     return restrict
+
+
+def write_configurations(config, file_name):
+    config = np.asarray(config, dtype=np.int64)
+    file = MPI.File.Open(mpi_comm, file_name, MPI.MODE_WRONLY | MPI.MODE_CREATE)
+    shape = config.shape
+    head = np.array([mpi_size, len(shape), *shape], dtype=np.int64)
+    if mpi_rank == 0:
+        file.Write_at(0, head)
+    offset = head.nbytes + mpi_rank * config.nbytes
+    file.Write_at(offset, config)
+    file.Close()
+
+
+def read_configurations(file_name):
+    file = MPI.File.Open(mpi_comm, file_name, MPI.MODE_RDONLY)
+    head1 = np.zeros(2, dtype=np.int64)
+    file.Read_at(0, head1)
+    size, config_rank = head1
+    head2 = np.zeros(config_rank, dtype=np.int64)
+    file.Read_at(head1.nbytes, head2)
+    config = np.zeros(head2, dtype=np.int64)
+    if size < mpi_size:
+        with seed_differ:
+            choose = TAT.random.uniform_int(0, size - 1)()
+    else:
+        choose = mpi_rank
+    offset = head1.nbytes + head2.nbytes + choose * config.nbytes
+    file.Read_at(offset, config)
+    file.Close()
+    return config