Tweak the performance of redox, gather more data (#115)

* Improve the parallelism settings * Expand cell more aggressively * Switch from QN to BFGS * Account for cell changes Our script now re-adjusts cell size, which leads to multiple records for the same geometry. This is OK because I would like to store multiple measurements for the same geometry so that we can establish error bounds on the forces/energies wrt cell size. * Update runs with new data * Flake8 fix
exalearn · Oct 11, 2023 · 141f6bd · 141f6bd
1 parent 3845708
commit 141f6bd
Show file tree

Hide file tree

Showing 7 changed files with 307 additions and 112 deletions.
diff --git a/examol/simulate/ase/__init__.py b/examol/simulate/ase/__init__.py
@@ -12,7 +12,7 @@
 from ase import units
 from ase.db import connect
 from ase.io import Trajectory, read
-from ase.optimize import QuasiNewton, FIRE
+from ase.optimize import BFGS, FIRE
 from ase.io.ulm import InvalidULMFileError
 from ase.calculators.gaussian import Gaussian, GaussianOptimizer
 
@@ -270,12 +270,7 @@ def optimize_structure(self, mol_key: str, xyz: str, config_name: str, charge: i
         try:
             os.chdir(run_path)
             with utils.make_ephemeral_calculator(calc_cfg) as calc:
-                # Prepare the structure for a specific code
-                if 'cp2k' in config_name:
-                    calc_cfg['buffer_size'] *= 1.5  # In case the molecule expands
-                self._prepare_atoms(atoms, charge, calc_cfg)
-
-                # Recover the history from a previous run
+                # Get the last atoms from a previous run
                 traj_path = Path('opt.traj')
                 if traj_path.is_file():
                     try:
@@ -286,6 +281,11 @@ def optimize_structure(self, mol_key: str, xyz: str, config_name: str, charge: i
                         traj_path.unlink()
                         pass
 
+                # Prepare the structure for a specific code
+                if 'cp2k' in config_name:
+                    calc_cfg['buffer_size'] *= 2  # In case the molecule expands
+                self._prepare_atoms(atoms, charge, calc_cfg)
+
                 # Special case: use Gaussian's optimizer
                 if isinstance(calc, Gaussian) and calc_cfg['use_gaussian_opt']:
                     # Start the optimization
@@ -312,8 +312,12 @@ def optimize_structure(self, mol_key: str, xyz: str, config_name: str, charge: i
                     dyn = FIRE(atoms, logfile='opt.log', trajectory=traj)
                     dyn.run(fmax=0.7, steps=self.optimization_steps)  # TODO (wardlt) make the fmax configurable
 
+                    # If CP2K, re-expand the simulation cell in chase molecule has expanded
+                    if 'cp2k' in config_name:
+                        self._prepare_atoms(atoms, charge, calc_cfg)
+
                     # Make the optimizer
-                    dyn = QuasiNewton(atoms, logfile='opt.log', trajectory=traj)
+                    dyn = BFGS(atoms, logfile='opt.log', trajectory=traj)
 
                     # Run an optimization
                     dyn.run(fmax=fmax_conv, steps=self.optimization_steps)

diff --git a/scripts/redoxmers/2_initial-data/0_gather-initial-dataset.py b/scripts/redoxmers/2_initial-data/0_gather-initial-dataset.py
@@ -58,7 +58,7 @@ def __init__(self,
         self.args = args
 
         # Determine where to store the task records
-        run_name = self.database_path.name[:-5]
+        run_name = self.database_path.name[:-8]
         self.record_path = self.database_path.parent / f'{run_name}-results.json.gz'
 
         # Output handles
@@ -92,8 +92,8 @@ def submit_task(self):
             my_record = self.database[key]
 
             # See if there is any new work to do
-            try:
-                for recipe in self.recipes:
+            for recipe in self.recipes:
+                try:
                     next_calculations = recipe.suggest_computations(my_record)
                     if len(next_calculations) > 0:
                         self.logger.debug(f'Submitting tasks for {my_record.key} recipe {recipe.name}@{recipe.level}')
@@ -114,10 +114,10 @@ def submit_task(self):
 
                     # Compute the property
                     recipe.update_record(my_record)
-            except ValueError as e:
-                self.logger.warning(f'{my_record.key} failed for {recipe.name}@{recipe.level}. Error: {e}')
-                if self.args.halt_on_error:
-                    raise ValueError(f'Failed to submit new tasks for {my_record.key}')
+                except ValueError as e:
+                    self.logger.warning(f'{my_record.key} failed for {recipe.name}@{recipe.level}. Error: {e}')
+                    if self.args.halt_on_error:
+                        raise ValueError(f'Failed to submit new tasks for {my_record.key}')
 
         # If there are neither molecules no ongoing tasks, then we are done
         if len(self.ongoing_tasks) == 0:
@@ -214,7 +214,7 @@ def write_database(self):
     if dataset_path.is_file():
         # Load the existing data
         my_logger.info(f'Loading initial data from {dataset_path}')
-        with dataset_path.open() as fp:
+        with gzip.open(dataset_path, 'rt') as fp:
             for line in fp:
                 record = MoleculeRecord.from_json(line)
                 dataset[record.key] = record