From 7b7b04f97903266100feaf5524347653c1bcc32a Mon Sep 17 00:00:00 2001
From: Sander Vandenhaute <svdenhau@gmail.com>
Date: Wed, 24 Jul 2024 16:30:10 -0400
Subject: [PATCH] fix bug in env_vars parsing and forwarding; add new examples

---
 configs/hortense.yaml            |  5 +++
 configs/lumi.yaml                |  1 +
 examples/data/acetaldehyde.xyz   |  9 ++++
 examples/data/vinyl_alcohol.xyz  |  9 ++++
 examples/online_learning_pimd.py | 74 ++++++++++++++++++++++++++++++++
 examples/proton_jump_plumed.py   | 67 +++++++++++++++++++++++++++++
 psiflow/execution.py             | 14 +++---
 psiflow/sampling/client.py       | 10 +++--
 psiflow/sampling/sampling.py     |  4 +-
 9 files changed, 182 insertions(+), 11 deletions(-)
 create mode 100644 examples/data/acetaldehyde.xyz
 create mode 100644 examples/data/vinyl_alcohol.xyz
 create mode 100644 examples/online_learning_pimd.py
 create mode 100644 examples/proton_jump_plumed.py

diff --git a/configs/hortense.yaml b/configs/hortense.yaml
index 0141e6d..6fe6c65 100644
--- a/configs/hortense.yaml
+++ b/configs/hortense.yaml
@@ -7,6 +7,8 @@ ModelEvaluation:
   cores_per_worker: 12
   gpu: True
   max_simulation_time: 20
+  env_vars:
+    KMP_BLOCKTIME: "1"
   slurm:
     partition: "gpu_rome_a100"
     account: "2023_070"
@@ -19,6 +21,8 @@ ModelTraining:
   cores_per_worker: 12
   gpu: true
   max_training_time: 40
+  env_vars:
+    OMP_PROC_BIND: "spread"
   slurm:
     partition: "gpu_rome_a100"
     account: "2023_070"
@@ -30,6 +34,7 @@ ModelTraining:
 CP2K:
   cores_per_worker: 64
   max_evaluation_time: 30
+  memory_limit: 2GB
   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp'
   slurm:
     partition: "cpu_rome"
diff --git a/configs/lumi.yaml b/configs/lumi.yaml
index 69e300e..4dc1083 100644
--- a/configs/lumi.yaml
+++ b/configs/lumi.yaml
@@ -6,6 +6,7 @@ default_threads: 8
 CP2K:
   cores_per_worker: 32
   max_evaluation_time: 20
+  memory_limit: 2GB
   launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp'
   slurm:
     partition: "standard"
diff --git a/examples/data/acetaldehyde.xyz b/examples/data/acetaldehyde.xyz
new file mode 100644
index 0000000..111a97c
--- /dev/null
+++ b/examples/data/acetaldehyde.xyz
@@ -0,0 +1,9 @@
+7
+Properties=species:S:1:pos:R:3
+O	0.694151672	0.776743934	-0.455455855
+C	0.195993254	-0.270095005	-0.307053207
+C	-0.846060202	-0.538006022	0.669585079 
+H	0.515801613	-1.097661033	-0.987914453
+H	-0.589257101	-0.505600908	1.733123281 
+H	-1.553309062	0.309375207	0.558315778 
+H	-1.411674563	-1.440354174	0.5617281699
diff --git a/examples/data/vinyl_alcohol.xyz b/examples/data/vinyl_alcohol.xyz
new file mode 100644
index 0000000..8833174
--- /dev/null
+++ b/examples/data/vinyl_alcohol.xyz
@@ -0,0 +1,9 @@
+7
+Properties=species:S:1:pos:R:3
+O	1.041371715	-0.216863172	0.001603252
+C	-0.098316254	0.512294574	-0.01021628
+C	-1.225162144	-0.248210652	0.020868361
+H	-0.087363805	1.596485281	-0.07557041
+H	0.61765221	-1.094559605	-0.02702971
+H	-2.216985293	0.211688229	-0.00469380
+H	-1.115257687	-1.357478425	-0.04507284
diff --git a/examples/online_learning_pimd.py b/examples/online_learning_pimd.py
new file mode 100644
index 0000000..caeae80
--- /dev/null
+++ b/examples/online_learning_pimd.py
@@ -0,0 +1,74 @@
+from pathlib import Path
+
+import psiflow
+from psiflow.reference import CP2K
+from psiflow.data import Dataset
+from psiflow.sampling import Walker
+from psiflow.models import MACE
+from psiflow.hamiltonians import MACEHamiltonian
+from psiflow.learning import Learning
+
+
+def main():
+    path_output = Path.cwd() / 'output'
+
+    with open('data/cp2k_input.txt', 'r') as f: cp2k_input = f.read()
+    cp2k = CP2K(cp2k_input)
+
+    model = MACE(
+        batch_size=4,
+        lr=0.02,
+        max_ell=3,
+        r_max=6.5,
+        energy_weight=100,
+        correlation=3,
+        max_L=1,
+        num_channels=24,
+        patience=8,
+        scheduler_patience=4,
+        max_num_epochs=200,
+    )
+    model.add_atomic_energy('H', cp2k.compute_atomic_energy('H', box_size=9))
+    model.add_atomic_energy('O', cp2k.compute_atomic_energy('O', box_size=9))
+
+    state = Dataset.load('data/water_train.xyz')[0]
+    walkers = (
+        Walker(state, temperature=300, pressure=0.1).multiply(40) +
+        Walker(state, temperature=450, pressure=0.1).multiply(40) +
+        Walker(state, temperature=600, pressure=0.1).multiply(40)
+    )
+    learning = Learning(
+        cp2k,
+        path_output,
+        wandb_project='psiflow_examples',
+        wandb_group='water_learning_pimd',
+    )
+
+    model, walkers = learning.passive_learning(
+        model,
+        walkers,
+        hamiltonian=MACEHamiltonian.mace_mp0(),
+        steps=10000,
+        step=2000,
+    )
+
+    for i in range(3):
+        model, walkers = learning.active_learning(
+            model,
+            walkers,
+            steps=2000,
+        )
+
+    # PIMD phase for low-temperature walkers
+    for j, walker in enumerate(walkers[:40]):
+        walker.nbeads = 32
+    model, walkers = learning.active_learning(
+        model,
+        walkers,
+        steps=500,
+    )
+
+
+if __name__ == '__main__':
+    with psiflow.load():
+        main()
diff --git a/examples/proton_jump_plumed.py b/examples/proton_jump_plumed.py
new file mode 100644
index 0000000..6b3d8be
--- /dev/null
+++ b/examples/proton_jump_plumed.py
@@ -0,0 +1,67 @@
+from ase.units import kJ, mol
+import numpy as np
+
+import psiflow
+from psiflow.data import Dataset
+from psiflow.geometry import Geometry
+from psiflow.hamiltonians import PlumedHamiltonian, MACEHamiltonian
+from psiflow.sampling import Walker, sample, quench, Metadynamics, replica_exchange
+
+
+PLUMED_INPUT = """UNITS LENGTH=A ENERGY=kj/mol
+d_C: DISTANCE ATOMS=3,5
+d_O: DISTANCE ATOMS=1,5
+CV: COMBINE ARG=d_C,d_O COEFFICIENTS=1,-1 PERIODIC=NO
+
+"""
+
+
+def get_bias(kappa: float, center: float):
+    plumed_str = PLUMED_INPUT
+    plumed_str += '\n'
+    plumed_str += 'RESTRAINT ARG=CV KAPPA={} AT={}\n'.format(kappa, center)
+    return PlumedHamiltonian(plumed_str)
+
+
+def main():
+    aldehyd = Geometry.load('data/acetaldehyde.xyz')
+    alcohol = Geometry.load('data/vinyl_alcohol.xyz')
+
+    mace = MACEHamiltonian.mace_cc()
+    energy = mace.compute([aldehyd, alcohol], 'energy').result()
+    energy = (energy - np.min(energy)) / (kJ / mol)
+    print('E_vinyl - E_aldehyde = {:7.3f} kJ/mol'.format(energy[1] - energy[0]))
+
+    # generate initial structures using metadynamics
+    plumed_str = PLUMED_INPUT
+    plumed_str += 'METAD ARG=CV PACE=5 SIGMA=0.25 HEIGHT=5\n'
+    metadynamics = Metadynamics(plumed_str)
+
+    # create 40 identical walkers
+    walkers = Walker(
+        aldehyd,
+        hamiltonian=mace,
+        temperature=300,
+        metadynamics=metadynamics,
+    ).multiply(4)
+
+    # do MTD and create large dataset from all trajectories
+    outputs = sample(walkers, steps=2000, step=20, start=1000)
+    data_mtd = sum([o.trajectory for o in outputs], start=Dataset([]))
+
+    # initialize walkers for umbrella sampling
+    walkers = []
+    for i, center in enumerate(np.linspace(1, 3, num=16)):
+        bias = get_bias(kappa=1500, center=center)
+        hamiltonian = mace + bias
+        walker = Walker(alcohol, hamiltonian=hamiltonian, temperature=300)
+        walkers.append(walker)
+    quench(walkers, data_mtd)  # make sure initial structure is reasonable
+    replica_exchange(walkers, trial_frequency=100)  # use REX for improved sampling
+
+    outputs = sample(walkers, steps=1000, step=10)
+
+
+if __name__ == '__main__':
+    with psiflow.load() as f:
+        main()
diff --git a/psiflow/execution.py b/psiflow/execution.py
index 689e2d3..4cdc1c2 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -170,7 +170,7 @@ def __init__(
         self,
         max_simulation_time: Optional[float] = None,
         timeout: float = (10 / 60),  # 5 seconds
-        env_vars: Optional[dict] = None,
+        env_vars: Optional[dict[str, str]] = None,
         **kwargs,
     ) -> None:
         super().__init__(**kwargs)
@@ -186,9 +186,10 @@ def __init__(
             'PYTHONUNBUFFERED': 'TRUE',
         }
         if env_vars is None:
-            env_vars = dict(default_env_vars)
+            env_vars = default_env_vars
         else:
-            env_vars.update(default_env_vars)
+            default_env_vars.update(env_vars)
+            env_vars = default_env_vars
         self.env_vars = env_vars
 
     def server_command(self):
@@ -245,7 +246,7 @@ def __init__(
         self,
         gpu=True,
         max_training_time: Optional[float] = None,
-        env_vars: Optional[dict] = None,
+        env_vars: Optional[dict[str, str]] = None,
         **kwargs,
     ) -> None:
         super().__init__(gpu=gpu, **kwargs)
@@ -266,9 +267,10 @@ def __init__(
             'PYTHONUNBUFFERED': 'TRUE',
         }
         if env_vars is None:
-            env_vars = dict(default_env_vars)
+            env_vars = default_env_vars
         else:
-            env_vars.update(default_env_vars)
+            default_env_vars.update(env_vars)
+            env_vars = default_env_vars
         self.env_vars = env_vars
 
     def train_command(self, initialize: bool = False):
diff --git a/psiflow/sampling/client.py b/psiflow/sampling/client.py
index 2833309..92cc75b 100644
--- a/psiflow/sampling/client.py
+++ b/psiflow/sampling/client.py
@@ -7,7 +7,6 @@ def main():
     import time
     from pathlib import Path
 
-    import torch
     from ase.io import read
     from ipi._driver.driver import run_driver
 
@@ -54,6 +53,10 @@ def main():
     assert args.address is not None
     assert args.start is not None
 
+    print("pid: {}".format(os.getpid()))
+    affinity = os.sched_getaffinity(os.getpid())
+    print("CPU affinity before function init: {}".format(affinity))
+
     template = Geometry.from_atoms(read(args.start))
     function = function_from_json(
         args.path_hamiltonian,
@@ -68,9 +71,8 @@ def main():
         verbose=True,
     )
 
-    print("pid: {}".format(os.getpid()))
-    print("CPU affinity: {}".format(os.sched_getaffinity(os.getpid())))
-    print("torch num threads: ", torch.get_num_threads())
+    affinity = os.sched_getaffinity(os.getpid())
+    print("CPU affinity after function init: {}".format(affinity))
 
     try:
         t0 = time.time()
diff --git a/psiflow/sampling/sampling.py b/psiflow/sampling/sampling.py
index 643e873..640d7b6 100644
--- a/psiflow/sampling/sampling.py
+++ b/psiflow/sampling/sampling.py
@@ -533,6 +533,8 @@ def _sample(
     command_server = definition.server_command()
     command_client = definition.client_command()
     resources = definition.wq_resources(max_nclients)
+    print('ENV VARS')
+    print(definition.env_vars)
     result = execute_ipi(
         len(walkers),
         hamiltonian_names,
@@ -543,7 +545,7 @@ def _sample(
         command_server,
         command_client,
         *plumed_list,
-        env_vars=definition.env_vars,
+        env_vars=dict(definition.env_vars),
         stdout=parsl.AUTO_LOGNAME,
         stderr=parsl.AUTO_LOGNAME,
         inputs=inputs,