checkpoint should overwrite warmstart policy so that the training can…

… resume from the checkpoint after preemption and restart
google · Apr 30, 2021 · 9bf0460 · 9bf0460
1 parent a097947
commit 9bf0460
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 10 deletions.
diff --git a/compiler_opt/rl/train_locally.py b/compiler_opt/rl/train_locally.py
@@ -22,7 +22,6 @@
 from absl import flags
 from absl import logging
 import gin
-from tf_agents.policies import policy_loader
 from tf_agents.system import system_multiprocessing as multiprocessing
 
 from compiler_opt.rl import agent_creators
@@ -70,21 +69,16 @@ def train_eval(get_signature_spec_fn=None,
   time_step_spec, action_spec = get_signature_spec_fn()
   tf_agent = agent_creators.create_agent(agent_name, time_step_spec,
                                          action_spec)
-  llvm_trainer = trainer.Trainer(root_dir=root_dir, agent=tf_agent)
+  llvm_trainer = trainer.Trainer(
+      root_dir=root_dir,
+      agent=tf_agent,
+      warmstart_policy_dir=warmstart_policy_dir)
   policy_dict = {
       'saved_policy': tf_agent.policy,
       'saved_collect_policy': tf_agent.collect_policy,
   }
   saver = policy_saver.PolicySaver(policy_dict=policy_dict)
 
-  if warmstart_policy_dir:
-    warmstart_policy = policy_loader.load(warmstart_policy_dir)
-    tf_agent.policy.update(
-        policy=warmstart_policy,
-        tau=1.0,
-        tau_non_trainable=None,
-        sort_variables_by_name=False)
-
   with open(os.path.join(FLAGS.data_path, 'module_paths'), 'r') as f:
     module_paths = [
         os.path.join(FLAGS.data_path, name.rstrip('\n')) for name in f

diff --git a/compiler_opt/rl/trainer.py b/compiler_opt/rl/trainer.py
@@ -21,6 +21,7 @@
 
 import gin
 import tensorflow as tf
+from tf_agents.policies import policy_loader
 
 from tf_agents.utils import common as common_utils
 
@@ -45,6 +46,7 @@ def __init__(
       self,
       root_dir,
       agent,
+      warmstart_policy_dir=None,
       # Params for summaries and logging
       checkpoint_interval=10000,
       log_interval=100,
@@ -55,6 +57,7 @@ def __init__(
     Args:
       root_dir: str, the root directory to host all required sub-directories.
       agent: a tf_agents.agents.TFAgent object.
+      warmstart_policy_dir: the directory to warmstart the policy if given.
       checkpoint_interval: int, the training step interval for saving
         checkpoint.
       log_interval: int, the training step interval for logging.
@@ -82,6 +85,15 @@ def __init__(
 
     self._initialize_metrics()
 
+    # Load warmstart policy before restoring from checkpoint.
+    if warmstart_policy_dir:
+      warmstart_policy = policy_loader.load(warmstart_policy_dir)
+      self._agent.policy.update(
+          policy=warmstart_policy,
+          tau=1.0,
+          tau_non_trainable=None,
+          sort_variables_by_name=False)
+
     self._checkpointer = common_utils.Checkpointer(
         ckpt_dir=self._root_dir,
         agent=self._agent,