DLR-RM · araffin · Apr 22, 2024 · Apr 18, 2024 · Apr 19, 2024 · Apr 21, 2024
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -48,6 +48,7 @@ New Features:
 Bug Fixes:
 ^^^^^^^^^^
 - Fixed ``monitor_wrapper`` argument that was not passed to the parent class, and dones argument that wasn't passed to ``_update_into_buffer`` (@corentinlger)
+- Fixed ``learning_rate`` argument that could cause weights_only=True to fail if passed a function with non-float types (e.g. ``learning_rate=lambda _: np.sin(1.0)``) (@markscsmith)
 
 `SB3-Contrib`_
 ^^^^^^^^^^^^^^
@@ -1593,4 +1594,4 @@ And all the contributors:
 @anand-bala @hughperkins @sidney-tio @AlexPasqua @dominicgkerr @Akhilez @Rocamonde @tobirohrer @ZikangXiong @ReHoss
 @DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto
 @lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger
-@marekm4 @stagoverflow @rushitnshah
+@marekm4 @stagoverflow @rushitnshah @markscsmith
diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py
@@ -92,7 +92,7 @@ def get_schedule_fn(value_schedule: Union[Schedule, float]) -> Schedule:
         value_schedule = constant_fn(float(value_schedule))
     else:
         assert callable(value_schedule)
-    return value_schedule
+    return lambda _: float(value_schedule(_))
 
 
 def get_linear_fn(start: float, end: float, end_fraction: float) -> Schedule:

diff --git a/tests/test_save_load.py b/tests/test_save_load.py
@@ -739,6 +739,14 @@ def test_load_invalid_object(tmp_path):
     assert len(record) == 0
 
 
+def test_learning_rate_float_for_unpickle(tmp_path):
+    path = str(tmp_path / "ppo_pendulum.zip")
+    PPO("MlpPolicy", "Pendulum-v1", learning_rate=lambda _: np.sin(1.0)).save(path)
+    with warnings.catch_warnings(record=True) as record:
+        PPO.load(path)
+    assert len(record) == 0
+
+
 def test_dqn_target_update_interval(tmp_path):
     # `target_update_interval` should not change when reloading the model. See GH Issue #1373.
     env = make_vec_env(env_id="CartPole-v1", n_envs=2)