From b1b247b8eeda373ff9a6d36f9ac48e05e2c430af Mon Sep 17 00:00:00 2001 From: kplers Date: Mon, 2 Dec 2024 19:42:53 +0900 Subject: [PATCH] docs: Add policy documentation links to policy_kwargs parameter --- stable_baselines3/a2c/a2c.py | 2 +- stable_baselines3/ddpg/ddpg.py | 2 +- stable_baselines3/dqn/dqn.py | 2 +- stable_baselines3/ppo/ppo.py | 2 +- stable_baselines3/sac/sac.py | 2 +- stable_baselines3/td3/td3.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/stable_baselines3/a2c/a2c.py b/stable_baselines3/a2c/a2c.py index a125aaef6..c3388bf8b 100644 --- a/stable_baselines3/a2c/a2c.py +++ b/stable_baselines3/a2c/a2c.py @@ -48,7 +48,7 @@ class A2C(OnPolicyAlgorithm): :param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average the reported success rate, mean episode length, and mean reward over :param tensorboard_log: the log location for tensorboard (if None, no logging) - :param policy_kwargs: additional arguments to be passed to the policy on creation + :param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`a2c_policies` :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages :param seed: Seed for the pseudo random generators diff --git a/stable_baselines3/ddpg/ddpg.py b/stable_baselines3/ddpg/ddpg.py index d94fa1812..4af502469 100644 --- a/stable_baselines3/ddpg/ddpg.py +++ b/stable_baselines3/ddpg/ddpg.py @@ -44,7 +44,7 @@ class DDPG(TD3): :param optimize_memory_usage: Enable a memory efficient variant of the replay buffer at a cost of more complexity. See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195 - :param policy_kwargs: additional arguments to be passed to the policy on creation + :param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`ddpg_policies` :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages :param seed: Seed for the pseudo random generators diff --git a/stable_baselines3/dqn/dqn.py b/stable_baselines3/dqn/dqn.py index a3f200e59..eb85fd65b 100644 --- a/stable_baselines3/dqn/dqn.py +++ b/stable_baselines3/dqn/dqn.py @@ -53,7 +53,7 @@ class DQN(OffPolicyAlgorithm): :param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average the reported success rate, mean episode length, and mean reward over :param tensorboard_log: the log location for tensorboard (if None, no logging) - :param policy_kwargs: additional arguments to be passed to the policy on creation + :param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`dqn_policies` :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages :param seed: Seed for the pseudo random generators diff --git a/stable_baselines3/ppo/ppo.py b/stable_baselines3/ppo/ppo.py index 03cbc2464..7ed1b4bbc 100644 --- a/stable_baselines3/ppo/ppo.py +++ b/stable_baselines3/ppo/ppo.py @@ -62,7 +62,7 @@ class PPO(OnPolicyAlgorithm): :param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average the reported success rate, mean episode length, and mean reward over :param tensorboard_log: the log location for tensorboard (if None, no logging) - :param policy_kwargs: additional arguments to be passed to the policy on creation + :param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`ppo_policies` :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages :param seed: Seed for the pseudo random generators diff --git a/stable_baselines3/sac/sac.py b/stable_baselines3/sac/sac.py index 8cb2ae53d..14a948fbd 100644 --- a/stable_baselines3/sac/sac.py +++ b/stable_baselines3/sac/sac.py @@ -68,7 +68,7 @@ class SAC(OffPolicyAlgorithm): :param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average the reported success rate, mean episode length, and mean reward over :param tensorboard_log: the log location for tensorboard (if None, no logging) - :param policy_kwargs: additional arguments to be passed to the policy on creation + :param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`sac_policies` :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages :param seed: Seed for the pseudo random generators diff --git a/stable_baselines3/td3/td3.py b/stable_baselines3/td3/td3.py index affb9c9f8..2dc40b4ba 100644 --- a/stable_baselines3/td3/td3.py +++ b/stable_baselines3/td3/td3.py @@ -56,7 +56,7 @@ class TD3(OffPolicyAlgorithm): :param stats_window_size: Window size for the rollout logging, specifying the number of episodes to average the reported success rate, mean episode length, and mean reward over :param tensorboard_log: the log location for tensorboard (if None, no logging) - :param policy_kwargs: additional arguments to be passed to the policy on creation + :param policy_kwargs: additional arguments to be passed to the policy on creation. See :ref:`td3_policies` :param verbose: Verbosity level: 0 for no output, 1 for info messages (such as device or wrappers used), 2 for debug messages :param seed: Seed for the pseudo random generators