Skip to content

Commit

Permalink
Release v2.0.0 (#1571)
Browse files Browse the repository at this point in the history
* RUF012: Explicit ClassVar

* Prepare v2.0.0

* Update docs/misc/changelog.rst

---------

Co-authored-by: Quentin Gallouédec <[email protected]>
  • Loading branch information
araffin and qgallouedec authored Jun 23, 2023
1 parent 4fdb65e commit 1036c05
Show file tree
Hide file tree
Showing 11 changed files with 39 additions and 37 deletions.
19 changes: 1 addition & 18 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
#
import os
import sys
from typing import Dict, List
from unittest.mock import MagicMock
from typing import Dict

# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
# PyEnchant.
Expand All @@ -36,22 +35,6 @@
# source code directory, relative to this file, for sphinx-autobuild
sys.path.insert(0, os.path.abspath(".."))


class Mock(MagicMock):
__subclasses__ = [] # type: ignore

@classmethod
def __getattr__(cls, name):
return MagicMock()


# Mock modules that requires C modules
# Note: because of that we cannot test examples using CI
# 'torch', 'torch.nn', 'torch.nn.functional',
# DO not mock modules for now, we will need to do that for read the docs later
MOCK_MODULES: List[str] = []
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

# Read version from file
version_file = os.path.join(os.path.dirname(__file__), "../stable_baselines3", "version.txt")
with open(version_file) as file_handler:
Expand Down
11 changes: 9 additions & 2 deletions docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Changelog
==========

Release 2.0.0a14 (WIP)
Release 2.0.0 (2023-06-22)
--------------------------

**Gymnasium support**
Expand All @@ -26,13 +26,20 @@ Breaking Changes:

New Features:
^^^^^^^^^^^^^

- Added Gymnasium support (Gym 0.21 and 0.26 are supported via the ``shimmy`` package)

`SB3-Contrib`_
^^^^^^^^^^^^^^
- Fixed QRDQN update interval for multi envs


`RL Zoo`_
^^^^^^^^^
- Gym 0.26+ patches to continue working with pybullet and TimeLimit wrapper
- Renamed `CarRacing-v1` to `CarRacing-v2` in hyperparameters
- Huggingface push to hub now accepts a `--n-timesteps` argument to adjust the length of the video
- Fixed `record_video` steps (before it was stepping in a closed env)
- Dropped Gym 0.21 support

Bug Fixes:
^^^^^^^^^^
Expand Down
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ line-length = 127
target-version = "py37"
# See https://beta.ruff.rs/docs/rules/
select = ["E", "F", "B", "UP", "C90", "RUF"]
# Ignore explicit stacklevel`
ignore = ["B028"]
# B028: Ignore explicit stacklevel`
# RUF013: Too many false positives (implicit optional)
ignore = ["B028", "RUF013"]

[tool.ruff.per-file-ignores]
# Default implementation in abstract methods
"./stable_baselines3/common/callbacks.py"= ["B027"]
"./stable_baselines3/common/noise.py"= ["B027"]
# ClassVar, implicit optional check not needed for tests
"./tests/*.py"= ["RUF012", "RUF013"]


[tool.ruff.mccabe]
Expand Down
4 changes: 2 additions & 2 deletions stable_baselines3/a2c/a2c.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional, Type, TypeVar, Union
from typing import Any, ClassVar, Dict, Optional, Type, TypeVar, Union

import torch as th
from gymnasium import spaces
Expand Down Expand Up @@ -54,7 +54,7 @@ class A2C(OnPolicyAlgorithm):
:param _init_setup_model: Whether or not to build the network at the creation of the instance
"""

policy_aliases: Dict[str, Type[BasePolicy]] = {
policy_aliases: ClassVar[Dict[str, Type[BasePolicy]]] = {
"MlpPolicy": ActorCriticPolicy,
"CnnPolicy": ActorCriticCnnPolicy,
"MultiInputPolicy": MultiInputActorCriticPolicy,
Expand Down
4 changes: 2 additions & 2 deletions stable_baselines3/common/base_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import warnings
from abc import ABC, abstractmethod
from collections import deque
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union
from typing import Any, ClassVar, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union

import gymnasium as gym
import numpy as np
Expand Down Expand Up @@ -94,7 +94,7 @@ class BaseAlgorithm(ABC):
"""

# Policy aliases (see _get_policy_from_name())
policy_aliases: Dict[str, Type[BasePolicy]] = {}
policy_aliases: ClassVar[Dict[str, Type[BasePolicy]]] = {}
policy: BasePolicy
observation_space: spaces.Space
action_space: spaces.Space
Expand Down
13 changes: 11 additions & 2 deletions stable_baselines3/common/vec_env/base_vec_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ class VecEnv(ABC):
:param action_space: Action space
"""

metadata = {"render_modes": ["human", "rgb_array"]}

def __init__(
self,
num_envs: int,
Expand All @@ -69,6 +67,7 @@ def __init__(
self.reset_infos: List[Dict[str, Any]] = [{} for _ in range(num_envs)]
# seeds to be used in the next call to env.reset()
self._seeds: List[Optional[int]] = [None for _ in range(num_envs)]

try:
render_modes = self.get_attr("render_mode")
except AttributeError:
Expand All @@ -80,6 +79,16 @@ def __init__(
), "render_mode mode should be the same for all environments"
self.render_mode = render_modes[0]

render_modes = []
if self.render_mode is not None:
if self.render_mode == "rgb_array":
# SB3 uses OpenCV for the "human" mode
render_modes = ["human", "rgb_array"]
else:
render_modes = [self.render_mode]

self.metadata = {"render_modes": render_modes}

def _reset_seeds(self) -> None:
"""
Reset the seeds that are going to be used at the next reset.
Expand Down
4 changes: 2 additions & 2 deletions stable_baselines3/dqn/dqn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import warnings
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type, TypeVar, Union

import numpy as np
import torch as th
Expand Down Expand Up @@ -62,7 +62,7 @@ class DQN(OffPolicyAlgorithm):
:param _init_setup_model: Whether or not to build the network at the creation of the instance
"""

policy_aliases: Dict[str, Type[BasePolicy]] = {
policy_aliases: ClassVar[Dict[str, Type[BasePolicy]]] = {
"MlpPolicy": MlpPolicy,
"CnnPolicy": CnnPolicy,
"MultiInputPolicy": MultiInputPolicy,
Expand Down
4 changes: 2 additions & 2 deletions stable_baselines3/ppo/ppo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import warnings
from typing import Any, Dict, Optional, Type, TypeVar, Union
from typing import Any, ClassVar, Dict, Optional, Type, TypeVar, Union

import numpy as np
import torch as th
Expand Down Expand Up @@ -68,7 +68,7 @@ class PPO(OnPolicyAlgorithm):
:param _init_setup_model: Whether or not to build the network at the creation of the instance
"""

policy_aliases: Dict[str, Type[BasePolicy]] = {
policy_aliases: ClassVar[Dict[str, Type[BasePolicy]]] = {
"MlpPolicy": ActorCriticPolicy,
"CnnPolicy": ActorCriticCnnPolicy,
"MultiInputPolicy": MultiInputActorCriticPolicy,
Expand Down
4 changes: 2 additions & 2 deletions stable_baselines3/sac/sac.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type, TypeVar, Union

import numpy as np
import torch as th
Expand Down Expand Up @@ -77,7 +77,7 @@ class SAC(OffPolicyAlgorithm):
:param _init_setup_model: Whether or not to build the network at the creation of the instance
"""

policy_aliases: Dict[str, Type[BasePolicy]] = {
policy_aliases: ClassVar[Dict[str, Type[BasePolicy]]] = {
"MlpPolicy": MlpPolicy,
"CnnPolicy": CnnPolicy,
"MultiInputPolicy": MultiInputPolicy,
Expand Down
4 changes: 2 additions & 2 deletions stable_baselines3/td3/td3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type, TypeVar, Union

import numpy as np
import torch as th
Expand Down Expand Up @@ -65,7 +65,7 @@ class TD3(OffPolicyAlgorithm):
:param _init_setup_model: Whether or not to build the network at the creation of the instance
"""

policy_aliases: Dict[str, Type[BasePolicy]] = {
policy_aliases: ClassVar[Dict[str, Type[BasePolicy]]] = {
"MlpPolicy": MlpPolicy,
"CnnPolicy": CnnPolicy,
"MultiInputPolicy": MultiInputPolicy,
Expand Down
2 changes: 1 addition & 1 deletion stable_baselines3/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.0a14
2.0.0

0 comments on commit 1036c05

Please sign in to comment.