From 790a90ed97e57f7ca975d3e3819ccd1315890305 Mon Sep 17 00:00:00 2001
From: Eugene Teoh <50418168+eugeneteoh@users.noreply.github.com>
Date: Wed, 10 Apr 2024 16:40:41 +0100
Subject: [PATCH] Fix missing joint_position_action and add gripper action
 (#221)

* Fix missing joint_position_action and add gripper action

* Remove requirements.txt as it is not needed anymore

* Change pyrep commit hash temporarily

* Update pyrep hash

* Remove unwanted comment

* Clean up environment for each task test

* Use pytest-xdist to parallelise tests

* Add verbose flag for pytest

* Fix test being flaky due to non-determinism
---
 .github/workflows/task_tests.yml |  3 ++-
 .github/workflows/unit_tests.yml |  3 ++-
 requirements.txt                 |  6 ------
 rlbench/backend/scene.py         | 36 ++++++++++++++++++--------------
 setup.py                         |  4 ++--
 tests/unit/test_environment.py   | 32 ++++++++++++++++++++++++++++
 6 files changed, 58 insertions(+), 26 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/.github/workflows/task_tests.yml b/.github/workflows/task_tests.yml
index 56ebfee21..82b03a135 100644
--- a/.github/workflows/task_tests.yml
+++ b/.github/workflows/task_tests.yml
@@ -38,4 +38,5 @@ jobs:
           export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT
 
           pip install ".[dev]"
-          python3 -m unittest discover tests/demos
+          pip install "pytest-xdist[psutil]"
+          pytest -v -n auto tests/unit
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 9d1e1e691..9142f4574 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -39,4 +39,5 @@ jobs:
           export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT
 
           pip install ".[dev]"
-          python3 -m unittest discover tests/unit
+          pip install "pytest-xdist[psutil]"
+          pytest -v -n auto tests/unit
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 2848bc939..000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-numpy
-Pillow
-pyquaternion
-html-testRunner
-setuptools
-natsort
diff --git a/rlbench/backend/scene.py b/rlbench/backend/scene.py
index 5ab3db866..b182657ed 100644
--- a/rlbench/backend/scene.py
+++ b/rlbench/backend/scene.py
@@ -77,7 +77,7 @@ def __init__(self,
 
         self._robot_shapes = self.robot.arm.get_objects_in_tree(
             object_type=ObjectType.SHAPE)
-        self._execute_demo_joint_position_action = None
+        self._joint_position_action = None
 
     def load(self, task: Task) -> None:
         """Loads the task and positions at the centre of the workspace.
@@ -337,6 +337,8 @@ def get_demo(self, record: bool = True,
         demo = []
         if record:
             self.pyrep.step()  # Need this here or get_force doesn't work...
+            self._joint_position_action = None
+            gripper_open = 1.0 if self.robot.gripper.get_open_amount()[0] > 0.9 else 0.0
             demo.append(self.get_observation())
         while True:
             success = False
@@ -366,7 +368,7 @@ def get_demo(self, record: bool = True,
                 while not done:
                     done = path.step()
                     self.step()
-                    self._execute_demo_joint_position_action = path.get_executed_joint_position_action()
+                    self._joint_position_action = np.append(path.get_executed_joint_position_action(), gripper_open)
                     self._demo_record_step(demo, record, callable_each_step)
                     success, term = self.task.success()
 
@@ -385,9 +387,10 @@ def get_demo(self, record: bool = True,
                         if not contains_param:
                             done = False
                             while not done:
-                                done = gripper.actuate(1.0, 0.04)
-                                self.pyrep.step()
-                                self.task.step()
+                                gripper_open = 1.0
+                                done = gripper.actuate(gripper_open, 0.04)
+                                self.step()
+                                self._joint_position_action = np.append(path.get_executed_joint_position_action(), gripper_open)
                                 if self._obs_config.record_gripper_closing:
                                     self._demo_record_step(
                                         demo, record, callable_each_step)
@@ -397,9 +400,10 @@ def get_demo(self, record: bool = True,
                         if not contains_param:
                             done = False
                             while not done:
-                                done = gripper.actuate(0.0, 0.04)
-                                self.pyrep.step()
-                                self.task.step()
+                                gripper_open = 0.0
+                                done = gripper.actuate(gripper_open, 0.04)
+                                self.step()
+                                self._joint_position_action = np.append(path.get_executed_joint_position_action(), gripper_open)
                                 if self._obs_config.record_gripper_closing:
                                     self._demo_record_step(
                                         demo, record, callable_each_step)
@@ -409,9 +413,10 @@ def get_demo(self, record: bool = True,
                         num = float(rest[:rest.index(')')])
                         done = False
                         while not done:
-                            done = gripper.actuate(num, 0.04)
-                            self.pyrep.step()
-                            self.task.step()
+                            gripper_open = num
+                            done = gripper.actuate(gripper_open, 0.04)
+                            self.step()
+                            self._joint_position_action = np.append(path.get_executed_joint_position_action(), gripper_open)
                             if self._obs_config.record_gripper_closing:
                                 self._demo_record_step(
                                     demo, record, callable_each_step)
@@ -429,8 +434,8 @@ def get_demo(self, record: bool = True,
         # (e.g. ball rowling to goal)
         if not success:
             for _ in range(10):
-                self.pyrep.step()
-                self.task.step()
+                self.step()
+                self._joint_position_action = np.append(path.get_executed_joint_position_action(), gripper_open)
                 self._demo_record_step(demo, record, callable_each_step)
                 success, term = self.task.success()
                 if success:
@@ -545,8 +550,7 @@ def _get_cam_data(cam: VisionSensor, name: str):
         misc.update(_get_cam_data(self._cam_front, 'front_camera'))
         misc.update(_get_cam_data(self._cam_wrist, 'wrist_camera'))
         misc.update({"variation_index": self._variation_index})
-        if self._execute_demo_joint_position_action is not None:
+        if self._joint_position_action is not None:
             # Store the actual requested joint positions during demo collection
-            misc.update({"executed_demo_joint_position_action": self._execute_demo_joint_position_action})
-            self._execute_demo_joint_position_action = None
+            misc.update({"joint_position_action": self._joint_position_action})
         return misc
diff --git a/setup.py b/setup.py
index d6253a6b9..97425d920 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@ def get_version(rel_path):
         raise RuntimeError("Unable to find version string.")
 
 core_requirements = [
-    "pyrep @ git+https://github.com/stepjam/PyRep.git@076ca15c57f2495a4194da03565891ab1aaa317e",
+    "pyrep @ git+https://github.com/stepjam/PyRep.git@cd9830b58ef09538562b785fc0c257f528f1762b",
     "numpy",
     "Pillow",
     "pyquaternion",
@@ -60,7 +60,7 @@ def get_version(rel_path):
             'rlbench.gym'
       ],
       extras_require={
-          "dev": ["html-testRunner", "gym"]
+          "dev": ["pytest", "html-testRunner", "gym"]
       },
       package_data={'': ['*.ttm', '*.obj', '**/**/*.ttm', '**/**/*.obj'],
                     'rlbench': ['task_design.ttt']},
diff --git a/tests/unit/test_environment.py b/tests/unit/test_environment.py
index eef574996..83feb08ee 100644
--- a/tests/unit/test_environment.py
+++ b/tests/unit/test_environment.py
@@ -263,3 +263,35 @@ def test_swap_arm(self):
                     robot_setup=robot_config)
                 self.env.launch()
                 self.env.shutdown()
+
+    def test_executed_jp_action(self):
+        for task_cls in [ReachTarget, TakeLidOffSaucepan]:
+            with self.subTest(task_cls=task_cls):
+                task = self.get_task(
+                    task_cls, JointPosition(True))
+                num_episodes = 20
+                demos = task.get_demos(num_episodes, live_demos=True)
+                total_reward = 0.0
+                # Check if executed joint position action is stored
+                for demo in demos:
+                    jp_action = []
+                    self.assertTrue("joint_position_action" not in demo[0].misc)
+                    for t, obs in enumerate(demo):
+                        if t == 0:
+                            # First timestep should not have an action
+                            self.assertTrue('joint_position_action' not in obs.misc)
+                        else:
+                            self.assertTrue("joint_position_action" in obs.misc)
+                            jp_action.append(obs.misc["joint_position_action"])
+
+                    task.reset_to_demo(demo)
+                    for t, action in enumerate(jp_action):
+                        obs, reward, term = task.step(action)
+                        if term:
+                            break
+                    total_reward += reward
+                
+                success_rate = total_reward / num_episodes
+                self.assertTrue(success_rate >= 0.9)
+                self.env.shutdown()
+                    
\ No newline at end of file