Skip to content

Commit

Permalink
this works, ugly code , fix soon
Browse files Browse the repository at this point in the history
  • Loading branch information
alik-git committed Dec 4, 2024
1 parent 2d09a46 commit 75f61e6
Showing 1 changed file with 77 additions and 28 deletions.
105 changes: 77 additions & 28 deletions examples/12_load_gpr_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pprint import pprint
import shutil
import argparse
import numpy as np

from lerobot.common.datasets.push_dataset_to_hub.gpr_h5_format import (
from_raw_to_lerobot_format,
Expand All @@ -20,12 +21,12 @@
GPR_FEATURES = {
"observation.joint_pos": {
"dtype": "float32",
"shape": (12,), # Adjust based on your robot's DOF
"shape": (10,),
"names": ["joint_positions"],
},
"observation.joint_vel": {
"dtype": "float32",
"shape": (12,),
"shape": (10,),
"names": ["joint_velocities"],
},
"observation.ang_vel": {
Expand All @@ -40,7 +41,7 @@
},
"action": {
"dtype": "float32",
"shape": (12,),
"shape": (10,),
"names": ["joint_commands"],
},
}
Expand All @@ -50,16 +51,13 @@ def test_gpr_dataset(raw_dir: Path, videos_dir: Path, fps: int):
# Setup paths
videos_dir.mkdir(parents=True, exist_ok=True)

# Create temporary repo_id for local testing
repo_id = "gpr_test"

# Convert raw data to LeRobot format
print("Converting raw data to LeRobot format...")
hf_dataset, episode_data_index, info = from_raw_to_lerobot_format(
raw_dir=raw_dir,
videos_dir=videos_dir,
fps=fps, # Your simulation fps
video=False, # No video data
video=False # No video data
)

# Delete the existing dataset folder if it exists
Expand All @@ -68,16 +66,67 @@ def test_gpr_dataset(raw_dir: Path, videos_dir: Path, fps: int):
print(f"Deleting existing dataset folder: {dataset_path}")
shutil.rmtree(dataset_path)

# import pdb; pdb.set_trace()



# Create dataset instance
print("\nCreating dataset...")
dataset = LeRobotDataset.from_preloaded(
repo_id=repo_id,
hf_dataset=hf_dataset,
episode_data_index=episode_data_index,
info=info,
videos_dir=videos_dir,
dataset = LeRobotDataset.create(
repo_id="gpr_test",
fps=fps,
features=GPR_FEATURES, # Using the features dict defined at the top of the file
use_videos=False,
)

episodes = range(len(episode_data_index["from"]))
for ep_idx in episodes:
from_idx = episode_data_index["from"][ep_idx].item()
to_idx = episode_data_index["to"][ep_idx].item()
num_frames = to_idx - from_idx

for frame_idx in range(num_frames):
i = from_idx + frame_idx
frame_data = hf_dataset[i]

# # Debugging: Print data types and shapes
# print(f"Frame {frame_idx} of episode {ep_idx}:")
# for key, value in frame_data.items():
# if isinstance(value, torch.Tensor):
# print(f" {key}: dtype={value.dtype}, shape={value.shape}")
# else:
# print(f" {key}: {value}")

frame = {
"observation.joint_pos": frame_data["observation.joint_pos"].numpy().astype(np.float32),
"observation.joint_vel": frame_data["observation.joint_vel"].numpy().astype(np.float32),
"observation.ang_vel": frame_data["observation.ang_vel"].numpy().astype(np.float32),
"observation.euler_rotation": frame_data["observation.euler_rotation"].numpy().astype(np.float32),
"action": frame_data["action"].numpy().astype(np.float32),
"timestamp": frame_data["timestamp"]
}

# print(f"added frame {frame_idx} of episode {ep_idx}")
dataset.add_frame(frame)

# # Debug print before save_episode
# print("\nBefore save_episode:")
# print("Episode buffer contents:")
# for key, value in dataset.episode_buffer.items():
# if isinstance(value, torch.Tensor):
# print(f" {key}: type={type(value)}, shape={value.shape}")
# elif isinstance(value, np.ndarray):
# print(f" {key}: type={type(value)}, shape={value.shape}")
# elif isinstance(value, list):
# print(f" {key}: value[0]={value[0] if len(value) > 0 else 'N/A'}, type={type(value)}, shape={len(value)}")
# else:
# print(f" {key}: type={type(value)}, value={value}")

# import pdb; pdb.set_trace()
dataset.save_episode(task="GPR Robot Task") # You might want to customize this task description

dataset.consolidate()

#########################################################
# From this point on its copy paste from lerobot/examples/1_load_lerobot_dataset.py

Expand All @@ -87,7 +136,6 @@ def test_gpr_dataset(raw_dir: Path, videos_dir: Path, fps: int):
print(f"Number of frames selected: {dataset.num_frames}")

# Or simply load the entire dataset:
dataset = LeRobotDataset(repo_id)
print(f"Number of episodes selected: {dataset.num_episodes}")
print(f"Number of frames selected: {dataset.num_frames}")

Expand All @@ -108,37 +156,38 @@ def test_gpr_dataset(raw_dir: Path, videos_dir: Path, fps: int):
to_idx = dataset.episode_data_index["to"][episode_index].item()

# Then we grab all the image frames from the first camera:
camera_key = dataset.meta.camera_keys[0]
frames = [dataset[idx][camera_key] for idx in range(from_idx, to_idx)]
# import pdb; pdb.set_trace()
# camera_key = dataset.meta.camera_keys[0]
# frames = [dataset[idx][camera_key] for idx in range(from_idx, to_idx)]

# The objects returned by the dataset are all torch.Tensors
print(type(frames[0]))
print(frames[0].shape)
# print(type(frames[0]))
# print(frames[0].shape)

# Since we're using pytorch, the shape is in pytorch, channel-first convention (c, h, w).
# We can compare this shape with the information available for that feature
pprint(dataset.features[camera_key])
# In particular:
print(dataset.features[camera_key]["shape"])
# pprint(dataset.features[camera_key])
# # In particular:
# print(dataset.features[camera_key]["shape"])
# The shape is in (h, w, c) which is a more universal format.

# For many machine learning applications we need to load the history of past observations or trajectories of
# future actions. Our datasets can load previous and future frames for each key/modality, using timestamps
# differences with the current loaded frame. For instance:
delta_timestamps = {
# loads 4 images: 1 second before current frame, 500 ms before, 200 ms before, and current frame
camera_key: [-1, -0.5, -0.20, 0],
# camera_key: [-1, -0.5, -0.20, 0],
# loads 8 state vectors: 1.5 seconds before, 1 second before, ... 200 ms, 100 ms, and current frame
"observation.state": [-1.5, -1, -0.5, -0.20, -0.10, 0],
# "observation.state": [-1.5, -1, -0.5, -0.20, -0.10, 0],
# loads 64 action vectors: current frame, 1 frame in the future, 2 frames, ... 63 frames in the future
"action": [t / dataset.fps for t in range(64)],
}
# Note that in any case, these delta_timestamps values need to be multiples of (1/fps) so that added to any
# timestamp, you still get a valid timestamp.

dataset = LeRobotDataset(repo_id, delta_timestamps=delta_timestamps)
print(f"\n{dataset[0][camera_key].shape=}") # (4, c, h, w)
print(f"{dataset[0]['observation.state'].shape=}") # (6, c)
dataset = LeRobotDataset(repo_id="gpr_test", delta_timestamps=delta_timestamps, local_files_only=True)
# print(f"\n{dataset[0][camera_key].shape=}") # (4, c, h, w)
# print(f"{dataset[0]['observation.state'].shape=}") # (6, c)
print(f"{dataset[0]['action'].shape=}\n") # (64, c)

# Finally, our datasets are fully compatible with PyTorch dataloaders and samplers because they are just
Expand All @@ -151,8 +200,8 @@ def test_gpr_dataset(raw_dir: Path, videos_dir: Path, fps: int):
)

for batch in dataloader:
print(f"{batch[camera_key].shape=}") # (32, 4, c, h, w)
print(f"{batch['observation.state'].shape=}") # (32, 5, c)
# print(f"{batch[camera_key].shape=}") # (32, 4, c, h, w)
# print(f"{batch['observation.state'].shape=}") # (32, 5, c)
print(f"{batch['action'].shape=}") # (32, 64, c)
break

Expand Down

0 comments on commit 75f61e6

Please sign in to comment.