Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support PGD and multi-view FCOS3D++ on Waymo #2835

Merged
merged 41 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
b10c39a
support training dsvt
JingweiZhang12 Jun 20, 2023
b00d3fe
Merge remote-tracking branch 'jingwei/dsvt_train' into dsvt_train
sunjiahao1999 Jul 12, 2023
7fecc0f
fix batch_size
sunjiahao1999 Aug 9, 2023
232c3b8
chage cam to lidar
Aug 10, 2023
a81529e
add cam instances
Aug 14, 2023
84d9fc7
add cam_instances
sunjiahao1999 Aug 16, 2023
b7b3b62
add description for skip
sunjiahao1999 Aug 16, 2023
1995b5a
fix num_ins_per_cat
Aug 18, 2023
a9c7cd3
refactor waymo create
sunjiahao1999 Aug 21, 2023
257bcb3
fix waymo create
sunjiahao1999 Aug 23, 2023
53620aa
remove some function not use
sunjiahao1999 Aug 24, 2023
061d9e6
fix cam_instances after refactor
sunjiahao1999 Aug 24, 2023
3c7d4e9
remove unused .py
sunjiahao1999 Aug 24, 2023
38e47b1
add fast eval
sunjiahao1999 Aug 28, 2023
8c6e8c0
add prallel eval
sunjiahao1999 Aug 29, 2023
eadd6fc
fail use parallel
sunjiahao1999 Aug 30, 2023
1705540
remove unused code
sunjiahao1999 Aug 30, 2023
76c9b6f
Merge from waymo_speed
sunjiahao1999 Sep 4, 2023
180d6df
fix create gt database bug
sunjiahao1999 Sep 11, 2023
8342685
fix train iter
sunjiahao1999 Sep 12, 2023
2c9b333
fix dis aug and model init
sunjiahao1999 Sep 15, 2023
9824778
train align
sunjiahao1999 Sep 18, 2023
681423f
fix lint
sunjiahao1999 Sep 18, 2023
fd0825a
fix basepoints in_range_3d
sunjiahao1999 Sep 18, 2023
13affc6
fix idx_all and add description
sunjiahao1999 Sep 18, 2023
e8166b3
Merge branch 'dev-1.x' into waymo_speed
sunjiahao1999 Sep 18, 2023
73b33c6
fix defualt pipeline
sunjiahao1999 Sep 18, 2023
fd2448a
10.27 Merge branch 'dev-1.x' into waymo_speed
sunjiahao1999 Oct 27, 2023
651dbf1
fix pgd fov
sunjiahao1999 Nov 20, 2023
bfb10b6
add mvfcos3d
sunjiahao1999 Dec 4, 2023
943d0ff
merge dsvt
sunjiahao1999 Dec 5, 2023
80a7a77
merge dev-1.x
sunjiahao1999 Dec 27, 2023
d714153
fix pgd config
sunjiahao1999 Dec 28, 2023
43963f8
fix mvfoc3d config && add doc
sunjiahao1999 Dec 28, 2023
3ea3f9f
merge dev-1.x
sunjiahao1999 Dec 28, 2023
6c00213
fix lint & delete unused
sunjiahao1999 Dec 28, 2023
31a59df
del unused
sunjiahao1999 Dec 28, 2023
0ecf88c
resolve comments
sunjiahao1999 Jan 4, 2024
ab31f64
del unused in pgd head
sunjiahao1999 Jan 4, 2024
5ff1df8
fix config comments bug and update waymo infos & mini link
sunjiahao1999 Jan 4, 2024
be43c4d
fix doc
sunjiahao1999 Jan 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions configs/_base_/datasets/waymoD3-fov-mono3d-3class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
input_modality = dict(use_lidar=False, use_camera=True)

# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)

# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'

# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None

train_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type='RandomResize3D',
scale=(1248, 832),
ratio_range=(0.95, 1.05),
# ratio_range=(1., 1.),
interpolation='nearest',
keep_ratio=True,
),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Pack3DDetInputs',
keys=[
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers_2d', 'depths'
]),
]

test_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='RandomResize3D',
scale=(1248, 832),
ratio_range=(1., 1.),
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='RandomResize3D',
scale=(1248, 832),
ratio_range=(1., 1.),
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]

train_dataloader = dict(
batch_size=3,
num_workers=3,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
# load one frame every three frames
load_interval=3,
backend_args=backend_args))

val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
load_eval_anns=False,
backend_args=backend_args))

test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
backend_args=backend_args))

val_evaluator = dict(
type='WaymoMetric',
waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
metric='LET_mAP',
load_type='fov_image_based',
result_prefix='./pgd_fov_pred')
test_evaluator = val_evaluator

vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
191 changes: 191 additions & 0 deletions configs/_base_/datasets/waymoD3-mv-mono3d-3class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
input_modality = dict(use_lidar=False, use_camera=True)

# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)

# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'

# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None

train_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type='RandomResize3D',
scale=(1248, 832),
# ratio_range=(1., 1.),
ratio_range=(0.95, 1.05),
interpolation='nearest',
keep_ratio=True,
),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Pack3DDetInputs',
keys=[
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers_2d', 'depths'
]),
]

test_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='Resize3D',
scale_factor=0.65,
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='Resize3D',
scale_factor=0.65,
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]

train_dataloader = dict(
batch_size=3,
num_workers=3,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
# load one frame every three frames
load_interval=3,
backend_args=backend_args))

val_dataloader = dict(
batch_size=1,
num_workers=0,
persistent_workers=False,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
# load_eval_anns=False,
backend_args=backend_args))

test_dataloader = dict(
batch_size=1,
num_workers=0,
persistent_workers=False,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
load_eval_anns=False,
backend_args=backend_args))

val_evaluator = dict(
type='WaymoMetric',
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
metric='LET_mAP',
load_type='mv_image_based',
result_prefix='./pgd_mv_pred',
nms_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=500,
nms_thr=0.05,
score_thr=0.001,
min_bbox_size=0,
max_per_frame=100))
test_evaluator = val_evaluator

vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
Loading
Loading