Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [sc-12401] RT-DETR baseline configuration for gr data #1

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
20 changes: 20 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"name": "rt_detr",
"dockerComposeFile": [
"../docker-compose.yaml"
],
"service": "rt_detr",
"workspaceFolder": "/home/ros/RT-DETR",
"shutdownAction": "stopCompose",
"customizations": {
"vscode": {
"settings": {
"remote.autoForwardPorts": false
},
"extensions": [
"ms-python.python",
"ms-python.debugpy"
]
}
}
}
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,10 @@ rtdetr_pytorch/output/
rtdetr_pytorch/dataset/
rtdetrv2_pytorch/output/
rtdetrv2_pytorch/dataset/
checkpoints
data
vis_results
*.png
*.jpg
*.pkl
*2017.json
27 changes: 27 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
services:
rt_detr:
container_name: rt_detr
image: rt_detr:latest
build:
context: ./
dockerfile: docker/Dockerfile
args:
- TARGET_PATH=.
volumes:
- ./rtdetrv2_pytorch:/home/ros/RT-DETR
- ./benchmark:/home/ros/RT-DETR/benchmark
- /mnt/gr-nas/visionai-data:/home/ros/RT-DETR/data
- /tmp/.X11-unix:/tmp/.X11-unix:rw
env_file: docker/.env
privileged: true
working_dir: /home/ros/RT-DETR
user: ros
ipc: host
network_mode: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
11 changes: 11 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM ghcr.io/greenroom-robotics/ros_builder:jazzy-latest-cuda

COPY rtdetrv2_pytorch/requirements.txt /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt --user

RUN sudo apt-get update && sudo apt-get install python3-opencv

# Misc dependencies
RUN pip install debugpy

CMD ["tail", "-f", "/dev/null"]
25 changes: 25 additions & 0 deletions rtdetrv2_pytorch/.vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [

{
"name": "Python Debugger: Remote Attach",
"type": "debugpy",
"request": "attach",
"connect": {
"host": "localhost",
"port": 5678
},
"pathMappings": [
{
"localRoot": "/home/ros/RT-DETR",
"remoteRoot": "/home/ros/RT-DETR"
}
],
"justMyCode": false,
}
]
}
42 changes: 42 additions & 0 deletions rtdetrv2_pytorch/configs/dataset/gr_detection.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
task: detection

evaluator:
type: CocoEvaluator
iou_types: ['bbox', ]

num_classes: 5 # Has to be actual number of classes + 1
remap_mscoco_category: False


train_dataloader:
type: DataLoader
dataset:
type: CocoDetection
img_folder: ./data
ann_file: ./data/datasets/image/experiments/e24-007-d24-002-traintestsplit/coco_labels/split_train.mapped.json
return_masks: False
transforms:
type: Compose
ops: ~
shuffle: True
num_workers: 4
drop_last: True
collate_fn:
type: BatchImageCollateFuncion


val_dataloader:
type: DataLoader
dataset:
type: CocoDetection
img_folder: ./data
ann_file: ./data/datasets/image/experiments/e24-007-d24-002-traintestsplit/coco_labels/split_test.mapped.json
return_masks: False
transforms:
type: Compose
ops: ~
shuffle: False
num_workers: 4
drop_last: False
collate_fn:
type: BatchImageCollateFuncion
38 changes: 38 additions & 0 deletions rtdetrv2_pytorch/configs/gr/include/dataloader.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

train_dataloader:
dataset:
transforms:
ops:
- {type: RandomPhotometricDistort, p: 0.5}
- {type: RandomZoomOut, fill: 0}
- {type: RandomIoUCrop, p: 0.8}
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: RandomHorizontalFlip}
- {type: Resize, size: [640, 640], }
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
- {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
policy:
name: stop_epoch
epoch: 71 # epoch in [71, ~) stop `ops`
ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']

collate_fn:
type: BatchImageCollateFuncion
scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
stop_epoch: 71 # epoch in [71, ~) stop `multiscales`

shuffle: True
total_batch_size: 16 # total batch size equals to 16 (4 * 4)
num_workers: 4


val_dataloader:
dataset:
transforms:
ops:
- {type: Resize, size: [640, 640]}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
shuffle: False
total_batch_size: 32
num_workers: 4
37 changes: 37 additions & 0 deletions rtdetrv2_pytorch/configs/gr/include/optimizer.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

use_amp: True
use_ema: True
ema:
type: ModelEMA
decay: 0.9999
warmups: 2000


epoches: 72
clip_max_norm: 0.1


optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.

lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001


lr_scheduler:
type: MultiStepLR
milestones: [1000]
gamma: 0.1


lr_warmup_scheduler:
type: LinearWarmup
warmup_duration: 2000
83 changes: 83 additions & 0 deletions rtdetrv2_pytorch/configs/gr/include/rtdetrv2_r50vd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
task: detection

model: RTDETR
criterion: RTDETRCriterionv2
postprocessor: RTDETRPostProcessor


use_focal_loss: True
eval_spatial_size: [640, 640] # h w


RTDETR:
backbone: PResNet
encoder: HybridEncoder
decoder: RTDETRTransformerv2


PResNet:
depth: 50
variant: d
freeze_at: 0
return_idx: [1, 2, 3]
num_stages: 4
freeze_norm: True
pretrained: True


HybridEncoder:
in_channels: [512, 1024, 2048]
feat_strides: [8, 16, 32]

# intra
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
nhead: 8
dim_feedforward: 1024
dropout: 0.
enc_act: 'gelu'

# cross
expansion: 1.0
depth_mult: 1
act: 'silu'


RTDETRTransformerv2:
feat_channels: [256, 256, 256]
feat_strides: [8, 16, 32]
hidden_dim: 256
num_levels: 3

num_layers: 6
num_queries: 300

num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0 # 1.0 0.4

eval_idx: -1

# NEW
num_points: [4, 4, 4] # [3,3,3] [2,2,2]
cross_attn_method: default # default, discrete
query_select_method: default # default, agnostic


RTDETRPostProcessor:
num_top_queries: 300


RTDETRCriterionv2:
weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
losses: ['vfl', 'boxes', ]
alpha: 0.75
gamma: 2.0

matcher:
type: HungarianMatcher
weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
alpha: 0.25
gamma: 2.0

67 changes: 67 additions & 0 deletions rtdetrv2_pytorch/configs/gr/rtdetrv2_r18vd_10e_gr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
__include__: [
'../dataset/gr_detection.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
'../runtime.yml',
]


output_dir: ./output/rtdetrv2_r18vd_10e


PResNet:
depth: 18
freeze_at: -1
freeze_norm: False
pretrained: True


HybridEncoder:
in_channels: [128, 256, 512]
hidden_dim: 256
expansion: 0.5


RTDETRTransformerv2:
num_layers: 3


epoches: 10

optimizer:
type: AdamW
params:
-
params: '^(?=.*(?:norm|bn)).*$'
weight_decay: 0.

eval_spatial_size: [1280, 1280]

train_dataloader:
dataset:
transforms:
ops:
- {type: RandomPhotometricDistort, p: 0.5}
- {type: RandomZoomOut, fill: 0}
- {type: RandomIoUCrop, p: 0.8}
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: RandomHorizontalFlip}
- {type: Resize, size: [1280, 1280], }
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
- {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
policy:
epoch: 45
collate_fn:
scales: ~

total_batch_size: 6

val_dataloader:
dataset:
transforms:
ops:
- {type: Resize, size: [1280, 1280]}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
total_batch_size: 12
Loading