Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Discrepancy in AP50 while Re-implementing MetaRCNN #145

Open
mani2002 opened this issue Jan 3, 2024 · 0 comments
Open

Discrepancy in AP50 while Re-implementing MetaRCNN #145

mani2002 opened this issue Jan 3, 2024 · 0 comments

Comments

@mani2002
Copy link

mani2002 commented Jan 3, 2024

I encountered an issue while re-implementing Metarcnn. When finetuning for all classes, I observed a difference of up to 6% in the AP50 values for novel classes. Initially, I utilized 2 GPUs for the implementation. Due to limited GPU access, I adjusted the batch size to 2 and increased the iterations to 36,000 to manage the base training process efficiently.

IMAGE

image1

Here are base training configurations for split1:

"img_norm_cfg = dict(
mean=[103.53, 116.28, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_multi_pipelines = dict(
query=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
],
support=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='GenerateMask', target_size=(224, 224)),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
])
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data_root = 'data/VOCdevkit/'
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='NWayKShotDataset',
num_support_ways=15,
num_support_shots=1,
one_support_shot_per_image=True,
num_used_support_shots=200,
save_dataset=False,
dataset=dict(
type='FewShotVOCDataset',
ann_cfg=[
dict(
type='ann_file',
ann_file=
'data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt'),
dict(
type='ann_file',
ann_file=
'data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt')
],
img_prefix='data/VOCdevkit/',
multi_pipelines=dict(
query=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize', img_scale=(1000, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='DefaultFormatBundle'),
dict(
type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
],
support=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='GenerateMask', target_size=(224, 224)),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='DefaultFormatBundle'),
dict(
type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]),
classes='BASE_CLASSES_SPLIT1',
use_difficult=True,
instance_wise=False,
dataset_name='query_dataset'),
support_dataset=dict(
type='FewShotVOCDataset',
ann_cfg=[
dict(
type='ann_file',
ann_file=
'data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt'),
dict(
type='ann_file',
ann_file=
'data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt')
],
img_prefix='data/VOCdevkit/',
multi_pipelines=dict(
query=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize', img_scale=(1000, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='DefaultFormatBundle'),
dict(
type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
],
support=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='GenerateMask', target_size=(224, 224)),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='DefaultFormatBundle'),
dict(
type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]),
classes='BASE_CLASSES_SPLIT1',
use_difficult=False,
instance_wise=False,
dataset_name='support_dataset')),
val=dict(
type='FewShotVOCDataset',
ann_cfg=[
dict(
type='ann_file',
ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/test.txt')
],
img_prefix='data/VOCdevkit/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
],
classes='BASE_CLASSES_SPLIT1'),
test=dict(
type='FewShotVOCDataset',
ann_cfg=[
dict(
type='ann_file',
ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/test.txt')
],
img_prefix='data/VOCdevkit/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
],
test_mode=True,
classes='BASE_CLASSES_SPLIT1'),
model_init=dict(
copy_from_train_dataset=True,
samples_per_gpu=16,
workers_per_gpu=1,
type='FewShotVOCDataset',
ann_cfg=None,
img_prefix='data/VOCdevkit/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='GenerateMask', target_size=(224, 224)),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
],
use_difficult=False,
instance_wise=True,
classes='BASE_CLASSES_SPLIT1',
dataset_name='model_init_dataset'))
evaluation = dict(interval=12000, metric='mAP')
optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=100,
warmup_ratio=0.001,
step=[16000])
runner = dict(type='IterBasedRunner', max_iters=36000)
norm_cfg = dict(type='BN', requires_grad=False)
pretrained = 'open-mmlab://detectron2/resnet101_caffe'
model = dict(
type='MetaRCNN',
pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict(
type='ResNetWithMetaConv',
depth=101,
num_stages=3,
strides=(1, 2, 2),
dilations=(1, 1, 1),
out_indices=(2, ),
frozen_stages=2,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
rpn_head=dict(
type='RPNHead',
in_channels=1024,
feat_channels=512,
anchor_generator=dict(
type='AnchorGenerator',
scales=[2, 4, 8, 16, 32],
ratios=[0.5, 1.0, 2.0],
scale_major=False,
strides=[16]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='MetaRCNNRoIHead',
shared_head=dict(
type='MetaRCNNResLayer',
pretrained='open-mmlab://detectron2/resnet101_caffe',
depth=50,
stage=3,
stride=2,
dilation=1,
style='caffe',
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=1024,
featmap_strides=[16]),
bbox_head=dict(
type='MetaBBoxHead',
with_avg_pool=False,
roi_feat_size=1,
in_channels=2048,
num_classes=15,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', loss_weight=1.0),
num_meta_classes=15,
meta_cls_in_channels=2048,
with_meta_cls_loss=True,
loss_meta=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
aggregation_layer=dict(
type='AggregationLayer',
aggregator_cfgs=[
dict(
type='DotProductAggregator',
in_channels=2048,
with_fc=False)
])),
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=12000,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=128,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=6000,
max_per_img=300,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.3),
max_per_img=100)))
checkpoint_config = dict(interval=12000)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
use_infinite_sampler = True
seed = 42
work_dir = './work_dirs/meta-rcnn_r101_c4_8xb4_voc-split1_base-training'
gpu_ids = range(0, 2)"

Could you please support in resolving this issue?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant