Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dior-h测试效果很低 #38

Open
JiakangSun1 opened this issue Feb 22, 2025 · 2 comments
Open

dior-h测试效果很低 #38

JiakangSun1 opened this issue Feb 22, 2025 · 2 comments

Comments

@JiakangSun1
Copy link

作者您好,我在训练Dior-H时用的是train和val,训练到后面在trainval上面的准确率挺高的

Image
但是在测试集上的效果却很差

Image
请问是什么原因呢,求作者大人指教
auto_scale_lr = dict(base_batch_size=16, enable=False) backend_args = None crop_size = ( 800, 800, ) data_root = 'F:\\Remote_Sensing\\dior' dataset_type = 'DIORDataset' default_hooks = dict( checkpoint=dict(interval=3, type='CheckpointHook'), logger=dict(interval=50, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='DetVisualizationHook')) default_scope = 'mmdet' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) model = dict( backbone=dict( attn_drop_rate=0.0, depth=12, drop_path_rate=0.3, drop_rate=0.0, embed_dim=768, img_size=800, mlp_ratio=4, num_heads=12, out_indices=[ 3, 5, 7, 11, ], patch_size=16, pretrained='F:\\Remote_Sensing\\vit-b-checkpoint-1599.pth', qk_scale=None, qkv_bias=True, type='RVSA_MTP', use_abs_pos_emb=True, use_checkpoint=True), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_size_divisor=32, std=[ 58.395, 57.12, 57.375, ], type='DetDataPreprocessor'), neck=dict( in_channels=[ 768, 768, 768, 768, ], num_outs=5, out_channels=256, type='FPN'), roi_head=dict( bbox_head=dict( bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 0.1, 0.1, 0.2, 0.2, ], type='DeltaXYWHBBoxCoder'), fc_out_channels=1024, in_channels=256, loss_bbox=dict(loss_weight=1.0, type='L1Loss'), loss_cls=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False), num_classes=20, reg_class_agnostic=False, roi_feat_size=7, type='Shared2FCBBoxHead'), bbox_roi_extractor=dict( featmap_strides=[ 4, 8, 16, 32, ], out_channels=256, roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'), type='SingleRoIExtractor'), type='StandardRoIHead'), rpn_head=dict( anchor_generator=dict( ratios=[ 0.5, 1.0, 2.0, ], scales=[ 8, ], strides=[ 4, 8, 16, 32, 64, ], type='AnchorGenerator'), bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 1.0, 1.0, 1.0, 1.0, ], type='DeltaXYWHBBoxCoder'), feat_channels=256, in_channels=256, loss_bbox=dict(loss_weight=1.0, type='L1Loss'), loss_cls=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True), type='RPNHead'), test_cfg=dict( rcnn=dict( max_per_img=100, nms=dict(iou_threshold=0.5, type='nms'), score_thr=0.05), rpn=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=1000)), train_cfg=dict( rcnn=dict( assigner=dict( ignore_iof_thr=-1, match_low_quality=False, min_pos_iou=0.5, neg_iou_thr=0.5, pos_iou_thr=0.5, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type='RandomSampler')), rpn=dict( allowed_border=-1, assigner=dict( ignore_iof_thr=-1, match_low_quality=True, min_pos_iou=0.3, neg_iou_thr=0.3, pos_iou_thr=0.7, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=False, neg_pos_ub=-1, num=256, pos_fraction=0.5, type='RandomSampler')), rpn_proposal=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=2000)), type='FasterRCNN') model_wrapper = dict( detect_anomalous_params=False, find_unused_parameters=False, type='MMDistributedDataParallel') optim_wrapper = dict( constructor='LayerDecayOptimizerConstructor_ViT', optimizer=dict( betas=( 0.9, 0.999, ), lr=0.0001, type='AdamW', weight_decay=0.05), paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=12)) param_scheduler = [ dict( begin=0, by_epoch=False, end=500, start_factor=1e-06, type='LinearLR'), dict( begin=0, by_epoch=True, end=12, gamma=0.1, milestones=[ 8, 11, ], type='MultiStepLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( ann_file='DIOR_test_coco.json', backend_args=None, data_prefix=dict(img='JPEGImages-test'), data_root='F:\\Remote_Sensing\\dior', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 800, 800, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ], test_mode=True, type='DIORDataset'), drop_last=False, num_workers=8, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( ann_file='F:\\Remote_Sensing\\dior/DIOR_test_coco.json', backend_args=None, format_only=False, metric='bbox', type='CocoMetric') test_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 800, 800, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ] train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=3) train_dataloader = dict( batch_sampler=dict(type='AspectRatioBatchSampler'), batch_size=4, dataset=dict( ann_file='DIOR_trainval_coco.json', backend_args=None, data_prefix=dict(img='JPEGImages-trainval'), data_root='F:\\Remote_Sensing\\dior', filter_cfg=dict(filter_empty_gt=True, min_size=32), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 800, 800, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ], type='DIORDataset'), num_workers=4, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 800, 800, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=1, dataset=dict( ann_file='DIOR_test_coco.json', backend_args=None, data_prefix=dict(img='JPEGImages-test'), data_root='F:\\Remote_Sensing\\dior', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 800, 800, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ], test_mode=True, type='DIORDataset'), drop_last=False, num_workers=8, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( ann_file='F:\\Remote_Sensing\\dior/DIOR_test_coco.json', backend_args=None, format_only=False, metric='bbox', type='CocoMetric') vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = './work_dirs\\dior'

@DotWang
Copy link
Collaborator

DotWang commented Feb 22, 2025

@JiakangSun1 你这个图我看不出来准确率高,你看看我那些log里边是啥样的

@JiakangSun1
Copy link
Author

@DotWang 感谢作者的回复,之前配置文件里model写成了RVSA_MTP,现在改成RVSA_MTP_branches已经能够正常训练了

Image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants