Skip to content

MMDetection 2的安装与训练

安装

condan环境

shell
conda create -n openmmlab python=3.8
conda activate openmmlab

torch 安装

shell
pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113

安装 mmengin 和 mmcv (这里有玄学的版本对应问题)

shell
pip install -U openmim
mim install mmengine==0.7.0
mim install mmcv==2.0.1

本地安装mmdet,这种方式可以训练

shell
git clone https://github.com/open-mmlab/mmdetection.git
cd mmdetection
pip install -v -e .
# "-v" 指详细说明,或更多的输出
# "-e" 表示在可编辑模式下安装项目,因此对代码所做的任何本地修改都会生效,从而无需重新安装。

训练

urpc自定义数据集训练

1、在 mmdet/datasets 下新建一个文件 urpc.py 在文件中创建自定义数据类。

python
from mmdet.registry import DATASETS
from .coco import CocoDataset
import copy
import os.path as osp
from typing import List, Union
from mmengine.fileio import get_local_path
from mmdet.registry import DATASETS

@DATASETS.register_module()

class URPCDataset(CocoDataset):
    METAINFO = {
        'classes': ("holothurian", "echinus", "starfish", "scallop")}


    def load_data_list(self) -> List[dict]:
        """Load annotations from an annotation file named as ``self.ann_file``

        Returns:
            List[dict]: A list of annotation.
        """  # noqa: E501

        with get_local_path(
                self.ann_file, backend_args=self.backend_args) as local_path:
            self.coco = self.COCOAPI(local_path)

        # The order of returned `cat_ids` will not

        # change with the order of the `classes`

        self.cat_ids = self.coco.get_cat_ids(
            cat_names=self.metainfo['classes'])
        self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
        self.cat_img_map = copy.deepcopy(self.coco.cat_img_map)
        img_ids = self.coco.get_img_ids()
        data_list = []
        total_ann_ids = []

        for img_id in img_ids:
            raw_img_info = self.coco.load_imgs([img_id])[0]
            raw_img_info['img_id'] = img_id
            ann_ids = self.coco.get_ann_ids(img_ids=[img_id])
            raw_ann_info = self.coco.load_anns(ann_ids)
            total_ann_ids.extend(ann_ids)
            parsed_data_info = self.parse_data_info({
                'raw_ann_info':
                raw_ann_info,
                'raw_img_info':
                raw_img_info
            })

            data_list.append(parsed_data_info)

        if self.ANN_ID_UNIQUE:
            assert len(set(total_ann_ids)) == len(
                total_ann_ids
            ), f"Annotation ids in '{self.ann_file}' are not unique!"

        del self.coco

        return data_list


    def parse_data_info(self, raw_data_info: dict) -> Union[dict, List[dict]]:

        """Parse raw annotation to target format.

        Args:
            raw_data_info (dict): Raw data information load from ``ann_file``
        Returns:
            Union[dict, List[dict]]: Parsed annotation.

        """

        img_info = raw_data_info['raw_img_info']
        ann_info = raw_data_info['raw_ann_info']

        data_info = {}

        # TODO: need to change data_prefix['img'] to data_prefix['img_path']

        img_path = osp.join(self.data_prefix['img'], img_info['file_name'])

        if self.data_prefix.get('seg', None):
            seg_map_path = osp.join(
                self.data_prefix['seg'],
                img_info['file_name'].rsplit('.', 1)[0] + self.seg_map_suffix)

        else:
            seg_map_path = None

        data_info['img_path'] = img_path
        data_info['img_id'] = img_info['img_id']
        data_info['seg_map_path'] = seg_map_path
        data_info['height'] = img_info['height']
        data_info['width'] = img_info['width']

        if self.return_classes:
            data_info['text'] = self.metainfo['classes']
            data_info['custom_entities'] = True

        instances = []

        for i, ann in enumerate(ann_info):
            instance = {}
            if ann.get('ignore', False):
                continue

            x1, y1, w, h = ann['bbox']

            inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
            inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
            if inter_w * inter_h == 0:
                continue
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue

            if ann['category_id'] not in self.cat_ids:
                continue

            bbox = [x1, y1, x1 + w, y1 + h]

            if ann.get('iscrowd', False):
                instance['ignore_flag'] = 1

            else:
                instance['ignore_flag'] = 0

            instance['bbox'] = bbox
            instance['bbox_label'] = self.cat2label[ann['category_id']]

            if ann.get('segmentation', None):
                instance['mask'] = ann['segmentation']

            instances.append(instance)

        data_info['instances'] = instances

        return data_info

    def filter_data(self) -> List[dict]:

        """Filter annotations according to filter_cfg.

        Returns:
            List[dict]: Filtered results.

        """

        if self.test_mode:
            return self.data_list

        if self.filter_cfg is None:
            return self.data_list

        filter_empty_gt = self.filter_cfg.get('filter_empty_gt', False)
        min_size = self.filter_cfg.get('min_size', 0)

        # obtain images that contain annotation

        ids_with_ann = set(data_info['img_id'] for data_info in self.data_list)

        # obtain images that contain annotations of the required categories

        ids_in_cat = set()

        for i, class_id in enumerate(self.cat_ids):
            ids_in_cat |= set(self.cat_img_map[class_id])

        # merge the image id sets of the two conditions and use the merged set

        # to filter out images if self.filter_empty_gt=True

        ids_in_cat &= ids_with_ann
        valid_data_infos = []

        for i, data_info in enumerate(self.data_list):

            img_id = data_info['img_id']
            width = data_info['width']
            height = data_info['height']

            if filter_empty_gt and img_id not in ids_in_cat:
                continue

            if min(width, height) >= min_size:
                valid_data_infos.append(data_info)

        return valid_data_infos

2、在 configs/_base_/datasets 目录下创建 urpc_detection.py

python
dataset_type = 'URPCDataset'  # 新类名

data_root = '/root/Desktop/Detection/URPC2020/'  # 数据集位置(COCO格式)

img_norm_cfg = dict(

    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)

img_scale = (1225, 954)  # 数据集中图片的输入大小

backend_args = None

classes = ("holothurian", "echinus", "starfish", "scallop")

train_pipeline = [

    dict(type='LoadImageFromFile', backend_args=backend_args),

    dict(type='LoadAnnotations', with_bbox=True),

    dict(type='Resize', scale=img_scale, keep_ratio=True),

    dict(type='RandomFlip', prob=0.5),

    dict(type='PackDetInputs')

]

test_pipeline = [

    dict(type='LoadImageFromFile', backend_args=backend_args),

    dict(type='Resize', scale=img_scale, keep_ratio=True),

    # If you don't have a gt annotation, delete the pipeline

    dict(type='LoadAnnotations', with_bbox=True),

    dict(

        type='PackDetInputs',

        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',

                   'scale_factor'))

]

train_dataloader = dict(

    batch_size=2,

    num_workers=2,

    persistent_workers=True,

    sampler=dict(type='DefaultSampler', shuffle=True),

    batch_sampler=dict(type='AspectRatioBatchSampler'),

    dataset=dict(

        metainfo=dict(classes=classes),

        type=dataset_type,

        data_root=data_root,

        ann_file='annotations/train.json',

        data_prefix=dict(img='train/'),

        filter_cfg=dict(filter_empty_gt=True, min_size=32),

        pipeline=train_pipeline,

        backend_args=backend_args))

val_dataloader = dict(

    batch_size=1,

    num_workers=2,

    persistent_workers=True,

    drop_last=False,

    sampler=dict(type='DefaultSampler', shuffle=False),

    dataset=dict(

        metainfo=dict(classes=classes),

        type=dataset_type,

        data_root=data_root,

        ann_file='annotations/val.json',

        data_prefix=dict(img='val/'),

        test_mode=True,

        pipeline=test_pipeline,

        backend_args=backend_args))

test_dataloader = val_dataloader

val_evaluator = dict(

    type='CocoMetric',

    ann_file=data_root + 'annotations/val.json',

    metric='bbox',

    format_only=False,

    backend_args=backend_args)

test_evaluator = val_evaluator

# inference on test dataset and

# format the output results for submission.

test_dataloader = dict(

    batch_size=1,

    num_workers=2,

    persistent_workers=True,

    drop_last=False,

    sampler=dict(type='DefaultSampler', shuffle=False),

    dataset=dict(

        metainfo=dict(classes=classes),

        type=dataset_type,

        data_root=data_root,

        ann_file=data_root + 'annotations/test.json',

        data_prefix=dict(img='test2017/'),

        test_mode=True,

        pipeline=test_pipeline))

test_evaluator = dict(

    type='CocoMetric',

    metric='bbox',

    format_only=True,

    ann_file=data_root + 'annotations/test.json',

    outfile_prefix='./work_dirs/coco_detection/test')

3、在工作目录下创建配置文件。这里使用了ssd模型,所以在 configs/ssd 目录下构建 ssd_urpc.py

python

_base_ = [
    '../_base_/models/ssd300.py', '../_base_/datasets/urpc_detection.py',
    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
]

# dataset settings
input_size = 300
train_pipeline = [
    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='Expand',
        mean={{_base_.model.data_preprocessor.mean}},
        to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}},
        ratio_range=(1, 4)),
    dict(
        type='MinIoURandomCrop',
        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
        min_crop_size=0.3),
    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
    dict(type='RandomFlip', prob=0.5),
    dict(
        type='PhotoMetricDistortion',
        brightness_delta=32,
        contrast_range=(0.5, 1.5),
        saturation_range=(0.5, 1.5),
        hue_delta=18),
    dict(type='PackDetInputs')
]
test_pipeline = [
    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='PackDetInputs',
        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                   'scale_factor'))
]
train_dataloader = dict(
    batch_size=8,
    num_workers=2,
    batch_sampler=None,
    dataset=dict(
        _delete_=True,
        type='RepeatDataset',
        times=5,
        dataset=dict(
            type={{_base_.dataset_type}},
            data_root={{_base_.data_root}},
            ann_file='annotations/train.json',
            data_prefix=dict(img='train2017/'),
            filter_cfg=dict(filter_empty_gt=True, min_size=32),
            pipeline=train_pipeline,
            backend_args={{_base_.backend_args}})))
val_dataloader = dict(batch_size=8, dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader

# # optimizer
# optim_wrapper = dict(
#     type='OptimWrapper',
#     optimizer=dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4))

# custom_hooks = [
#     dict(type='NumClassCheckHook'),
#     dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')
# ]

# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (8 GPUs) x (8 samples per GPU)
# auto_scale_lr = dict(base_batch_size=16)
data = dict(samples_per_gpu=2, workers_per_gpu=2)  # 显卡配置

evaluation = dict(interval=1, metric='bbox')

optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)  # 学习率调整

# 训练策略

lr_config = dict(

    policy='step',

    warmup='linear',

    warmup_iters=500,

    warmup_ratio=0.001,

    step=[8, 10, 12])

runner = dict(type='EpochBasedRunner', max_epochs=12)  # 训练轮次

log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])

# load_from = ''

# resume_from = ''

fp16 = dict(loss_scale='dynamic')  # 混合精度训练

work_dir = ''  # 模型、日志保存位置

4、训练

shell
 python ./tools/train.py ./configs/ssd/ssd_urpc.py

构建模型

需要在 configs 下创建一个工作目录或在原来的目录中添加,例如在 atss 中添加 atss_r50_fpn_opixray.py 用于构建模型。

python
# model settings

# 需要修改的

# type='ATSS'

# num_classes

model = dict(

    type='ATSS', # 替换

    data_preprocessor=dict(

        type='DetDataPreprocessor',

        mean=[123.675, 116.28, 103.53],

        std=[58.395, 57.12, 57.375],

        bgr_to_rgb=True,

        pad_size_divisor=32),

    backbone=dict(

        type='ResNet',

        depth=50,

        num_stages=4,

        out_indices=(0, 1, 2, 3),

        frozen_stages=1,

        norm_cfg=dict(type='BN', requires_grad=True),

        norm_eval=True,

        style='pytorch',

        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),

    neck=dict(

        type='FPN',

        in_channels=[256, 512, 1024, 2048],

        out_channels=256,

        start_level=1,

        add_extra_convs='on_output',

        num_outs=5),

    bbox_head=dict(

        type='ATSSHead',

        num_classes=5,  # 类别数修改

        in_channels=256,

        stacked_convs=4,

        feat_channels=256,

        anchor_generator=dict(

            type='AnchorGenerator',

            ratios=[1.0],

            octave_base_scale=8,

            scales_per_octave=1,

            strides=[8, 16, 32, 64, 128]),

        bbox_coder=dict(

            type='DeltaXYWHBBoxCoder',

            target_means=[.0, .0, .0, .0],

            target_stds=[0.1, 0.1, 0.2, 0.2]),

        loss_cls=dict(

            type='FocalLoss',

            use_sigmoid=True,

            gamma=2.0,

            alpha=0.25,

            loss_weight=1.0),

        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),

        loss_centerness=dict(

            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),

    # training and testing settings

    train_cfg=dict(

        assigner=dict(type='ATSSAssigner', topk=9),

        allowed_border=-1,

        pos_weight=-1,

        debug=False),

    test_cfg=dict(

        nms_pre=1000,

        min_bbox_size=0,

        score_thr=0.05,

        nms=dict(type='nms', iou_threshold=0.6),

        max_per_img=100))

# optimizer

optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)

训练时,回到上面自定义数据集的第3步,修改配置。

文章更新时间: