diff --git a/cv/classification/fasternet/pytorch/README.md b/cv/classification/fasternet/pytorch/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96d08a377c964dc589363b43cd1c38ddae39aa34 --- /dev/null +++ b/cv/classification/fasternet/pytorch/README.md @@ -0,0 +1,63 @@ +# FasterNet + +## Model description + +This is the official Pytorch/PytorchLightning implementation of the paper:
+> [**Run, Don't Walk: Chasing Higher FLOPS for Faster Neural Networks**](https://arxiv.org/abs/2303.03667) +> Jierun Chen, Shiu-hong Kao, Hao He, Weipeng Zhuo, Song Wen, Chul-Ho Lee, S.-H. Gary Chan +> *IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2023* +> + +--- +We propose a simple yet fast and effective partial convolution (**PConv**), as well as a latency-efficient family of architectures called **FasterNet**. + +## Step 1: Installing +### 1. Dependency Setup +Clone this repo and install required packages: +``` +pip install -r requirements.txt +``` + +### 2. Dataset Preparation + +Download the [ImageNet-1K](http://image-net.org/) classification dataset and structure the data as follows: +``` +/path/to/imagenet-1k/ + train/ + n01440764/ + n01440764_10026.JPEG + val/ + n01440764/ + ILSVRC2012_val_00000293.JPEG +``` + +## Step 2: Training +**Remark**: Training will prompt wondb visualization options, you'll need a W&B account to visualize, choose "3" if you don't need to. + +FasterNet-T0 training on ImageNet-1K with a 8-GPU node: +``` +# You can change the dataset path '--data_dir' according to your own dataset path !!! +python3 train_test.py -g 0,1,2,3,4,5,6,7 --num_nodes 1 -n 4 -b 4096 -e 2000 \ +--data_dir /home/datasets/cv/imagenet --pin_memory --wandb_project_name fasternet \ +--model_ckpt_dir ./model_ckpt/$(date +'%Y%m%d_%H%M%S') --cfg cfg/fasternet_t0.yaml +``` + +FasterNet-T0 training on ImageNet-1K with a 1-GPU node: +``` +# You can change the dataset path '--data_dir' according to your own dataset path !!! +python3 train_test.py -g 0 --num_nodes 1 -n 4 -b 512 -e 2000 \ +--data_dir /home/datasets/cv/imagenet --pin_memory --wandb_project_name fasternet \ +--model_ckpt_dir ./model_ckpt/$(date +'%Y%m%d_%H%M%S') --cfg cfg/fasternet_t0.yaml +``` + +To train other FasterNet variants, `--cfg` need to be changed. You may also want to change the training batch size `-b`. + + +## Result + +| GPU | FP32 | +| ----------- | ------------------------------------ | +| 8 cards | test_acc1 71.832 val_acc1 71.722 | + +## Reference +This repository is built using the [timm](https://github.com/rwightman/pytorch-image-models) , [poolformer](https://github.com/sail-sg/poolformer), [ConvNeXt](https://github.com/facebookresearch/ConvNeXt) and [mmdetection](https://github.com/open-mmlab/mmdetection) repositories. diff --git a/cv/classification/fasternet/pytorch/cfg/fasternet_l.yaml b/cv/classification/fasternet/pytorch/cfg/fasternet_l.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8932f6cdc719b276d608ad959b01cf4e416e9138 --- /dev/null +++ b/cv/classification/fasternet/pytorch/cfg/fasternet_l.yaml @@ -0,0 +1,72 @@ +strategy: ddp +benchmark: True +pretrained: False +sync_batchnorm: False +dataset_name: imagenet +image_size: 224 +#multi_scale: null +multi_scale: !!str 192_280 # image_size 192 before epoch 280 +test_crop_ratio: 0.9 +# -------------------------------------- +# Optimizer parameters +# -------------------------------------- +opt: adamw +weight_decay: 0.05 +momentum: 0.9 +epochs: 300 +clip_grad: 0.01 +precision: 16 +# -------------------------------------- +# Learning rate schedule parameters +# -------------------------------------- +sched: cosine +lr: 0.002 # for bs=2048 +warmup_lr: 0.000001 +min_lr: 0.00001 +warmup_epochs: 20 +# -------------------------------------- +# Distillation parameters +# -------------------------------------- +teacher_model: regnety_160 +distillation_type: none # do not use KD by default +#distillation_type: hard # should be better than soft +#distillation_type: soft +distillation_alpha: 0.5 +distillation_tau: 1.0 +# -------------------------------------- +# Model parameters +# -------------------------------------- +model_name: fasternet +mlp_ratio: 2 +embed_dim: 192 +depths: [3, 4, 18, 3] +feature_dim: 1280 +patch_size: 4 +patch_stride: 4 +patch_size2: 2 +patch_stride2: 2 +layer_scale_init_value: 0 # no layer scale +drop_path_rate: 0.3 +norm_layer: BN +act_layer: RELU +n_div: 4 +# -------------------------------------- +# Augmentation parameters +# -------------------------------------- +color_jitter: 0 +aa: rand-m7-mstd0.5-inc1 # Use AutoAugment policy, Rand Augment +train_interpolation: bicubic # Training interpolation (random, bilinear, bicubic default: "bicubic") +smoothing: 0.1 # Label smoothing +# Random Erase params +reprob: 0 +remode: pixel +recount: 1 +# -------------------------------------- +# MixUp/CutMix parameters +# -------------------------------------- +mixup: 0.7 +cutmix: 1.0 +cutmix_minmax: null # cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None) +mixup_prob: 1.0 # Probability of performing mixup or cutmix when either/both is enabled +mixup_switch_prob: 0.5 # Probability of switching to cutmix when both mixup and cutmix enabled +mixup_mode: batch # How to apply mixup/cutmix params. Per "batch", "pair", or "elem" \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/cfg/fasternet_m.yaml b/cv/classification/fasternet/pytorch/cfg/fasternet_m.yaml new file mode 100644 index 0000000000000000000000000000000000000000..651976879846e7cc2a2dc6fb61f1fa991c8f6b93 --- /dev/null +++ b/cv/classification/fasternet/pytorch/cfg/fasternet_m.yaml @@ -0,0 +1,72 @@ +strategy: ddp +benchmark: True +pretrained: False +sync_batchnorm: False +dataset_name: imagenet +image_size: 224 +#multi_scale: null +multi_scale: !!str 192_280 # image_size 192 before epoch 280 +test_crop_ratio: 0.9 +# -------------------------------------- +# Optimizer parameters +# -------------------------------------- +opt: adamw +weight_decay: 0.05 +momentum: 0.9 +epochs: 300 +clip_grad: 1 +precision: 16 +# -------------------------------------- +# Learning rate schedule parameters +# -------------------------------------- +sched: cosine +lr: 0.002 # for bs=2048 +warmup_lr: 0.000001 +min_lr: 0.00001 +warmup_epochs: 20 +# -------------------------------------- +# Distillation parameters +# -------------------------------------- +teacher_model: regnety_160 +distillation_type: none # do not use KD by default +#distillation_type: hard # should be better than soft +#distillation_type: soft +distillation_alpha: 0.5 +distillation_tau: 1.0 +# -------------------------------------- +# Model parameters +# -------------------------------------- +model_name: fasternet +mlp_ratio: 2 +embed_dim: 144 +depths: [3, 4, 18, 3] +feature_dim: 1280 +patch_size: 4 +patch_stride: 4 +patch_size2: 2 +patch_stride2: 2 +layer_scale_init_value: 0 # no layer scale +drop_path_rate: 0.2 +norm_layer: BN +act_layer: RELU +n_div: 4 +# -------------------------------------- +# Augmentation parameters +# -------------------------------------- +color_jitter: 0 +aa: rand-m7-mstd0.5-inc1 # Use AutoAugment policy, Rand Augment +train_interpolation: bicubic # Training interpolation (random, bilinear, bicubic default: "bicubic") +smoothing: 0.1 # Label smoothing +# Random Erase params +reprob: 0 +remode: pixel +recount: 1 +# -------------------------------------- +# MixUp/CutMix parameters +# -------------------------------------- +mixup: 0.5 +cutmix: 1.0 +cutmix_minmax: null # cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None) +mixup_prob: 1.0 # Probability of performing mixup or cutmix when either/both is enabled +mixup_switch_prob: 0.5 # Probability of switching to cutmix when both mixup and cutmix enabled +mixup_mode: batch # How to apply mixup/cutmix params. Per "batch", "pair", or "elem" \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/cfg/fasternet_s.yaml b/cv/classification/fasternet/pytorch/cfg/fasternet_s.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80b17d2eb488d17dd6e260f5bc15a436195a7794 --- /dev/null +++ b/cv/classification/fasternet/pytorch/cfg/fasternet_s.yaml @@ -0,0 +1,72 @@ +strategy: ddp +benchmark: True +pretrained: False +sync_batchnorm: False +dataset_name: imagenet +image_size: 224 +#multi_scale: null +multi_scale: !!str 192_280 # image_size 192 before epoch 280 +test_crop_ratio: 0.9 +# -------------------------------------- +# Optimizer parameters +# -------------------------------------- +opt: adamw +weight_decay: 0.03 +momentum: 0.9 +epochs: 300 +clip_grad: null +precision: 16 +# -------------------------------------- +# Learning rate schedule parameters +# -------------------------------------- +sched: cosine +lr: 0.004 # for bs=4096 RELU +warmup_lr: 0.000001 +min_lr: 0.00001 +warmup_epochs: 20 +# -------------------------------------- +# Distillation parameters +# -------------------------------------- +teacher_model: regnety_160 +distillation_type: none # do not use KD by default +#distillation_type: hard # should be better than soft +#distillation_type: soft +distillation_alpha: 0.5 +distillation_tau: 1.0 +# -------------------------------------- +# Model parameters +# -------------------------------------- +model_name: fasternet +mlp_ratio: 2 +embed_dim: 128 +depths: [1, 2, 13, 2] +feature_dim: 1280 +patch_size: 4 +patch_stride: 4 +patch_size2: 2 +patch_stride2: 2 +layer_scale_init_value: 0 # no layer scale +drop_path_rate: 0.1 +norm_layer: BN +act_layer: RELU +n_div: 4 +# -------------------------------------- +# Augmentation parameters +# -------------------------------------- +color_jitter: 0 +aa: rand-m7-mstd0.5-inc1 # Use AutoAugment policy, Rand Augment +train_interpolation: bicubic # Training interpolation (random, bilinear, bicubic default: "bicubic") +smoothing: 0.1 # Label smoothing +# Random Erase params +reprob: 0 +remode: pixel +recount: 1 +# -------------------------------------- +# MixUp/CutMix parameters +# -------------------------------------- +mixup: 0.3 +cutmix: 1.0 +cutmix_minmax: null # cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None) +mixup_prob: 1.0 # Probability of performing mixup or cutmix when either/both is enabled +mixup_switch_prob: 0.5 # Probability of switching to cutmix when both mixup and cutmix enabled +mixup_mode: batch # How to apply mixup/cutmix params. Per "batch", "pair", or "elem" \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/cfg/fasternet_t0.yaml b/cv/classification/fasternet/pytorch/cfg/fasternet_t0.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a2c2915ed76d91d7a21830c618f0d7f12203c17 --- /dev/null +++ b/cv/classification/fasternet/pytorch/cfg/fasternet_t0.yaml @@ -0,0 +1,72 @@ +strategy: ddp +benchmark: True +pretrained: False +sync_batchnorm: False +dataset_name: imagenet +image_size: 224 +#multi_scale: null +multi_scale: !!str 192_280 # image_size 192 before epoch 280 +test_crop_ratio: 0.9 +# -------------------------------------- +# Optimizer parameters +# -------------------------------------- +opt: adamw +weight_decay: 0.005 +momentum: 0.9 +epochs: 300 +clip_grad: null +precision: 16 +# -------------------------------------- +# Learning rate schedule parameters +# -------------------------------------- +sched: cosine +lr: 0.004 # for bs=4096 +warmup_lr: 0.000001 +min_lr: 0.00001 +warmup_epochs: 20 +# -------------------------------------- +# Distillation parameters +# -------------------------------------- +teacher_model: regnety_160 +distillation_type: none # do not use KD by default +#distillation_type: hard # should be better than soft +#distillation_type: soft +distillation_alpha: 0.5 +distillation_tau: 1.0 +# -------------------------------------- +# Model parameters +# -------------------------------------- +model_name: fasternet +mlp_ratio: 2 +embed_dim: 40 +depths: [1, 2, 8, 2] +feature_dim: 1280 +patch_size: 4 +patch_stride: 4 +patch_size2: 2 +patch_stride2: 2 +layer_scale_init_value: 0 # no layer scale +drop_path_rate: 0. +norm_layer: BN +act_layer: GELU +n_div: 4 +# -------------------------------------- +# Augmentation parameters +# -------------------------------------- +color_jitter: 0 +aa: null +train_interpolation: bicubic # Training interpolation (random, bilinear, bicubic default: "bicubic") +smoothing: 0.1 # Label smoothing +# Random Erase params +reprob: 0 +remode: pixel +recount: 1 +# -------------------------------------- +# MixUp/CutMix parameters +# -------------------------------------- +mixup: 0.05 +cutmix: 1.0 +cutmix_minmax: null # cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None) +mixup_prob: 1.0 # Probability of performing mixup or cutmix when either/both is enabled +mixup_switch_prob: 0.5 # Probability of switching to cutmix when both mixup and cutmix enabled +mixup_mode: batch # How to apply mixup/cutmix params. Per "batch", "pair", or "elem" \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/cfg/fasternet_t1.yaml b/cv/classification/fasternet/pytorch/cfg/fasternet_t1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9af3f8b13ce59e8fb7206a02e3228141ab0238d --- /dev/null +++ b/cv/classification/fasternet/pytorch/cfg/fasternet_t1.yaml @@ -0,0 +1,72 @@ +strategy: ddp +benchmark: True +pretrained: False +sync_batchnorm: False +dataset_name: imagenet +image_size: 224 +#multi_scale: null +multi_scale: !!str 192_280 # image_size 192 before epoch 280 +test_crop_ratio: 0.9 +# -------------------------------------- +# Optimizer parameters +# -------------------------------------- +opt: adamw +weight_decay: 0.01 +momentum: 0.9 +epochs: 300 +clip_grad: null +precision: 16 +# -------------------------------------- +# Learning rate schedule parameters +# -------------------------------------- +sched: cosine +lr: 0.004 # for bs=4096 +warmup_lr: 0.000001 +min_lr: 0.00001 +warmup_epochs: 20 +# -------------------------------------- +# Distillation parameters +# -------------------------------------- +teacher_model: regnety_160 +distillation_type: none # do not use KD by default +#distillation_type: hard # should be better than soft +#distillation_type: soft +distillation_alpha: 0.5 +distillation_tau: 1.0 +# -------------------------------------- +# Model parameters +# -------------------------------------- +model_name: fasternet +mlp_ratio: 2 +embed_dim: 64 +depths: [1, 2, 8, 2] +feature_dim: 1280 +patch_size: 4 +patch_stride: 4 +patch_size2: 2 +patch_stride2: 2 +layer_scale_init_value: 0 # no layer scale +drop_path_rate: 0.02 +norm_layer: BN +act_layer: GELU +n_div: 4 +# -------------------------------------- +# Augmentation parameters +# -------------------------------------- +color_jitter: 0 +aa: rand-m3-mstd0.5-inc1 # Use AutoAugment policy, Rand Augment +train_interpolation: bicubic # Training interpolation (random, bilinear, bicubic default: "bicubic") +smoothing: 0.1 # Label smoothing +# Random Erase params +reprob: 0 +remode: pixel +recount: 1 +# -------------------------------------- +# MixUp/CutMix parameters +# -------------------------------------- +mixup: 0.1 +cutmix: 1.0 +cutmix_minmax: null # cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None) +mixup_prob: 1.0 # Probability of performing mixup or cutmix when either/both is enabled +mixup_switch_prob: 0.5 # Probability of switching to cutmix when both mixup and cutmix enabled +mixup_mode: batch # How to apply mixup/cutmix params. Per "batch", "pair", or "elem" \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/cfg/fasternet_t2.yaml b/cv/classification/fasternet/pytorch/cfg/fasternet_t2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17d3eb5d966b250ddb374f4372e334363d57a1d9 --- /dev/null +++ b/cv/classification/fasternet/pytorch/cfg/fasternet_t2.yaml @@ -0,0 +1,72 @@ +strategy: ddp +benchmark: True +pretrained: False +sync_batchnorm: False +dataset_name: imagenet +image_size: 224 +#multi_scale: null +multi_scale: !!str 192_280 # image_size 192 before epoch 280 +test_crop_ratio: 0.9 +# -------------------------------------- +# Optimizer parameters +# -------------------------------------- +opt: adamw +weight_decay: 0.02 +momentum: 0.9 +epochs: 300 +clip_grad: null +precision: 16 +# -------------------------------------- +# Learning rate schedule parameters +# -------------------------------------- +sched: cosine +lr: 0.004 # for bs=4096 +warmup_lr: 0.000001 +min_lr: 0.00001 +warmup_epochs: 20 +# -------------------------------------- +# Distillation parameters +# -------------------------------------- +teacher_model: regnety_160 +distillation_type: none # do not use KD by default +#distillation_type: hard # should be better than soft +#distillation_type: soft +distillation_alpha: 0.5 +distillation_tau: 1.0 +# -------------------------------------- +# Model parameters +# -------------------------------------- +model_name: fasternet +mlp_ratio: 2 +embed_dim: 96 +depths: [1, 2, 8, 2] +feature_dim: 1280 +patch_size: 4 +patch_stride: 4 +patch_size2: 2 +patch_stride2: 2 +layer_scale_init_value: 0 # no layer scale +drop_path_rate: 0.05 +norm_layer: BN +act_layer: RELU +n_div: 4 +# -------------------------------------- +# Augmentation parameters +# -------------------------------------- +color_jitter: 0 +aa: rand-m5-mstd0.5-inc1 # Use AutoAugment policy, Rand Augment +train_interpolation: bicubic # Training interpolation (random, bilinear, bicubic default: "bicubic") +smoothing: 0.1 # Label smoothing +# Random Erase params +reprob: 0 +remode: pixel +recount: 1 +# -------------------------------------- +# MixUp/CutMix parameters +# -------------------------------------- +mixup: 0.1 +cutmix: 1.0 +cutmix_minmax: null # cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None) +mixup_prob: 1.0 # Probability of performing mixup or cutmix when either/both is enabled +mixup_switch_prob: 0.5 # Probability of switching to cutmix when both mixup and cutmix enabled +mixup_mode: batch # How to apply mixup/cutmix params. Per "batch", "pair", or "elem" \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/data/__init__.py b/cv/classification/fasternet/pytorch/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/cv/classification/fasternet/pytorch/data/custom_imagenet_data.py b/cv/classification/fasternet/pytorch/data/custom_imagenet_data.py new file mode 100644 index 0000000000000000000000000000000000000000..5ed8ed9c2466b81c17a9d00b791cd22b539a42f6 --- /dev/null +++ b/cv/classification/fasternet/pytorch/data/custom_imagenet_data.py @@ -0,0 +1,237 @@ +# type: ignore[override] +import os +from typing import Any, Callable, Optional + +import torch +from pytorch_lightning import LightningDataModule +from torch.utils.data import DataLoader +from torchvision import datasets +from utils.utils import * + +from pl_bolts.datasets import UnlabeledImagenet +from pl_bolts.transforms.dataset_normalizations import imagenet_normalization +from pl_bolts.utils import _TORCHVISION_AVAILABLE +from pl_bolts.utils.warnings import warn_missing_pkg + +from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from timm.data import create_transform + +if _TORCHVISION_AVAILABLE: + from torchvision import transforms +else: # pragma: no cover + warn_missing_pkg("torchvision") + + +class custom_ImagenetDataModule(LightningDataModule): + """ + The train set is the imagenet train. + The val/test set are the official imagenet validation set. + + """ + + name = "imagenet" + + def __init__( + self, + data_dir: str, + meta_dir: Optional[str] = None, + image_size: int = 224, + num_workers: int = 0, + batch_size: int = 32, + batch_size_eva: int = 32, + # dist_eval: bool = True, + pin_memory: bool = True, + drop_last: bool = False, + train_transforms_multi_scale = None, + scaling_epoch = None, + *args: Any, + **kwargs: Any, + ) -> None: + """ + Args: + data_dir: path to the imagenet dataset file + meta_dir: path to meta.bin file + image_size: final image size + num_workers: how many data workers + batch_size: batch_size + pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before + returning them + drop_last: If true drops the last incomplete batch + """ + super().__init__(*args, **kwargs) + + if not _TORCHVISION_AVAILABLE: # pragma: no cover + raise ModuleNotFoundError( + "You want to use ImageNet dataset loaded from `torchvision` which is not installed yet." + ) + + self.image_size = image_size + self.dims = (3, self.image_size, self.image_size) + self.data_dir = data_dir + self.num_workers = num_workers + self.meta_dir = meta_dir + self.batch_size = batch_size + self.batch_size_eva = batch_size_eva + self.pin_memory = pin_memory + self.drop_last = drop_last + self.num_samples = 1281167 + self.num_tasks = get_world_size() + self.global_rank = get_rank() + self.train_transforms_multi_scale = train_transforms_multi_scale + self.scaling_epoch = scaling_epoch + # self.dist_eval = dist_eval + + @property + def num_classes(self) -> int: + return 1000 + + def _verify_splits(self, data_dir: str, split: str) -> None: + dirs = os.listdir(data_dir) + + if split not in dirs: + raise FileNotFoundError( + f"a {split} Imagenet split was not found in {data_dir}," + f" make sure the folder contains a subfolder named {split}" + ) + + def prepare_data(self) -> None: + """This method already assumes you have imagenet2012 downloaded. It validates the data using the meta.bin. + + .. warning:: Please download imagenet on your own first. + To get imagenet: + 1. download yourself from http://www.image-net.org/challenges/LSVRC/2012/downloads + 2. download the devkit (ILSVRC2012_devkit_t12.tar.gz) + """ + self._verify_splits(self.data_dir, "train") + self._verify_splits(self.data_dir, "val") + + def setup(self, stage: Optional[str] = None) -> None: + """Creates train, val, and test dataset.""" + if stage == "fit" or stage is None: + train_transforms = self.train_transform() if self.train_transforms is None else self.train_transforms + val_transforms = self.val_transform() if self.val_transforms is None else self.val_transforms + + self.dataset_train = datasets.ImageFolder(os.path.join(self.data_dir, 'train'), transform=train_transforms) + self.dataset_val = datasets.ImageFolder(os.path.join(self.data_dir, 'val'), transform=val_transforms) + + if self.train_transforms_multi_scale is not None: + self.dataset_train_multi_scale = datasets.ImageFolder(os.path.join(self.data_dir, 'train'), + transform=self.train_transforms_multi_scale) + else: + self.dataset_train_multi_scale = None + + if stage == "test" or stage is None: + val_transforms = self.val_transform() if self.val_transforms is None else self.val_transforms + self.dataset_test = datasets.ImageFolder(os.path.join(self.data_dir, 'val'), transform=val_transforms) + + def train_dataloader(self) -> DataLoader: + if self.dataset_train_multi_scale is not None and \ + self.trainer.current_epoch < self.scaling_epoch: + dataset = self.dataset_train_multi_scale + print("load dataset_train_multi_scale") + else: + dataset = self.dataset_train + print("load dataset_train") + + loader: DataLoader = DataLoader( + dataset, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.num_workers, + drop_last=self.drop_last, + pin_memory=self.pin_memory + ) + return loader + + def val_dataloader(self) -> DataLoader: + loader: DataLoader = DataLoader( + self.dataset_val, + batch_size=self.batch_size_eva, + # persistent_workers=True, + shuffle=False, + num_workers=self.num_workers, + drop_last=False, + pin_memory=self.pin_memory + ) + return loader + + def test_dataloader(self) -> DataLoader: + """Uses the validation split of imagenet2012 for testing.""" + loader: DataLoader = DataLoader( + self.dataset_test, + batch_size=self.batch_size_eva, + # persistent_workers=True, + shuffle=False, + num_workers=self.num_workers, + drop_last=False, + pin_memory=self.pin_memory + ) + return loader + + def train_transform(self) -> Callable: + preprocessing = transforms.Compose( + [ + transforms.RandomResizedCrop(self.image_size), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + imagenet_normalization(), + ] + ) + + return preprocessing + + def val_transform(self) -> Callable: + + preprocessing = transforms.Compose( + [ + transforms.Resize(self.image_size + 32), + transforms.CenterCrop(self.image_size), + transforms.ToTensor(), + imagenet_normalization(), + ] + ) + return preprocessing + + +def build_imagenet_transform(is_train, args, image_size): + resize_im = image_size > 32 + if is_train: + # this should always dispatch to transforms_imagenet_train + transform = create_transform( + input_size=image_size, + is_training=True, + # use_prefetcher=args.use_prefetcher, + color_jitter=args.color_jitter, + auto_augment=args.aa, + interpolation=args.train_interpolation, + re_prob=args.reprob, + re_mode=args.remode, + re_count=args.recount, + ) + if not resize_im: + # replace RandomResizedCropAndInterpolation with + # RandomCrop + transform.transforms[0] = transforms.RandomCrop( + image_size, padding=4) + return transform + + t = [] + if resize_im: + # warping (no cropping) when evaluated at 384 or larger + if image_size >= 384: + t.append( + transforms.Resize((image_size, image_size), + interpolation=transforms.InterpolationMode.BICUBIC), + ) + print(f"Warping {image_size} size input images...") + else: + # size = int((256 / 224) * image_size) + size = int(1.0*image_size/args.test_crop_ratio) + t.append( + transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC), # to maintain same ratio w.r.t. 224 images + ) + t.append(transforms.CenterCrop(image_size)) + + t.append(transforms.ToTensor()) + t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)) + return transforms.Compose(t) \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/data/data_api.py b/cv/classification/fasternet/pytorch/data/data_api.py new file mode 100644 index 0000000000000000000000000000000000000000..082c5a27cdf1217490e36f7642d2ce2473c1c450 --- /dev/null +++ b/cv/classification/fasternet/pytorch/data/data_api.py @@ -0,0 +1,47 @@ +import os +import sys +import inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(currentdir) +sys.path.insert(0, parentdir) + +from utils.utils import str2list +from data.custom_imagenet_data import custom_ImagenetDataModule +from data.custom_imagenet_data import build_imagenet_transform + + +__all__ = ['LitDataModule'] + + +def LitDataModule(hparams): + dm =None + CLASS_NAMES = None + drop_last = True + bs = hparams.batch_size + bs_eva = hparams.batch_size_eva + n_gpus = len(str2list(hparams.gpus)) if hparams.gpus is not None else hparams.devices + n_nodes = hparams.num_nodes + batch_size = int(1.0*bs / n_gpus / n_nodes) if hparams.strategy == 'ddp' else bs + batch_size_eva = int(1.0*bs_eva / n_gpus / n_nodes) if hparams.strategy == 'ddp' else bs_eva + + if hparams.dataset_name == "imagenet": + dm = custom_ImagenetDataModule( + image_size=hparams.image_size, + data_dir=hparams.data_dir, + train_transforms=build_imagenet_transform(is_train=True, args=hparams, image_size=hparams.image_size), + train_transforms_multi_scale=None if hparams.multi_scale is None else build_imagenet_transform( + is_train=True, args=hparams, image_size=int(hparams.multi_scale.split('_')[0])), + scaling_epoch=None if hparams.multi_scale is None else int(hparams.multi_scale.split('_')[1]), + val_transforms=build_imagenet_transform(is_train=False, args=hparams, image_size=hparams.image_size), + num_workers=hparams.num_workers, + pin_memory=hparams.pin_memory, + # dist_eval= True if len(str2list(hparams.gpus))>1 else False, + batch_size=batch_size, + batch_size_eva=batch_size_eva, + drop_last=drop_last + ) + else: + print("Invalid dataset name, exiting...") + exit() + + return dm, CLASS_NAMES \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/detection/README.MD b/cv/classification/fasternet/pytorch/detection/README.MD new file mode 100644 index 0000000000000000000000000000000000000000..fb0355c053b19ce4877e8826db7237718a1b7f53 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/README.MD @@ -0,0 +1,75 @@ +# COCO Object detection with FasterNet + +## Dependency Setup + +After setting up dependency for [Image Classification](https://github.com/JierunChen/FasterNet), install the following packages +``` +pip install mmcv-full==1.6.0 -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.11.0/index.html +pip install mmdet==2.25.0 +``` + +## Data preparation + +Prepare COCO according to the guidelines in [MMDetection v2.25.0](https://mmdetection.readthedocs.io/en/v2.25.0/). + +## Results and models on COCO + +| Method | Backbone | Pretrain | Lr schd | Aug | box AP | mask AP | Config | Download | +|------------|----------|-------------|:-------:|:---:|:------:|:-------:|------------------------------------------------------|----------| +| Mask R-CNN | FasterNet-S | ImageNet-1K | 1x | No | 39.9 | 36.9 | [config](configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py) | [log](https://github.com/JierunChen/FasterNet/releases/download/v1.0/mask_rcnn_fasternet_s_fpn_1x_coco_20221111_063428.log) & [model](https://github.com/JierunChen/FasterNet/releases/download/v1.0/mask_rcnn_fasternet_s_fpn_1x_coco_20221111_063419.pth) | +| Mask R-CNN | FasterNet-M | ImageNet-1K | 1x | No | 43.0 | 39.1 | [config](configs/fasternet/mask_rcnn_fasternet_m_fpn_1x_coco.py) | [log](https://github.com/JierunChen/FasterNet/releases/download/v1.0/mask_rcnn_fasternet_m_fpn_1x_coco_20221107_124415.log) & [model](https://github.com/JierunChen/FasterNet/releases/download/v1.0/mask_rcnn_fasternet_m_fpn_1x_coco_20221107_124408.pth) | +| Mask R-CNN | FasterNet-L | ImageNet-1K | 1x | No | 44.0 | 39.9 | [config](configs/fasternet/mask_rcnn_fasternet_l_fpn_1x_coco.py) | [log](https://github.com/JierunChen/FasterNet/releases/download/v1.0/mask_rcnn_fasternet_l_fpn_1x_coco_20221107_004515.log) & [model](https://github.com/JierunChen/FasterNet/releases/download/v1.0/mask_rcnn_fasternet_l_fpn_1x_coco_20221107_004433.pth) | + +## Evaluation + +To evaluate FasterNet-T0 + Mask R-CNN on COCO val2017 on a single node with 8 GPUs, run +``` +bash ./dist_test.sh configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py \ + ckpts/mask_rcnn_fasternet_s_fpn_1x_coco_20221111_063419.pth 8 --eval bbox segm +``` + +To measure its FLOPs, run +``` +python get_flops.py configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py +``` + +To measure its throughput on a GPU, run +``` +CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --nproc_per_node=1 --master_port=29500 \ +benchmark.py configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py \ +ckpts/mask_rcnn_fasternet_s_fpn_1x_coco_20221111_063419.pth --launcher pytorch --fuse-conv-bn +``` + +**Note**: There are an issue related to throughput measurement in the [paper v1](https://arxiv.org/abs/2303.03667). +Although it do not affect the conclusion that FasterNet achieves higher accuracy-latency efficiency, we clarify that: + +- PConv and FasterNet use `"slicing"` type for faster inference and throughput measurement. +However, it implicitly modifies the shortcut, making a computation inconsistency to using `"split_cat"`. +To fix that, we may + - clone the input via `x = x.clone()` before applying partial convolution, but it introduces additional latency and can defeat the benefits of using `"slicing"` over `"split_cat"`. + - move the shortcut after the PConv operator, which resolves the issue and is likely to maintain the effectiveness. Models modified are under retraining and will be released once finished. + + +## Training + +To train FasterNet-T0 + Mask R-CNN on COCO train2017 on a single node with 8 GPUs for 12 epochs, run +``` +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ./dist_train.sh \ +configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py 8 \ +--work-dir work_dirs/mask_rcnn_fasternet_s_fpn_1x_coco/$(date +'%Y%m%d_%H%M%S') \ +--cfg-options model.pretrained=../model_ckpt/fasternet_t0-epoch=281-val_acc1=71.9180.pth +``` + +## Acknowledgement +This repository is mainly based on [mmdetection](https://github.com/open-mmlab/mmdetection) library. + +## Citation +If you find this repository helpful, please consider citing: +``` +@article{chen2023run, + title={Run, Don't Walk: Chasing Higher FLOPS for Faster Neural Networks}, + author={Chen, Jierun and Kao, Shiu-hong and He, Hao and Zhuo, Weipeng and Wen, Song and Lee, Chul-Ho and Chan, S-H Gary}, + journal={arXiv preprint arXiv:2303.03667}, + year={2023} +} +``` diff --git a/cv/classification/fasternet/pytorch/detection/backbones/__init__.py b/cv/classification/fasternet/pytorch/detection/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2249d1d23094aeb622fab47e8250fc88b8f12a2e --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/backbones/__init__.py @@ -0,0 +1 @@ +from .fasternet import * \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/detection/backbones/fasternet.py b/cv/classification/fasternet/pytorch/detection/backbones/fasternet.py new file mode 100644 index 0000000000000000000000000000000000000000..a8ab981c9200548ab2ff44c154bb7a901c404928 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/backbones/fasternet.py @@ -0,0 +1,59 @@ +from mmdet.models.builder import BACKBONES as det_BACKBONES +import os +import sys +import inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parent_parentdir = os.path.dirname(os.path.dirname(currentdir)) +sys.path.insert(0, parent_parentdir) +from models.fasternet import FasterNet + + +@det_BACKBONES.register_module() +def fasternet_s(**kwargs): + model = FasterNet( + mlp_ratio=2.0, + embed_dim=128, + depths=(1, 2, 13, 2), + drop_path_rate=0.15, + act_layer='RELU', + fork_feat=True, + pconv_fw_type='split_cat', + # pconv_fw_type='slicing', # TODO: move the residual connect after the pconv, retrain the model, and then use slicing for inference + **kwargs + ) + + return model + + +@det_BACKBONES.register_module() +def fasternet_m(**kwargs): + model = FasterNet( + mlp_ratio=2.0, + embed_dim=144, + depths=(3, 4, 18, 3), + drop_path_rate=0.2, + act_layer='RELU', + fork_feat=True, + pconv_fw_type='split_cat', + # pconv_fw_type='slicing', # TODO: move the residual connect after the pconv, retrain the model, and then use slicing for inference + **kwargs + ) + + return model + + +@det_BACKBONES.register_module() +def fasternet_l(**kwargs): + model = FasterNet( + mlp_ratio=2.0, + embed_dim=192, + depths=(3, 4, 18, 3), + drop_path_rate=0.3, + act_layer='RELU', + fork_feat=True, + pconv_fw_type='split_cat', + # pconv_fw_type='slicing', # TODO: move the residual connect after the pconv, retrain the model, and then use slicing for inference + **kwargs + ) + + return model diff --git a/cv/classification/fasternet/pytorch/detection/benchmark.py b/cv/classification/fasternet/pytorch/detection/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..d1629070d97baafcc96af76d1c19320eb5db4e3d --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/benchmark.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import copy +import os +import time + +import torch +from mmcv import Config, DictAction +from mmcv.cnn import fuse_conv_bn +from mmcv.parallel import MMDistributedDataParallel +from mmcv.runner import init_dist, load_checkpoint, wrap_fp16_model + +from mmdet.datasets import (build_dataloader, build_dataset, + replace_ImageToTensor) +from mmdet.models import build_detector +from mmdet.utils import replace_cfg_vals, update_data_root + +import backbones + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMDet benchmark a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--repeat-num', + type=int, + default=1, + help='number of repeat times of measurement for averaging the results') + parser.add_argument( + '--max-iter', type=int, default=2000, help='num of max iter') + parser.add_argument( + '--log-interval', type=int, default=50, help='interval of logging') + parser.add_argument( + '--fuse-conv-bn', + action='store_true', + help='Whether to fuse conv and bn, this will slightly increase' + 'the inference speed') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + return args + + +def measure_inference_speed(cfg, checkpoint, max_iter, log_interval, + is_fuse_conv_bn): + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + # build the dataloader + samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) + if samples_per_gpu > 1: + # Replace 'ImageToTensor' to 'DefaultFormatBundle' + cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader( + dataset, + samples_per_gpu=1, + # Because multiple processes will occupy additional CPU resources, + # FPS statistics will be more unstable when workers_per_gpu is not 0. + # It is reasonable to set workers_per_gpu to 0. + workers_per_gpu=0, + dist=True, + shuffle=False) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + load_checkpoint(model, checkpoint, map_location='cpu') + if is_fuse_conv_bn: + model = fuse_conv_bn(model) + + model = MMDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False) + model.eval() + + # the first several iterations may be very slow so skip them + num_warmup = 5 + pure_inf_time = 0 + fps = 0 + + # benchmark with 2000 image and take the average + for i, data in enumerate(data_loader): + + torch.cuda.synchronize() + start_time = time.perf_counter() + + with torch.no_grad(): + model(return_loss=False, rescale=True, **data) + + torch.cuda.synchronize() + elapsed = time.perf_counter() - start_time + + if i >= num_warmup: + pure_inf_time += elapsed + if (i + 1) % log_interval == 0: + fps = (i + 1 - num_warmup) / pure_inf_time + print( + f'Done image [{i + 1:<3}/ {max_iter}], ' + f'fps: {fps:.1f} img / s, ' + f'times per image: {1000 / fps:.1f} ms / img', + flush=True) + + if (i + 1) == max_iter: + fps = (i + 1 - num_warmup) / pure_inf_time + print( + f'Overall fps: {fps:.1f} img / s, ' + f'times per image: {1000 / fps:.1f} ms / img', + flush=True) + break + return fps + + +def repeat_measure_inference_speed(cfg, + checkpoint, + max_iter, + log_interval, + is_fuse_conv_bn, + repeat_num=1): + assert repeat_num >= 1 + + fps_list = [] + + for _ in range(repeat_num): + # + cp_cfg = copy.deepcopy(cfg) + + fps_list.append( + measure_inference_speed(cp_cfg, checkpoint, max_iter, log_interval, + is_fuse_conv_bn)) + + if repeat_num > 1: + fps_list_ = [round(fps, 1) for fps in fps_list] + times_pre_image_list_ = [round(1000 / fps, 1) for fps in fps_list] + mean_fps_ = sum(fps_list_) / len(fps_list_) + mean_times_pre_image_ = sum(times_pre_image_list_) / len( + times_pre_image_list_) + print( + f'Overall fps: {fps_list_}[{mean_fps_:.1f}] img / s, ' + f'times per image: ' + f'{times_pre_image_list_}[{mean_times_pre_image_:.1f}] ms / img', + flush=True) + return fps_list + + return fps_list[0] + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + + # replace the ${key} with the value of cfg.key + cfg = replace_cfg_vals(cfg) + + # update data root according to MMDET_DATASETS + update_data_root(cfg) + + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + if args.launcher == 'none': + raise NotImplementedError('Only supports distributed mode') + else: + init_dist(args.launcher, **cfg.dist_params) + + repeat_measure_inference_speed(cfg, args.checkpoint, args.max_iter, + args.log_interval, args.fuse_conv_bn, + args.repeat_num) + + +if __name__ == '__main__': + main() diff --git a/cv/classification/fasternet/pytorch/detection/configs/_base_/datasets/coco_detection.py b/cv/classification/fasternet/pytorch/detection/configs/_base_/datasets/coco_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..149f590bb45fa65c29fd4c005e4a237d7dd2e117 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/_base_/datasets/coco_detection.py @@ -0,0 +1,49 @@ +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/cv/classification/fasternet/pytorch/detection/configs/_base_/datasets/coco_instance.py b/cv/classification/fasternet/pytorch/detection/configs/_base_/datasets/coco_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..9901a858414465d19d8ec6ced316b460166176b4 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/_base_/datasets/coco_instance.py @@ -0,0 +1,49 @@ +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/cv/classification/fasternet/pytorch/detection/configs/_base_/default_runtime.py b/cv/classification/fasternet/pytorch/detection/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..5b0b1452c0a625e331be7b1e6c5cf341cc91ff64 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/_base_/default_runtime.py @@ -0,0 +1,27 @@ +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +custom_hooks = [dict(type='NumClassCheckHook')] + +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] + +# disable opencv multithreading to avoid system being overloaded +opencv_num_threads = 0 +# set multi-process start method as `fork` to speed up the training +mp_start_method = 'fork' + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/cv/classification/fasternet/pytorch/detection/configs/_base_/models/mask_rcnn_r50_fpn.py b/cv/classification/fasternet/pytorch/detection/configs/_base_/models/mask_rcnn_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..d903e55e2d95135b1448e566d4d5ec8146597a6a --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/_base_/models/mask_rcnn_r50_fpn.py @@ -0,0 +1,120 @@ +# model settings +model = dict( + type='MaskRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_1x.py b/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..13b3783cbbe93b6c32bc415dc50f633dffa4aec7 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_1x.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_20e.py b/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_20e.py new file mode 100644 index 0000000000000000000000000000000000000000..00e859022156dcbef6501c04d03f335639f2c1f6 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_20e.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_2x.py b/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_2x.py new file mode 100644 index 0000000000000000000000000000000000000000..69dc9ee8080649ce3646b5775b0ca2e9c863d0f5 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/_base_/schedules/schedule_2x.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_l_fpn_1x_coco.py b/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_l_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1c2134bc434582301a87c53fa7f7761677664715 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_l_fpn_1x_coco.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# optimizer +model = dict( + backbone=dict( + type='fasternet_l', + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='../model_ckpt/fasternet_l-epoch=299-val_acc1=83.5060.pth', + ), + # init_cfg=None, + ), + neck=dict(in_channels=[192, 384, 768, 1536])) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) diff --git a/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_m_fpn_1x_coco.py b/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_m_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..20a896271c16183be4b4171362e296fd1755b99d --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_m_fpn_1x_coco.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# optimizer +model = dict( + backbone=dict( + type='fasternet_m', + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='../model_ckpt/fasternet_m-epoch=291-val_acc1=82.9620.pth', + ), + # init_cfg=None, + ), + neck=dict(in_channels=[144, 288, 576, 1152])) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) diff --git a/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py b/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..f5f84c30a43545b042af7a750a78ecbfa0ad1f9a --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/configs/fasternet/mask_rcnn_fasternet_s_fpn_1x_coco.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] + +# optimizer +model = dict( + backbone=dict( + type='fasternet_s', + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='../model_ckpt/fasternet_s-epoch=299-val_acc1=81.2840.pth', + ), + # init_cfg=None, + ), + neck=dict(in_channels=[128, 256, 512, 1024])) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0002, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) diff --git a/cv/classification/fasternet/pytorch/detection/dist_test.sh b/cv/classification/fasternet/pytorch/detection/dist_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..dea131b43ea8f1222661d20603d40c18ea7f28a1 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/dist_test.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +CONFIG=$1 +CHECKPOINT=$2 +GPUS=$3 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} +PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/test.py \ + $CONFIG \ + $CHECKPOINT \ + --launcher pytorch \ + ${@:4} diff --git a/cv/classification/fasternet/pytorch/detection/dist_train.sh b/cv/classification/fasternet/pytorch/detection/dist_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..aa71bf4ae988f5e50f20e4939d178fd2b1355439 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/dist_train.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CONFIG=$1 +GPUS=$2 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} +PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/train.py \ + $CONFIG \ + --seed 0 \ + --launcher pytorch ${@:3} diff --git a/cv/classification/fasternet/pytorch/detection/get_flops.py b/cv/classification/fasternet/pytorch/detection/get_flops.py new file mode 100644 index 0000000000000000000000000000000000000000..8b99dfd30b7adeb98d91c282a79951c35c00bb34 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/get_flops.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import numpy as np +import torch +from mmcv import Config, DictAction +from mmdet.models import build_detector + +try: + from mmcv.cnn import get_model_complexity_info +except ImportError: + raise ImportError('Please upgrade mmcv to >0.6.2') +import backbones + + +@torch.no_grad() +def get_complexity_info(model, input_size): + model.eval() + + # using ptflops + macs, params = get_model_complexity_info(model, (3, input_size, input_size), as_strings=True, print_per_layer_stat=False, verbose=False) + + print('{:<30} {:<8}'.format('Computational complexity: ', macs)) + print('{:<30} {:<8}'.format('Number of parameters: ', params)) + macs = float(macs[:-5]) + params = float(params[:-2]) + + return macs, params + + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a detector') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[1280, 800], + help='input image size') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--size-divisor', + type=int, + default=32, + help='Pad the input image, the minimum size that is divisible ' + 'by size_divisor, -1 means do not pad the image.') + args = parser.parse_args() + return args + + +def main(): + + args = parse_args() + + if len(args.shape) == 1: + h = w = args.shape[0] + elif len(args.shape) == 2: + h, w = args.shape + else: + raise ValueError('invalid input shape') + ori_shape = (3, h, w) + divisor = args.size_divisor + if divisor > 0: + h = int(np.ceil(h / divisor)) * divisor + w = int(np.ceil(w / divisor)) * divisor + + input_shape = (3, h, w) + + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + model = build_detector( + cfg.model, + train_cfg=cfg.get('train_cfg'), + test_cfg=cfg.get('test_cfg')) + if torch.cuda.is_available(): + model.cuda() + model.eval() + + if hasattr(model, 'forward_dummy'): + model.forward = model.forward_dummy + else: + raise NotImplementedError( + 'FLOPs counter is currently not currently supported with {}'. + format(model.__class__.__name__)) + + flops, params = get_model_complexity_info(model, input_shape) + + split_line = '=' * 30 + + if divisor > 0 and \ + input_shape != ori_shape: + print(f'{split_line}\nUse size divisor set input shape ' + f'from {ori_shape} to {input_shape}\n') + print(f'{split_line}\nInput shape: {input_shape}\n' + f'Flops: {flops}\nParams: {params}\n{split_line}') + print('!!!Please be cautious if you use the results in papers. ' + 'You may need to check if all ops are supported and verify that the ' + 'flops computation is correct.') + + +if __name__ == '__main__': + main() diff --git a/cv/classification/fasternet/pytorch/detection/test.py b/cv/classification/fasternet/pytorch/detection/test.py new file mode 100644 index 0000000000000000000000000000000000000000..d9c5c370632f3cadd9bc2865a996d07644ae13ed --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/test.py @@ -0,0 +1,276 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import time +import warnings + +import mmcv +import torch +from mmcv import Config, DictAction +from mmcv.cnn import fuse_conv_bn +from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, + wrap_fp16_model) + +from mmdet.apis import multi_gpu_test, single_gpu_test +from mmdet.datasets import (build_dataloader, build_dataset, + replace_ImageToTensor) +from mmdet.models import build_detector +from mmdet.utils import (build_ddp, build_dp, compat_cfg, get_device, + replace_cfg_vals, setup_multi_processes, + update_data_root) +import backbones + + +def parse_args(): + parser = argparse.ArgumentParser( + description='MMDet test (and eval) a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--work-dir', + help='the directory to save the file containing evaluation metrics') + parser.add_argument('--out', help='output result file in pickle format') + parser.add_argument( + '--fuse-conv-bn', + action='store_true', + help='Whether to fuse conv and bn, this will slightly increase' + 'the inference speed') + parser.add_argument( + '--gpu-ids', + type=int, + nargs='+', + help='(Deprecated, please use --gpu-id) ids of gpus to use ' + '(only applicable to non-distributed training)') + parser.add_argument( + '--gpu-id', + type=int, + default=0, + help='id of gpu to use ' + '(only applicable to non-distributed testing)') + parser.add_argument( + '--format-only', + action='store_true', + help='Format the output results without perform evaluation. It is' + 'useful when you want to format the result to a specific format and ' + 'submit it to the test server') + parser.add_argument( + '--eval', + type=str, + nargs='+', + help='evaluation metrics, which depends on the dataset, e.g., "bbox",' + ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') + parser.add_argument('--show', action='store_true', help='show results') + parser.add_argument( + '--show-dir', help='directory where painted images will be saved') + parser.add_argument( + '--show-score-thr', + type=float, + default=0.3, + help='score threshold (default: 0.3)') + parser.add_argument( + '--gpu-collect', + action='store_true', + help='whether to use gpu to collect results.') + parser.add_argument( + '--tmpdir', + help='tmp directory used for collecting results from multiple ' + 'workers, available when gpu-collect is not specified') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help='custom options for evaluation, the key-value pair in xxx=yyy ' + 'format will be kwargs for dataset.evaluate() function (deprecate), ' + 'change to --eval-options instead.') + parser.add_argument( + '--eval-options', + nargs='+', + action=DictAction, + help='custom options for evaluation, the key-value pair in xxx=yyy ' + 'format will be kwargs for dataset.evaluate() function') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + if args.options and args.eval_options: + raise ValueError( + '--options and --eval-options cannot be both ' + 'specified, --options is deprecated in favor of --eval-options') + if args.options: + warnings.warn('--options is deprecated in favor of --eval-options') + args.eval_options = args.options + return args + + +def main(): + args = parse_args() + + assert args.out or args.eval or args.format_only or args.show \ + or args.show_dir, \ + ('Please specify at least one operation (save/eval/format/show the ' + 'results / save the results) with the argument "--out", "--eval"' + ', "--format-only", "--show" or "--show-dir"') + + if args.eval and args.format_only: + raise ValueError('--eval and --format_only cannot be both specified') + + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') + + cfg = Config.fromfile(args.config) + + # replace the ${key} with the value of cfg.key + cfg = replace_cfg_vals(cfg) + + # update data root according to MMDET_DATASETS + update_data_root(cfg) + + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + cfg = compat_cfg(cfg) + + # set multi-process settings + setup_multi_processes(cfg) + + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + + if 'pretrained' in cfg.model: + cfg.model.pretrained = None + elif 'init_cfg' in cfg.model.backbone: + cfg.model.backbone.init_cfg = None + + if cfg.model.get('neck'): + if isinstance(cfg.model.neck, list): + for neck_cfg in cfg.model.neck: + if neck_cfg.get('rfp_backbone'): + if neck_cfg.rfp_backbone.get('pretrained'): + neck_cfg.rfp_backbone.pretrained = None + elif cfg.model.neck.get('rfp_backbone'): + if cfg.model.neck.rfp_backbone.get('pretrained'): + cfg.model.neck.rfp_backbone.pretrained = None + + if args.gpu_ids is not None: + cfg.gpu_ids = args.gpu_ids[0:1] + warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' + 'Because we only support single GPU mode in ' + 'non-distributed testing. Use the first GPU ' + 'in `gpu_ids` now.') + else: + cfg.gpu_ids = [args.gpu_id] + cfg.device = get_device() + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + test_dataloader_default_args = dict( + samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False) + + # in case the test dataset is concatenated + if isinstance(cfg.data.test, dict): + cfg.data.test.test_mode = True + if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1: + # Replace 'ImageToTensor' to 'DefaultFormatBundle' + cfg.data.test.pipeline = replace_ImageToTensor( + cfg.data.test.pipeline) + elif isinstance(cfg.data.test, list): + for ds_cfg in cfg.data.test: + ds_cfg.test_mode = True + if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1: + for ds_cfg in cfg.data.test: + ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) + + test_loader_cfg = { + **test_dataloader_default_args, + **cfg.data.get('test_dataloader', {}) + } + + rank, _ = get_dist_info() + # allows not to create + if args.work_dir is not None and rank == 0: + mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + json_file = osp.join(args.work_dir, f'eval_{timestamp}.json') + + # build the dataloader + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader(dataset, **test_loader_cfg) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') + if args.fuse_conv_bn: + model = fuse_conv_bn(model) + # old versions did not save class info in checkpoints, this walkaround is + # for backward compatibility + if 'CLASSES' in checkpoint.get('meta', {}): + model.CLASSES = checkpoint['meta']['CLASSES'] + else: + model.CLASSES = dataset.CLASSES + + if not distributed: + model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) + outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, + args.show_score_thr) + else: + model = build_ddp( + model, + cfg.device, + device_ids=[int(os.environ['LOCAL_RANK'])], + broadcast_buffers=False) + outputs = multi_gpu_test( + model, data_loader, args.tmpdir, args.gpu_collect + or cfg.evaluation.get('gpu_collect', False)) + + rank, _ = get_dist_info() + if rank == 0: + if args.out: + print(f'\nwriting results to {args.out}') + mmcv.dump(outputs, args.out) + kwargs = {} if args.eval_options is None else args.eval_options + if args.format_only: + dataset.format_results(outputs, **kwargs) + if args.eval: + eval_kwargs = cfg.get('evaluation', {}).copy() + # hard-code way to remove EvalHook args + for key in [ + 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', + 'rule', 'dynamic_intervals' + ]: + eval_kwargs.pop(key, None) + eval_kwargs.update(dict(metric=args.eval, **kwargs)) + metric = dataset.evaluate(outputs, **eval_kwargs) + print(metric) + metric_dict = dict(config=args.config, metric=metric) + if args.work_dir is not None and rank == 0: + mmcv.dump(metric_dict, json_file) + + +if __name__ == '__main__': + main() diff --git a/cv/classification/fasternet/pytorch/detection/train.py b/cv/classification/fasternet/pytorch/detection/train.py new file mode 100644 index 0000000000000000000000000000000000000000..db062a9ad66b99c77eff2827295f7dfefea52209 --- /dev/null +++ b/cv/classification/fasternet/pytorch/detection/train.py @@ -0,0 +1,243 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import copy +import os +import os.path as osp +import time +import warnings + +import mmcv +import torch +import torch.distributed as dist +from mmcv import Config, DictAction +from mmcv.runner import get_dist_info, init_dist +from mmcv.utils import get_git_hash + +from mmdet import __version__ +from mmdet.apis import init_random_seed, set_random_seed, train_detector +from mmdet.datasets import build_dataset +from mmdet.models import build_detector +from mmdet.utils import (collect_env, get_device, get_root_logger, + replace_cfg_vals, setup_multi_processes, + update_data_root) +import backbones + + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a detector') + parser.add_argument('config', help='train config file path') + parser.add_argument('--work-dir', help='the dir to save logs and models') + parser.add_argument( + '--resume-from', help='the checkpoint file to resume from') + parser.add_argument( + '--auto-resume', + action='store_true', + help='resume from the latest checkpoint automatically') + parser.add_argument( + '--no-validate', + action='store_true', + help='whether not to evaluate the checkpoint during training') + group_gpus = parser.add_mutually_exclusive_group() + group_gpus.add_argument( + '--gpus', + type=int, + help='(Deprecated, please use --gpu-id) number of gpus to use ' + '(only applicable to non-distributed training)') + group_gpus.add_argument( + '--gpu-ids', + type=int, + nargs='+', + help='(Deprecated, please use --gpu-id) ids of gpus to use ' + '(only applicable to non-distributed training)') + group_gpus.add_argument( + '--gpu-id', + type=int, + default=0, + help='id of gpu to use ' + '(only applicable to non-distributed training)') + parser.add_argument('--seed', type=int, default=None, help='random seed') + parser.add_argument( + '--diff-seed', + action='store_true', + help='Whether or not set different seeds for different ranks') + parser.add_argument( + '--deterministic', + action='store_true', + help='whether to set deterministic options for CUDNN backend.') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file (deprecate), ' + 'change to --cfg-options instead.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + parser.add_argument( + '--auto-scale-lr', + action='store_true', + help='enable automatically scaling LR.') + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + if args.options and args.cfg_options: + raise ValueError( + '--options and --cfg-options cannot be both ' + 'specified, --options is deprecated in favor of --cfg-options') + if args.options: + warnings.warn('--options is deprecated in favor of --cfg-options') + args.cfg_options = args.options + + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + + # replace the ${key} with the value of cfg.key + cfg = replace_cfg_vals(cfg) + + # update data root according to MMDET_DATASETS + update_data_root(cfg) + + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + if args.auto_scale_lr: + if 'auto_scale_lr' in cfg and \ + 'enable' in cfg.auto_scale_lr and \ + 'base_batch_size' in cfg.auto_scale_lr: + cfg.auto_scale_lr.enable = True + else: + warnings.warn('Can not find "auto_scale_lr" or ' + '"auto_scale_lr.enable" or ' + '"auto_scale_lr.base_batch_size" in your' + ' configuration file. Please update all the ' + 'configuration files to mmdet >= 2.24.1.') + + # set multi-process settings + setup_multi_processes(cfg) + + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + + # work_dir is determined in this priority: CLI > segment in file > filename + if args.work_dir is not None: + # update configs according to CLI args if args.work_dir is not None + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + # use config filename as default work_dir if cfg.work_dir is None + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + + if args.resume_from is not None: + cfg.resume_from = args.resume_from + cfg.auto_resume = args.auto_resume + if args.gpus is not None: + cfg.gpu_ids = range(1) + warnings.warn('`--gpus` is deprecated because we only support ' + 'single GPU mode in non-distributed training. ' + 'Use `gpus=1` now.') + if args.gpu_ids is not None: + cfg.gpu_ids = args.gpu_ids[0:1] + warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' + 'Because we only support single GPU mode in ' + 'non-distributed training. Use the first GPU ' + 'in `gpu_ids` now.') + if args.gpus is None and args.gpu_ids is None: + cfg.gpu_ids = [args.gpu_id] + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + # re-set gpu_ids with distributed training mode + _, world_size = get_dist_info() + cfg.gpu_ids = range(world_size) + + # create work_dir + mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) + # dump config + cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) + # init the logger before other steps + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + log_file = osp.join(cfg.work_dir, f'{timestamp}.log') + logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) + + # init the meta dict to record some important information such as + # environment info and seed, which will be logged + meta = dict() + # log env info + env_info_dict = collect_env() + env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) + dash_line = '-' * 60 + '\n' + logger.info('Environment info:\n' + dash_line + env_info + '\n' + + dash_line) + meta['env_info'] = env_info + meta['config'] = cfg.pretty_text + # log some basic info + logger.info(f'Distributed training: {distributed}') + logger.info(f'Config:\n{cfg.pretty_text}') + + cfg.device = get_device() + # set random seeds + seed = init_random_seed(args.seed, device=cfg.device) + seed = seed + dist.get_rank() if args.diff_seed else seed + logger.info(f'Set random seed to {seed}, ' + f'deterministic: {args.deterministic}') + set_random_seed(seed, deterministic=args.deterministic) + cfg.seed = seed + meta['seed'] = seed + meta['exp_name'] = osp.basename(args.config) + + model = build_detector( + cfg.model, + train_cfg=cfg.get('train_cfg'), + test_cfg=cfg.get('test_cfg')) + model.init_weights() + + datasets = [build_dataset(cfg.data.train)] + if len(cfg.workflow) == 2: + val_dataset = copy.deepcopy(cfg.data.val) + val_dataset.pipeline = cfg.data.train.pipeline + datasets.append(build_dataset(val_dataset)) + if cfg.checkpoint_config is not None: + # save mmdet version, config file content and class names in + # checkpoints as meta data + cfg.checkpoint_config.meta = dict( + mmdet_version=__version__ + get_git_hash()[:7], + CLASSES=datasets[0].CLASSES) + # add an attribute for visualization convenience + model.CLASSES = datasets[0].CLASSES + train_detector( + model, + datasets, + cfg, + distributed=distributed, + validate=(not args.no_validate), + timestamp=timestamp, + meta=meta) + + +if __name__ == '__main__': + main() diff --git a/cv/classification/fasternet/pytorch/models/__init__.py b/cv/classification/fasternet/pytorch/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..30c63658909e88ef19f837f9ae8228e2ebe24fc3 --- /dev/null +++ b/cv/classification/fasternet/pytorch/models/__init__.py @@ -0,0 +1 @@ +from .registry import * \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/models/fasternet.py b/cv/classification/fasternet/pytorch/models/fasternet.py new file mode 100644 index 0000000000000000000000000000000000000000..5abc16d469b1991cac4a1a4b84d4402dce3e535e --- /dev/null +++ b/cv/classification/fasternet/pytorch/models/fasternet.py @@ -0,0 +1,360 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +import torch +import torch.nn as nn +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from functools import partial +from typing import List +from torch import Tensor +import copy +import os + +try: + from mmdet.models.builder import BACKBONES as det_BACKBONES + from mmdet.utils import get_root_logger + from mmcv.runner import _load_checkpoint + has_mmdet = True +except ImportError: + print("If for detection, please install mmdetection first") + has_mmdet = False + + +class Partial_conv3(nn.Module): + + def __init__(self, dim, n_div, forward): + super().__init__() + self.dim_conv3 = dim // n_div + self.dim_untouched = dim - self.dim_conv3 + self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False) + + if forward == 'slicing': + self.forward = self.forward_slicing + elif forward == 'split_cat': + self.forward = self.forward_split_cat + else: + raise NotImplementedError + + def forward_slicing(self, x: Tensor) -> Tensor: + # only for inference + x = x.clone() # !!! Keep the original input intact for the residual connection later + x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :]) + + return x + + def forward_split_cat(self, x: Tensor) -> Tensor: + # for training/inference + x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1) + x1 = self.partial_conv3(x1) + x = torch.cat((x1, x2), 1) + + return x + + +class MLPBlock(nn.Module): + + def __init__(self, + dim, + n_div, + mlp_ratio, + drop_path, + layer_scale_init_value, + act_layer, + norm_layer, + pconv_fw_type + ): + + super().__init__() + self.dim = dim + self.mlp_ratio = mlp_ratio + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.n_div = n_div + + mlp_hidden_dim = int(dim * mlp_ratio) + + mlp_layer: List[nn.Module] = [ + nn.Conv2d(dim, mlp_hidden_dim, 1, bias=False), + norm_layer(mlp_hidden_dim), + act_layer(), + nn.Conv2d(mlp_hidden_dim, dim, 1, bias=False) + ] + + self.mlp = nn.Sequential(*mlp_layer) + + self.spatial_mixing = Partial_conv3( + dim, + n_div, + pconv_fw_type + ) + + if layer_scale_init_value > 0: + self.layer_scale = nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True) + self.forward = self.forward_layer_scale + else: + self.forward = self.forward + + def forward(self, x: Tensor) -> Tensor: + shortcut = x + x = self.spatial_mixing(x) + x = shortcut + self.drop_path(self.mlp(x)) + return x + + def forward_layer_scale(self, x: Tensor) -> Tensor: + shortcut = x + x = self.spatial_mixing(x) + x = shortcut + self.drop_path( + self.layer_scale.unsqueeze(-1).unsqueeze(-1) * self.mlp(x)) + return x + + +class BasicStage(nn.Module): + + def __init__(self, + dim, + depth, + n_div, + mlp_ratio, + drop_path, + layer_scale_init_value, + norm_layer, + act_layer, + pconv_fw_type + ): + + super().__init__() + + blocks_list = [ + MLPBlock( + dim=dim, + n_div=n_div, + mlp_ratio=mlp_ratio, + drop_path=drop_path[i], + layer_scale_init_value=layer_scale_init_value, + norm_layer=norm_layer, + act_layer=act_layer, + pconv_fw_type=pconv_fw_type + ) + for i in range(depth) + ] + + self.blocks = nn.Sequential(*blocks_list) + + def forward(self, x: Tensor) -> Tensor: + x = self.blocks(x) + return x + + +class PatchEmbed(nn.Module): + + def __init__(self, patch_size, patch_stride, in_chans, embed_dim, norm_layer): + super().__init__() + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_stride, bias=False) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = nn.Identity() + + def forward(self, x: Tensor) -> Tensor: + x = self.norm(self.proj(x)) + return x + + +class PatchMerging(nn.Module): + + def __init__(self, patch_size2, patch_stride2, dim, norm_layer): + super().__init__() + self.reduction = nn.Conv2d(dim, 2 * dim, kernel_size=patch_size2, stride=patch_stride2, bias=False) + if norm_layer is not None: + self.norm = norm_layer(2 * dim) + else: + self.norm = nn.Identity() + + def forward(self, x: Tensor) -> Tensor: + x = self.norm(self.reduction(x)) + return x + + +class FasterNet(nn.Module): + + def __init__(self, + in_chans=3, + num_classes=1000, + embed_dim=96, + depths=(1, 2, 8, 2), + mlp_ratio=2., + n_div=4, + patch_size=4, + patch_stride=4, + patch_size2=2, # for subsequent layers + patch_stride2=2, + patch_norm=True, + feature_dim=1280, + drop_path_rate=0.1, + layer_scale_init_value=0, + norm_layer='BN', + act_layer='RELU', + fork_feat=False, + init_cfg=None, + pretrained=None, + pconv_fw_type='split_cat', + **kwargs): + super().__init__() + + if norm_layer == 'BN': + norm_layer = nn.BatchNorm2d + else: + raise NotImplementedError + + if act_layer == 'GELU': + act_layer = nn.GELU + elif act_layer == 'RELU': + act_layer = partial(nn.ReLU, inplace=True) + else: + raise NotImplementedError + + if not fork_feat: + self.num_classes = num_classes + self.num_stages = len(depths) + self.embed_dim = embed_dim + self.patch_norm = patch_norm + self.num_features = int(embed_dim * 2 ** (self.num_stages - 1)) + self.mlp_ratio = mlp_ratio + self.depths = depths + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + patch_size=patch_size, + patch_stride=patch_stride, + in_chans=in_chans, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None + ) + + # stochastic depth decay rule + dpr = [x.item() + for x in torch.linspace(0, drop_path_rate, sum(depths))] + + # build layers + stages_list = [] + for i_stage in range(self.num_stages): + stage = BasicStage(dim=int(embed_dim * 2 ** i_stage), + n_div=n_div, + depth=depths[i_stage], + mlp_ratio=self.mlp_ratio, + drop_path=dpr[sum(depths[:i_stage]):sum(depths[:i_stage + 1])], + layer_scale_init_value=layer_scale_init_value, + norm_layer=norm_layer, + act_layer=act_layer, + pconv_fw_type=pconv_fw_type + ) + stages_list.append(stage) + + # patch merging layer + if i_stage < self.num_stages - 1: + stages_list.append( + PatchMerging(patch_size2=patch_size2, + patch_stride2=patch_stride2, + dim=int(embed_dim * 2 ** i_stage), + norm_layer=norm_layer) + ) + + self.stages = nn.Sequential(*stages_list) + + self.fork_feat = fork_feat + + if self.fork_feat: + self.forward = self.forward_det + # add a norm layer for each output + self.out_indices = [0, 2, 4, 6] + for i_emb, i_layer in enumerate(self.out_indices): + if i_emb == 0 and os.environ.get('FORK_LAST3', None): + raise NotImplementedError + else: + layer = norm_layer(int(embed_dim * 2 ** i_emb)) + layer_name = f'norm{i_layer}' + self.add_module(layer_name, layer) + else: + self.forward = self.forward_cls + # Classifier head + self.avgpool_pre_head = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(self.num_features, feature_dim, 1, bias=False), + act_layer() + ) + self.head = nn.Linear(feature_dim, num_classes) \ + if num_classes > 0 else nn.Identity() + + self.apply(self.cls_init_weights) + self.init_cfg = copy.deepcopy(init_cfg) + if self.fork_feat and (self.init_cfg is not None or pretrained is not None): + self.init_weights() + + def cls_init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, (nn.Conv1d, nn.Conv2d)): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, (nn.LayerNorm, nn.GroupNorm)): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + # init for mmdetection by loading imagenet pre-trained weights + def init_weights(self, pretrained=None): + logger = get_root_logger() + if self.init_cfg is None and pretrained is None: + logger.warn(f'No pre-trained weights for ' + f'{self.__class__.__name__}, ' + f'training start from scratch') + pass + else: + assert 'checkpoint' in self.init_cfg, f'Only support ' \ + f'specify `Pretrained` in ' \ + f'`init_cfg` in ' \ + f'{self.__class__.__name__} ' + if self.init_cfg is not None: + ckpt_path = self.init_cfg['checkpoint'] + elif pretrained is not None: + ckpt_path = pretrained + + ckpt = _load_checkpoint( + ckpt_path, logger=logger, map_location='cpu') + if 'state_dict' in ckpt: + _state_dict = ckpt['state_dict'] + elif 'model' in ckpt: + _state_dict = ckpt['model'] + else: + _state_dict = ckpt + + state_dict = _state_dict + missing_keys, unexpected_keys = \ + self.load_state_dict(state_dict, False) + + # show for debug + print('missing_keys: ', missing_keys) + print('unexpected_keys: ', unexpected_keys) + + def forward_cls(self, x): + # output only the features of last layer for image classification + x = self.patch_embed(x) + x = self.stages(x) + x = self.avgpool_pre_head(x) # B C 1 1 + x = torch.flatten(x, 1) + x = self.head(x) + + return x + + def forward_det(self, x: Tensor) -> Tensor: + # output the features of four stages for dense prediction + x = self.patch_embed(x) + outs = [] + for idx, stage in enumerate(self.stages): + x = stage(x) + if self.fork_feat and idx in self.out_indices: + norm_layer = getattr(self, f'norm{idx}') + x_out = norm_layer(x) + outs.append(x_out) + + return outs \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/models/model_api.py b/cv/classification/fasternet/pytorch/models/model_api.py new file mode 100644 index 0000000000000000000000000000000000000000..b4021b6abea08285d28dc38494ca7476f284b2c4 --- /dev/null +++ b/cv/classification/fasternet/pytorch/models/model_api.py @@ -0,0 +1,158 @@ +import os +import sys +import inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(currentdir) +sys.path.insert(0, parentdir) + +from torch.optim.lr_scheduler import * +from models import * +from utils.utils import * +import torch +import pytorch_lightning as pl + +from timm.data import Mixup +from timm.models import create_model +from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy +from timm.scheduler import create_scheduler +from timm.optim import create_optimizer +from timm.utils import accuracy +from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR +from utils.loss import DistillationLoss + + +def build_criterion(args): + if args.mixup > 0.: + # smoothing is handled with mixup label transform + criterion = SoftTargetCrossEntropy() + elif args.smoothing: + criterion = LabelSmoothingCrossEntropy(smoothing=args.smoothing) + else: + criterion = torch.nn.CrossEntropyLoss() + return criterion + + +def build_mixup_fn(args, num_classes): + mixup_fn = None + mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None + if mixup_active: + mixup_fn = Mixup( + mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax, + prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode, + label_smoothing=args.smoothing, num_classes=num_classes) + return mixup_fn + + +class LitModel(pl.LightningModule): + def __init__(self, num_classes, hparams): + super().__init__() + + self.save_hyperparameters(hparams) + if 'fasternet' in hparams.model_name: + self.model = create_model( + hparams.model_name, + mlp_ratio=hparams.mlp_ratio, + embed_dim=hparams.embed_dim, + depths=hparams.depths, + pretrained=hparams.pretrained, + n_div=hparams.n_div, + feature_dim=hparams.feature_dim, + patch_size=hparams.patch_size, + patch_stride=hparams.patch_stride, + patch_size2=hparams.patch_size2, + patch_stride2=hparams.patch_stride2, + num_classes=num_classes, + layer_scale_init_value=hparams.layer_scale_init_value, + drop_path_rate=hparams.drop_path_rate, + norm_layer=hparams.norm_layer, + act_layer=hparams.act_layer, + pconv_fw_type=hparams.pconv_fw_type + ) + else: + self.model = create_model( + hparams.model_name, + pretrained=hparams.pretrained, + num_classes=num_classes + ) + + base_criterion = build_criterion(hparams) + self.distillation_type = hparams.distillation_type + if hparams.distillation_type == 'none': + self.criterion = base_criterion + else: + # assert hparams.teacher_path, 'need to specify teacher-path when using distillation' + print(f"Creating teacher model: {hparams.teacher_model}") + teacher_model = create_model( + hparams.teacher_model, + pretrained=True, + num_classes=num_classes, + global_pool='avg', + ) + for param in teacher_model.parameters(): + param.requires_grad = False + teacher_model.eval() + self.criterion = DistillationLoss(base_criterion, + teacher_model, + hparams.distillation_type, + hparams.distillation_alpha, + hparams.distillation_tau + ) + self.criterion_eva = torch.nn.CrossEntropyLoss() + self.mixup_fn = build_mixup_fn(hparams, num_classes) + + def forward(self, x): + return self.model(x) + + def on_train_epoch_start(self): + if self.hparams.multi_scale is not None: + if self.current_epoch == int(self.hparams.multi_scale.split('_')[1]): + # image_size = self.hparams.image_size + self.trainer.reset_train_dataloader(self) + + def _calculate_loss(self, batch, mode="train"): + imgs, labels = batch + if mode == "train" and self.mixup_fn is not None: + imgs, labels = self.mixup_fn(imgs, labels) + preds = self.model(imgs) + + if mode == "train": + if self.distillation_type == 'none': + loss = self.criterion(preds, labels) + else: + loss = self.criterion(imgs, preds, labels) + self.log("%s_loss" % mode, loss) + else: + loss = self.criterion_eva(preds, labels) + acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) + sync_dist = True if torch.cuda.device_count() > 1 else False + self.log("%s_loss" % mode, loss, sync_dist=sync_dist) + self.log("%s_acc1" % mode, acc1, sync_dist=sync_dist) + self.log("%s_acc5" % mode, acc5, sync_dist=sync_dist) + + return loss + + def training_step(self, batch, batch_idx): + loss = self._calculate_loss(batch, mode="train") + return loss + + def validation_step(self, batch, batch_idx): + self._calculate_loss(batch, mode="val") + + def test_step(self, batch, batch_idx): + self._calculate_loss(batch, mode="test") + + def configure_optimizers(self): + optimizer = create_optimizer(self.hparams, self.parameters()) + if self.hparams.sched == 'cosine': + scheduler = LinearWarmupCosineAnnealingLR(optimizer, + warmup_epochs=self.hparams.warmup_epochs, + max_epochs=self.hparams.epochs, + warmup_start_lr=self.hparams.warmup_lr, + eta_min=self.hparams.min_lr + ) + else: + # scheduler, _ = create_scheduler(self.hparams, optimizer) + raise NotImplementedError + + return [optimizer], [scheduler] + diff --git a/cv/classification/fasternet/pytorch/models/registry.py b/cv/classification/fasternet/pytorch/models/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..169a6645a68936b1b2871164c6fcd6ef3657897d --- /dev/null +++ b/cv/classification/fasternet/pytorch/models/registry.py @@ -0,0 +1,9 @@ +import torch.nn as nn +from timm.models.registry import register_model +from .fasternet import FasterNet + + +@register_model +def fasternet(**kwargs): + model = FasterNet(**kwargs) + return model \ No newline at end of file diff --git a/cv/classification/fasternet/pytorch/requirements.txt b/cv/classification/fasternet/pytorch/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e02c51aabe2720644ccba80d8ab3834e2c093d5c --- /dev/null +++ b/cv/classification/fasternet/pytorch/requirements.txt @@ -0,0 +1,20 @@ +# conda create -n fasternet python=3.9.12 -y +# conda activate fasternet +# pip install -r requirements.txt + +# ---------------------------------------- +--extra-index-url https://download.pytorch.org/whl/cu113 + +#torch==1.11.0 +#torchvision==0.12.0 +pytorch-lightning==1.6.5 +lightning-bolts==0.5.0 +timm==0.6.5 +wandb>=0.12.21 +matplotlib==3.5.2 +fvcore>=0.1.5.post20220512 + +# ---------------------------------------- +# install the following packages for mmdet +# pip install mmcv-full==1.6.0 -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.11.0/index.html +# pip install mmdet==2.25.0 diff --git a/cv/classification/fasternet/pytorch/train_test.py b/cv/classification/fasternet/pytorch/train_test.py new file mode 100644 index 0000000000000000000000000000000000000000..127e13b6c91124565e73321c93b9318eab63c06c --- /dev/null +++ b/cv/classification/fasternet/pytorch/train_test.py @@ -0,0 +1,152 @@ +import os +import sys +import torch +from torch import nn +from argparse import ArgumentParser +import wandb + +import pytorch_lightning as pl +from pytorch_lightning.callbacks import ModelCheckpoint, TQDMProgressBar, StochasticWeightAveraging +from pytorch_lightning.loggers import WandbLogger +from pytorch_lightning import seed_everything +from pytorch_lightning.plugins import DDPPlugin + +from utils.utils import * +from utils.fuse_conv_bn import fuse_conv_bn +from data.data_api import LitDataModule +from models.model_api import LitModel + + +def main(args): + # Init data pipeline + dm, _ = LitDataModule(hparams=args) + + # Init LitModel + if args.checkpoint_path is not None: + PATH = args.checkpoint_path + if PATH[-5:]=='.ckpt': + model = LitModel.load_from_checkpoint(PATH, map_location='cpu', num_classes=dm.num_classes, hparams=args) + print('Successfully load the pl checkpoint file.') + if args.pl_ckpt_2_torch_pth: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = model.model.to(device) + torch.save(model.state_dict(), PATH[:-5]+'.pth') + exit() + elif PATH[-4:] == '.pth': + model = LitModel(num_classes=dm.num_classes, hparams=args) + missing_keys, unexpected_keys = model.model.load_state_dict(torch.load(PATH), False) + # show for debug + print('missing_keys: ', missing_keys) + print('unexpected_keys: ', unexpected_keys) + else: + raise TypeError + else: + model = LitModel(num_classes=dm.num_classes, hparams=args) + + flops, params = get_flops_params(model.model, args.image_size) + + if args.fuse_conv_bn: + fuse_conv_bn(model.model) + + if args.measure_latency: + dm.prepare_data() + dm.setup(stage="test") + for idx, (images, _) in enumerate(dm.test_dataloader()): + model = model.model.eval() + throughput, latency = measure_latency(images[:1, :, :, :], model, GPU=False, num_threads=1) + if torch.cuda.is_available(): + throughput, latency = measure_latency(images, model, GPU=True) + exit() + + print_model(model) + + # Callbacks + MONITOR = 'val_acc1' + checkpoint_callback = ModelCheckpoint( + monitor=MONITOR, + dirpath=args.model_ckpt_dir, + filename=args.model_name+'-{epoch}-{val_acc1:.4f}', + save_top_k=1, + save_last=True, + mode='max' if 'acc' in MONITOR else 'min' + ) + refresh_callback = TQDMProgressBar(refresh_rate=20) + callbacks = [ + checkpoint_callback, + refresh_callback + ] + + # Initialize wandb logger + WANDB_ON = True if args.dev+args.test_phase==0 else False + if WANDB_ON: + wandb_logger = WandbLogger( + project=args.wandb_project_name, + save_dir=args.wandb_save_dir, + offline=args.wandb_offline, + log_model=False, + job_type='train') + wandb_logger.log_hyperparams(args) + wandb_logger.log_hyperparams({"flops": flops, "params": params}) + + # Initialize a trainer + find_unused_para = False if args.distillation_type == 'none' else True + trainer = pl.Trainer( + fast_dev_run=args.dev, + logger=wandb_logger if WANDB_ON else None, + max_epochs=args.epochs, + gpus=args.gpus, + accelerator="gpu", + sync_batchnorm=args.sync_batchnorm, + num_nodes=args.num_nodes, + gradient_clip_val=args.clip_grad, + strategy=DDPPlugin(find_unused_parameters=find_unused_para) if args.strategy == 'ddp' else args.strategy, + callbacks=callbacks, + precision=args.precision, + benchmark=args.benchmark + ) + + if bool(args.test_phase): + trainer.test(model, datamodule=dm) + else: + trainer.fit(model, dm) + if args.dev==0: + trainer.test(ckpt_path="best", datamodule=dm) + + # Close wandb run + if WANDB_ON: + wandb.finish() + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument('-c', '--cfg', type=str, default='cfg/fasternet_t0.yaml') + parser.add_argument('-g', "--gpus", type=str, default=None, + help="Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node.") + parser.add_argument('-d', "--dev", type=int, default=0, help='fast_dev_run for debug') + parser.add_argument("--num_nodes", type=int, default=1) + parser.add_argument('-n', "--num_workers", type=int, default=4) + parser.add_argument('-b', "--batch_size", type=int, default=2048) + parser.add_argument('-e', "--batch_size_eva", type=int, default=1000, help='batch_size for evaluation') + parser.add_argument("--model_ckpt_dir", type=str, default="./model_ckpt/") + parser.add_argument("--data_dir", type=str, default="../../data/imagenet") + parser.add_argument('--pin_memory', action='store_true') + parser.add_argument("--checkpoint_path", type=str, default=None) + parser.add_argument("--pconv_fw_type", type=str, default='split_cat', + help="use 'split_cat' for training/inference and 'slicing' only for inference") + parser.add_argument('--measure_latency', action='store_true', help='measure latency or throughput') + parser.add_argument('--test_phase', action='store_true') + parser.add_argument('--fuse_conv_bn', action='store_true') + parser.add_argument("--wandb_project_name", type=str, default="fasternet") + parser.add_argument('--wandb_offline', action='store_true') + parser.add_argument('--wandb_save_dir', type=str, default='./') + parser.add_argument('--pl_ckpt_2_torch_pth', action='store_true', + help='convert pl .ckpt file to torch .pth file, and then exit') + + args = parser.parse_args() + cfg = load_cfg(args.cfg) + args = merge_args_cfg(args, cfg) + + # please change {WANDB_API_KEY} to your personal api_key before using wandb + # os.environ["WANDB_API_KEY"] = "{WANDB_API_KEY}" + + main(args) diff --git a/cv/classification/fasternet/pytorch/utils/__init__.py b/cv/classification/fasternet/pytorch/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/cv/classification/fasternet/pytorch/utils/fuse_conv_bn.py b/cv/classification/fasternet/pytorch/utils/fuse_conv_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..8f1b7686a402fae8f3dd8412c74afcf7053fc65e --- /dev/null +++ b/cv/classification/fasternet/pytorch/utils/fuse_conv_bn.py @@ -0,0 +1,60 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +def _fuse_conv_bn(conv: nn.Module, bn: nn.Module) -> nn.Module: + """Fuse conv and bn into one module. + + Args: + conv (nn.Module): Conv to be fused. + bn (nn.Module): BN to be fused. + + Returns: + nn.Module: Fused module. + """ + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like( + bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * + factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_conv_bn(module: nn.Module) -> nn.Module: + """Recursively fuse conv and bn in a module. + + During inference, the functionary of batch norm layers is turned off + but only the mean and var alone channels are used, which exposes the + chance to fuse it with the preceding conv layers to save computations and + simplify network structures. + + Args: + module (nn.Module): Module to be fused. + + Returns: + nn.Module: Fused module. + """ + last_conv = None + last_conv_name = None + + for name, child in module.named_children(): + if isinstance(child, + (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = _fuse_conv_bn(last_conv, child) + module._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + module._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_conv_bn(child) + return module + diff --git a/cv/classification/fasternet/pytorch/utils/loss.py b/cv/classification/fasternet/pytorch/utils/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..ed725e0aa0ead84e0947cce8b1415dfd020b9775 --- /dev/null +++ b/cv/classification/fasternet/pytorch/utils/loss.py @@ -0,0 +1,71 @@ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +""" +Implements the knowledge distillation loss +""" +import torch +from torch.nn import functional as F + + +class DistillationLoss(torch.nn.Module): + """ + This module wraps a standard criterion and adds an extra knowledge distillation loss by + taking a teacher model prediction and using it as additional supervision. + """ + def __init__(self, base_criterion: torch.nn.Module, teacher_model: torch.nn.Module, + distillation_type: str, alpha: float, tau: float): + super().__init__() + self.base_criterion = base_criterion + self.teacher_model = teacher_model + assert distillation_type in ['none', 'soft', 'hard'] + self.distillation_type = distillation_type + self.alpha = alpha + self.tau = tau + + def forward(self, inputs, outputs, labels): + """ + Args: + inputs: The original inputs that are feed to the teacher model + outputs: the outputs of the model to be trained. It is expected to be + either a Tensor, or a Tuple[Tensor, Tensor], with the original output + in the first position and the distillation predictions as the second output + labels: the labels for the base criterion + """ + outputs_kd = outputs + # outputs_kd = None + # if not isinstance(outputs, torch.Tensor): + # # assume that the model outputs a tuple of [outputs, outputs_kd] + # outputs, outputs_kd = outputs + base_loss = self.base_criterion(outputs, labels) + # if self.distillation_type == 'none': + # return base_loss + + # if outputs_kd is None: + # raise ValueError("When knowledge distillation is enabled, the model is " + # "expected to return a Tuple[Tensor, Tensor] with the output of the " + # "class_token and the dist_token") + # don't backprop throught the teacher + with torch.no_grad(): + teacher_outputs = self.teacher_model(inputs) + + if self.distillation_type == 'soft': + T = self.tau + # taken from https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100 + # with slight modifications + distillation_loss = F.kl_div( + F.log_softmax(outputs_kd / T, dim=1), + #We provide the teacher's targets in log probability because we use log_target=True + #(as recommended in pytorch https://github.com/pytorch/pytorch/blob/9324181d0ac7b4f7949a574dbc3e8be30abe7041/torch/nn/functional.py#L2719) + #but it is possible to give just the probabilities and set log_target=False. In our experiments we tried both. + F.log_softmax(teacher_outputs / T, dim=1), + reduction='sum', + log_target=True + ) * (T * T) / outputs_kd.numel() + #We divide by outputs_kd.numel() to have the legacy PyTorch behavior. + #But we also experiments output_kd.size(0) + #see issue 61(https://github.com/facebookresearch/deit/issues/61) for more details + elif self.distillation_type == 'hard': + distillation_loss = F.cross_entropy(outputs_kd, teacher_outputs.argmax(dim=1)) + + loss = base_loss * (1 - self.alpha) + distillation_loss * self.alpha + return loss diff --git a/cv/classification/fasternet/pytorch/utils/utils.py b/cv/classification/fasternet/pytorch/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a4970342ca90588e9a3ab9c2a8b296cda09fc7a0 --- /dev/null +++ b/cv/classification/fasternet/pytorch/utils/utils.py @@ -0,0 +1,188 @@ +import torch +from torch import nn +import os +import numpy as np +import yaml +import time +from argparse import Namespace +import torch.distributed as dist +from pytorch_lightning.utilities import rank_zero_only +from fvcore.nn import FlopCountAnalysis, parameter_count + + +def load_cfg(cfg): + hyp = None + if isinstance(cfg, str): + with open(cfg, errors='ignore') as f: + hyp = yaml.safe_load(f) # load hyps dict + return Namespace(**hyp) + + +def merge_args_cfg(args, cfg): + dict0 = vars(args) + dict1 = vars(cfg) + dict = {**dict0, **dict1} + + return Namespace(**dict) + + +def str2list(string, sperator=','): + li = list(map(int, string.split(sperator))) + return li + +# def check_dir_format(dir): +# if dir.endswith(os.path.sep): +# return dir +# else: +# return dir+os.path.sep +# +# +# +# +# +# def append_path_by_date(model_ckpt_dir): +# os.environ['TZ'] = 'Asia/Hong_Kong' +# time.tzset() +# timestr = time.strftime("%Y%m%d-%H%M%S") +# +# return check_dir_format(model_ckpt_dir) + timestr + os.path.sep +# +@torch.no_grad() +@rank_zero_only +def print_model(model): + print(model) + + +@torch.no_grad() +def replace_layers(model, old, new): + for n, module in model.named_children(): + if len(list(module.children())) > 0: + ## compound module, go inside it + replace_layers(module, old, new) + + if isinstance(module, old): + ## simple module + setattr(model, n, new()) + +# @torch.no_grad() +# @rank_zero_only +# def mk_model_ckpt_dir(model_ckpt_dir): +# if not os.path.exists(model_ckpt_dir): +# os.makedirs(model_ckpt_dir) +# +@torch.no_grad() +def get_flops_params(model, input_size): + model.eval() + + tensor = (torch.rand(1, 3, input_size, input_size), ) + flops = FlopCountAnalysis(model, tensor) + flops = flops.total() / 1000000. + print("FVcore FLOPs(M): ", flops) + + params = parameter_count(model) + params = params[""] / 1000000. + print("FVcore params(M): ", params) + + return flops, params + + +@torch.no_grad() +def measure_latency(images, model, GPU=True, chan_last=False, half=False, num_threads=None, iter=200): + """ + :param images: b, c, h, w + :param model: model + :param GPU: whther use GPU + :param chan_last: data_format + :param half: half precision + :param num_threads: for cpu + :return: + """ + + if GPU: + model.cuda() + model.eval() + torch.backends.cudnn.benchmark = True + + images = images.cuda(non_blocking=True) + batch_size = images.shape[0] + if chan_last: + images = images.to(memory_format=torch.channels_last) + model = model.to(memory_format=torch.channels_last) + if half: + images = images.half() + model = model.half() + + for i in range(50): + model(images) + torch.cuda.synchronize() + tic1 = time.time() + for i in range(iter): + model(images) + torch.cuda.synchronize() + tic2 = time.time() + throughput = iter * batch_size / (tic2 - tic1) + latency = 1000 * (tic2 - tic1) / iter + print(f"batch_size {batch_size} throughput on gpu {throughput}") + print(f"batch_size {batch_size} latency on gpu {latency} ms") + + return throughput, latency + else: + model.eval() + if num_threads is not None: + torch.set_num_threads(num_threads) + + batch_size = images.shape[0] + + if chan_last: + images = images.to(memory_format=torch.channels_last) + model = model.to(memory_format=torch.channels_last) + if half: + images = images.half() + model = model.half() + for i in range(10): + model(images) + tic1 = time.time() + for i in range(iter): + model(images) + tic2 = time.time() + throughput = iter * batch_size / (tic2 - tic1) + latency = 1000 * (tic2 - tic1) / iter + print(f"batch_size {batch_size} throughput on cpu {throughput}") + print(f"batch_size {batch_size} latency on cpu {latency} ms") + + return throughput, latency +# +# +# def setup_for_distributed(is_master): +# """ +# This function disables printing when not in master process +# """ +# import builtins as __builtin__ +# builtin_print = __builtin__.print +# +# def print(*args, **kwargs): +# force = kwargs.pop('force', False) +# if is_master or force: +# builtin_print(*args, **kwargs) +# +# __builtin__.print = print +# +# +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() \ No newline at end of file