diff --git a/cv/classification/repvgg/pytorch/LICENSE b/cv/classification/repvgg/pytorch/LICENSE new file mode 100755 index 0000000000000000000000000000000000000000..9b7d31a349b645c2323b96fb4ed7912eabce3884 --- /dev/null +++ b/cv/classification/repvgg/pytorch/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 DingXiaoH + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cv/classification/repvgg/pytorch/README.md b/cv/classification/repvgg/pytorch/README.md new file mode 100755 index 0000000000000000000000000000000000000000..be9cb1ea0edfc12c90723fb39c055796b3969645 --- /dev/null +++ b/cv/classification/repvgg/pytorch/README.md @@ -0,0 +1,61 @@ + +# RepVGG +## Model description + A simple but powerful architecture of convolutional neural network, which has a VGG-like inference-time body composed of nothing but a stack of 3x3 convolution and ReLU, while the training-time model has a multi-branch topology. Such decoupling of the training-time and inference-time architecture is realized by a structural re-parameterization technique so that the model is named RepVGG. + +## Step 1: Installing + +```bash +pip3 install timm yacs +``` + +## Step 2: Download data + +Download the [ImageNet Dataset](https://www.image-net.org/download.php) + +```bash +# IMAGENET PATH as follow: +ls -al /home/datasets/imagenet_jpeg/ +total 52688 +drwxr-xr-x 1002 root root 24576 Mar 1 15:33 train +-rw-r--r-- 1 root root 43829433 May 16 07:55 train_list.txt +drwxr-xr-x 1002 root root 24576 Mar 1 15:41 val +-rw-r--r-- 1 root root 2144499 May 16 07:56 val_list.txt +----------------------- +# train_list.txt has the following format +train/n01440764/n01440764_10026.JPEG 0 +... + +# val_list.txt has the following format +val/ILSVRC2012_val_00000001.JPEG 65 +----------------------- +``` + +## Step 3: Run RepVGG +``` +python -m torch.distributed.launch --nproc_per_node 8 --master_port 12349 main.py --arch [model name] --data-path [/path/to/imagenet] --batch-size 32 --tag train_from_scratch --output ./ --opts TRAIN.EPOCHS 300 TRAIN.BASE_LR 0.1 TRAIN.WEIGHT_DECAY 1e-4 TRAIN.WARMUP_EPOCHS 5 MODEL.LABEL_SMOOTHING 0.1 AUG.PRESET weak AUG.MIXUP 0.0 DATA.DATASET imagenet DATA.IMG_SIZE 224 +``` +The original RepVGG models were trained in 120 epochs with cosine learning rate decay from 0.1 to 0. We used 8 GPUs, global batch size of 256, weight decay of 1e-4 (no weight decay on fc.bias, bn.bias, rbr_dense.bn.weight and rbr_1x1.bn.weight) (weight decay on rbr_identity.weight makes little difference, and it is better to use it in most of the cases), and the same simple data preprocssing as the PyTorch official example: +``` + trans = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) +``` + +The valid model names include (--arch [model name]) +``` +RepVGGplus-L2pse, RepVGG-A0, RepVGG-A1, RepVGG-A2, RepVGG-B0, RepVGG-B1, RepVGG-B1g2, RepVGG-B1g4, RepVGG-B2, RepVGG-B2g2, RepVGG-B2g4, RepVGG-B3, RepVGG-B3g2, RepVGG-B3g4 +``` + +| model | GPU | FP32 | +|----------| ----------- | ------------------------------------ | +| RepVGG-A0| 8 cards | Acc@1=0.7241 | + + + + + + + diff --git a/cv/classification/repvgg/pytorch/data/__init__.py b/cv/classification/repvgg/pytorch/data/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..70c633ce61849c1600e3effbd1bf46f29f29cb80 --- /dev/null +++ b/cv/classification/repvgg/pytorch/data/__init__.py @@ -0,0 +1 @@ +from .build import build_loader \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/data/build.py b/cv/classification/repvgg/pytorch/data/build.py new file mode 100755 index 0000000000000000000000000000000000000000..faf093957ce8f3de6b4198832aad019d62454185 --- /dev/null +++ b/cv/classification/repvgg/pytorch/data/build.py @@ -0,0 +1,188 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# The training script is based on the code of Swin Transformer (https://github.com/microsoft/Swin-Transformer) +# -------------------------------------------------------- +import torch +import numpy as np +import torch.distributed as dist +from torchvision import datasets, transforms +from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +from timm.data import Mixup +from timm.data import create_transform +try: + from timm.data.transforms import str_to_pil_interp as _pil_interp +except: + from timm.data.transforms import _pil_interp +from .cached_image_folder import CachedImageFolder +from .samplers import SubsetRandomSampler +import os + + +def build_loader(config): + config.defrost() + dataset_train, config.MODEL.NUM_CLASSES = build_dataset(is_train=True, config=config) + config.freeze() + print(f"local rank {config.LOCAL_RANK} / global rank {dist.get_rank()} successfully build train dataset") + dataset_val, _ = build_dataset(is_train=False, config=config) + print(f"local rank {config.LOCAL_RANK} / global rank {dist.get_rank()} successfully build val dataset") + + num_tasks = dist.get_world_size() + global_rank = dist.get_rank() + if config.DATA.ZIP_MODE and config.DATA.CACHE_MODE == 'part': + indices = np.arange(dist.get_rank(), len(dataset_train), dist.get_world_size()) + sampler_train = SubsetRandomSampler(indices) + else: + sampler_train = torch.utils.data.DistributedSampler( + dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True + ) + + if dataset_val is None: + sampler_val = None + else: + indices = np.arange(dist.get_rank(), len(dataset_val), dist.get_world_size()) #TODO + sampler_val = SubsetRandomSampler(indices) + + data_loader_train = torch.utils.data.DataLoader( + dataset_train, sampler=sampler_train, + batch_size=config.DATA.BATCH_SIZE, + num_workers=config.DATA.NUM_WORKERS, + pin_memory=config.DATA.PIN_MEMORY, + drop_last=True, + ) + + if dataset_val is None: + data_loader_val = None + else: + data_loader_val = torch.utils.data.DataLoader( + dataset_val, sampler=sampler_val, + batch_size=config.DATA.TEST_BATCH_SIZE, + shuffle=False, + num_workers=config.DATA.NUM_WORKERS, + pin_memory=config.DATA.PIN_MEMORY, + drop_last=False + ) + + # setup mixup / cutmix + mixup_fn = None + mixup_active = config.AUG.MIXUP > 0 or config.AUG.CUTMIX > 0. or config.AUG.CUTMIX_MINMAX is not None + if mixup_active: + mixup_fn = Mixup( + mixup_alpha=config.AUG.MIXUP, cutmix_alpha=config.AUG.CUTMIX, cutmix_minmax=config.AUG.CUTMIX_MINMAX, + prob=config.AUG.MIXUP_PROB, switch_prob=config.AUG.MIXUP_SWITCH_PROB, mode=config.AUG.MIXUP_MODE, + label_smoothing=config.MODEL.LABEL_SMOOTHING, num_classes=config.MODEL.NUM_CLASSES) + + return dataset_train, dataset_val, data_loader_train, data_loader_val, mixup_fn + + +def build_dataset(is_train, config): + if config.DATA.DATASET == 'imagenet': + transform = build_transform(is_train, config) + prefix = 'train' if is_train else 'val' + if config.DATA.ZIP_MODE: + ann_file = prefix + "_map.txt" + prefix = prefix + ".zip@/" + dataset = CachedImageFolder(config.DATA.DATA_PATH, ann_file, prefix, transform, + cache_mode=config.DATA.CACHE_MODE if is_train else 'part') + else: + import torchvision + print('use raw ImageNet data') + #dataset = torchvision.datasets.ImageNet(root=config.DATA.DATA_PATH, split='train' if is_train else 'val', transform=transform) + root = os.path.join(config.DATA.DATA_PATH, prefix) + dataset = datasets.ImageFolder(root, transform=transform) + + nb_classes = 1000 + + elif config.DATA.DATASET == 'cf100': + mean = [0.5070751592371323, 0.48654887331495095, 0.4409178433670343] + std = [0.2673342858792401, 0.2564384629170883, 0.27615047132568404] + if is_train: + transform = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean, std) + ]) + dataset = datasets.CIFAR100(root=config.DATA.DATA_PATH, train=True, download=True, transform=transform) + else: + transform = transforms.Compose( + [transforms.ToTensor(), + transforms.Normalize(mean, std)]) + dataset = datasets.CIFAR100(root=config.DATA.DATA_PATH, train=False, download=True, transform=transform) + nb_classes = 100 + + else: + raise NotImplementedError("We only support ImageNet and CIFAR-100 now.") + + return dataset, nb_classes + + +def build_transform(is_train, config): + resize_im = config.DATA.IMG_SIZE > 32 + if is_train: + # this should always dispatch to transforms_imagenet_train + + if config.AUG.PRESET is None: + transform = create_transform( + input_size=config.DATA.IMG_SIZE, + is_training=True, + color_jitter=config.AUG.COLOR_JITTER if config.AUG.COLOR_JITTER > 0 else None, + auto_augment=config.AUG.AUTO_AUGMENT if config.AUG.AUTO_AUGMENT != 'none' else None, + re_prob=config.AUG.REPROB, + re_mode=config.AUG.REMODE, + re_count=config.AUG.RECOUNT, + interpolation=config.DATA.INTERPOLATION, + ) + print('=============================== original AUG! ', config.AUG.AUTO_AUGMENT) + if not resize_im: + # replace RandomResizedCropAndInterpolation with + # RandomCrop + transform.transforms[0] = transforms.RandomCrop(config.DATA.IMG_SIZE, padding=4) + + elif config.AUG.PRESET.strip() == 'raug15': + from train.randaug import RandAugPolicy + transform = transforms.Compose([ + transforms.RandomResizedCrop(config.DATA.IMG_SIZE), + transforms.RandomHorizontalFlip(), + RandAugPolicy(magnitude=15), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD), + ]) + print('---------------------- RAND AUG 15 distortion!') + + elif config.AUG.PRESET.strip() == 'weak': + transform = transforms.Compose([ + transforms.RandomResizedCrop(config.DATA.IMG_SIZE), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD), + ]) + elif config.AUG.PRESET.strip() == 'none': + transform = transforms.Compose([ + transforms.Resize(config.DATA.IMG_SIZE, interpolation=_pil_interp(config.DATA.INTERPOLATION)), + transforms.CenterCrop(config.DATA.IMG_SIZE), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD), + ]) + else: + raise ValueError('???' + config.AUG.PRESET) + print(transform) + return transform + + t = [] + if resize_im: + if config.TEST.CROP: + size = int((256 / 224) * config.DATA.TEST_SIZE) + t.append(transforms.Resize(size, interpolation=_pil_interp(config.DATA.INTERPOLATION)), + # to maintain same ratio w.r.t. 224 images + ) + t.append(transforms.CenterCrop(config.DATA.TEST_SIZE)) + else: + # default for testing + t.append(transforms.Resize(config.DATA.TEST_SIZE, interpolation=_pil_interp(config.DATA.INTERPOLATION))) + t.append(transforms.CenterCrop(config.DATA.TEST_SIZE)) + t.append(transforms.ToTensor()) + t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)) + trans = transforms.Compose(t) + return trans diff --git a/cv/classification/repvgg/pytorch/data/cached_image_folder.py b/cv/classification/repvgg/pytorch/data/cached_image_folder.py new file mode 100755 index 0000000000000000000000000000000000000000..2f3d013a67f5aac9ca5fac635044ecef8fdbca5a --- /dev/null +++ b/cv/classification/repvgg/pytorch/data/cached_image_folder.py @@ -0,0 +1,244 @@ +import io +import os +import time +import torch.distributed as dist +import torch.utils.data as data +from PIL import Image + +from .zipreader import is_zip_path, ZipReader + + +def has_file_allowed_extension(filename, extensions): + """Checks if a file is an allowed extension. + Args: + filename (string): path to a file + Returns: + bool: True if the filename ends with a known image extension + """ + filename_lower = filename.lower() + return any(filename_lower.endswith(ext) for ext in extensions) + + +def find_classes(dir): + classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + + +def make_dataset(dir, class_to_idx, extensions): + images = [] + dir = os.path.expanduser(dir) + for target in sorted(os.listdir(dir)): + d = os.path.join(dir, target) + if not os.path.isdir(d): + continue + + for root, _, fnames in sorted(os.walk(d)): + for fname in sorted(fnames): + if has_file_allowed_extension(fname, extensions): + path = os.path.join(root, fname) + item = (path, class_to_idx[target]) + images.append(item) + + return images + + +def make_dataset_with_ann(ann_file, img_prefix, extensions): + images = [] + with open(ann_file, "r") as f: + contents = f.readlines() + for line_str in contents: + path_contents = [c for c in line_str.split('\t')] + im_file_name = path_contents[0] + class_index = int(path_contents[1]) + + assert str.lower(os.path.splitext(im_file_name)[-1]) in extensions + item = (os.path.join(img_prefix, im_file_name), class_index) + + images.append(item) + + return images + + +class DatasetFolder(data.Dataset): + """A generic data loader where the samples are arranged in this way: :: + root/class_x/xxx.ext + root/class_x/xxy.ext + root/class_x/xxz.ext + root/class_y/123.ext + root/class_y/nsdf3.ext + root/class_y/asd932_.ext + Args: + root (string): Root directory path. + loader (callable): A function to load a sample given its path. + extensions (list[string]): A list of allowed extensions. + transform (callable, optional): A function/transform that takes in + a sample and returns a transformed version. + E.g, ``transforms.RandomCrop`` for images. + target_transform (callable, optional): A function/transform that takes + in the target and transforms it. + Attributes: + samples (list): List of (sample path, class_index) tuples + """ + + def __init__(self, root, loader, extensions, ann_file='', img_prefix='', transform=None, target_transform=None, + cache_mode="no"): + # image folder mode + if ann_file == '': + _, class_to_idx = find_classes(root) + samples = make_dataset(root, class_to_idx, extensions) + # zip mode + else: + samples = make_dataset_with_ann(os.path.join(root, ann_file), + os.path.join(root, img_prefix), + extensions) + + if len(samples) == 0: + raise (RuntimeError("Found 0 files in subfolders of: " + root + "\n" + + "Supported extensions are: " + ",".join(extensions))) + + self.root = root + self.loader = loader + self.extensions = extensions + + self.samples = samples + self.labels = [y_1k for _, y_1k in samples] + self.classes = list(set(self.labels)) + + self.transform = transform + self.target_transform = target_transform + + self.cache_mode = cache_mode + if self.cache_mode != "no": + self.init_cache() + + def init_cache(self): + assert self.cache_mode in ["part", "full"] + n_sample = len(self.samples) + global_rank = dist.get_rank() + world_size = dist.get_world_size() + + samples_bytes = [None for _ in range(n_sample)] + start_time = time.time() + for index in range(n_sample): + if index % (n_sample // 10) == 0: + t = time.time() - start_time + print(f'global_rank {dist.get_rank()} cached {index}/{n_sample} takes {t:.2f}s per block') + start_time = time.time() + path, target = self.samples[index] + if self.cache_mode == "full": + samples_bytes[index] = (ZipReader.read(path), target) + elif self.cache_mode == "part" and index % world_size == global_rank: + samples_bytes[index] = (ZipReader.read(path), target) + else: + samples_bytes[index] = (path, target) + self.samples = samples_bytes + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (sample, target) where target is class_index of the target class. + """ + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + if self.target_transform is not None: + target = self.target_transform(target) + + return sample, target + + def __len__(self): + return len(self.samples) + + def __repr__(self): + fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' + fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) + fmt_str += ' Root Location: {}\n'.format(self.root) + tmp = ' Transforms (if any): ' + fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + tmp = ' Target Transforms (if any): ' + fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + return fmt_str + + +IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'] + + +def pil_loader(path): + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + if isinstance(path, bytes): + img = Image.open(io.BytesIO(path)) + elif is_zip_path(path): + data = ZipReader.read(path) + img = Image.open(io.BytesIO(data)) + else: + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + +def accimage_loader(path): + import accimage + try: + return accimage.Image(path) + except IOError: + # Potentially a decoding problem, fall back to PIL.Image + return pil_loader(path) + + +def default_img_loader(path): + from torchvision import get_image_backend + if get_image_backend() == 'accimage': + return accimage_loader(path) + else: + return pil_loader(path) + + +class CachedImageFolder(DatasetFolder): + """A generic data loader where the images are arranged in this way: :: + root/dog/xxx.png + root/dog/xxy.png + root/dog/xxz.png + root/cat/123.png + root/cat/nsdf3.png + root/cat/asd932_.png + Args: + root (string): Root directory path. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + loader (callable, optional): A function to load an image given its path. + Attributes: + imgs (list): List of (image path, class_index) tuples + """ + + def __init__(self, root, ann_file='', img_prefix='', transform=None, target_transform=None, + loader=default_img_loader, cache_mode="no"): + super(CachedImageFolder, self).__init__(root, loader, IMG_EXTENSIONS, + ann_file=ann_file, img_prefix=img_prefix, + transform=transform, target_transform=target_transform, + cache_mode=cache_mode) + self.imgs = self.samples + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (image, target) where target is class_index of the target class. + """ + path, target = self.samples[index] + image = self.loader(path) + if self.transform is not None: + img = self.transform(image) + else: + img = image + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target diff --git a/cv/classification/repvgg/pytorch/data/lmdb_dataset.py b/cv/classification/repvgg/pytorch/data/lmdb_dataset.py new file mode 100755 index 0000000000000000000000000000000000000000..640d1824d34be5f1f522918500156fdf498a22df --- /dev/null +++ b/cv/classification/repvgg/pytorch/data/lmdb_dataset.py @@ -0,0 +1,164 @@ +import os +import os.path as osp +from PIL import Image +import six +import lmdb +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) +import pyarrow as pa +import numpy as np +import torch.utils.data as data +from torch.utils.data import DataLoader +from torchvision.datasets import ImageFolder + +train_lmdb_path = '/apdcephfs/share_1290939/0_public_datasets/imageNet_2012/train.lmdb' +val_lmdb_path = '/apdcephfs/share_1290939/0_public_datasets/imageNet_2012/val.lmdb' + +# from data.lmdb_dataset import ImageFolderLMDB, train_lmdb_path, val_lmdb_path +# lmdb_path = train_lmdb_path if is_train else val_lmdb_path +# dataset = ImageFolderLMDB(db_path=lmdb_path, transform=transform) + +def loads_pyarrow(buf): + """ + Args: + buf: the output of `dumps`. + """ + return pa.deserialize(buf) + + +class ImageFolderLMDB(data.Dataset): + def __init__(self, db_path, transform=None, target_transform=None): + self.db_path = db_path + self.env = lmdb.open(db_path, subdir=osp.isdir(db_path), + readonly=True, lock=False, + readahead=False, meminit=False) + with self.env.begin(write=False) as txn: + self.length = loads_pyarrow(txn.get(b'__len__')) + self.keys = loads_pyarrow(txn.get(b'__keys__')) + + self.transform = transform + self.target_transform = target_transform + + def __getstate__(self): + state = self.__dict__ + state["env"] = None + return state + + def __setstate__(self, state): + self.__dict__ = state + self.env = lmdb.open(self.db_path, subdir=osp.isdir(self.db_path), + readonly=True, lock=False, + readahead=False, meminit=False) + with self.env.begin(write=False) as txn: + self.length = loads_pyarrow(txn.get(b'__len__')) + self.keys = loads_pyarrow(txn.get(b'__keys__')) + + def __getitem__(self, index): + env = self.env + with env.begin(write=False) as txn: + byteflow = txn.get(self.keys[index]) + + unpacked = loads_pyarrow(byteflow) + + # load img + imgbuf = unpacked[0] + buf = six.BytesIO() + buf.write(imgbuf) + buf.seek(0) + img = Image.open(buf).convert('RGB') + if self.transform is not None: + img = self.transform(img) + + # load label + target = unpacked[1] + if self.target_transform is not None: + target = self.transform(target) + + return img, target +# if self.transform is not None: +# img = self.transform(img) +# +# # im2arr = np.array(img) +# +# if self.target_transform is not None: +# target = self.target_transform(target) +# +# return img, target + # return im2arr, target + + def __len__(self): + return self.length + + def __repr__(self): + return self.__class__.__name__ + ' (' + self.db_path + ')' + + +def raw_reader(path): + with open(path, 'rb') as f: + bin_data = f.read() + return bin_data + + +def dumps_pyarrow(obj): + """ + Serialize an object. + Returns: + Implementation-dependent bytes-like object + """ + return pa.serialize(obj).to_buffer() + + +def folder2lmdb(dpath, name="train", write_frequency=5000): + directory = osp.expanduser(osp.join(dpath, name)) + print("Loading dataset from %s" % directory) + dataset = ImageFolder(directory, loader=raw_reader) + data_loader = DataLoader(dataset, num_workers=4, collate_fn=lambda x: x) + + lmdb_path = osp.join(dpath, "%s.lmdb" % name) + isdir = os.path.isdir(lmdb_path) + + print("Generate LMDB to %s" % lmdb_path) + db = lmdb.open(lmdb_path, subdir=isdir, + map_size=1099511627776 * 2, readonly=False, + meminit=False, map_async=True) + + txn = db.begin(write=True) + for idx, data in enumerate(data_loader): + image, label = data[0] + + txn.put(u'{}'.format(idx).encode('ascii'), dumps_pyarrow((image, label))) + if idx % write_frequency == 0: + print("[%d/%d]" % (idx, len(data_loader))) + txn.commit() + txn = db.begin(write=True) + + # finish iterating through dataset + txn.commit() + keys = [u'{}'.format(k).encode('ascii') for k in range(idx + 1)] + with db.begin(write=True) as txn: + txn.put(b'__keys__', dumps_pyarrow(keys)) + txn.put(b'__len__', dumps_pyarrow(len(keys))) + + print("Flushing database ...") + db.sync() + db.close() + + + + +if __name__ == "__main__": + # lmdb_path = '/apdcephfs/share_1016399/0_public_datasets/imageNet_2012/train.lmdb' + # from lmdb_dataset import ImageFolderLMDB + # dataset = ImageFolderLMDB(db_path=lmdb_path) + # for x, y in dataset: + # print(type(x), type(y)) + # exit() + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--dir', type=str, required=True, help="The dataset directory to process") + args = parser.parse_args() + # generate lmdb + path = args.dir + folder2lmdb(path, name="train") + folder2lmdb(path, name="val") diff --git a/cv/classification/repvgg/pytorch/data/samplers.py b/cv/classification/repvgg/pytorch/data/samplers.py new file mode 100755 index 0000000000000000000000000000000000000000..fed54b4e27b2df268670ace4cda64687209a5380 --- /dev/null +++ b/cv/classification/repvgg/pytorch/data/samplers.py @@ -0,0 +1,21 @@ +import torch + +class SubsetRandomSampler(torch.utils.data.Sampler): + r"""Samples elements randomly from a given list of indices, without replacement. + + Arguments: + indices (sequence): a sequence of indices + """ + + def __init__(self, indices): + self.epoch = 0 + self.indices = indices + + def __iter__(self): + return (self.indices[i] for i in torch.randperm(len(self.indices))) + + def __len__(self): + return len(self.indices) + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/cv/classification/repvgg/pytorch/data/zipreader.py b/cv/classification/repvgg/pytorch/data/zipreader.py new file mode 100755 index 0000000000000000000000000000000000000000..9d773c3c4e91eae4435faf9d9b297a2d3c21a3d2 --- /dev/null +++ b/cv/classification/repvgg/pytorch/data/zipreader.py @@ -0,0 +1,96 @@ +import os +import zipfile +import io +import numpy as np +from PIL import Image +from PIL import ImageFile + +ImageFile.LOAD_TRUNCATED_IMAGES = True + + +def is_zip_path(img_or_path): + """judge if this is a zip path""" + return '.zip@' in img_or_path + + +class ZipReader(object): + """A class to read zipped files""" + zip_bank = dict() + + def __init__(self): + super(ZipReader, self).__init__() + + @staticmethod + def get_zipfile(path): + zip_bank = ZipReader.zip_bank + if path not in zip_bank: + zfile = zipfile.ZipFile(path, 'r') + zip_bank[path] = zfile + return zip_bank[path] + + @staticmethod + def split_zip_style_path(path): + pos_at = path.index('@') + assert pos_at != -1, "character '@' is not found from the given path '%s'" % path + + zip_path = path[0: pos_at] + folder_path = path[pos_at + 1:] + folder_path = str.strip(folder_path, '/') + return zip_path, folder_path + + @staticmethod + def list_folder(path): + zip_path, folder_path = ZipReader.split_zip_style_path(path) + + zfile = ZipReader.get_zipfile(zip_path) + folder_list = [] + for file_foler_name in zfile.namelist(): + file_foler_name = str.strip(file_foler_name, '/') + if file_foler_name.startswith(folder_path) and \ + len(os.path.splitext(file_foler_name)[-1]) == 0 and \ + file_foler_name != folder_path: + if len(folder_path) == 0: + folder_list.append(file_foler_name) + else: + folder_list.append(file_foler_name[len(folder_path) + 1:]) + + return folder_list + + @staticmethod + def list_files(path, extension=None): + if extension is None: + extension = ['.*'] + zip_path, folder_path = ZipReader.split_zip_style_path(path) + + zfile = ZipReader.get_zipfile(zip_path) + file_lists = [] + for file_foler_name in zfile.namelist(): + file_foler_name = str.strip(file_foler_name, '/') + if file_foler_name.startswith(folder_path) and \ + str.lower(os.path.splitext(file_foler_name)[-1]) in extension: + if len(folder_path) == 0: + file_lists.append(file_foler_name) + else: + file_lists.append(file_foler_name[len(folder_path) + 1:]) + + return file_lists + + @staticmethod + def read(path): + zip_path, path_img = ZipReader.split_zip_style_path(path) + zfile = ZipReader.get_zipfile(zip_path) + data = zfile.read(path_img) + return data + + @staticmethod + def imread(path): + zip_path, path_img = ZipReader.split_zip_style_path(path) + zfile = ZipReader.get_zipfile(zip_path) + data = zfile.read(path_img) + try: + im = Image.open(io.BytesIO(data)) + except: + print("ERROR IMG LOADED: ", path_img) + random_img = np.random.rand(224, 224, 3) * 255 + im = Image.fromarray(np.uint8(random_img)) + return im diff --git a/cv/classification/repvgg/pytorch/example_pspnet.py b/cv/classification/repvgg/pytorch/example_pspnet.py new file mode 100755 index 0000000000000000000000000000000000000000..19ff9abdce2ac9b80d637c5033fb568351564796 --- /dev/null +++ b/cv/classification/repvgg/pytorch/example_pspnet.py @@ -0,0 +1,161 @@ +import torch +from torch import nn +import torch.nn.functional as F +from repvgg import get_RepVGG_func_by_name + +# The PSPNet parts are from +# https://github.com/hszhao/semseg + +class PPM(nn.Module): + def __init__(self, in_dim, reduction_dim, bins, BatchNorm): + super(PPM, self).__init__() + self.features = [] + for bin in bins: + self.features.append(nn.Sequential( + nn.AdaptiveAvgPool2d(bin), + nn.Conv2d(in_dim, reduction_dim, kernel_size=1, bias=False), + BatchNorm(reduction_dim), + nn.ReLU(inplace=True) + )) + self.features = nn.ModuleList(self.features) + + def forward(self, x): + x_size = x.size() + out = [x] + for f in self.features: + out.append(F.interpolate(f(x), x_size[2:], mode='bilinear', align_corners=True)) + return torch.cat(out, 1) + + +class PSPNet(nn.Module): + def __init__(self, + backbone_name, backbone_file, deploy, + bins=(1, 2, 3, 6), dropout=0.1, classes=2, + zoom_factor=8, use_ppm=True, criterion=nn.CrossEntropyLoss(ignore_index=255), BatchNorm=nn.BatchNorm2d, + pretrained=True): + super(PSPNet, self).__init__() + assert 2048 % len(bins) == 0 + assert classes > 1 + assert zoom_factor in [1, 2, 4, 8] + self.zoom_factor = zoom_factor + self.use_ppm = use_ppm + self.criterion = criterion + + repvgg_fn = get_RepVGG_func_by_name(backbone_name) + backbone = repvgg_fn(deploy) + if pretrained: + checkpoint = torch.load(backbone_file) + if 'state_dict' in checkpoint: + checkpoint = checkpoint['state_dict'] + ckpt = {k.replace('module.', ''): v for k, v in checkpoint.items()} # strip the names + backbone.load_state_dict(ckpt) + + self.layer0, self.layer1, self.layer2, self.layer3, self.layer4 = backbone.stage0, backbone.stage1, backbone.stage2, backbone.stage3, backbone.stage4 + + # The last two stages should have stride=1 for semantic segmentation + # Note that the stride of 1x1 should be the same as the 3x3 + # Use dilation following the implementation of PSPNet + secondlast_channel = 0 + for n, m in self.layer3.named_modules(): + if ('rbr_dense' in n or 'rbr_reparam' in n) and isinstance(m, nn.Conv2d): + m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1) + print('change dilation, padding, stride of ', n) + secondlast_channel = m.out_channels + elif 'rbr_1x1' in n and isinstance(m, nn.Conv2d): + m.stride = (1, 1) + print('change stride of ', n) + last_channel = 0 + for n, m in self.layer4.named_modules(): + if ('rbr_dense' in n or 'rbr_reparam' in n) and isinstance(m, nn.Conv2d): + m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1) + print('change dilation, padding, stride of ', n) + last_channel = m.out_channels + elif 'rbr_1x1' in n and isinstance(m, nn.Conv2d): + m.stride = (1, 1) + print('change stride of ', n) + + fea_dim = last_channel + aux_in = secondlast_channel + + if use_ppm: + self.ppm = PPM(fea_dim, int(fea_dim/len(bins)), bins, BatchNorm) + fea_dim *= 2 + + self.cls = nn.Sequential( + nn.Conv2d(fea_dim, 512, kernel_size=3, padding=1, bias=False), + BatchNorm(512), + nn.ReLU(inplace=True), + nn.Dropout2d(p=dropout), + nn.Conv2d(512, classes, kernel_size=1) + ) + if self.training: + self.aux = nn.Sequential( + nn.Conv2d(aux_in, 256, kernel_size=3, padding=1, bias=False), + BatchNorm(256), + nn.ReLU(inplace=True), + nn.Dropout2d(p=dropout), + nn.Conv2d(256, classes, kernel_size=1) + ) + + def forward(self, x, y=None): + x_size = x.size() + assert (x_size[2]-1) % 8 == 0 and (x_size[3]-1) % 8 == 0 + h = int((x_size[2] - 1) / 8 * self.zoom_factor + 1) + w = int((x_size[3] - 1) / 8 * self.zoom_factor + 1) + + x = self.layer0(x) + x = self.layer1(x) + x = self.layer2(x) + x_tmp = self.layer3(x) + x = self.layer4(x_tmp) + + if self.use_ppm: + x = self.ppm(x) + x = self.cls(x) + if self.zoom_factor != 1: + x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=True) + + if self.training: + aux = self.aux(x_tmp) + if self.zoom_factor != 1: + aux = F.interpolate(aux, size=(h, w), mode='bilinear', align_corners=True) + main_loss = self.criterion(x, y) + aux_loss = self.criterion(aux, y) + return x.max(1)[1], main_loss, aux_loss + else: + return x + + +if __name__ == '__main__': + # 1. Build the PSPNet with RepVGG backbone. Download the ImageNet-pretrained weight file and load it. + model = PSPNet(backbone_name='RepVGG-A0', backbone_file='RepVGG-A0-train.pth', deploy=False, classes=19, pretrained=True) + + # 2. Train it + # seg_train(model) + + # 3. Convert and check the equivalence + input = torch.rand(4, 3, 713, 713) + model.eval() + print(model) + y_train = model(input) + for module in model.modules(): + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + y_deploy = model(input) + print('output is ', y_deploy.size()) + print('=================== The diff is') + print(((y_deploy - y_train) ** 2).sum()) + + # 4. Save the converted model + torch.save(model.state_dict(), 'PSPNet-RepVGG-A0-deploy.pth') + del model # Or do whatever you want with it + + # 5. For inference, load the saved model. There is no need to load the ImageNet-pretrained weights again. + deploy_model = PSPNet(backbone_name='RepVGG-A0', backbone_file=None, deploy=True, classes=19, pretrained=False) + deploy_model.eval() + deploy_model.load_state_dict(torch.load('PSPNet-RepVGG-A0-deploy.pth')) + + # 6. Check again or do whatever you want + y_deploy = deploy_model(input) + print('=================== The diff is') + print(((y_deploy - y_train) ** 2).sum()) \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/jizhi_submit_train_repvgg.py b/cv/classification/repvgg/pytorch/jizhi_submit_train_repvgg.py new file mode 100755 index 0000000000000000000000000000000000000000..8debce4d26aada27fcad1d47148c04510499636f --- /dev/null +++ b/cv/classification/repvgg/pytorch/jizhi_submit_train_repvgg.py @@ -0,0 +1,34 @@ +import argparse +import datetime +import os +import json + +parser = argparse.ArgumentParser('JIZHI submit', add_help=False) +parser.add_argument('arch', default=None, type=str) +parser.add_argument('tag', default=None, type=str) +parser.add_argument('--config', default='/apdcephfs_cq2/share_1290939/xiaohanding/cnt/default_V100x8_elastic_config.json', type=str, + help='config file') + + +args = parser.parse_args() +run_dir = f'{args.arch}_{args.tag}' + +cmd = f'python3 -m torch.distributed.launch --nproc_per_node 8 --master_port 12349 main.py ' \ + f'--arch {args.arch} --batch-size 32 --tag {args.tag} --output-dir /apdcephfs_cq2/share_1290939/xiaohanding/swin_exps/{args.arch}_{args.tag} --opts TRAIN.EPOCHS 120 TRAIN.BASE_LR 0.1 TRAIN.WEIGHT_DECAY 4e-5 TRAIN.WARMUP_EPOCHS 5 MODEL.LABEL_SMOOTHING 0.1 AUG.PRESET raug15 DATA.DATASET imagenet' + +os.system('cd /apdcephfs_cq2/share_1290939/xiaohanding/RepVGG/') +os.system(f'mkdir runs/{run_dir}') +with open(f'runs/{run_dir}/start.sh', 'w') as f: + f.write(cmd) +with open(args.config, 'r') as f: + json_content = json.load(f) +json_content['model_local_file_path'] = f'/apdcephfs_cq2/share_1290939/xiaohanding/RepVGG/runs/{run_dir}' +config_file_path = f'/apdcephfs_cq2/share_1290939/xiaohanding/RepVGG/runs/{run_dir}/config.json' +with open(config_file_path, 'w') as f: + json.dump(json_content, f) + +os.system(f'cp *.py runs/{run_dir}/') +os.system(f'cp -r data runs/{run_dir}/') +os.system(f'cp -r train runs/{run_dir}/') +os.system(f'cd runs/{run_dir}') +os.system(f'jizhi_client start -scfg {config_file_path}') \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/main.py b/cv/classification/repvgg/pytorch/main.py new file mode 100755 index 0000000000000000000000000000000000000000..c721c14bd81484896b0a4f4f72558af96af29790 --- /dev/null +++ b/cv/classification/repvgg/pytorch/main.py @@ -0,0 +1,414 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# The training script is based on the code of Swin Transformer (https://github.com/microsoft/Swin-Transformer) +# -------------------------------------------------------- +import time +import argparse +import datetime +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import torch.distributed as dist +from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy +from timm.utils import accuracy, AverageMeter +from train.config import get_config +from data import build_loader +from train.lr_scheduler import build_scheduler +from train.logger import create_logger +from utils import load_checkpoint, save_checkpoint, get_grad_norm, auto_resume_helper, reduce_tensor, save_latest, update_model_ema, unwrap_model +import copy +from train.optimizer import build_optimizer +from repvggplus import create_RepVGGplus_by_name + +try: + # noinspection PyUnresolvedReferences + from apex import amp +except ImportError: + amp = None + +def parse_option(): + parser = argparse.ArgumentParser('RepOpt-VGG training script built on the codebase of Swin Transformer', add_help=False) + parser.add_argument( + "--opts", + help="Modify config options by adding 'KEY VALUE' pairs. ", + default=None, + nargs='+', + ) + + # easy config modification + parser.add_argument('--arch', default=None, type=str, help='arch name') + parser.add_argument('--batch-size', default=128, type=int, help="batch size for single GPU") + parser.add_argument('--data-path', default='/your/path/to/dataset', type=str, help='path to dataset') + parser.add_argument('--scales-path', default=None, type=str, help='path to the trained Hyper-Search model') + parser.add_argument('--zip', action='store_true', help='use zipped dataset instead of folder dataset') + parser.add_argument('--cache-mode', type=str, default='part', choices=['no', 'full', 'part'], + help='no: no cache, ' + 'full: cache all data, ' + 'part: sharding the dataset into nonoverlapping pieces and only cache one piece') + parser.add_argument('--resume', help='resume from checkpoint') + parser.add_argument('--accumulation-steps', type=int, help="gradient accumulation steps") + parser.add_argument('--use-checkpoint', action='store_true', + help="whether to use gradient checkpointing to save memory") + parser.add_argument('--amp-opt-level', type=str, default='O0', choices=['O0', 'O1', 'O2'], #TODO Note: use amp if you have it + help='mixed precision opt level, if O0, no amp is used') + parser.add_argument('--output', default='/your/path/to/save/dir', type=str, metavar='PATH', + help='root of output folder, the full path is // (default: output)') + parser.add_argument('--tag', help='tag of experiment') + parser.add_argument('--eval', action='store_true', help='Perform evaluation only') + parser.add_argument('--throughput', action='store_true', help='Test throughput only') + + # distributed training + parser.add_argument("--local_rank", type=int, default=0, help='local rank for DistributedDataParallel') + + args, unparsed = parser.parse_known_args() + + config = get_config(args) + + return args, config + + + + + +def main(config): + dataset_train, dataset_val, data_loader_train, data_loader_val, mixup_fn = build_loader(config) + + logger.info(f"Creating model:{config.MODEL.ARCH}") + + model = create_RepVGGplus_by_name(config.MODEL.ARCH, deploy=False, use_checkpoint=args.use_checkpoint) + optimizer = build_optimizer(config, model) + + logger.info(str(model)) + model.cuda() + + if torch.cuda.device_count() > 1: + if config.AMP_OPT_LEVEL != "O0": + model, optimizer = amp.initialize(model, optimizer, opt_level=config.AMP_OPT_LEVEL) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.LOCAL_RANK], + broadcast_buffers=False) + model_without_ddp = model.module + else: + if config.AMP_OPT_LEVEL != "O0": + model, optimizer = amp.initialize(model, optimizer, opt_level=config.AMP_OPT_LEVEL) + model_without_ddp = model + + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + logger.info(f"number of params: {n_parameters}") + if hasattr(model_without_ddp, 'flops'): + flops = model_without_ddp.flops() + logger.info(f"number of GFLOPs: {flops / 1e9}") + + if config.THROUGHPUT_MODE: + throughput(data_loader_val, model, logger) + return + + if config.EVAL_MODE: + load_weights(model, config.MODEL.RESUME) + acc1, acc5, loss = validate(config, data_loader_val, model) + logger.info(f"Only eval. top-1 acc, top-5 acc, loss: {acc1:.3f}, {acc5:.3f}, {loss:.5f}") + return + + lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train)) + + if config.AUG.MIXUP > 0.: + # smoothing is handled with mixup label transform + criterion = SoftTargetCrossEntropy() + elif config.MODEL.LABEL_SMOOTHING > 0.: + criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING) + else: + criterion = torch.nn.CrossEntropyLoss() + + max_accuracy = 0.0 + max_ema_accuracy = 0.0 + + if config.TRAIN.EMA_ALPHA > 0 and (not config.EVAL_MODE) and (not config.THROUGHPUT_MODE): + model_ema = copy.deepcopy(model) + else: + model_ema = None + + if config.TRAIN.AUTO_RESUME: + resume_file = auto_resume_helper(config.OUTPUT) + if resume_file: + if config.MODEL.RESUME: + logger.warning(f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}") + config.defrost() + config.MODEL.RESUME = resume_file + config.freeze() + logger.info(f'auto resuming from {resume_file}') + else: + logger.info(f'no checkpoint found in {config.OUTPUT}, ignoring auto resume') + + if (not config.THROUGHPUT_MODE) and config.MODEL.RESUME: + max_accuracy = load_checkpoint(config, model_without_ddp, optimizer, lr_scheduler, logger, model_ema=model_ema) + + + logger.info("Start training") + start_time = time.time() + for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS): + data_loader_train.sampler.set_epoch(epoch) + + train_one_epoch(config, model, criterion, data_loader_train, optimizer, epoch, mixup_fn, lr_scheduler, model_ema=model_ema) + if dist.get_rank() == 0: + save_latest(config, epoch, model_without_ddp, max_accuracy, optimizer, lr_scheduler, logger, model_ema=model_ema) + if epoch % config.SAVE_FREQ == 0: + save_checkpoint(config, epoch, model_without_ddp, max_accuracy, optimizer, lr_scheduler, logger, model_ema=model_ema) + + if epoch % config.SAVE_FREQ == 0 or epoch >= (config.TRAIN.EPOCHS - 10): + + if data_loader_val is not None: + acc1, acc5, loss = validate(config, data_loader_val, model) + logger.info(f"Accuracy of the network at epoch {epoch}: {acc1:.3f}%") + max_accuracy = max(max_accuracy, acc1) + logger.info(f'Max accuracy: {max_accuracy:.2f}%') + if max_accuracy == acc1 and dist.get_rank() == 0: + save_checkpoint(config, epoch, model_without_ddp, max_accuracy, optimizer, lr_scheduler, logger, + is_best=True, model_ema=model_ema) + + if model_ema is not None: + if data_loader_val is not None: + acc1, acc5, loss = validate(config, data_loader_val, model_ema) + logger.info(f"EMAAccuracy of the network at epoch {epoch} test images: {acc1:.3f}%") + max_ema_accuracy = max(max_ema_accuracy, acc1) + logger.info(f'EMAMax accuracy: {max_ema_accuracy:.2f}%') + if max_ema_accuracy == acc1 and dist.get_rank() == 0: + best_ema_path = os.path.join(config.OUTPUT, 'best_ema.pth') + logger.info(f"{best_ema_path} best EMA saving......") + torch.save(unwrap_model(model_ema).state_dict(), best_ema_path) + else: + latest_ema_path = os.path.join(config.OUTPUT, 'latest_ema.pth') + logger.info(f"{latest_ema_path} latest EMA saving......") + torch.save(unwrap_model(model_ema).state_dict(), latest_ema_path) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + logger.info('Training time {}'.format(total_time_str)) + + +def train_one_epoch(config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler, model_ema=None): + model.train() + optimizer.zero_grad() + + num_steps = len(data_loader) + batch_time = AverageMeter() + loss_meter = AverageMeter() + norm_meter = AverageMeter() + + start = time.time() + end = time.time() + for idx, (samples, targets) in enumerate(data_loader): + samples = samples.cuda(non_blocking=True) + targets = targets.cuda(non_blocking=True) + + if mixup_fn is not None: + samples, targets = mixup_fn(samples, targets) + + outputs = model(samples) + + if type(outputs) is dict: + loss = 0.0 + for name, pred in outputs.items(): + if 'aux' in name: + loss += 0.1 * criterion(pred, targets) + else: + loss += criterion(pred, targets) + else: + loss = criterion(outputs, targets) + + if config.TRAIN.ACCUMULATION_STEPS > 1: + + loss = loss / config.TRAIN.ACCUMULATION_STEPS + if config.AMP_OPT_LEVEL != "O0": + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + if config.TRAIN.CLIP_GRAD: + grad_norm = torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), config.TRAIN.CLIP_GRAD) + else: + grad_norm = get_grad_norm(amp.master_params(optimizer)) + else: + loss.backward() + if config.TRAIN.CLIP_GRAD: + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.TRAIN.CLIP_GRAD) + else: + grad_norm = get_grad_norm(model.parameters()) + if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0: + optimizer.step() + optimizer.zero_grad() + lr_scheduler.step_update(epoch * num_steps + idx) + + else: + + optimizer.zero_grad() + if config.AMP_OPT_LEVEL != "O0": + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + if config.TRAIN.CLIP_GRAD: + grad_norm = torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), config.TRAIN.CLIP_GRAD) + else: + grad_norm = get_grad_norm(amp.master_params(optimizer)) + else: + loss.backward() + if config.TRAIN.CLIP_GRAD: + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.TRAIN.CLIP_GRAD) + else: + grad_norm = get_grad_norm(model.parameters()) + optimizer.step() + lr_scheduler.step_update(epoch * num_steps + idx) + + torch.cuda.synchronize() + + loss_meter.update(loss.item(), targets.size(0)) + norm_meter.update(grad_norm) + batch_time.update(time.time() - end) + + if model_ema is not None: + update_model_ema(config, dist.get_world_size(), model=model, model_ema=model_ema, cur_epoch=epoch, cur_iter=idx) + + end = time.time() + + if idx % config.PRINT_FREQ == 0: + lr = optimizer.param_groups[0]['lr'] + memory_used = torch.cuda.max_memory_allocated() / (1024.0 * 1024.0) + etas = batch_time.avg * (num_steps - idx) + logger.info( + f'Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t' + f'eta {datetime.timedelta(seconds=int(etas))} lr {lr:.6f}\t' + f'time {batch_time.val:.4f} ({batch_time.avg:.4f})\t' + f'loss {loss_meter.val:.4f} ({loss_meter.avg:.4f})\t' + f'grad_norm {norm_meter.val:.4f} ({norm_meter.avg:.4f})\t' + f'mem {memory_used:.0f}MB') + epoch_time = time.time() - start + logger.info(f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}") + + +@torch.no_grad() +def validate(config, data_loader, model): + criterion = torch.nn.CrossEntropyLoss() + model.eval() + + batch_time = AverageMeter() + loss_meter = AverageMeter() + acc1_meter = AverageMeter() + acc5_meter = AverageMeter() + + end = time.time() + for idx, (images, target) in enumerate(data_loader): + images = images.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + + # compute output + output = model(images) + + # =============================== deepsup part + if type(output) is dict: + output = output['main'] + + # measure accuracy and record loss + loss = criterion(output, target) + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + + acc1 = reduce_tensor(acc1) + acc5 = reduce_tensor(acc5) + loss = reduce_tensor(loss) + + loss_meter.update(loss.item(), target.size(0)) + acc1_meter.update(acc1.item(), target.size(0)) + acc5_meter.update(acc5.item(), target.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if idx % config.PRINT_FREQ == 0: + memory_used = torch.cuda.max_memory_allocated() / (1024.0 * 1024.0) + logger.info( + f'Test: [{idx}/{len(data_loader)}]\t' + f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + f'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f})\t' + f'Acc@1 {acc1_meter.val:.3f} ({acc1_meter.avg:.3f})\t' + f'Acc@5 {acc5_meter.val:.3f} ({acc5_meter.avg:.3f})\t' + f'Mem {memory_used:.0f}MB') + logger.info(f' * Acc@1 {acc1_meter.avg:.3f} Acc@5 {acc5_meter.avg:.3f}') + return acc1_meter.avg, acc5_meter.avg, loss_meter.avg + + +@torch.no_grad() +def throughput(data_loader, model, logger): + model.eval() + + for idx, (images, _) in enumerate(data_loader): + images = images.cuda(non_blocking=True) + + batch_size = images.shape[0] + for i in range(50): + model(images) + torch.cuda.synchronize() + logger.info(f"throughput averaged with 30 times") + tic1 = time.time() + for i in range(30): + model(images) + torch.cuda.synchronize() + tic2 = time.time() + throughput = 30 * batch_size / (tic2 - tic1) + logger.info(f"batch_size {batch_size} throughput {throughput}") + return + + +import os + +if __name__ == '__main__': + args, config = parse_option() + + if config.AMP_OPT_LEVEL != "O0": + assert amp is not None, "amp not installed!" + + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + rank = int(os.environ["RANK"]) + world_size = int(os.environ['WORLD_SIZE']) + else: + rank = -1 + world_size = -1 + torch.cuda.set_device(config.LOCAL_RANK) + torch.distributed.init_process_group(backend='nccl', init_method='env://', world_size=world_size, rank=rank) + torch.distributed.barrier() + seed = config.SEED + dist.get_rank() + + torch.manual_seed(seed) + np.random.seed(seed) + cudnn.benchmark = True + + if not config.EVAL_MODE: + # linear scale the learning rate according to total batch size, may not be optimal + linear_scaled_lr = config.TRAIN.BASE_LR * config.DATA.BATCH_SIZE * dist.get_world_size() / 256.0 + linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR * config.DATA.BATCH_SIZE * dist.get_world_size() / 256.0 + linear_scaled_min_lr = config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * dist.get_world_size() / 256.0 + # gradient accumulation also need to scale the learning rate + if config.TRAIN.ACCUMULATION_STEPS > 1: + linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS + linear_scaled_warmup_lr = linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS + linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS + config.defrost() + config.TRAIN.BASE_LR = linear_scaled_lr + config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr + config.TRAIN.MIN_LR = linear_scaled_min_lr + config.freeze() + + print('==========================================') + print('real base lr: ', config.TRAIN.BASE_LR) + print('==========================================') + + os.makedirs(config.OUTPUT, exist_ok=True) + + logger = create_logger(output_dir=config.OUTPUT, dist_rank=0 if torch.cuda.device_count() == 1 else dist.get_rank(), name=f"{config.MODEL.ARCH}") + + if torch.cuda.device_count() == 1 or dist.get_rank() == 0: + path = os.path.join(config.OUTPUT, "config.json") + with open(path, "w") as f: + f.write(config.dump()) + logger.info(f"Full config saved to {path}") + + # print config + logger.info(config.dump()) + + main(config) diff --git a/cv/classification/repvgg/pytorch/quantization/quant_qat_train.py b/cv/classification/repvgg/pytorch/quantization/quant_qat_train.py new file mode 100755 index 0000000000000000000000000000000000000000..80e1dcd06bcf20c839b98808d3da54d9cb9616cd --- /dev/null +++ b/cv/classification/repvgg/pytorch/quantization/quant_qat_train.py @@ -0,0 +1,426 @@ +import argparse +import random +import shutil +import time +import warnings +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +from utils import * +import torchvision.transforms as transforms +import PIL + +best_acc1 = 0 + +IMAGENET_TRAINSET_SIZE = 1281167 + +parser = argparse.ArgumentParser(description='PyTorch Whole Model Quant') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('-a', '--arch', metavar='ARCH', default='RepVGG-A0') +parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=8, type=int, metavar='N', + help='number of epochs for each run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--val-batch-size', default=100, type=int, metavar='V', + help='validation batch size') +parser.add_argument('--lr', '--learning-rate', default=1e-4, type=float, + metavar='LR', help='learning rate for finetuning', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://127.0.0.1:23333', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--base-weights', default=None, type=str, + help='weights of the base model.') +parser.add_argument('--tag', default='testtest', type=str, + help='the tag for identifying the log and model files. Just a string.') +parser.add_argument('--fpfinetune', dest='fpfinetune', action='store_true', + help='full precision finetune') +parser.add_argument('--fixobserver', dest='fixobserver', action='store_true', + help='fix observer?') +parser.add_argument('--fixbn', dest='fixbn', action='store_true', + help='fix bn?') +parser.add_argument('--quantlayers', default='all', type=str, choices=['all', 'exclud_first_and_linear', 'exclud_first_and_last'], + help='the tag for identifying the log and model files. Just a string.') + + + +def sgd_optimizer(model, lr, momentum, weight_decay): + params = [] + for key, value in model.named_parameters(): + if not value.requires_grad: + continue + apply_weight_decay = weight_decay + apply_lr = lr + if value.ndimension() < 2: #TODO note this + apply_weight_decay = 0 + print('set weight decay=0 for {}'.format(key)) + if 'bias' in key: + apply_lr = 2 * lr # Just a Caffe-style common practice. Made no difference. + params += [{'params': [value], 'lr': apply_lr, 'weight_decay': apply_weight_decay}] + optimizer = torch.optim.SGD(params, lr, momentum=momentum) + return optimizer + +def main(): + args = parser.parse_args() + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + ngpus_per_node = torch.cuda.device_count() + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + + + +def get_default_train_trans(args): + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + if (not hasattr(args, 'resolution')) or args.resolution == 224: + trans = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize]) + else: + raise ValueError('Not yet implemented.') + return trans + + +def get_default_val_trans(args): + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + if (not hasattr(args, 'resolution')) or args.resolution == 224: + trans = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize]) + else: + trans = transforms.Compose([ + transforms.Resize(args.resolution, interpolation=PIL.Image.BILINEAR), + transforms.CenterCrop(args.resolution), + transforms.ToTensor(), + normalize, + ]) + return trans + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = gpu + log_file = 'quant_{}_exp.txt'.format(args.tag) + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + + # 1. Build and load base model + from repvgg import get_RepVGG_func_by_name + repvgg_build_func = get_RepVGG_func_by_name(args.arch) + base_model = repvgg_build_func(deploy=True) + from tools.insert_bn import directly_insert_bn_without_init + directly_insert_bn_without_init(base_model) + if args.base_weights is not None: + load_checkpoint(base_model, args.base_weights) + + # 2. + if not args.fpfinetune: + from quantization.repvgg_quantized import RepVGGWholeQuant + qat_model = RepVGGWholeQuant(repvgg_model=base_model, quantlayers=args.quantlayers) + qat_model.prepare_quant() + else: + qat_model = base_model + log_msg('===================== not QAT, just full-precision finetune ===========', log_file) + + #=================================================== + # From now on, the code will be very similar to ordinary training + # =================================================== + + is_main = not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0) + + if is_main: + for n, p in qat_model.named_parameters(): + print(n, p.size()) + for n, p in qat_model.named_buffers(): + print(n, p.size()) + log_msg('epochs {}, lr {}, weight_decay {}'.format(args.epochs, args.lr, args.weight_decay), log_file) + # You will see it now has quantization-related parameters (zero-points and scales) + + if not torch.cuda.is_available(): + print('using CPU, this will be slow') + elif args.distributed: + if args.gpu is not None: + torch.cuda.set_device(args.gpu) + qat_model.cuda(args.gpu) + args.batch_size = int(args.batch_size / ngpus_per_node) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + qat_model = torch.nn.parallel.DistributedDataParallel(qat_model, device_ids=[args.gpu]) + else: + qat_model.cuda() + qat_model = torch.nn.parallel.DistributedDataParallel(qat_model) + elif args.gpu is not None: + torch.cuda.set_device(args.gpu) + qat_model = qat_model.cuda(args.gpu) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + qat_model = torch.nn.DataParallel(qat_model).cuda() + + + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + optimizer = sgd_optimizer(qat_model, args.lr, args.momentum, args.weight_decay) + + warmup_epochs = 1 + lr_scheduler = WarmupCosineAnnealingLR(optimizer=optimizer, T_cosine_max=args.epochs * IMAGENET_TRAINSET_SIZE // args.batch_size // ngpus_per_node, + eta_min=0, warmup=warmup_epochs * IMAGENET_TRAINSET_SIZE // args.batch_size // ngpus_per_node) + + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + else: + # Map model to be loaded to specified single gpu. + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + qat_model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['scheduler']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # todo + train_sampler, train_loader = get_default_ImageNet_train_sampler_loader(args) + val_loader = get_default_ImageNet_val_loader(args) + + if args.evaluate: + validate(val_loader, qat_model, criterion, args) + return + + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + # train for one epoch + train(train_loader, qat_model, criterion, optimizer, epoch, args, lr_scheduler, is_main=is_main) + + if args.fixobserver and epoch > (3 * args.epochs // 8): + # Freeze quantizer parameters + qat_model.apply(torch.quantization.disable_observer) #TODO testing. May not be useful + log_msg('fix observer after epoch {}'.format(epoch), log_file) + + if args.fixbn and epoch > (2 * args.epochs // 8): #TODO testing. May not be useful + # Freeze batch norm mean and variance estimates + qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats) + log_msg('fix bn after epoch {}'.format(epoch), log_file) + + # evaluate on validation set + if is_main: + acc1 = validate(val_loader, qat_model, criterion, args) + msg = '{}, base{}, quant, epoch {}, QAT acc {}'.format(args.arch, args.base_weights, epoch, acc1) + log_msg(msg, log_file) + + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': qat_model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer' : optimizer.state_dict(), + 'scheduler': lr_scheduler.state_dict(), + }, is_best, + filename = '{}_{}.pth.tar'.format(args.arch, args.tag), + best_filename='{}_{}_best.pth.tar'.format(args.arch, args.tag)) + + +def train(train_loader, model, criterion, optimizer, epoch, args, lr_scheduler, is_main): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5, ], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.gpu is not None: + images = images.cuda(args.gpu, non_blocking=True) + if torch.cuda.is_available(): + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if lr_scheduler is not None: + lr_scheduler.step() + + if is_main and i % args.print_freq == 0: + progress.display(i) + if is_main and i % 1000 == 0 and lr_scheduler is not None: + print('cur lr: ', lr_scheduler.get_lr()[0]) + + + + +def validate(val_loader, model, criterion, args): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i) + + # TODO: this should also be done with the ProgressMeter + print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + + return top1.avg + + +def save_checkpoint(state, is_best, filename, best_filename): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, best_filename) + + + + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/quantization/repvgg_quantized.py b/cv/classification/repvgg/pytorch/quantization/repvgg_quantized.py new file mode 100755 index 0000000000000000000000000000000000000000..9a06a89c745c7824d1e60adc17125da9636c0be7 --- /dev/null +++ b/cv/classification/repvgg/pytorch/quantization/repvgg_quantized.py @@ -0,0 +1,63 @@ +import torch +import torch.nn as nn +from torch.quantization import QuantStub, DeQuantStub + +class RepVGGWholeQuant(nn.Module): + + def __init__(self, repvgg_model, quantlayers): + super(RepVGGWholeQuant, self).__init__() + assert quantlayers in ['all', 'exclud_first_and_linear', 'exclud_first_and_last'] + self.quantlayers = quantlayers + self.quant = QuantStub() + self.stage0, self.stage1, self.stage2, self.stage3, self.stage4 = repvgg_model.stage0, repvgg_model.stage1, repvgg_model.stage2, repvgg_model.stage3, repvgg_model.stage4 + self.gap, self.linear = repvgg_model.gap, repvgg_model.linear + self.dequant = DeQuantStub() + + + def forward(self, x): + if self.quantlayers == 'all': + x = self.quant(x) + out = self.stage0(x) + else: + out = self.stage0(x) + out = self.quant(out) + out = self.stage1(out) + out = self.stage2(out) + out = self.stage3(out) + if self.quantlayers == 'all': + out = self.stage4(out) + out = self.gap(out).view(out.size(0), -1) + out = self.linear(out) + out = self.dequant(out) + elif self.quantlayers == 'exclud_first_and_linear': + out = self.stage4(out) + out = self.dequant(out) + out = self.gap(out).view(out.size(0), -1) + out = self.linear(out) + else: + out = self.dequant(out) + out = self.stage4(out) + out = self.gap(out).view(out.size(0), -1) + out = self.linear(out) + return out + + # From https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html + def fuse_model(self): + for m in self.modules(): + if type(m) == nn.Sequential and hasattr(m, 'conv'): + # Note that we moved ReLU from "block.nonlinearity" into "rbr_reparam" (nn.Sequential). + # This makes it more convenient to fuse operators using off-the-shelf APIs. + torch.quantization.fuse_modules(m, ['conv', 'bn', 'relu'], inplace=True) + + def _get_qconfig(self): + return torch.quantization.get_default_qat_qconfig('fbgemm') + + def prepare_quant(self): + # From https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html + self.fuse_model() + qconfig = self._get_qconfig() + self.qconfig = qconfig + torch.quantization.prepare_qat(self, inplace=True) + + def freeze_quant_bn(self): + self.apply(torch.nn.intrinsic.qat.freeze_bn_stats) \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/repvgg.py b/cv/classification/repvgg/pytorch/repvgg.py new file mode 100755 index 0000000000000000000000000000000000000000..92bd07f462173962326848cb6de01e19ee279c4e --- /dev/null +++ b/cv/classification/repvgg/pytorch/repvgg.py @@ -0,0 +1,303 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- +import torch.nn as nn +import numpy as np +import torch +import copy +from se_block import SEBlock +import torch.utils.checkpoint as checkpoint + +def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) + return result + +class RepVGGBlock(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False): + super(RepVGGBlock, self).__init__() + self.deploy = deploy + self.groups = groups + self.in_channels = in_channels + + assert kernel_size == 3 + assert padding == 1 + + padding_11 = padding - kernel_size // 2 + + self.nonlinearity = nn.ReLU() + + if use_se: + # Note that RepVGG-D2se uses SE before nonlinearity. But RepVGGplus models uses SE after nonlinearity. + self.se = SEBlock(out_channels, internal_neurons=out_channels // 16) + else: + self.se = nn.Identity() + + if deploy: + self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) + + else: + self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None + self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) + self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) + print('RepVGG Block, identity = ', self.rbr_identity) + + + def forward(self, inputs): + if hasattr(self, 'rbr_reparam'): + return self.nonlinearity(self.se(self.rbr_reparam(inputs))) + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(inputs) + + return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)) + + + # Optional. This may improve the accuracy and facilitates quantization in some cases. + # 1. Cancel the original weight decay on rbr_dense.conv.weight and rbr_1x1.conv.weight. + # 2. Use like this. + # loss = criterion(....) + # for every RepVGGBlock blk: + # loss += weight_decay_coefficient * 0.5 * blk.get_cust_L2() + # optimizer.zero_grad() + # loss.backward() + def get_custom_L2(self): + K3 = self.rbr_dense.conv.weight + K1 = self.rbr_1x1.conv.weight + t3 = (self.rbr_dense.bn.weight / ((self.rbr_dense.bn.running_var + self.rbr_dense.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + t1 = (self.rbr_1x1.bn.weight / ((self.rbr_1x1.bn.running_var + self.rbr_1x1.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + + l2_loss_circle = (K3 ** 2).sum() - (K3[:, :, 1:2, 1:2] ** 2).sum() # The L2 loss of the "circle" of weights in 3x3 kernel. Use regular L2 on them. + eq_kernel = K3[:, :, 1:2, 1:2] * t3 + K1 * t1 # The equivalent resultant central point of 3x3 kernel. + l2_loss_eq_kernel = (eq_kernel ** 2 / (t3 ** 2 + t1 ** 2)).sum() # Normalize for an L2 coefficient comparable to regular L2. + return l2_loss_eq_kernel + l2_loss_circle + + + +# This func derives the equivalent kernel and bias in a DIFFERENTIABLE way. +# You can get the equivalent kernel and bias at any time and do whatever you want, + # for example, apply some penalties or constraints during training, just like you do to the other models. +# May be useful for quantization or pruning. + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1,1,1,1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, nn.Sequential): + kernel = branch.conv.weight + running_mean = branch.bn.running_mean + running_var = branch.bn.running_var + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn.eps + else: + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, 'id_tensor'): + input_dim = self.in_channels // self.groups + kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def switch_to_deploy(self): + if hasattr(self, 'rbr_reparam'): + return + kernel, bias = self.get_equivalent_kernel_bias() + self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, out_channels=self.rbr_dense.conv.out_channels, + kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride, + padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, bias=True) + self.rbr_reparam.weight.data = kernel + self.rbr_reparam.bias.data = bias + self.__delattr__('rbr_dense') + self.__delattr__('rbr_1x1') + if hasattr(self, 'rbr_identity'): + self.__delattr__('rbr_identity') + if hasattr(self, 'id_tensor'): + self.__delattr__('id_tensor') + self.deploy = True + + + +class RepVGG(nn.Module): + + def __init__(self, num_blocks, num_classes=1000, width_multiplier=None, override_groups_map=None, deploy=False, use_se=False, use_checkpoint=False): + super(RepVGG, self).__init__() + assert len(width_multiplier) == 4 + self.deploy = deploy + self.override_groups_map = override_groups_map or dict() + assert 0 not in self.override_groups_map + self.use_se = use_se + self.use_checkpoint = use_checkpoint + + self.in_planes = min(64, int(64 * width_multiplier[0])) + self.stage0 = RepVGGBlock(in_channels=3, out_channels=self.in_planes, kernel_size=3, stride=2, padding=1, deploy=self.deploy, use_se=self.use_se) + self.cur_layer_idx = 1 + self.stage1 = self._make_stage(int(64 * width_multiplier[0]), num_blocks[0], stride=2) + self.stage2 = self._make_stage(int(128 * width_multiplier[1]), num_blocks[1], stride=2) + self.stage3 = self._make_stage(int(256 * width_multiplier[2]), num_blocks[2], stride=2) + self.stage4 = self._make_stage(int(512 * width_multiplier[3]), num_blocks[3], stride=2) + self.gap = nn.AdaptiveAvgPool2d(output_size=1) + self.linear = nn.Linear(int(512 * width_multiplier[3]), num_classes) + + def _make_stage(self, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + blocks = [] + for stride in strides: + cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1) + blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3, + stride=stride, padding=1, groups=cur_groups, deploy=self.deploy, use_se=self.use_se)) + self.in_planes = planes + self.cur_layer_idx += 1 + return nn.ModuleList(blocks) + + def forward(self, x): + out = self.stage0(x) + for stage in (self.stage1, self.stage2, self.stage3, self.stage4): + for block in stage: + if self.use_checkpoint: + out = checkpoint.checkpoint(block, out) + else: + out = block(out) + out = self.gap(out) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26] +g2_map = {l: 2 for l in optional_groupwise_layers} +g4_map = {l: 4 for l in optional_groupwise_layers} + +def create_RepVGG_A0(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[2, 4, 14, 1], num_classes=1000, + width_multiplier=[0.75, 0.75, 0.75, 2.5], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_A1(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[2, 4, 14, 1], num_classes=1000, + width_multiplier=[1, 1, 1, 2.5], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_A2(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[2, 4, 14, 1], num_classes=1000, + width_multiplier=[1.5, 1.5, 1.5, 2.75], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B0(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[1, 1, 1, 2.5], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B1(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[2, 2, 2, 4], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B1g2(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[2, 2, 2, 4], override_groups_map=g2_map, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B1g4(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[2, 2, 2, 4], override_groups_map=g4_map, deploy=deploy, use_checkpoint=use_checkpoint) + + +def create_RepVGG_B2(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B2g2(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g2_map, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B2g4(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g4_map, deploy=deploy, use_checkpoint=use_checkpoint) + + +def create_RepVGG_B3(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[3, 3, 3, 5], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B3g2(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[3, 3, 3, 5], override_groups_map=g2_map, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_B3g4(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[4, 6, 16, 1], num_classes=1000, + width_multiplier=[3, 3, 3, 5], override_groups_map=g4_map, deploy=deploy, use_checkpoint=use_checkpoint) + +def create_RepVGG_D2se(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[8, 14, 24, 1], num_classes=1000, + width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=None, deploy=deploy, use_se=True, use_checkpoint=use_checkpoint) + + +func_dict = { +'RepVGG-A0': create_RepVGG_A0, +'RepVGG-A1': create_RepVGG_A1, +'RepVGG-A2': create_RepVGG_A2, +'RepVGG-B0': create_RepVGG_B0, +'RepVGG-B1': create_RepVGG_B1, +'RepVGG-B1g2': create_RepVGG_B1g2, +'RepVGG-B1g4': create_RepVGG_B1g4, +'RepVGG-B2': create_RepVGG_B2, +'RepVGG-B2g2': create_RepVGG_B2g2, +'RepVGG-B2g4': create_RepVGG_B2g4, +'RepVGG-B3': create_RepVGG_B3, +'RepVGG-B3g2': create_RepVGG_B3g2, +'RepVGG-B3g4': create_RepVGG_B3g4, +'RepVGG-D2se': create_RepVGG_D2se, # Updated at April 25, 2021. This is not reported in the CVPR paper. +} +def get_RepVGG_func_by_name(name): + return func_dict[name] + + + +# Use this for converting a RepVGG model or a bigger model with RepVGG as its component +# Use like this +# model = create_RepVGG_A0(deploy=False) +# train model or load weights +# repvgg_model_convert(model, save_path='repvgg_deploy.pth') +# If you want to preserve the original model, call with do_copy=True + +# ====================== for using RepVGG as the backbone of a bigger model, e.g., PSPNet, the pseudo code will be like +# train_backbone = create_RepVGG_B2(deploy=False) +# train_backbone.load_state_dict(torch.load('RepVGG-B2-train.pth')) +# train_pspnet = build_pspnet(backbone=train_backbone) +# segmentation_train(train_pspnet) +# deploy_pspnet = repvgg_model_convert(train_pspnet) +# segmentation_test(deploy_pspnet) +# ===================== example_pspnet.py shows an example + +def repvgg_model_convert(model:torch.nn.Module, save_path=None, do_copy=True): + if do_copy: + model = copy.deepcopy(model) + for module in model.modules(): + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + if save_path is not None: + torch.save(model.state_dict(), save_path) + return model diff --git a/cv/classification/repvgg/pytorch/repvggplus.py b/cv/classification/repvgg/pytorch/repvggplus.py new file mode 100755 index 0000000000000000000000000000000000000000..9f365871befd228d6e7ed5d4c0928a597785af4a --- /dev/null +++ b/cv/classification/repvgg/pytorch/repvggplus.py @@ -0,0 +1,293 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- + +import torch.nn as nn +import torch.utils.checkpoint as checkpoint +from se_block import SEBlock +import torch +import numpy as np + +def conv_bn_relu(in_channels, out_channels, kernel_size, stride, padding, groups=1): + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) + result.add_module('relu', nn.ReLU()) + return result + +def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) + return result + +class RepVGGplusBlock(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', + deploy=False, + use_post_se=False): + super(RepVGGplusBlock, self).__init__() + self.deploy = deploy + self.groups = groups + self.in_channels = in_channels + + assert kernel_size == 3 + assert padding == 1 + + self.nonlinearity = nn.ReLU() + + if use_post_se: + self.post_se = SEBlock(out_channels, internal_neurons=out_channels // 4) + else: + self.post_se = nn.Identity() + + if deploy: + self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) + else: + if out_channels == in_channels and stride == 1: + self.rbr_identity = nn.BatchNorm2d(num_features=out_channels) + else: + self.rbr_identity = None + self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) + padding_11 = padding - kernel_size // 2 + self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) + + def forward(self, x): + if self.deploy: + return self.post_se(self.nonlinearity(self.rbr_reparam(x))) + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(x) + out = self.rbr_dense(x) + self.rbr_1x1(x) + id_out + out = self.post_se(self.nonlinearity(out)) + return out + + + # This func derives the equivalent kernel and bias in a DIFFERENTIABLE way. + # You can get the equivalent kernel and bias at any time and do whatever you want, + # for example, apply some penalties or constraints during training, just like you do to the other models. + # May be useful for quantization or pruning. + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, nn.Sequential): + # For the 1x1 or 3x3 branch + kernel, running_mean, running_var, gamma, beta, eps = branch.conv.weight, branch.bn.running_mean, branch.bn.running_var, branch.bn.weight, branch.bn.bias, branch.bn.eps + else: + # For the identity branch + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, 'id_tensor'): + # Construct and store the identity kernel in case it is used multiple times + input_dim = self.in_channels // self.groups + kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel, running_mean, running_var, gamma, beta, eps = self.id_tensor, branch.running_mean, branch.running_var, branch.weight, branch.bias, branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def switch_to_deploy(self): + if hasattr(self, 'rbr_reparam'): + return + kernel, bias = self.get_equivalent_kernel_bias() + self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, + out_channels=self.rbr_dense.conv.out_channels, + kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride, + padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, + groups=self.rbr_dense.conv.groups, bias=True) + self.rbr_reparam.weight.data = kernel + self.rbr_reparam.bias.data = bias + self.__delattr__('rbr_dense') + self.__delattr__('rbr_1x1') + if hasattr(self, 'rbr_identity'): + self.__delattr__('rbr_identity') + if hasattr(self, 'id_tensor'): + self.__delattr__('id_tensor') + self.deploy = True + + + +class RepVGGplusStage(nn.Module): + + def __init__(self, in_planes, planes, num_blocks, stride, use_checkpoint, use_post_se=False, deploy=False): + super().__init__() + strides = [stride] + [1] * (num_blocks - 1) + blocks = [] + self.in_planes = in_planes + for stride in strides: + cur_groups = 1 + blocks.append(RepVGGplusBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3, + stride=stride, padding=1, groups=cur_groups, deploy=deploy, use_post_se=use_post_se)) + self.in_planes = planes + self.blocks = nn.ModuleList(blocks) + self.use_checkpoint = use_checkpoint + + def forward(self, x): + for block in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(block, x) + else: + x = block(x) + return x + + +class RepVGGplus(nn.Module): + """RepVGGplus + An official improved version of RepVGG (RepVGG: Making VGG-style ConvNets Great Again) `_. + + Args: + num_blocks (tuple[int]): Depths of each stage. + num_classes (tuple[int]): Num of classes. + width_multiplier (tuple[float]): The width of the four stages + will be (64 * width_multiplier[0], 128 * width_multiplier[1], 256 * width_multiplier[2], 512 * width_multiplier[3]). + deploy (bool, optional): If True, the model will have the inference-time structure. + Default: False. + use_post_se (bool, optional): If True, the model will have Squeeze-and-Excitation blocks following the conv-ReLU units. + Default: False. + use_checkpoint (bool, optional): If True, the model will use torch.utils.checkpoint to save the GPU memory during training with acceptable slowdown. + Do not use it if you have sufficient GPU memory. + Default: False. + """ + def __init__(self, + num_blocks, + num_classes, + width_multiplier, + deploy=False, + use_post_se=False, + use_checkpoint=False): + super().__init__() + + self.deploy = deploy + self.num_classes = num_classes + + in_channels = min(64, int(64 * width_multiplier[0])) + stage_channels = [int(64 * width_multiplier[0]), int(128 * width_multiplier[1]), int(256 * width_multiplier[2]), int(512 * width_multiplier[3])] + self.stage0 = RepVGGplusBlock(in_channels=3, out_channels=in_channels, kernel_size=3, stride=2, padding=1, deploy=self.deploy, use_post_se=use_post_se) + self.stage1 = RepVGGplusStage(in_channels, stage_channels[0], num_blocks[0], stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.stage2 = RepVGGplusStage(stage_channels[0], stage_channels[1], num_blocks[1], stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + # split stage3 so that we can insert an auxiliary classifier + self.stage3_first = RepVGGplusStage(stage_channels[1], stage_channels[2], num_blocks[2] // 2, stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.stage3_second = RepVGGplusStage(stage_channels[2], stage_channels[2], num_blocks[2] - num_blocks[2] // 2, stride=1, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.stage4 = RepVGGplusStage(stage_channels[2], stage_channels[3], num_blocks[3], stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.gap = nn.AdaptiveAvgPool2d(output_size=1) + self.flatten = nn.Flatten() + self.linear = nn.Linear(int(512 * width_multiplier[3]), num_classes) + # aux classifiers + if not self.deploy: + self.stage1_aux = self._build_aux_for_stage(self.stage1) + self.stage2_aux = self._build_aux_for_stage(self.stage2) + self.stage3_first_aux = self._build_aux_for_stage(self.stage3_first) + + def _build_aux_for_stage(self, stage): + stage_out_channels = list(stage.blocks.children())[-1].rbr_dense.conv.out_channels + downsample = conv_bn_relu(in_channels=stage_out_channels, out_channels=stage_out_channels, kernel_size=3, stride=2, padding=1) + fc = nn.Linear(stage_out_channels, self.num_classes, bias=True) + return nn.Sequential(downsample, nn.AdaptiveAvgPool2d(1), nn.Flatten(), fc) + + def forward(self, x): + out = self.stage0(x) + out = self.stage1(out) + stage1_aux = self.stage1_aux(out) + out = self.stage2(out) + stage2_aux = self.stage2_aux(out) + out = self.stage3_first(out) + stage3_first_aux = self.stage3_first_aux(out) + out = self.stage3_second(out) + out = self.stage4(out) + y = self.gap(out) + y = self.flatten(y) + y = self.linear(y) + return { + 'main': y, + 'stage1_aux': stage1_aux, + 'stage2_aux': stage2_aux, + 'stage3_first_aux': stage3_first_aux, + } + + def switch_repvggplus_to_deploy(self): + for m in self.modules(): + if hasattr(m, 'switch_to_deploy'): + m.switch_to_deploy() + if hasattr(self, 'stage1_aux'): + self.__delattr__('stage1_aux') + if hasattr(self, 'stage2_aux'): + self.__delattr__('stage2_aux') + if hasattr(self, 'stage3_first_aux'): + self.__delattr__('stage3_first_aux') + self.deploy = True + + +# torch.utils.checkpoint can reduce the memory consumption during training with a minor slowdown. Don't use it if you have sufficient GPU memory. +# Not sure whether it slows down inference +# pse for "post SE", which means using SE block after ReLU +def create_RepVGGplus_L2pse(deploy=False, use_checkpoint=False): + return RepVGGplus(num_blocks=[8, 14, 24, 1], num_classes=1000, + width_multiplier=[2.5, 2.5, 2.5, 5], deploy=deploy, use_post_se=True, + use_checkpoint=use_checkpoint) + +# Will release more +repvggplus_func_dict = { + 'RepVGGplus-L2pse': create_RepVGGplus_L2pse, +} + +def create_RepVGGplus_by_name(name, deploy=False, use_checkpoint=False): + if 'plus' in name: + return repvggplus_func_dict[name](deploy=deploy, use_checkpoint=use_checkpoint) + else: + print('=================== Building the vanila RepVGG ===================') + from repvgg import get_RepVGG_func_by_name + return get_RepVGG_func_by_name(name)(deploy=deploy, use_checkpoint=use_checkpoint) + + + + + + +# Use this for converting a RepVGG model or a bigger model with RepVGG as its component +# Use like this +# model = create_RepVGG_A0(deploy=False) +# train model or load weights +# repvgg_model_convert(model, save_path='repvgg_deploy.pth') +# If you want to preserve the original model, call with do_copy=True + +# ====================== for using RepVGG as the backbone of a bigger model, e.g., PSPNet, the pseudo code will be like +# train_backbone = create_RepVGG_B2(deploy=False) +# train_backbone.load_state_dict(torch.load('RepVGG-B2-train.pth')) +# train_pspnet = build_pspnet(backbone=train_backbone) +# segmentation_train(train_pspnet) +# deploy_pspnet = repvgg_model_convert(train_pspnet) +# segmentation_test(deploy_pspnet) +# ===================== example_pspnet.py shows an example + +def repvgg_model_convert(model:torch.nn.Module, save_path=None, do_copy=True): + import copy + if do_copy: + model = copy.deepcopy(model) + for module in model.modules(): + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + if save_path is not None: + torch.save(model.state_dict(), save_path) + return model diff --git a/cv/classification/repvgg/pytorch/repvggplus_custom_L2.py b/cv/classification/repvgg/pytorch/repvggplus_custom_L2.py new file mode 100755 index 0000000000000000000000000000000000000000..dd8a15bec7c309bb83fdb702977c2382d2a6f01f --- /dev/null +++ b/cv/classification/repvgg/pytorch/repvggplus_custom_L2.py @@ -0,0 +1,268 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- +import torch.nn as nn +import torch.utils.checkpoint as checkpoint +from se_block import SEBlock +import torch +import numpy as np + + +def conv_bn_relu(in_channels, out_channels, kernel_size, stride, padding, groups=1): + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) + result.add_module('relu', nn.ReLU()) + return result + +def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) + return result + +class RepVGGplusBlock(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', + deploy=False, + use_post_se=False): + super(RepVGGplusBlock, self).__init__() + self.deploy = deploy + self.groups = groups + self.in_channels = in_channels + + assert kernel_size == 3 + assert padding == 1 + + self.nonlinearity = nn.ReLU() + + if use_post_se: + self.post_se = SEBlock(out_channels, internal_neurons=out_channels // 4) + else: + self.post_se = nn.Identity() + + if deploy: + self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) + else: + if out_channels == in_channels and stride == 1: + self.rbr_identity = nn.BatchNorm2d(num_features=out_channels) + else: + self.rbr_identity = None + self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) + padding_11 = padding - kernel_size // 2 + self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) + + def forward(self, x, L2): + + if self.deploy: + return self.post_se(self.nonlinearity(self.rbr_reparam(x))), None + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(x) + out = self.rbr_dense(x) + self.rbr_1x1(x) + id_out + out = self.post_se(self.nonlinearity(out)) + + # Custom L2 + t3 = (self.rbr_dense.bn.weight / ((self.rbr_dense.bn.running_var + self.rbr_dense.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + t1 = (self.rbr_1x1.bn.weight / ((self.rbr_1x1.bn.running_var + self.rbr_1x1.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + K3 = self.rbr_dense.conv.weight + K1 = self.rbr_1x1.conv.weight + + l2_loss_circle = (K3 ** 2).sum() - (K3[:, :, 1:2, 1:2] ** 2).sum() + eq_kernel = K3[:,:,1:2,1:2] * t3 + K1 * t1 + l2_loss_eq_kernel = (eq_kernel ** 2 / (t3 ** 2 + t1 ** 2)).sum() + + return out, L2 + l2_loss_circle + l2_loss_eq_kernel + + + # This func derives the equivalent kernel and bias in a DIFFERENTIABLE way. + # You can get the equivalent kernel and bias at any time and do whatever you want, + # for example, apply some penalties or constraints during training, just like you do to the other models. + # May be useful for quantization or pruning. + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, nn.Sequential): + # For the 1x1 or 3x3 branch + kernel, running_mean, running_var, gamma, beta, eps = branch.conv.weight, branch.bn.running_mean, branch.bn.running_var, branch.bn.weight, branch.bn.bias, branch.bn.eps + else: + # For the identity branch + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, 'id_tensor'): + # Construct and store the identity kernel in case it is used multiple times + input_dim = self.in_channels // self.groups + kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel, running_mean, running_var, gamma, beta, eps = self.id_tensor, branch.running_mean, branch.running_var, branch.weight, branch.bias, branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def switch_to_deploy(self): + if hasattr(self, 'rbr_reparam'): + return + kernel, bias = self.get_equivalent_kernel_bias() + self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, + out_channels=self.rbr_dense.conv.out_channels, + kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride, + padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, + groups=self.rbr_dense.conv.groups, bias=True) + self.rbr_reparam.weight.data = kernel + self.rbr_reparam.bias.data = bias + self.__delattr__('rbr_dense') + self.__delattr__('rbr_1x1') + if hasattr(self, 'rbr_identity'): + self.__delattr__('rbr_identity') + if hasattr(self, 'id_tensor'): + self.__delattr__('id_tensor') + self.deploy = True + + + +class RepVGGplusStage(nn.Module): + + def __init__(self, in_planes, planes, num_blocks, stride, use_checkpoint, use_post_se=False, deploy=False): + super().__init__() + strides = [stride] + [1] * (num_blocks - 1) + blocks = [] + self.in_planes = in_planes + for stride in strides: + cur_groups = 1 + blocks.append(RepVGGplusBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3, + stride=stride, padding=1, groups=cur_groups, deploy=deploy, use_post_se=use_post_se)) + self.in_planes = planes + self.blocks = nn.ModuleList(blocks) + self.use_checkpoint = use_checkpoint + + def forward(self, x, L2): + for block in self.blocks: + if self.use_checkpoint: + x, L2 = checkpoint.checkpoint(block, x, L2) + else: + x, L2 = block(x, L2) + return x, L2 + + +class RepVGGplus(nn.Module): + + def __init__(self, num_blocks, num_classes, + width_multiplier, override_groups_map=None, + deploy=False, + use_post_se=False, + use_checkpoint=False): + super().__init__() + + self.deploy = deploy + self.override_groups_map = override_groups_map or dict() + self.use_post_se = use_post_se + self.use_checkpoint = use_checkpoint + self.num_classes = num_classes + self.nonlinear = 'relu' + + self.in_planes = min(64, int(64 * width_multiplier[0])) + self.stage0 = RepVGGplusBlock(in_channels=3, out_channels=self.in_planes, kernel_size=3, stride=2, padding=1, deploy=self.deploy, use_post_se=use_post_se) + self.cur_layer_idx = 1 + self.stage1 = RepVGGplusStage(self.in_planes, int(64 * width_multiplier[0]), num_blocks[0], stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.stage2 = RepVGGplusStage(int(64 * width_multiplier[0]), int(128 * width_multiplier[1]), num_blocks[1], stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + # split stage3 so that we can insert an auxiliary classifier + self.stage3_first = RepVGGplusStage(int(128 * width_multiplier[1]), int(256 * width_multiplier[2]), num_blocks[2] // 2, stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.stage3_second = RepVGGplusStage(int(256 * width_multiplier[2]), int(256 * width_multiplier[2]), num_blocks[2] // 2, stride=1, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.stage4 = RepVGGplusStage(int(256 * width_multiplier[2]), int(512 * width_multiplier[3]), num_blocks[3], stride=2, use_checkpoint=use_checkpoint, use_post_se=use_post_se, deploy=deploy) + self.gap = nn.AdaptiveAvgPool2d(output_size=1) + self.linear = nn.Linear(int(512 * width_multiplier[3]), num_classes) + # aux classifiers + if not self.deploy: + self.stage1_aux = self._build_aux_for_stage(self.stage1) + self.stage2_aux = self._build_aux_for_stage(self.stage2) + self.stage3_first_aux = self._build_aux_for_stage(self.stage3_first) + + def _build_aux_for_stage(self, stage): + stage_out_channels = list(stage.blocks.children())[-1].rbr_dense.conv.out_channels + downsample = conv_bn_relu(in_channels=stage_out_channels, out_channels=stage_out_channels, kernel_size=3, stride=2, padding=1) + fc = nn.Linear(stage_out_channels, self.num_classes, bias=True) + return nn.Sequential(downsample, nn.AdaptiveAvgPool2d(1), nn.Flatten(), fc) + + def forward(self, x): + if self.deploy: + out, _ = self.stage0(x, L2=None) + out, _ = self.stage1(out, L2=None) + out, _ = self.stage2(out, L2=None) + out, _ = self.stage3_first(out, L2=None) + out, _ = self.stage3_second(out, L2=None) + out, _ = self.stage4(out, L2=None) + y = self.gap(out) + y = y.view(y.size(0), -1) + y = self.linear(y) + return y + + else: + out, L2 = self.stage0(x, L2=0.0) + out, L2 = self.stage1(out, L2=L2) + stage1_aux = self.stage1_aux(out) + out, L2 = self.stage2(out, L2=L2) + stage2_aux = self.stage2_aux(out) + out, L2 = self.stage3_first(out, L2=L2) + stage3_first_aux = self.stage3_first_aux(out) + out, L2 = self.stage3_second(out, L2=L2) + out, L2 = self.stage4(out, L2=L2) + y = self.gap(out) + y = y.view(y.size(0), -1) + y = self.linear(y) + return { + 'main': y, + 'stage1_aux': stage1_aux, + 'stage2_aux': stage2_aux, + 'stage3_first_aux': stage3_first_aux, + 'L2': L2 + } + + def switch_repvggplus_to_deploy(self): + for m in self.modules(): + if hasattr(m, 'switch_to_deploy'): + m.switch_to_deploy() + if hasattr(m, 'use_checkpoint'): + m.use_checkpoint = False # Disable checkpoint. I am not sure whether using checkpoint slows down inference. + if hasattr(self, 'stage1_aux'): + self.__delattr__('stage1_aux') + if hasattr(self, 'stage2_aux'): + self.__delattr__('stage2_aux') + if hasattr(self, 'stage3_first_aux'): + self.__delattr__('stage3_first_aux') + self.deploy = True + + +# torch.utils.checkpoint can reduce the memory consumption during training with a minor slowdown. Don't use it if you have sufficient GPU memory. +# Not sure whether it slows down inference +# pse for "post SE", which means using SE block after ReLU +def create_RepVGGplus_L2pse(deploy=False, use_checkpoint=False): + return RepVGGplus(num_blocks=[8, 14, 24, 1], num_classes=1000, + width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=None, deploy=deploy, use_post_se=True, + use_checkpoint=use_checkpoint) + +repvggplus_func_dict = { +'RepVGGplus-L2pse': create_RepVGGplus_L2pse, +} +def get_RepVGGplus_func_by_name(name): + return repvggplus_func_dict[name] \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/se_block.py b/cv/classification/repvgg/pytorch/se_block.py new file mode 100755 index 0000000000000000000000000000000000000000..e23911e0cae8826711cff19ea16028030ce0e73a --- /dev/null +++ b/cv/classification/repvgg/pytorch/se_block.py @@ -0,0 +1,22 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +# https://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.html + +class SEBlock(nn.Module): + + def __init__(self, input_channels, internal_neurons): + super(SEBlock, self).__init__() + self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) + self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) + self.input_channels = input_channels + + def forward(self, inputs): + x = F.avg_pool2d(inputs, kernel_size=inputs.size(3)) + x = self.down(x) + x = F.relu(x) + x = self.up(x) + x = torch.sigmoid(x) + x = x.view(-1, self.input_channels, 1, 1) + return inputs * x \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/tools/convert.py b/cv/classification/repvgg/pytorch/tools/convert.py new file mode 100755 index 0000000000000000000000000000000000000000..b239ad0fd806ecabfa42124e753d16dfd45c0fc9 --- /dev/null +++ b/cv/classification/repvgg/pytorch/tools/convert.py @@ -0,0 +1,46 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- +import argparse +import os +import torch +import torch.nn.parallel +import torch.optim +import torch.utils.data +import torch.utils.data.distributed +from repvggplus import create_RepVGGplus_by_name, repvgg_model_convert + +parser = argparse.ArgumentParser(description='RepVGG(plus) Conversion') +parser.add_argument('load', metavar='LOAD', help='path to the weights file') +parser.add_argument('save', metavar='SAVE', help='path to the weights file') +parser.add_argument('-a', '--arch', metavar='ARCH', default='RepVGG-A0') + +def convert(): + args = parser.parse_args() + + train_model = create_RepVGGplus_by_name(args.arch, deploy=False) + + if os.path.isfile(args.load): + print("=> loading checkpoint '{}'".format(args.load)) + checkpoint = torch.load(args.load) + if 'state_dict' in checkpoint: + checkpoint = checkpoint['state_dict'] + elif 'model' in checkpoint: + checkpoint = checkpoint['model'] + ckpt = {k.replace('module.', ''): v for k, v in checkpoint.items()} # strip the names + print(ckpt.keys()) + train_model.load_state_dict(ckpt) + else: + print("=> no checkpoint found at '{}'".format(args.load)) + + if 'plus' in args.arch: + train_model.switch_repvggplus_to_deploy() + torch.save(train_model.state_dict(), args.save) + else: + repvgg_model_convert(train_model, save_path=args.save) + + +if __name__ == '__main__': + convert() \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/tools/insert_bn.py b/cv/classification/repvgg/pytorch/tools/insert_bn.py new file mode 100755 index 0000000000000000000000000000000000000000..5a66f6b4b2e576d86aca48cb1a88076e7dd48c47 --- /dev/null +++ b/cv/classification/repvgg/pytorch/tools/insert_bn.py @@ -0,0 +1,217 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- +import argparse +import os +import time +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim +import torch.utils.data +import torch.utils.data.distributed +from utils import accuracy, ProgressMeter, AverageMeter +from repvgg import get_RepVGG_func_by_name, RepVGGBlock +from utils import load_checkpoint, get_ImageNet_train_dataset, get_default_train_trans + +# Insert BN into an inference-time RepVGG (e.g., for quantization-aware training). +# Get the mean and std on every conv3x3 (before the bias-adding) on the train set. Then use such data to initialize BN layers and insert them after conv3x3. +# May, 07, 2021 + +parser = argparse.ArgumentParser(description='Get the mean and std on every conv3x3 (before the bias-adding) on the train set. Then use such data to initialize BN layers and insert them after conv3x3.') +parser.add_argument('data', metavar='DIR', help='path to dataset') +parser.add_argument('weights', metavar='WEIGHTS', help='path to the weights file') +parser.add_argument('save', metavar='SAVE', help='path to save the model with BN') +parser.add_argument('-a', '--arch', metavar='ARCH', default='RepVGG-A0') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('-b', '--batch-size', default=100, type=int, + metavar='N', + help='mini-batch size (default: 100) for test') +parser.add_argument('-n', '--num-batches', default=500, type=int, + metavar='N', + help='number of batches (default: 500) to record the mean and std on the train set') +parser.add_argument('-r', '--resolution', default=224, type=int, + metavar='R', + help='resolution (default: 224) for test') + + +def update_running_mean_var(x, running_mean, running_var, momentum=0.9, is_first_batch=False): + mean = x.mean(dim=(0, 2, 3), keepdim=True) + var = ((x - mean) ** 2).mean(dim=(0, 2, 3), keepdim=True) + if is_first_batch: + running_mean = mean + running_var = var + else: + running_mean = momentum * running_mean + (1.0 - momentum) * mean + running_var = momentum * running_var + (1.0 - momentum) * var + return running_mean, running_var + +# Record the mean and std like a BN layer but do no normalization +class BNStatistics(nn.Module): + def __init__(self, num_features): + super(BNStatistics, self).__init__() + shape = (1, num_features, 1, 1) + self.register_buffer('running_mean', torch.zeros(shape)) + self.register_buffer('running_var', torch.zeros(shape)) + self.is_first_batch = True + + def forward(self, x): + if self.running_mean.device != x.device: + self.running_mean = self.running_mean.to(x.device) + self.running_var = self.running_var.to(x.device) + self.running_mean, self.running_var = update_running_mean_var(x, self.running_mean, self.running_var, momentum=0.9, is_first_batch=self.is_first_batch) + self.is_first_batch = False + return x + +# This is designed to insert BNStat layer between Conv2d(without bias) and its bias +class BiasAdd(nn.Module): + def __init__(self, num_features): + super(BiasAdd, self).__init__() + self.bias = torch.nn.Parameter(torch.Tensor(num_features)) + def forward(self, x): + return x + self.bias.view(1, -1, 1, 1) + +def switch_repvggblock_to_bnstat(model): + for n, block in model.named_modules(): + if isinstance(block, RepVGGBlock): + print('switch to BN Statistics: ', n) + assert hasattr(block, 'rbr_reparam') + stat = nn.Sequential() + stat.add_module('conv', nn.Conv2d(block.rbr_reparam.in_channels, block.rbr_reparam.out_channels, + block.rbr_reparam.kernel_size, + block.rbr_reparam.stride, block.rbr_reparam.padding, + block.rbr_reparam.dilation, + block.rbr_reparam.groups, bias=False)) # Note bias=False + stat.add_module('bnstat', BNStatistics(block.rbr_reparam.out_channels)) + stat.add_module('biasadd', BiasAdd(block.rbr_reparam.out_channels)) # Bias is here + stat.conv.weight.data = block.rbr_reparam.weight.data + stat.biasadd.bias.data = block.rbr_reparam.bias.data + block.__delattr__('rbr_reparam') + block.rbr_reparam = stat + +def switch_bnstat_to_convbn(model): + for n, block in model.named_modules(): + if isinstance(block, RepVGGBlock): + assert hasattr(block, 'rbr_reparam') + assert hasattr(block.rbr_reparam, 'bnstat') + print('switch to ConvBN: ', n) + conv = nn.Conv2d(block.rbr_reparam.conv.in_channels, block.rbr_reparam.conv.out_channels, + block.rbr_reparam.conv.kernel_size, + block.rbr_reparam.conv.stride, block.rbr_reparam.conv.padding, + block.rbr_reparam.conv.dilation, + block.rbr_reparam.conv.groups, bias=False) + bn = nn.BatchNorm2d(block.rbr_reparam.conv.out_channels) + bn.running_mean = block.rbr_reparam.bnstat.running_mean.squeeze() # Initialize the mean and var of BN with the statistics + bn.running_var = block.rbr_reparam.bnstat.running_var.squeeze() + std = (bn.running_var + bn.eps).sqrt() + conv.weight.data = block.rbr_reparam.conv.weight.data + bn.weight.data = std + bn.bias.data = block.rbr_reparam.biasadd.bias.data + bn.running_mean # Initialize gamma = std and beta = bias + mean + + convbn = nn.Sequential() + convbn.add_module('conv', conv) + convbn.add_module('bn', bn) + block.__delattr__('rbr_reparam') + block.rbr_reparam = convbn + + +# Insert a BN after conv3x3 (rbr_reparam). With no reasonable initialization of BN, the model may break down. +# So you have to load the weights obtained through the BN statistics (please see the function "insert_bn" in this file). +def directly_insert_bn_without_init(model): + for n, block in model.named_modules(): + if isinstance(block, RepVGGBlock): + print('directly insert a BN with no initialization: ', n) + assert hasattr(block, 'rbr_reparam') + convbn = nn.Sequential() + convbn.add_module('conv', nn.Conv2d(block.rbr_reparam.in_channels, block.rbr_reparam.out_channels, + block.rbr_reparam.kernel_size, + block.rbr_reparam.stride, block.rbr_reparam.padding, + block.rbr_reparam.dilation, + block.rbr_reparam.groups, bias=False)) # Note bias=False + convbn.add_module('bn', nn.BatchNorm2d(block.rbr_reparam.out_channels)) + # ==================== + convbn.add_module('relu', nn.ReLU()) + # TODO we moved ReLU from "block.nonlinearity" into "rbr_reparam" (nn.Sequential). This makes it more convenient to fuse operators (see RepVGGWholeQuant.fuse_model) using off-the-shelf APIs. + block.nonlinearity = nn.Identity() + #========================== + block.__delattr__('rbr_reparam') + block.rbr_reparam = convbn + + +def insert_bn(): + args = parser.parse_args() + + repvgg_build_func = get_RepVGG_func_by_name(args.arch) + + model = repvgg_build_func(deploy=True).cuda() + + load_checkpoint(model, args.weights) + + switch_repvggblock_to_bnstat(model) + + cudnn.benchmark = True + + trans = get_default_train_trans(args) + print('data aug: ', trans) + + train_dataset = get_ImageNet_train_dataset(args, trans) + + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + + progress = ProgressMeter( + min(len(train_loader), args.num_batches), + [batch_time, losses, top1, top5], + prefix='BN stat: ') + + criterion = nn.CrossEntropyLoss().cuda() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(train_loader): + if i >= args.num_batches: + break + images = images.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % 10 == 0: + progress.display(i) + + + print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + + switch_bnstat_to_convbn(model) + + torch.save(model.state_dict(), args.save) + + + + +if __name__ == '__main__': + insert_bn() \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/tools/verify.py b/cv/classification/repvgg/pytorch/tools/verify.py new file mode 100755 index 0000000000000000000000000000000000000000..d9f77fda2d17c1fe943b57dac80470f3b2c98192 --- /dev/null +++ b/cv/classification/repvgg/pytorch/tools/verify.py @@ -0,0 +1,30 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# -------------------------------------------------------- +import torch +import torch.nn as nn +from repvgg import create_RepVGG_B1 + +if __name__ == '__main__': + x = torch.randn(1, 3, 224, 224) + model = create_RepVGG_B1(deploy=False) + model.eval() + + for module in model.modules(): + if isinstance(module, torch.nn.BatchNorm2d): + nn.init.uniform_(module.running_mean, 0, 0.1) + nn.init.uniform_(module.running_var, 0, 0.1) + nn.init.uniform_(module.weight, 0, 0.1) + nn.init.uniform_(module.bias, 0, 0.1) + + train_y = model(x) + for module in model.modules(): + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + + print(model) + deploy_y = model(x) + print('========================== The diff is') + print(((train_y - deploy_y) ** 2).sum()) diff --git a/cv/classification/repvgg/pytorch/train/config.py b/cv/classification/repvgg/pytorch/train/config.py new file mode 100755 index 0000000000000000000000000000000000000000..e5fd7772801d486a8b60831cf4e2e051f30f062b --- /dev/null +++ b/cv/classification/repvgg/pytorch/train/config.py @@ -0,0 +1,217 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# The training script is based on the code of Swin Transformer (https://github.com/microsoft/Swin-Transformer) +# -------------------------------------------------------- + +import os +import yaml +from yacs.config import CfgNode as CN + +_C = CN() + +# Base config files +_C.BASE = [''] + +# ----------------------------------------------------------------------------- +# Data settings +# ----------------------------------------------------------------------------- +_C.DATA = CN() +# Batch size for a single GPU, could be overwritten by command line argument +_C.DATA.BATCH_SIZE = 128 +# Path to dataset, could be overwritten by command line argument +_C.DATA.DATA_PATH = '/your/path/to/dataset' + +# Dataset name +_C.DATA.DATASET = 'imagenet' +# Input image size +_C.DATA.IMG_SIZE = 224 +_C.DATA.TEST_SIZE = None +_C.DATA.TEST_BATCH_SIZE = None +# Interpolation to resize image (random, bilinear, bicubic) +_C.DATA.INTERPOLATION = 'bilinear' +# Use zipped dataset instead of folder dataset +# could be overwritten by command line argument +_C.DATA.ZIP_MODE = False +# Cache Data in Memory, could be overwritten by command line argument +_C.DATA.CACHE_MODE = 'part' +# Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU. +_C.DATA.PIN_MEMORY = True +# Number of data loading threads +_C.DATA.NUM_WORKERS = 8 + +# ----------------------------------------------------------------------------- +# Model settings +# ----------------------------------------------------------------------------- +_C.MODEL = CN() +# Model type +_C.MODEL.ARCH = 'RepVGG-L2pse' +# Checkpoint to resume, could be overwritten by command line argument +_C.MODEL.RESUME = '' +# Number of classes, overwritten in data preparation +_C.MODEL.NUM_CLASSES = 1000 +# Label Smoothing +_C.MODEL.LABEL_SMOOTHING = 0.1 + +# ----------------------------------------------------------------------------- +# Training settings +# ----------------------------------------------------------------------------- +_C.TRAIN = CN() +_C.TRAIN.START_EPOCH = 0 +_C.TRAIN.EPOCHS = 300 +_C.TRAIN.WARMUP_EPOCHS = 20 +_C.TRAIN.WEIGHT_DECAY = 0.05 +_C.TRAIN.BASE_LR = 5e-4 +_C.TRAIN.WARMUP_LR = 0.0 +_C.TRAIN.MIN_LR = 0.0 +# Clip gradient norm +_C.TRAIN.CLIP_GRAD = 0.0 +# Auto resume from latest checkpoint +_C.TRAIN.AUTO_RESUME = True +# Gradient accumulation steps +# could be overwritten by command line argument +_C.TRAIN.ACCUMULATION_STEPS = 0 +# Whether to use gradient checkpointing to save memory +# could be overwritten by command line argument +_C.TRAIN.USE_CHECKPOINT = False + +# LR scheduler +_C.TRAIN.LR_SCHEDULER = CN() +_C.TRAIN.LR_SCHEDULER.NAME = 'cosine' +# Epoch interval to decay LR, used in StepLRScheduler +_C.TRAIN.LR_SCHEDULER.DECAY_EPOCHS = 30 +# LR decay rate, used in StepLRScheduler +_C.TRAIN.LR_SCHEDULER.DECAY_RATE = 0.1 + +# Optimizer +_C.TRAIN.OPTIMIZER = CN() +_C.TRAIN.OPTIMIZER.NAME = 'sgd' +# Optimizer Epsilon +_C.TRAIN.OPTIMIZER.EPS = 1e-8 +# Optimizer Betas +_C.TRAIN.OPTIMIZER.BETAS = (0.9, 0.999) +# SGD momentum +_C.TRAIN.OPTIMIZER.MOMENTUM = 0.9 + +# For EMA model +_C.TRAIN.EMA_ALPHA = 0.0 +_C.TRAIN.EMA_UPDATE_PERIOD = 8 + +# For RepOptimizer only +_C.TRAIN.SCALES_PATH = None + +# ----------------------------------------------------------------------------- +# Augmentation settings +# ----------------------------------------------------------------------------- +_C.AUG = CN() +# Mixup alpha, mixup enabled if > 0 +_C.AUG.MIXUP = 0.2 +# Cutmix alpha, cutmix enabled if > 0 +_C.AUG.CUTMIX = 0.0 +# Cutmix min/max ratio, overrides alpha and enables cutmix if set +_C.AUG.CUTMIX_MINMAX = None +# Probability of performing mixup or cutmix when either/both is enabled +_C.AUG.MIXUP_PROB = 1.0 +# Probability of switching to cutmix when both mixup and cutmix enabled +_C.AUG.MIXUP_SWITCH_PROB = 0.5 +# How to apply mixup/cutmix params. Per "batch", "pair", or "elem" +_C.AUG.MIXUP_MODE = 'batch' + +_C.AUG.PRESET = None # If use AUG.PRESET (e.g., 'raug15'), use the pre-defined preprocessing, ignoring the following settings. +# Color jitter factor +_C.AUG.COLOR_JITTER = 0.4 +# Use AutoAugment policy. "v0" or "original" +_C.AUG.AUTO_AUGMENT = 'rand-m9-mstd0.5-inc1' +# Random erase prob +_C.AUG.REPROB = 0.25 +# Random erase mode +_C.AUG.REMODE = 'pixel' +# Random erase count +_C.AUG.RECOUNT = 1 + + +# ----------------------------------------------------------------------------- +# Testing settings +# ----------------------------------------------------------------------------- +_C.TEST = CN() +# Whether to use center crop when testing +_C.TEST.CROP = False + +# ----------------------------------------------------------------------------- +# Misc +# ----------------------------------------------------------------------------- +# Mixed precision opt level, if O0, no amp is used ('O0', 'O1', 'O2') +# overwritten by command line argument +_C.AMP_OPT_LEVEL = '' +# Path to output folder, overwritten by command line argument +_C.OUTPUT = '' +# Tag of experiment, overwritten by command line argument +_C.TAG = 'default' +# Frequency to save checkpoint +_C.SAVE_FREQ = 20 +# Frequency to logging info +_C.PRINT_FREQ = 10 +# Fixed random seed +_C.SEED = 0 +# Perform evaluation only, overwritten by command line argument +_C.EVAL_MODE = False +# Test throughput only, overwritten by command line argument +_C.THROUGHPUT_MODE = False +# local rank for DistributedDataParallel, given by command line argument +_C.LOCAL_RANK = 0 + + +def update_config(config, args): + config.defrost() + if args.opts: + config.merge_from_list(args.opts) + # merge from specific arguments + if args.scales_path: + config.TRAIN.SCALES_PATH = args.scales_path + if args.arch: + config.MODEL.ARCH = args.arch + if args.batch_size: + config.DATA.BATCH_SIZE = args.batch_size + if args.data_path: + config.DATA.DATA_PATH = args.data_path + if args.zip: + config.DATA.ZIP_MODE = True + if args.cache_mode: + config.DATA.CACHE_MODE = args.cache_mode + if args.resume: + config.MODEL.RESUME = args.resume + if args.accumulation_steps: + config.TRAIN.ACCUMULATION_STEPS = args.accumulation_steps + if args.use_checkpoint: + config.TRAIN.USE_CHECKPOINT = True + if args.amp_opt_level: + config.AMP_OPT_LEVEL = args.amp_opt_level + if args.output: + config.OUTPUT = args.output + if args.tag: + config.TAG = args.tag + if args.eval: + config.EVAL_MODE = True + if args.throughput: + config.THROUGHPUT_MODE = True + + if config.DATA.TEST_SIZE is None: + config.DATA.TEST_SIZE = config.DATA.IMG_SIZE + if config.DATA.TEST_BATCH_SIZE is None: + config.DATA.TEST_BATCH_SIZE = config.DATA.BATCH_SIZE + # set local rank for distributed training + config.LOCAL_RANK = args.local_rank + # output folder + config.OUTPUT = os.path.join(config.OUTPUT, config.MODEL.ARCH, config.TAG) + config.freeze() + + +def get_config(args): + """Get a yacs CfgNode object with default values.""" + # Return a clone so that the defaults will not be altered + # This is for the "local variable" use pattern + config = _C.clone() + update_config(config, args) + + return config diff --git a/cv/classification/repvgg/pytorch/train/cutout.py b/cv/classification/repvgg/pytorch/train/cutout.py new file mode 100755 index 0000000000000000000000000000000000000000..8592ffc08441857a7b0fd8882d23179cc064fae1 --- /dev/null +++ b/cv/classification/repvgg/pytorch/train/cutout.py @@ -0,0 +1,55 @@ +import numpy as np + +class Cutout: + + def __init__(self, size=16) -> None: + self.size = size + + def _create_cutout_mask(self, img_height, img_width, num_channels, size): + """Creates a zero mask used for cutout of shape `img_height` x `img_width`. + Args: + img_height: Height of image cutout mask will be applied to. + img_width: Width of image cutout mask will be applied to. + num_channels: Number of channels in the image. + size: Size of the zeros mask. + Returns: + A mask of shape `img_height` x `img_width` with all ones except for a + square of zeros of shape `size` x `size`. This mask is meant to be + elementwise multiplied with the original image. Additionally returns + the `upper_coord` and `lower_coord` which specify where the cutout mask + will be applied. + """ + # assert img_height == img_width + + # Sample center where cutout mask will be applied + height_loc = np.random.randint(low=0, high=img_height) + width_loc = np.random.randint(low=0, high=img_width) + + size = int(size) + # Determine upper right and lower left corners of patch + upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2)) + lower_coord = ( + min(img_height, height_loc + size // 2), + min(img_width, width_loc + size // 2), + ) + mask_height = lower_coord[0] - upper_coord[0] + mask_width = lower_coord[1] - upper_coord[1] + assert mask_height > 0 + assert mask_width > 0 + + mask = np.ones((img_height, img_width, num_channels)) + zeros = np.zeros((mask_height, mask_width, num_channels)) + mask[upper_coord[0]: lower_coord[0], upper_coord[1]: lower_coord[1], :] = zeros + return mask, upper_coord, lower_coord + + def __call__(self, pil_img): + pil_img = pil_img.copy() + img_height, img_width, num_channels = (*pil_img.size, 3) + _, upper_coord, lower_coord = self._create_cutout_mask( + img_height, img_width, num_channels, self.size + ) + pixels = pil_img.load() # create the pixel map + for i in range(upper_coord[0], lower_coord[0]): # for every col: + for j in range(upper_coord[1], lower_coord[1]): # For every row + pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly + return pil_img \ No newline at end of file diff --git a/cv/classification/repvgg/pytorch/train/logger.py b/cv/classification/repvgg/pytorch/train/logger.py new file mode 100755 index 0000000000000000000000000000000000000000..a0ae05e487856c285cad5a7cf87cb5e63c30d8f6 --- /dev/null +++ b/cv/classification/repvgg/pytorch/train/logger.py @@ -0,0 +1,41 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# The training script is based on the code of Swin Transformer (https://github.com/microsoft/Swin-Transformer) +# -------------------------------------------------------- + +import os +import sys +import logging +import functools +from termcolor import colored + + +@functools.lru_cache() +def create_logger(output_dir, dist_rank=0, name=''): + # create logger + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + logger.propagate = False + + # create formatter + fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s' + color_fmt = colored('[%(asctime)s %(name)s]', 'green') + \ + colored('(%(filename)s %(lineno)d)', 'yellow') + ': %(levelname)s %(message)s' + + # create console handlers for master process + if dist_rank == 0: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.DEBUG) + console_handler.setFormatter( + logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S')) + logger.addHandler(console_handler) + + # create file handlers + file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{dist_rank}.txt'), mode='a') + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) + logger.addHandler(file_handler) + + return logger diff --git a/cv/classification/repvgg/pytorch/train/lr_scheduler.py b/cv/classification/repvgg/pytorch/train/lr_scheduler.py new file mode 100755 index 0000000000000000000000000000000000000000..029b184c16a03eb2e64017b9518b1e460cd20e28 --- /dev/null +++ b/cv/classification/repvgg/pytorch/train/lr_scheduler.py @@ -0,0 +1,101 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# The training script is based on the code of Swin Transformer (https://github.com/microsoft/Swin-Transformer) +# -------------------------------------------------------- + +import torch +from timm.scheduler.cosine_lr import CosineLRScheduler +from timm.scheduler.step_lr import StepLRScheduler +from timm.scheduler.scheduler import Scheduler + + +def build_scheduler(config, optimizer, n_iter_per_epoch): + num_steps = int(config.TRAIN.EPOCHS * n_iter_per_epoch) + warmup_steps = int(config.TRAIN.WARMUP_EPOCHS * n_iter_per_epoch) + decay_steps = int(config.TRAIN.LR_SCHEDULER.DECAY_EPOCHS * n_iter_per_epoch) + + lr_scheduler = None + if config.TRAIN.LR_SCHEDULER.NAME == 'cosine': + lr_scheduler = CosineLRScheduler( + optimizer, + t_initial=num_steps, + lr_min=config.TRAIN.MIN_LR, + warmup_lr_init=config.TRAIN.WARMUP_LR, + warmup_t=warmup_steps, + cycle_limit=1, + t_in_epochs=False, + ) + elif config.TRAIN.LR_SCHEDULER.NAME == 'linear': + lr_scheduler = LinearLRScheduler( + optimizer, + t_initial=num_steps, + lr_min_rate=0.01, + warmup_lr_init=config.TRAIN.WARMUP_LR, + warmup_t=warmup_steps, + t_in_epochs=False, + ) + elif config.TRAIN.LR_SCHEDULER.NAME == 'step': + lr_scheduler = StepLRScheduler( + optimizer, + decay_t=decay_steps, + decay_rate=config.TRAIN.LR_SCHEDULER.DECAY_RATE, + warmup_lr_init=config.TRAIN.WARMUP_LR, + warmup_t=warmup_steps, + t_in_epochs=False, + ) + + return lr_scheduler + + +class LinearLRScheduler(Scheduler): + def __init__(self, + optimizer: torch.optim.Optimizer, + t_initial: int, + lr_min_rate: float, + warmup_t=0, + warmup_lr_init=0., + t_in_epochs=True, + noise_range_t=None, + noise_pct=0.67, + noise_std=1.0, + noise_seed=42, + initialize=True, + ) -> None: + super().__init__( + optimizer, param_group_field="lr", + noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed, + initialize=initialize) + + self.t_initial = t_initial + self.lr_min_rate = lr_min_rate + self.warmup_t = warmup_t + self.warmup_lr_init = warmup_lr_init + self.t_in_epochs = t_in_epochs + if self.warmup_t: + self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values] + super().update_groups(self.warmup_lr_init) + else: + self.warmup_steps = [1 for _ in self.base_values] + + def _get_lr(self, t): + if t < self.warmup_t: + lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps] + else: + t = t - self.warmup_t + total_t = self.t_initial - self.warmup_t + lrs = [v - ((v - v * self.lr_min_rate) * (t / total_t)) for v in self.base_values] + return lrs + + def get_epoch_values(self, epoch: int): + if self.t_in_epochs: + return self._get_lr(epoch) + else: + return None + + def get_update_values(self, num_updates: int): + if not self.t_in_epochs: + return self._get_lr(num_updates) + else: + return None diff --git a/cv/classification/repvgg/pytorch/train/optimizer.py b/cv/classification/repvgg/pytorch/train/optimizer.py new file mode 100755 index 0000000000000000000000000000000000000000..68abf6d0eb1decfbb61457e86ac0cc955fc30f39 --- /dev/null +++ b/cv/classification/repvgg/pytorch/train/optimizer.py @@ -0,0 +1,71 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# The training script is based on the code of Swin Transformer (https://github.com/microsoft/Swin-Transformer) +# -------------------------------------------------------- + +from torch import optim as optim + + +def build_optimizer(config, model): + """ + Build optimizer, set weight decay of normalization to 0 by default. + """ + skip = {} + skip_keywords = {} + if hasattr(model, 'no_weight_decay'): + skip = model.no_weight_decay() + if hasattr(model, 'no_weight_decay_keywords'): + skip_keywords = model.no_weight_decay_keywords() + echo = (config.LOCAL_RANK==0) + parameters = set_weight_decay(model, skip, skip_keywords, echo=echo) + opt_lower = config.TRAIN.OPTIMIZER.NAME.lower() + optimizer = None + if opt_lower == 'sgd': + optimizer = optim.SGD(parameters, momentum=config.TRAIN.OPTIMIZER.MOMENTUM, nesterov=True, + lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) + if echo: + print('================================== SGD nest, momentum = {}, wd = {}'.format(config.TRAIN.OPTIMIZER.MOMENTUM, config.TRAIN.WEIGHT_DECAY)) + elif opt_lower == 'adam': + print('adam') + optimizer = optim.Adam(parameters, eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS, + lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) + elif opt_lower == 'adamw': + optimizer = optim.AdamW(parameters, eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS, + lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) + + return optimizer + + +def set_weight_decay(model, skip_list=(), skip_keywords=(), echo=False): + has_decay = [] + no_decay = [] + + for name, param in model.named_parameters(): + if not param.requires_grad: + continue # frozen weights + if 'identity.weight' in name: + has_decay.append(param) + if echo: + print(f"{name} USE weight decay") + elif len(param.shape) == 1 or name.endswith(".bias") or (name in skip_list) or \ + check_keywords_in_name(name, skip_keywords): + no_decay.append(param) + if echo: + print(f"{name} has no weight decay") + else: + has_decay.append(param) + if echo: + print(f"{name} USE weight decay") + + return [{'params': has_decay}, + {'params': no_decay, 'weight_decay': 0.}] + + +def check_keywords_in_name(name, keywords=()): + isin = False + for keyword in keywords: + if keyword in name: + isin = True + return isin diff --git a/cv/classification/repvgg/pytorch/train/randaug.py b/cv/classification/repvgg/pytorch/train/randaug.py new file mode 100755 index 0000000000000000000000000000000000000000..6934fb8059bd1f04129d41964dbae0f8d39f0beb --- /dev/null +++ b/cv/classification/repvgg/pytorch/train/randaug.py @@ -0,0 +1,407 @@ +import math +import random + +import numpy as np +import PIL +from PIL import Image, ImageEnhance, ImageOps + +from train.cutout import Cutout + + +_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]]) + +_FILL = (128, 128, 128) + +# This signifies the max integer that the controller RNN could predict for the +# augmentation scheme. +_MAX_LEVEL = 10. + +_HPARAMS_DEFAULT = dict( + translate_const=250, + img_mean=_FILL, +) + +_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC) + + +def _interpolation(kwargs): + interpolation = kwargs.pop('resample', Image.BILINEAR) + if isinstance(interpolation, (list, tuple)): + return random.choice(interpolation) + else: + return interpolation + + +def _check_args_tf(kwargs): + if 'fillcolor' in kwargs and _PIL_VER < (5, 0): + kwargs.pop('fillcolor') + kwargs['resample'] = _interpolation(kwargs) + + +def cutout(img, factor, **kwargs): + _check_args_tf(kwargs) + return Cutout(size=factor)(img) + + +def shear_x(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs) + + +def shear_y(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs) + + +def translate_x_rel(img, pct, **kwargs): + pixels = pct * img.size[0] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) + + +def translate_y_rel(img, pct, **kwargs): + pixels = pct * img.size[1] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) + + +def translate_x_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) + + +def translate_y_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) + + +def rotate(img, degrees, **kwargs): + _check_args_tf(kwargs) + if _PIL_VER >= (5, 2): + return img.rotate(degrees, **kwargs) + elif _PIL_VER >= (5, 0): + w, h = img.size + post_trans = (0, 0) + rotn_center = (w / 2.0, h / 2.0) + angle = -math.radians(degrees) + matrix = [ + round(math.cos(angle), 15), + round(math.sin(angle), 15), + 0.0, + round(-math.sin(angle), 15), + round(math.cos(angle), 15), + 0.0, + ] + + def transform(x, y, matrix): + (a, b, c, d, e, f) = matrix + return a * x + b * y + c, d * x + e * y + f + + matrix[2], matrix[5] = transform( + -rotn_center[0] - post_trans[0], + - rotn_center[1] - post_trans[1], matrix + ) + matrix[2] += rotn_center[0] + matrix[5] += rotn_center[1] + return img.transform(img.size, Image.AFFINE, matrix, **kwargs) + else: + return img.rotate(degrees, resample=kwargs['resample']) + + +def auto_contrast(img, **__): + return ImageOps.autocontrast(img) + + +def invert(img, **__): + return ImageOps.invert(img) + + +def identity(img, **__): + return img + + +def equalize(img, **__): + return ImageOps.equalize(img) + + +def solarize(img, thresh, **__): + return ImageOps.solarize(img, thresh) + + +def solarize_add(img, add, thresh=128, **__): + lut = [] + for i in range(256): + if i < thresh: + lut.append(min(255, i + add)) + else: + lut.append(i) + if img.mode in ("L", "RGB"): + if img.mode == "RGB" and len(lut) == 256: + lut = lut + lut + lut + return img.point(lut) + else: + return img + + +def posterize(img, bits_to_keep, **__): + if bits_to_keep >= 8: + return img + return ImageOps.posterize(img, bits_to_keep) + + +def contrast(img, factor, **__): + return ImageEnhance.Contrast(img).enhance(factor) + + +def color(img, factor, **__): + return ImageEnhance.Color(img).enhance(factor) + + +def brightness(img, factor, **__): + return ImageEnhance.Brightness(img).enhance(factor) + + +def sharpness(img, factor, **__): + return ImageEnhance.Sharpness(img).enhance(factor) + + +def _randomly_negate(v): + """With 50% prob, negate the value""" + return -v if random.random() > 0.5 else v + + +def _cutout_level_to_arg(level, _hparams): + # range [0, 40] + level = max(2, (level / _MAX_LEVEL) * 40.) + return level, + + +def _rotate_level_to_arg(level, _hparams): + # range [-30, 30] + level = (level / _MAX_LEVEL) * 30. + level = _randomly_negate(level) + return level, + + +def _enhance_level_to_arg(level, _hparams): + # range [0.1, 1.9] + return (level / _MAX_LEVEL) * 1.8 + 0.1, + + +def _shear_level_to_arg(level, _hparams): + # range [-0.3, 0.3] + level = (level / _MAX_LEVEL) * 0.3 + level = _randomly_negate(level) + return level, + + +def _translate_abs_level_to_arg(level, hparams): + translate_const = hparams['translate_const'] + level = (level / _MAX_LEVEL) * float(translate_const) + level = _randomly_negate(level) + return level, + + +def _translate_rel_level_to_arg(level, _hparams): + # range [-0.45, 0.45] + level = (level / _MAX_LEVEL) * 0.45 + level = _randomly_negate(level) + return level, + + +def _posterize_original_level_to_arg(level, _hparams): + # As per original AutoAugment paper description + # range [4, 8], 'keep 4 up to 8 MSB of image' + return int((level / _MAX_LEVEL) * 4) + 4, + + +def _posterize_research_level_to_arg(level, _hparams): + # As per Tensorflow models research and UDA impl + # range [4, 0], 'keep 4 down to 0 MSB of original image' + return 4 - int((level / _MAX_LEVEL) * 4), + + +def _posterize_tpu_level_to_arg(level, _hparams): + # As per Tensorflow TPU EfficientNet impl + # range [0, 4], 'keep 0 up to 4 MSB of original image' + return int((level / _MAX_LEVEL) * 4), + + +def _solarize_level_to_arg(level, _hparams): + # range [0, 256] + return int((level / _MAX_LEVEL) * 256), + + +def _solarize_add_level_to_arg(level, _hparams): + # range [0, 110] + return int((level / _MAX_LEVEL) * 110), + + +LEVEL_TO_ARG = { + 'AutoContrast': None, + 'Equalize': None, + 'Invert': None, + 'Identity': None, + 'Rotate': _rotate_level_to_arg, + 'PosterizeOriginal': _posterize_original_level_to_arg, + 'PosterizeResearch': _posterize_research_level_to_arg, + 'PosterizeTpu': _posterize_tpu_level_to_arg, + 'Solarize': _solarize_level_to_arg, + 'SolarizeAdd': _solarize_add_level_to_arg, + 'Color': _enhance_level_to_arg, + 'Contrast': _enhance_level_to_arg, + 'Brightness': _enhance_level_to_arg, + 'Sharpness': _enhance_level_to_arg, + 'ShearX': _shear_level_to_arg, + 'ShearY': _shear_level_to_arg, + 'TranslateX': _translate_abs_level_to_arg, + 'TranslateY': _translate_abs_level_to_arg, + 'TranslateXRel': _translate_rel_level_to_arg, + 'TranslateYRel': _translate_rel_level_to_arg, + 'Cutout': _cutout_level_to_arg, +} + + +NAME_TO_OP = { + 'AutoContrast': auto_contrast, + 'Equalize': equalize, + 'Invert': invert, + 'Identity': identity, + 'Rotate': rotate, + 'PosterizeOriginal': posterize, + 'PosterizeResearch': posterize, + 'PosterizeTpu': posterize, + 'Solarize': solarize, + 'SolarizeAdd': solarize_add, + 'Color': color, + 'Contrast': contrast, + 'Brightness': brightness, + 'Sharpness': sharpness, + 'ShearX': shear_x, + 'ShearY': shear_y, + 'TranslateX': translate_x_abs, + 'TranslateY': translate_y_abs, + 'TranslateXRel': translate_x_rel, + 'TranslateYRel': translate_y_rel, + 'Cutout': cutout, +} + + +class AutoAugmentTransform(object): + """ + AutoAugment from Google. + Implementation adapted from: + https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py + """ + + def __init__(self, name, prob=0.5, magnitude=10, hparams=None): + """ + Args: + name (str): any type of transforms list in _RAND_TRANSFORMS. + prob (float): probability of perform current augmentation. + magnitude (int): intensity / magnitude of each augmentation. + hparams (dict): hyper-parameters required by each augmentation. + """ + hparams = hparams or _HPARAMS_DEFAULT + self.aug_fn = NAME_TO_OP[name] + self.level_fn = LEVEL_TO_ARG[name] + self.prob = prob + self.magnitude = magnitude + self.hparams = hparams.copy() + self.kwargs = dict( + fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL, + resample=hparams['interpolation'] if 'interpolation' in hparams + else _RANDOM_INTERPOLATION, + ) + + # If magnitude_std is > 0, we introduce some randomness + # in the usually fixed policy and sample magnitude from a normal distribution + # with mean `magnitude` and std-dev of `magnitude_std`. + # NOTE This is my own hack, being tested, not in papers or reference impls. + self.magnitude_std = self.hparams.get('magnitude_std', 0) + + def __call__(self, img: PIL.Image) -> PIL.Image: + if random.random() > self.prob: + return img + magnitude = self.magnitude + if self.magnitude_std and self.magnitude_std > 0: + magnitude = random.gauss(magnitude, self.magnitude_std) + # NOTE: magnitude fixed and no boundary + # magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range + level_args = self.level_fn( + magnitude, self.hparams) if self.level_fn is not None else tuple() + return self.aug_fn(img, *level_args, **self.kwargs) + # return np.array(self.aug_fn(Image.fromarray(img), *level_args, **self.kwargs)) + + # def apply_coords(self, coords: np.ndarray) -> np.ndarray: + # return coords + + +_RAND_TRANSFORMS = [ + 'AutoContrast', + 'Equalize', + 'Invert', + 'Rotate', + 'PosterizeTpu', + 'Solarize', + 'SolarizeAdd', + 'Color', + 'Contrast', + 'Brightness', + 'Sharpness', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + 'Cutout' # FIXME I implement this as random erasing separately +] + +_RAND_TRANSFORMS_CMC = [ + 'AutoContrast', + 'Identity', + 'Rotate', + 'Sharpness', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + # 'Cutout' # FIXME I implement this as random erasing separately +] + + +# These experimental weights are based loosely on the relative improvements mentioned in paper. +# They may not result in increased performance, but could likely be tuned to so. +_RAND_CHOICE_WEIGHTS_0 = { + 'Rotate': 0.3, + 'ShearX': 0.2, + 'ShearY': 0.2, + 'TranslateXRel': 0.1, + 'TranslateYRel': 0.1, + 'Color': .025, + 'Sharpness': 0.025, + 'AutoContrast': 0.025, + 'Solarize': .005, + 'SolarizeAdd': .005, + 'Contrast': .005, + 'Brightness': .005, + 'Equalize': .005, + 'PosterizeTpu': 0, + 'Invert': 0, +} + + +class RandAugPolicy(object): + def __init__(self, layers=2, magnitude=10): + self.layers = layers + self.magnitude = magnitude + + def __call__(self, img): + for _ in range(self.layers): + trans = np.random.choice(_RAND_TRANSFORMS) + # NOTE: prob apply, fixed magnitude + # trans_op = AutoAugmentTransform(trans, prob=np.random.uniform(0.2, 0.8), magnitude=self.magnitude) + # NOTE: always apply, random magnitude + trans_op = AutoAugmentTransform(trans, prob=1.0, magnitude=np.random.choice(self.magnitude)) + img = trans_op(img) + assert img is not None, trans + return img diff --git a/cv/classification/repvgg/pytorch/utils.py b/cv/classification/repvgg/pytorch/utils.py new file mode 100755 index 0000000000000000000000000000000000000000..78ceb52305e8e4619d183f3f6219046a4c2f028a --- /dev/null +++ b/cv/classification/repvgg/pytorch/utils.py @@ -0,0 +1,249 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# The training script is based on the code of Swin Transformer (https://github.com/microsoft/Swin-Transformer) +# -------------------------------------------------------- + +import torch +import math +import os + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self, name, fmt=':f'): + self.name = name + self.fmt = fmt + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + +def load_checkpoint(model, ckpt_path): + checkpoint = torch.load(ckpt_path) + if 'model' in checkpoint: + checkpoint = checkpoint['model'] + if 'state_dict' in checkpoint: + checkpoint = checkpoint['state_dict'] + ckpt = {} + for k, v in checkpoint.items(): + if k.startswith('module.'): + ckpt[k[7:]] = v + else: + ckpt[k] = v + model.load_state_dict(ckpt) + + +class WarmupCosineAnnealingLR(torch.optim.lr_scheduler._LRScheduler): + + def __init__(self, optimizer, T_cosine_max, eta_min=0, last_epoch=-1, warmup=0): + self.eta_min = eta_min + self.T_cosine_max = T_cosine_max + self.warmup = warmup + super(WarmupCosineAnnealingLR, self).__init__(optimizer, last_epoch) + + def get_lr(self): + if self.last_epoch < self.warmup: + return [self.last_epoch / self.warmup * base_lr for base_lr in self.base_lrs] + else: + return [self.eta_min + (base_lr - self.eta_min) * + (1 + math.cos(math.pi * (self.last_epoch - self.warmup) / (self.T_cosine_max - self.warmup))) / 2 + for base_lr in self.base_lrs] + + +def log_msg(message, log_file): + print(message) + with open(log_file, 'a') as f: + print(message, file=f) + + + + + +try: + # noinspection PyUnresolvedReferences + from apex import amp +except ImportError: + amp = None + +def unwrap_model(model): + """Remove the DistributedDataParallel wrapper if present.""" + wrapped = isinstance(model, torch.nn.parallel.distributed.DistributedDataParallel) + return model.module if wrapped else model + + +def load_checkpoint(config, model, optimizer, lr_scheduler, logger, model_ema=None): + logger.info(f"==============> Resuming form {config.MODEL.RESUME}....................") + if config.MODEL.RESUME.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + config.MODEL.RESUME, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(config.MODEL.RESUME, map_location='cpu') + msg = model.load_state_dict(checkpoint['model'], strict=False) + logger.info(msg) + max_accuracy = 0.0 + if not config.EVAL_MODE and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + config.defrost() + config.TRAIN.START_EPOCH = checkpoint['epoch'] + 1 + config.freeze() + if 'amp' in checkpoint and config.AMP_OPT_LEVEL != "O0" and checkpoint['config'].AMP_OPT_LEVEL != "O0": + amp.load_state_dict(checkpoint['amp']) + logger.info(f"=> loaded successfully '{config.MODEL.RESUME}' (epoch {checkpoint['epoch']})") + if 'max_accuracy' in checkpoint: + max_accuracy = checkpoint['max_accuracy'] + if model_ema is not None: + unwrap_model(model_ema).load_state_dict(checkpoint['ema']) + print('=================================================== EMAloaded') + + del checkpoint + torch.cuda.empty_cache() + return max_accuracy + + +def load_weights(model, path): + checkpoint = torch.load(path, map_location='cpu') + if 'model' in checkpoint: + checkpoint = checkpoint['model'] + if 'state_dict' in checkpoint: + checkpoint = checkpoint['state_dict'] + unwrap_model(model).load_state_dict(checkpoint, strict=False) + print('=================== loaded from', path) + +def save_latest(config, epoch, model, max_accuracy, optimizer, lr_scheduler, logger, model_ema=None): + save_state = {'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'max_accuracy': max_accuracy, + 'epoch': epoch, + 'config': config} + if config.AMP_OPT_LEVEL != "O0": + save_state['amp'] = amp.state_dict() + if model_ema is not None: + save_state['ema'] = unwrap_model(model_ema).state_dict() + + save_path = os.path.join(config.OUTPUT, 'latest.pth') + logger.info(f"{save_path} saving......") + torch.save(save_state, save_path) + logger.info(f"{save_path} saved !!!") + +def save_checkpoint(config, epoch, model, max_accuracy, optimizer, lr_scheduler, logger, is_best=False, model_ema=None): + save_state = {'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'max_accuracy': max_accuracy, + 'epoch': epoch, + 'config': config} + if config.AMP_OPT_LEVEL != "O0": + save_state['amp'] = amp.state_dict() + if model_ema is not None: + save_state['ema'] = unwrap_model(model_ema).state_dict() + + if is_best: + best_path = os.path.join(config.OUTPUT, 'best_ckpt.pth') + torch.save(save_state, best_path) + + save_path = os.path.join(config.OUTPUT, f'ckpt_epoch_{epoch}.pth') + logger.info(f"{save_path} saving......") + torch.save(save_state, save_path) + logger.info(f"{save_path} saved !!!") + + +def get_grad_norm(parameters, norm_type=2): + if isinstance(parameters, torch.Tensor): + parameters = [parameters] + parameters = list(filter(lambda p: p.grad is not None, parameters)) + norm_type = float(norm_type) + total_norm = 0 + for p in parameters: + param_norm = p.grad.data.norm(norm_type) + total_norm += param_norm.item() ** norm_type + total_norm = total_norm ** (1. / norm_type) + return total_norm + + +import torch.distributed as dist + +def auto_resume_helper(output_dir): + checkpoints = os.listdir(output_dir) + checkpoints = [ckpt for ckpt in checkpoints if ckpt.endswith('pth') and 'ema' not in ckpt] + print(f"All checkpoints founded in {output_dir}: {checkpoints}") + if len(checkpoints) > 0: + latest_checkpoint = max([os.path.join(output_dir, d) for d in checkpoints], key=os.path.getmtime) + print(f"The latest checkpoint founded: {latest_checkpoint}") + resume_file = latest_checkpoint + else: + resume_file = None + return resume_file + + +def reduce_tensor(tensor): + rt = tensor.clone() + dist.all_reduce(rt, op=dist.ReduceOp.SUM) + rt /= dist.get_world_size() + return rt + +def update_model_ema(cfg, num_gpus, model, model_ema, cur_epoch, cur_iter): + """Update exponential moving average (ema) of model weights.""" + update_period = cfg.TRAIN.EMA_UPDATE_PERIOD + if update_period is None or update_period == 0 or cur_iter % update_period != 0: + return + # Adjust alpha to be fairly independent of other parameters + total_batch_size = num_gpus * cfg.DATA.BATCH_SIZE + adjust = total_batch_size / cfg.TRAIN.EPOCHS * update_period + # print('ema adjust', adjust) + alpha = min(1.0, cfg.TRAIN.EMA_ALPHA * adjust) + # During warmup simply copy over weights instead of using ema + alpha = 1.0 if cur_epoch < cfg.TRAIN.WARMUP_EPOCHS else alpha + # Take ema of all parameters (not just named parameters) + params = unwrap_model(model).state_dict() + for name, param in unwrap_model(model_ema).state_dict().items(): + param.copy_(param * (1.0 - alpha) + params[name] * alpha) \ No newline at end of file