diff --git a/cv/detection/yolov5/pytorch/models/common.py b/cv/detection/yolov5/pytorch/models/common.py index a26d4bddd3e5791122c456203d41d81b3e48647d..2308ace43b1a4a928abab9705d17058153d8ca7c 100644 --- a/cv/detection/yolov5/pytorch/models/common.py +++ b/cv/detection/yolov5/pytorch/models/common.py @@ -1,3 +1,6 @@ +# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. + # YOLOv5 common modules from copy import copy @@ -46,6 +49,24 @@ class Conv(nn.Module): return self.act(self.conv(x)) +class SPPF(nn.Module): + # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher + def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * 4, c2, 1, 1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + + def forward(self, x): + x = self.cv1(x) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) + + class TransformerLayer(nn.Module): # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) def __init__(self, c, num_heads): diff --git a/cv/detection/yolov5/pytorch/train.py b/cv/detection/yolov5/pytorch/train.py index cc456a8400465421e8e71214e3e196832525ee8c..2bc68e9815ce4679668fd47eec8eb55ce98e629a 100644 --- a/cv/detection/yolov5/pytorch/train.py +++ b/cv/detection/yolov5/pytorch/train.py @@ -1,3 +1,18 @@ +# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + """Train a YOLOv5 model on a custom dataset Usage: @@ -382,6 +397,10 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary f'Using {dataloader.num_workers} dataloader workers\n' f'Logging results to {save_dir}\n' f'Starting training for {epochs} epochs...') + + run_steps = 0 + time_step = [] + time_step.append(time.time()) for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() @@ -474,11 +493,16 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Print if RANK in [-1, 0]: + time_step.append(time.time()) mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ( f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]) - pbar.set_description(s) + time_iter = time_step[i+1] - time_step[i] + fps = opt.batch_size / time_iter + performance = " timer: %.6f sec. || fps: %.3f" % (time_iter, fps) + + pbar.set_description(s + performance) if nb > 1000: log_freq = 100 diff --git a/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py b/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py index 5cb5cebc784da8723135329fcbf91c7e705298f2..a1bcb60f5d3db9c11f82d52bae9941fddc7262e3 100755 --- a/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py +++ b/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py @@ -1,4 +1,6 @@ # Copyright (c) Open-MMLab. All rights reserved. +# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. + import os.path as osp import platform import shutil @@ -12,7 +14,12 @@ from .base_runner import BaseRunner from .builder import RUNNERS from .checkpoint import save_checkpoint from .utils import get_host_info +import torch.distributed as dist +def get_world_size(): + if dist.is_initialized(): + return dist.get_world_size() + return 1 @RUNNERS.register_module() class EpochBasedRunner(BaseRunner): @@ -44,13 +51,20 @@ class EpochBasedRunner(BaseRunner): self._max_iters = self._max_epochs * len(self.data_loader) self.call_hook('before_train_epoch') time.sleep(2) # Prevent possible deadlock during epoch transition + all_fps = [] for i, data_batch in enumerate(self.data_loader): self._inner_iter = i + batchsize=data_batch['img'].shape[0] self.call_hook('before_train_iter') + start_time = time.time() self.run_iter(data_batch, train_mode=True, **kwargs) + end_time = time.time() + fps = batchsize / (end_time - start_time) *get_world_size() + # print ("fps",fps) + all_fps.append(fps) self.call_hook('after_train_iter') self._iter += 1 - + print('Avgfps img/s:', sum(all_fps) / len(all_fps)) self.call_hook('after_train_epoch') self._epoch += 1 diff --git a/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py b/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py index 374f5ccc283bfdfe526b008ef731c4a4e9285443..9ed5cb56b7fe3021d909ca103f05c5131ba81a6b 100644 --- a/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py +++ b/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py @@ -12,6 +12,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -83,6 +84,7 @@ class BaseTrainer(object): num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) end = time.time() + total_num = 0 for iter_id, batch in enumerate(data_loader): if iter_id >= num_iters: break @@ -116,6 +118,8 @@ class BaseTrainer(object): if not opt.hide_data_time: Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) + total_num += batch['input'].shape[0] + Bar.suffix = Bar.suffix + '|Fps {fps:.3f}'.format(fps=batch['input'].shape[0]/batch_time.avg) if opt.print_iter > 0: if iter_id % opt.print_iter == 0: print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) @@ -129,6 +133,7 @@ class BaseTrainer(object): bar.finish() ret = {k: v.avg for k, v in avg_loss_stats.items()} ret['time'] = bar.elapsed_td.total_seconds() / 60. + ret['fps'] = total_num / bar.elapsed_td.total_seconds() return ret, results diff --git a/speech/speech_synthesis/tacotron2/pytorch/train.py b/speech/speech_synthesis/tacotron2/pytorch/train.py index f760b8c8fcc60bdc176fc138133d487a8792cf9a..b862aa567b023f811a5341c66c32acd65ba4b56c 100644 --- a/speech/speech_synthesis/tacotron2/pytorch/train.py +++ b/speech/speech_synthesis/tacotron2/pytorch/train.py @@ -1,3 +1,6 @@ +# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. + import os import sys import time @@ -131,8 +134,13 @@ def validate(model, criterion, valset, iteration, batch_size, n_gpus, val_loss = 0.0 for i, batch in enumerate(val_loader): + start = time.perf_counter() x, y = model.parse_batch(batch) y_pred = model(x) + mel = y_pred[0] + num_mels = mel.size(0) * mel.size(2) + duration = time.perf_counter() - start + throughput = num_mels / duration loss = criterion(y_pred, y) if distributed_run: reduced_val_loss = reduce_tensor(loss.data, n_gpus).item() @@ -143,7 +151,7 @@ def validate(model, criterion, valset, iteration, batch_size, n_gpus, model.train() if rank == 0: - print("Validation loss {}: {:9f} ".format(iteration, val_loss)) + print("Validation loss {}: {:9f} ThroughPut {:.3f}samples/s".format(iteration, val_loss, throughput)) logger.log_validation(val_loss, model, y, y_pred, iteration) if val_loss <= args.target_val_loss: @@ -217,7 +225,10 @@ def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus, model.zero_grad() x, y = model.parse_batch(batch) + y_pred = model(x) + mel = y_pred[0] + num_mels = mel.size(0) * mel.size(2) loss = criterion(y_pred, y) if hparams.distributed_run: @@ -242,8 +253,9 @@ def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus, if not is_overflow and rank == 0: duration = time.perf_counter() - start - print("Train loss {} {:.6f} Grad Norm {:.6f} {:.2f}s/it".format( - iteration, reduced_loss, grad_norm, duration)) + throughput = num_mels / duration + print("Train loss {} {:.6f} Grad Norm {:.6f} Timer {:.3f}s/it ThroughPut {:.3f}samples/s".format( + iteration, reduced_loss, grad_norm, duration, throughput)) logger.log_training( reduced_loss, grad_norm, learning_rate, duration, iteration)