diff --git a/cv/detection/yolov5/pytorch/models/common.py b/cv/detection/yolov5/pytorch/models/common.py
index a26d4bddd3e5791122c456203d41d81b3e48647d..2308ace43b1a4a928abab9705d17058153d8ca7c 100644
--- a/cv/detection/yolov5/pytorch/models/common.py
+++ b/cv/detection/yolov5/pytorch/models/common.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+
 # YOLOv5 common modules
 
 from copy import copy
@@ -46,6 +49,24 @@ class Conv(nn.Module):
         return self.act(self.conv(x))
 
 
+class SPPF(nn.Module):
+    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
+    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            y1 = self.m(x)
+            y2 = self.m(y1)
+            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
+
+
 class TransformerLayer(nn.Module):
     # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
     def __init__(self, c, num_heads):
diff --git a/cv/detection/yolov5/pytorch/train.py b/cv/detection/yolov5/pytorch/train.py
index cc456a8400465421e8e71214e3e196832525ee8c..2bc68e9815ce4679668fd47eec8eb55ce98e629a 100644
--- a/cv/detection/yolov5/pytorch/train.py
+++ b/cv/detection/yolov5/pytorch/train.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 """Train a YOLOv5 model on a custom dataset
 
 Usage:
@@ -382,6 +397,10 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
                 f'Using {dataloader.num_workers} dataloader workers\n'
                 f'Logging results to {save_dir}\n'
                 f'Starting training for {epochs} epochs...')
+
+    run_steps = 0
+    time_step = []
+    time_step.append(time.time())
     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
         model.train()
 
@@ -474,11 +493,16 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
 
             # Print
             if RANK in [-1, 0]:
+                time_step.append(time.time())
                 mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
                 mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
                 s = ('%10s' * 2 + '%10.4g' * 6) % (
                     f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])
-                pbar.set_description(s)
+                time_iter = time_step[i+1] - time_step[i]
+                fps = opt.batch_size / time_iter
+                performance = " timer: %.6f sec. || fps: %.3f" % (time_iter, fps)
+
+                pbar.set_description(s + performance)
 
                 if nb > 1000:
                     log_freq = 100
diff --git a/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py b/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py
index 5cb5cebc784da8723135329fcbf91c7e705298f2..a1bcb60f5d3db9c11f82d52bae9941fddc7262e3 100755
--- a/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py
+++ b/cv/ocr/satrn/pytorch/base/ocrcv/runner/epoch_based_runner.py
@@ -1,4 +1,6 @@
 # Copyright (c) Open-MMLab. All rights reserved.
+# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+
 import os.path as osp
 import platform
 import shutil
@@ -12,7 +14,12 @@ from .base_runner import BaseRunner
 from .builder import RUNNERS
 from .checkpoint import save_checkpoint
 from .utils import get_host_info
+import torch.distributed as dist
 
+def get_world_size():
+    if dist.is_initialized():
+        return dist.get_world_size()
+    return 1
 
 @RUNNERS.register_module()
 class EpochBasedRunner(BaseRunner):
@@ -44,13 +51,20 @@ class EpochBasedRunner(BaseRunner):
         self._max_iters = self._max_epochs * len(self.data_loader)
         self.call_hook('before_train_epoch')
         time.sleep(2)  # Prevent possible deadlock during epoch transition
+        all_fps = []
         for i, data_batch in enumerate(self.data_loader):
             self._inner_iter = i
+            batchsize=data_batch['img'].shape[0]
             self.call_hook('before_train_iter')
+            start_time = time.time()
             self.run_iter(data_batch, train_mode=True, **kwargs)
+            end_time = time.time()
+            fps = batchsize / (end_time - start_time) *get_world_size()
+            # print ("fps",fps)
+            all_fps.append(fps)
             self.call_hook('after_train_iter')
             self._iter += 1
-
+        print('Avgfps img/s:', sum(all_fps) / len(all_fps))
         self.call_hook('after_train_epoch')
         self._epoch += 1
 
diff --git a/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py b/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py
index 374f5ccc283bfdfe526b008ef731c4a4e9285443..9ed5cb56b7fe3021d909ca103f05c5131ba81a6b 100644
--- a/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py
+++ b/cv/tracking/fairmot/pytorch/src/lib/trains/base_trainer.py
@@ -12,6 +12,7 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -83,6 +84,7 @@ class BaseTrainer(object):
     num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
     bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
     end = time.time()
+    total_num = 0
     for iter_id, batch in enumerate(data_loader):
       if iter_id >= num_iters:
         break
@@ -116,6 +118,8 @@ class BaseTrainer(object):
       if not opt.hide_data_time:
         Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
           '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
+      total_num += batch['input'].shape[0]
+      Bar.suffix = Bar.suffix + '|Fps {fps:.3f}'.format(fps=batch['input'].shape[0]/batch_time.avg)
       if opt.print_iter > 0:
         if iter_id % opt.print_iter == 0:
           print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 
@@ -129,6 +133,7 @@ class BaseTrainer(object):
     bar.finish()
     ret = {k: v.avg for k, v in avg_loss_stats.items()}
     ret['time'] = bar.elapsed_td.total_seconds() / 60.
+    ret['fps'] = total_num / bar.elapsed_td.total_seconds()
     return ret, results
 
   
diff --git a/speech/speech_synthesis/tacotron2/pytorch/train.py b/speech/speech_synthesis/tacotron2/pytorch/train.py
index f760b8c8fcc60bdc176fc138133d487a8792cf9a..b862aa567b023f811a5341c66c32acd65ba4b56c 100644
--- a/speech/speech_synthesis/tacotron2/pytorch/train.py
+++ b/speech/speech_synthesis/tacotron2/pytorch/train.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+
 import os
 import sys
 import time
@@ -131,8 +134,13 @@ def validate(model, criterion, valset, iteration, batch_size, n_gpus,
 
         val_loss = 0.0
         for i, batch in enumerate(val_loader):
+            start = time.perf_counter()
             x, y = model.parse_batch(batch)
             y_pred = model(x)
+            mel = y_pred[0]
+            num_mels = mel.size(0) * mel.size(2)
+            duration = time.perf_counter() - start
+            throughput = num_mels / duration
             loss = criterion(y_pred, y)
             if distributed_run:
                 reduced_val_loss = reduce_tensor(loss.data, n_gpus).item()
@@ -143,7 +151,7 @@ def validate(model, criterion, valset, iteration, batch_size, n_gpus,
 
     model.train()
     if rank == 0:
-        print("Validation loss {}: {:9f}  ".format(iteration, val_loss))
+        print("Validation loss {}: {:9f}  ThroughPut {:.3f}samples/s".format(iteration, val_loss, throughput))
         logger.log_validation(val_loss, model, y, y_pred, iteration)
     
     if val_loss <= args.target_val_loss:
@@ -217,7 +225,10 @@ def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus,
 
             model.zero_grad()
             x, y = model.parse_batch(batch)
+
             y_pred = model(x)
+            mel = y_pred[0]
+            num_mels = mel.size(0) * mel.size(2)
 
             loss = criterion(y_pred, y)
             if hparams.distributed_run:
@@ -242,8 +253,9 @@ def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus,
 
             if not is_overflow and rank == 0:
                 duration = time.perf_counter() - start
-                print("Train loss {} {:.6f} Grad Norm {:.6f} {:.2f}s/it".format(
-                    iteration, reduced_loss, grad_norm, duration))
+                throughput = num_mels / duration
+                print("Train loss {} {:.6f} Grad Norm {:.6f} Timer {:.3f}s/it ThroughPut {:.3f}samples/s".format(
+                    iteration, reduced_loss, grad_norm, duration, throughput))
                 logger.log_training(
                     reduced_loss, grad_norm, learning_rate, duration, iteration)