开始讲解之前推荐一下我的专栏本专栏的内容支持(分类、检测、分割、追踪、关键点检测),专栏目前为限时折扣欢迎大家订阅本专栏本专栏每周更新5-7篇最新机制更有包含我所有改进的文件和交流群提供给大家本人定期在群内分享发表论文方法和经验。一、本文介绍本文给大家带来的最新改进机制是Damo-YOLO的RepGFPN重参数化泛化特征金字塔网络利用其优化YOLOv26的Neck部分可以在不影响计算量的同时大幅度涨点亲测在小目标和大目标检测的数据集上效果均表现良好。RepGFPN不同于以往提出的改进模块其更像是一种结构一种思想(一种处理事情的方法)RepGFPN相对于BiFPN和之前的FPN均有一定程度上的优化效果本文含两个版本一个是个人总结的使用方法另一个是官方的使用方法。专栏链接YOLOv26有效涨点专栏包含Conv、注意力机制、主干/Backbone、损失函数、优化器、后处理等改进机制目录一、本文介绍二、RepGFPN的框架原理三、RepGFPN的核心代码四、手把手教你添加RepGFPN4.1 修改一4.2 修改二4.3 修改三4.4 修改四4.5 修改五五、正式训练5.1 yaml文件5.1.1 yaml文件15.1.2 yaml文件25.2 训练代码5.3 训练过程截图五、本文总结二、RepGFPN的框架原理官方论文地址官方论文地址官方代码地址官方代码地址RepGFPN重参数化泛化特征金字塔网络是DAMO-YOLO框架中用于实时目标检测的新方法。其主要主要原理是RepGFPN改善了用于目标检测的特征金字塔网络FPN的概念更高效地融合多尺度特征对于捕捉高层语义和低层空间细节至关重要。其主要改进机制包括-不同尺度通道它为不同尺度的特征图采用不同的通道维度优化了计算资源下的性能。优化的皇后融合机制该方法通过修改的皇后融合机制增强了特征交互通过去除额外的上采样操作减少延迟。整合CSPNet和ELAN它结合了CSPNet和高效层聚合网络ELAN以及重参数化改善了特征融合而不显著增加计算需求。总结RepGFPN更像是一种结构一种思想其中的模块我们是可以用其它的机制替换的。下面的图片是Damo-YOLO的网络结构图其中我用红框标出来的部分就是RepGFPN的路径聚合图。根据图片我们来说一下GFPN(重参数化特征金字塔网络)作为“颈部也就是YOLOv8中的neck用于优化和融合高层语义和低层空间特征。在左上角的融合块Fusion Block中我们可以看到反复出现的结构单元它们由多个1x1卷积一个3x3卷积组成这些卷积后面通常跟着批量归一化BN和激活函数Act。这个复合结构在训练时和推理时有所不同这是通过“简化Rep 3x3”结构来实现的它在训练时使用3x3卷积而在推理时则简化为1x1卷积以提高效率(现在很多结构都使用在何种思想训练时候用复杂的模块推理时换为简单的模块这在大家自己的改进中也可以是一种思想)。三、RepGFPN的核心代码下面的代码是GFPN的核心代码我们将其复制导ultralytics/nn/modules目录下在其中创建一个文件我这里起名为GFPN然后粘贴进去其余使用方式看章节四。import torch import torch.nn as nn import numpy as np class swish(nn.Module): def forward(self, x): return x * torch.sigmoid(x) def autopad(k, pNone, d1): # kernel, padding, dilation Pad to same shape outputs. if d 1: k d * (k - 1) 1 if isinstance(k, int) else [d * (x - 1) 1 for x in k] # actual kernel-size if p is None: p k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad return p class Conv(nn.Module): Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation). default_act swish() # default activation def __init__(self, c1, c2, k1, s1, pNone, g1, d1, actTrue): Initialize Conv layer with given arguments including activation. super().__init__() self.conv nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groupsg, dilationd, biasFalse) self.bn nn.BatchNorm2d(c2) self.act self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() def forward(self, x): Apply convolution, batch normalization and activation to input tensor. return self.act(self.bn(self.conv(x))) def forward_fuse(self, x): Perform transposed convolution of 2D data. return self.act(self.conv(x)) class RepConv(nn.Module): default_act swish() # default activation def __init__(self, c1, c2, k3, s1, p1, g1, d1, actTrue, bnFalse, deployFalse): Initializes Light Convolution layer with inputs, outputs optional activation function. super().__init__() assert k 3 and p 1 self.g g self.c1 c1 self.c2 c2 self.act self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() self.bn nn.BatchNorm2d(num_featuresc1) if bn and c2 c1 and s 1 else None self.conv1 Conv(c1, c2, k, s, pp, gg, actFalse) self.conv2 Conv(c1, c2, 1, s, p(p - k // 2), gg, actFalse) def forward_fuse(self, x): Forward process. return self.act(self.conv(x)) def forward(self, x): Forward process. id_out 0 if self.bn is None else self.bn(x) return self.act(self.conv1(x) self.conv2(x) id_out) def get_equivalent_kernel_bias(self): Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases. kernel3x3, bias3x3 self._fuse_bn_tensor(self.conv1) kernel1x1, bias1x1 self._fuse_bn_tensor(self.conv2) kernelid, biasid self._fuse_bn_tensor(self.bn) return kernel3x3 self._pad_1x1_to_3x3_tensor(kernel1x1) kernelid, bias3x3 bias1x1 biasid def _pad_1x1_to_3x3_tensor(self, kernel1x1): Pads a 1x1 tensor to a 3x3 tensor. if kernel1x1 is None: return 0 else: return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) def _fuse_bn_tensor(self, branch): Generates appropriate kernels and biases for convolution by fusing branches of the neural network. if branch is None: return 0, 0 if isinstance(branch, Conv): kernel branch.conv.weight running_mean branch.bn.running_mean running_var branch.bn.running_var gamma branch.bn.weight beta branch.bn.bias eps branch.bn.eps elif isinstance(branch, nn.BatchNorm2d): if not hasattr(self, id_tensor): input_dim self.c1 // self.g kernel_value np.zeros((self.c1, input_dim, 3, 3), dtypenp.float32) for i in range(self.c1): kernel_value[i, i % input_dim, 1, 1] 1 self.id_tensor torch.from_numpy(kernel_value).to(branch.weight.device) kernel self.id_tensor running_mean branch.running_mean running_var branch.running_var gamma branch.weight beta branch.bias eps branch.eps std (running_var eps).sqrt() t (gamma / std).reshape(-1, 1, 1, 1) return kernel * t, beta - running_mean * gamma / std def fuse_convs(self): Combines two convolution layers into a single layer and removes unused attributes from the class. if hasattr(self, conv): return kernel, bias self.get_equivalent_kernel_bias() self.conv nn.Conv2d(in_channelsself.conv1.conv.in_channels, out_channelsself.conv1.conv.out_channels, kernel_sizeself.conv1.conv.kernel_size, strideself.conv1.conv.stride, paddingself.conv1.conv.padding, dilationself.conv1.conv.dilation, groupsself.conv1.conv.groups, biasTrue).requires_grad_(False) self.conv.weight.data kernel self.conv.bias.data bias for para in self.parameters(): para.detach_() self.__delattr__(conv1) self.__delattr__(conv2) if hasattr(self, nm): self.__delattr__(nm) if hasattr(self, bn): self.__delattr__(bn) if hasattr(self, id_tensor): self.__delattr__(id_tensor) class BasicBlock_3x3_Reverse(nn.Module): def __init__(self, ch_in, ch_hidden_ratio, ch_out, shortcutTrue): super(BasicBlock_3x3_Reverse, self).__init__() assert ch_in ch_out ch_hidden int(ch_in * ch_hidden_ratio) self.conv1 Conv(ch_hidden, ch_out, 3, s1) self.conv2 RepConv(ch_in, ch_hidden, 3, s1) self.shortcut shortcut def forward(self, x): y self.conv2(x) y self.conv1(y) if self.shortcut: return x y else: return y class SPP(nn.Module): def __init__( self, ch_in, ch_out, k, pool_size ): super(SPP, self).__init__() self.pool [] for i, size in enumerate(pool_size): pool nn.MaxPool2d(kernel_sizesize, stride1, paddingsize // 2, ceil_modeFalse) self.add_module(pool{}.format(i), pool) self.pool.append(pool) self.conv Conv(ch_in, ch_out, k) def forward(self, x): outs [x] for pool in self.pool: outs.append(pool(x)) y torch.cat(outs, axis1) y self.conv(y) return y class CSPStage(nn.Module): def __init__(self, ch_in, ch_out, n, block_fnBasicBlock_3x3_Reverse, ch_hidden_ratio1.0, actsilu, sppFalse): super(CSPStage, self).__init__() split_ratio 2 ch_first int(ch_out // split_ratio) ch_mid int(ch_out - ch_first) self.conv1 Conv(ch_in, ch_first, 1) self.conv2 Conv(ch_in, ch_mid, 1) self.convs nn.Sequential() next_ch_in ch_mid for i in range(n): if block_fn BasicBlock_3x3_Reverse: self.convs.add_module( str(i), BasicBlock_3x3_Reverse(next_ch_in, ch_hidden_ratio, ch_mid, shortcutTrue)) else: raise NotImplementedError if i (n - 1) // 2 and spp: self.convs.add_module(spp, SPP(ch_mid * 4, ch_mid, 1, [5, 9, 13])) next_ch_in ch_mid self.conv3 Conv(ch_mid * n ch_first, ch_out, 1) def forward(self, x): y1 self.conv1(x) y2 self.conv2(x) mid_out [y1] for conv in self.convs: y2 conv(y2) mid_out.append(y2) y torch.cat(mid_out, axis1) y self.conv3(y) return y四、手把手教你添加RepGFPN下面的步骤如果你不会或者不想麻烦操作可以联系作者获得本专栏添加所有项目文件的源代码可直接训练.4.1 修改一第一还是建立文件我们找到如下ultralytics/nn文件夹下建立一个目录名字呢就是Addmodules文件夹4.2 修改二然后在Addmodules文件夹内建立一个新的py文件将本文章节三中的“核心代码复制粘贴进去。4.3 修改三第二步我们在该目录下创建一个新的py文件名字为__init__.py然后在其内部导入我们的文件如下图所示。4.4 修改四第三步我门中到如下文件ultralytics/nn/tasks.py进行导入和注册我们的模块(此处只需要添加一次即可如果你用我其它的改进机制这里的步骤只需要添加一次)​​​​4.5 修改五在ultralytics/nn/tasks.py文件内的parse_model方法函数内位置大概在1500行左右按照图示位置添加即可此处需要自己有一定的判别能力如果不会可联系作者获得视频教程。五、正式训练5.1 yaml文件5.1.1 yaml文件1训练信息YOLO26-Neck-DamoYOLO-1 summary: 287 layers, 2,823,196 parameters, 2,823,196 gradients, 6.2 GFLOPs# Ultralytics AGPL-3.0 License - https://ultralytics.com/license # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs # Model docs: https://docs.ultralytics.com/models/yolo26 # Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes end2end: True # whether to use end-to-end mode reg_max: 1 # DFL bins scales: # model compound scaling constants, i.e. modelyolo26n.yaml will call yolo26.yaml with scale n # [depth, width, max_channels] n: [0.50, 0.25, 1024] # summary: 260 layers, 2,572,280 parameters, 2,572,280 gradients, 6.1 GFLOPs s: [0.50, 0.50, 1024] # summary: 260 layers, 10,009,784 parameters, 10,009,784 gradients, 22.8 GFLOPs m: [0.50, 1.00, 512] # summary: 280 layers, 21,896,248 parameters, 21,896,248 gradients, 75.4 GFLOPs l: [1.00, 1.00, 512] # summary: 392 layers, 26,299,704 parameters, 26,299,704 gradients, 93.8 GFLOPs x: [1.00, 1.50, 512] # summary: 392 layers, 58,993,368 parameters, 58,993,368 gradients, 209.5 GFLOPs # YOLO26n backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 2, C3k2, [256, False, 0.25]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 2, C3k2, [512, False, 0.25]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 2, C3k2, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 2, C3k2, [1024, True]] - [-1, 1, SPPF, [1024, 5, 3, True]] # 9 - [-1, 2, C2PSA, [1024]] # 10 # DAMO-YOLO GFPN Head head: - [-1, 1, Conv, [512, 1, 1]] # 11 - [6, 1, Conv, [512, 3, 2]] - [[-1, 11], 1, Concat, [1]] - [-1, 2, C3k2, [512, True]] # 14 - [-1, 1, nn.Upsample, [None, 2, nearest]] #15 - [4, 1, Conv, [256, 3, 2]] # 16 - [[15, -1, 6], 1, Concat, [1]] - [-1, 2, C3k2, [512, True]] # 18 - [-1, 1, nn.Upsample, [None, 2, nearest]] - [[-1, 4], 1, Concat, [1]] - [-1, 2, C3k2, [256, True]] # 21 - [-1, 1, Conv, [256, 3, 2]] - [[-1, 18], 1, Concat, [1]] - [-1, 2, C3k2, [512, True]] # 24 - [18, 1, Conv, [256, 3, 2]] # 25 - [24, 1, Conv, [256, 3, 2]] # 26 - [[14, 25, -1], 1, Concat, [1]] - [-1, 2, C3k2, [1024, True, 0.5, True]] # 28 - [[21, 24, 28], 1, Detect, [nc]] # Detect(P3, P4, P5)5.1.2 yaml文件2训练信息YOLO26-Neck-DamoYOLO-2 summary: 265 layers, 2,916,444 parameters, 2,916,444 gradients, 6.5 GFLOPs# Ultralytics AGPL-3.0 License - https://ultralytics.com/license # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs # Model docs: https://docs.ultralytics.com/models/yolo26 # Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes end2end: True # whether to use end-to-end mode reg_max: 1 # DFL bins scales: # model compound scaling constants, i.e. modelyolo26n.yaml will call yolo26.yaml with scale n # [depth, width, max_channels] n: [0.50, 0.25, 1024] # summary: 260 layers, 2,572,280 parameters, 2,572,280 gradients, 6.1 GFLOPs s: [0.50, 0.50, 1024] # summary: 260 layers, 10,009,784 parameters, 10,009,784 gradients, 22.8 GFLOPs m: [0.50, 1.00, 512] # summary: 280 layers, 21,896,248 parameters, 21,896,248 gradients, 75.4 GFLOPs l: [1.00, 1.00, 512] # summary: 392 layers, 26,299,704 parameters, 26,299,704 gradients, 93.8 GFLOPs x: [1.00, 1.50, 512] # summary: 392 layers, 58,993,368 parameters, 58,993,368 gradients, 209.5 GFLOPs # YOLO26n backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 2, C3k2, [256, False, 0.25]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 2, C3k2, [512, False, 0.25]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 2, C3k2, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 2, C3k2, [1024, True]] - [-1, 1, SPPF, [1024, 5, 3, True]] # 9 - [-1, 2, C2PSA, [1024]] # 10 # DAMO-YOLO GFPN Head head: - [-1, 1, Conv, [512, 1, 1]] # 11 - [6, 1, Conv, [512, 3, 2]] - [[-1, 11], 1, Concat, [1]] - [-1, 2, CSPStage, [512]] # 14 - [-1, 1, nn.Upsample, [None, 2, nearest]] #15 - [4, 1, Conv, [256, 3, 2]] # 16 - [[15, -1, 6], 1, Concat, [1]] - [-1, 2, CSPStage, [512]] # 18 - [-1, 1, nn.Upsample, [None, 2, nearest]] - [[-1, 4], 1, Concat, [1]] - [-1, 2, CSPStage, [256]] # 21 - [-1, 1, Conv, [256, 3, 2]] - [[-1, 18], 1, Concat, [1]] - [-1, 2, CSPStage, [512]] # 24 - [18, 1, Conv, [256, 3, 2]] # 25 - [24, 1, Conv, [256, 3, 2]] # 26 - [[14, 25, -1], 1, Concat, [1]] - [-1, 2, CSPStage, [1024]] # 28 - [[21, 24, 28], 1, Detect, [nc]] # Detect(P3, P4, P5)5.2 训练代码大家可以创建一个py文件将我给的代码复制粘贴进去配置好自己的文件路径即可运行。import warnings warnings.filterwarnings(ignore) from ultralytics import YOLO if __name__ __main__: model YOLO(模型配置文件地址,也就是5.1你保存到本地文件的地址) # 如何切换模型版本, 上面的ymal文件可以改为 yolo26s.yaml就是使用的26s, # 类似某个改进的yaml文件名称为yolo26-XXX.yaml那么如果想使用其它版本就把上面的名称改为yolo26l-XXX.yaml即可改的是上面YOLO中间的名字不是配置文件的 # model.load(yolo26n.pt) # 是否加载预训练权重,科研不建议大家加载否则很难提升精度 model.train( datar数据集文件地址, # 如果大家任务是其它的ultralytics/cfg/default.yaml找到这里修改task可以改成detect, segment, classify, pose cacheFalse, imgsz640, epochs20, single_clsFalse, # 是否是单类别检测 batch16, close_mosaic0, workers0, device0, optimizerMuSGD, # using SGD/MuSGD # resume, # 这里是填写last.pt地址 ampTrue, # 如果出现训练损失为Nan可以关闭amp projectruns/train, nameexp, )5.3 训练过程截图五、本文总结到此本文的正式分享内容就结束了在这里给大家推荐我的YOLOv26改进有效涨点专栏本专栏目前为新开的平均质量分98分后期我会根据各种最新的前沿顶会进行论文复现也会对一些老的改进机制进行补充如果大家觉得本文帮助到你了订阅本专栏关注后续更多的更新~专栏链接