yolov5-5.0加入CBAM,SE,CA,ECA注意力机制
CBAM注意力
yolo.py和yaml文件中相应的CBAMC3也要换成CBAM,下面的SE同理
class ChannelAttention(nn.Module):def __init__(self, in_planes, ratio=16):super(ChannelAttention, self).__init__()self.avg_pool = nn.AdaptiveAvgPool2d(1)self.max_pool = nn.AdaptiveMaxPool2d(1)self.f1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)self.relu = nn.ReLU()self.f2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)# 写法二,亦可使用顺序容器# self.sharedMLP = nn.Sequential(# nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(),# nn.Conv2d(in_planes // rotio, in_planes, 1, bias=False))self.sigmoid = nn.Sigmoid()def forward(self, x):avg_out = self.f2(self.relu(self.f1(self.avg_pool(x))))max_out = self.f2(self.relu(self.f1(self.max_pool(x))))out = self.sigmoid(avg_out + max_out)return torch.mul(x, out)class SpatialAttention(nn.Module):def __init__(self, kernel_size=7):super(SpatialAttention, self).__init__()assert kernel_size in (3, 7), 'kernel size must be 3 or 7'padding = 3 if kernel_size == 7 else 1self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)self.sigmoid = nn.Sigmoid()def forward(self, x):avg_out = torch.mean(x, dim=1, keepdim=True)max_out, _ = torch.max(x, dim=1, keepdim=True)out = torch.cat([avg_out, max_out], dim=1)out = self.sigmoid(self.conv(out))return torch.mul(x, out)class CBAMC3(nn.Module):# CSP Bottleneck with 3 convolutionsdef __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansionsuper(CBAMC3, self).__init__()c_ = int(c2 * e) # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c1, c_, 1, 1)self.cv3 = Conv(2 * c_, c2, 1)self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])self.channel_attention = ChannelAttention(c2, 16)self.spatial_attention = SpatialAttention(7)# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])def forward(self, x):# 将最后的标准卷积模块改为了注意力机制提取特征return self.spatial_attention(self.channel_attention(self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))))
1.这里是卷积注意力的代码,我一般喜欢加在common.py的C3模块后面,不需要做改动,傻瓜ctrl+c+v就可以了。
2.在yolo.py里做改动。在parse_model函数里将对应代码用以下代码替换,还是傻瓜ctrl+c+v。
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR, CBAMC3]:c1, c2 = ch[f], args[0]if c2 != no: # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3, C3TR, CBAMC3]:args.insert(2, n) # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int): # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2else:c2 = ch[f]
3.在yaml文件里改动。比如你要用s网络,我是这样改的:将骨干网络中的C3模块全部替换为CBAMC3模块(这里需要注意的是,这样改动只能加载少部分预训练权重)。如果不想改动这么大,那么接着往下看。
pytorch中加入注意力机制(CBAM),以yolov5为例_YY_172的博客-CSDN博客_yolov5加注意力
这是首发将CBAM注意力添加到yolov5网络中的博主,我也是看了他的方法,侵删。
backbone:# [from, number, module, args][[-1, 1, Focus, [64, 3]], # 0-P1/2[-1, 1, Conv, [128, 3, 2]], # 1-P2/4[-1, 3,CBAMC3, [128]],[-1, 1, Conv, [256, 3, 2]], # 3-P3/8[-1, 9, CBAMC3, [256]],[-1, 1, Conv, [512, 3, 2]], # 5-P4/16[-1, 9, CBAMC3, [512]],[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32[-1, 1, SPP, [1024, [5, 9, 13]]],[-1, 3, CBAMC3, [1024, False]], # 9]
SE注意力
class SE(nn.Module):def __init__(self, c1, c2, r=16):super(SE, self).__init__()self.avgpool = nn.AdaptiveAvgPool2d(1)self.l1 = nn.Linear(c1, c1 // r, bias=False)self.relu = nn.ReLU(inplace=True)self.l2 = nn.Linear(c1 // r, c1, bias=False)self.sig = nn.Sigmoid()def forward(self, x):print(x.size())b, c, _, _ = x.size()y = self.avgpool(x).view(b, c)y = self.l1(y)y = self.relu(y)y = self.l2(y)y = self.sig(y)y = y.view(b, c, 1, 1)return x * y.expand_as(x)
1.这里是SE注意力的代码段,同上一个注意力的加法一样,我喜欢加在C3后面。
2.在yolo.py中做改动。
def parse_model(d, ch): # model_dict, input_channels(3)logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchorsno = na * (nc + 5) # number of outputs = anchors * (classes + 5)layers, save, c2 = [], [], ch[-1] # layers, savelist, ch outfor i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, argsm = eval(m) if isinstance(m, str) else m # eval stringsfor j, a in enumerate(args):try:args[j] = eval(a) if isinstance(a, str) else a # eval stringsexcept:passn = max(round(n * gd), 1) if n > 1 else n # depth gainif m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR, CoordAtt, SELayer, eca_layer, CBAM]:c1, c2 = ch[f], args[0]if c2 != no: # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3, C3TR]:args.insert(2, n) # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int): # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2else:c2 = ch[f]
3.在你要用的yaml文件中做改动。
backbone:# [from, number, module, args][[-1, 1, Focus, [64, 3]], # 0-P1/2[-1, 1, Conv, [128, 3, 2]], # 1-P2/4[-1, 3,C3, [128]],[-1, 1, Conv, [256, 3, 2]], # 3-P3/8[-1, 9, C3, [256]],[-1, 1, Conv, [512, 3, 2]], # 5-P4/16[-1, 9, C3, [512]],[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32[-1, 1, SPP, [1024, [5, 9, 13]]],[-1, 3, C3, [1024, False]], # 9[-1, 1, SELayer, [1024, 4]]]
ECA注意力
# class eca_layer(nn.Module):
# """Constructs a ECA module.
# Args:
# channel: Number of channels of the input feature map
# k_size: Adaptive selection of kernel size
# """
# def __init__(self, channel, k_size=3):
# super(eca_layer, self).__init__()
# self.avg_pool = nn.AdaptiveAvgPool2d(1)
# self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
# self.sigmoid = nn.Sigmoid()
#
# def forward(self, x):
# # feature descriptor on the global spatial information
# y = self.avg_pool(x)
#
# # Two different branches of ECA module
# y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
#
# # Multi-scale information fusion
# y = self.sigmoid(y)
# x=x*y.expand_as(x)
#
# return x * y.expand_as(x)
1.这里是注意力代码片段,放到自己的脚本里把注释取消掉就可以了,添加的位置同上,这里就不说了。
2.改动yolo.py。看以下代码段。
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR]:c1, c2 = ch[f], args[0]if c2 != no: # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3,eca_layer]:args.insert(2, n) # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int): # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2elif m is eca_layer:channel=args[0]channel=make_divisible(channel*gw,8)if channel != no else channelargs=[channel]else:c2 = ch[f]
3.改动你要用的yaml文件。这里我要解释一下为什么交代了两种添加注意力的方法(第一种:将骨干里的C3全部替换掉;第二种:在骨干最后一层加注意力,做一个输出层)。第二种方法的模型目前还在跑,还没出结果,不过模型的结果也能猜个大概,有稳定的微小提升,detect效果不会提升太多;我在用第一种方法将ECA注意力全部替换掉骨干里的C3时,模型的p、r、map均出现了下降的情况,大概就是一个两个点,但是令人意外的是,他的检测效果很好,能够检测到未作改动前的模型很多检测不到的目标,当然也会比原模型出现更多的误检和漏检情况,手动改阈值后好了很多,因为数据集涉及到公司机密,所以这里就不放出来了,我做的是安全帽的检测,有兴趣的同学可以尝试一下这种添加注意力的方法。
如果只是求提高模型准确率,推荐第二种方法。
接下来就是发表在今年CVPR上的注意力了。
CoorAttention
# class h_sigmoid(nn.Module):
# def __init__(self, inplace=True):
# super(h_sigmoid, self).__init__()
# self.relu = nn.ReLU6(inplace=inplace)
#
# def forward(self, x):
# return self.relu(x + 3) / 6
#
#
# class h_swish(nn.Module):
# def __init__(self, inplace=True):
# super(h_swish, self).__init__()
# self.sigmoid = h_sigmoid(inplace=inplace)
#
# def forward(self, x):
# return x * self.sigmoid(x)# class CoordAtt(nn.Module):
# def __init__(self, inp, oup, reduction=32):
# super(CoordAtt, self).__init__()
# self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
# self.pool_w = nn.AdaptiveAvgPool2d((1, None))
#
# mip = max(8, inp // reduction)
#
# self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
# self.bn1 = nn.BatchNorm2d(mip)
# self.act = h_swish()
#
# self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
# self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
#
# def forward(self, x):
# identity = x
#
# n, c, h, w = x.size()
# x_h = self.pool_h(x)
# x_w = self.pool_w(x).permute(0, 1, 3, 2)
#
# y = torch.cat([x_h, x_w], dim=2)
# y = self.conv1(y)
# y = self.bn1(y)
# y = self.act(y)
#
# x_h, x_w = torch.split(y, [h, w], dim=2)
# x_w = x_w.permute(0, 1, 3, 2)
#
# a_h = self.conv_h(x_h).sigmoid()
# a_w = self.conv_w(x_w).sigmoid()
#
# out = identity * a_w * a_h
#
# return out
这是代码段,加在common.py的C3模块后面
这里是改动yolo.py的部分,最后在yaml文件里的改动这里就不说了,前面提供了两种方法供大家使用,大家可以自行选择。
def parse_model(d, ch): # model_dict, input_channels(3)logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchorsno = na * (nc + 5) # number of outputs = anchors * (classes + 5)layers, save, c2 = [], [], ch[-1] # layers, savelist, ch outfor i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, argsm = eval(m) if isinstance(m, str) else m # eval stringsfor j, a in enumerate(args):try:args[j] = eval(a) if isinstance(a, str) else a # eval stringsexcept:passn = max(round(n * gd), 1) if n > 1 else n # depth gainif m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR,CBAMC3,CoordAtt]:#c1, c2 = ch[f], args[0]if c2 != no: # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3, C3TR]:args.insert(2, n) # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int): # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2# elif m is eca_layer:# channel=args[0]# channel=make_divisible(channel*gw,8)if channel != no else channel# args=[channel] elif m is CoordAtt:inp,oup,re = args[0],args[1],args[2]oup = make_divisible(oup * gw, 8) if oup != no else oupargs = [inp,oup,re]else:c2 = ch[f]
后面的ECA和CA注意力添加方法是我对着前两位博主照葫芦画瓢,在我的本地运行多次,就俩字,好用,以后的注意力也可以按照这种方法去添加。
小白学习中,此笔记纯属学习笔记使用,若有侵权,请联系我删除
yolov5-5.0加入CBAM,SE,CA,ECA注意力机制相关推荐
- 文献阅读笔记10——ECA注意力机制
0 写在前面 第一部分内容更针对于精度的提升,第二部分就需要找到一些方法针对速度的提升. 1 ECA注意力机制 天津大学在2020CVPR发表的一篇文章 1.1 Abstract+Conclusion ...
- [YOLOv7/YOLOv5系列算法改进NO.33]引入GAMAttention注意力机制
前 言:作为当前先进的深度学习目标检测算法YOLOv7,已经集合了大量的trick,但是还是有提高和改进的空间,针对具体应用场景下的检测难点,可以不同的改进方法.此后的系列文章,将重点对YOLOv7 ...
- YOLOv5、v7改进之三十一:CrissCrossAttention注意力机制
前 言:作为当前先进的深度学习目标检测算法YOLOv7,已经集合了大量的trick,但是还是有提高和改进的空间,针对具体应用场景下的检测难点,可以不同的改进方法.此后的系列文章,将重点对YOLOv7 ...
- 【CBAM 解读】混合注意力机制:Convolutional Block Attention Module
摘要 本文提出了卷积块注意模块(CBAM),这是一种简单而有效的前馈卷积神经网络注意模块.在给定中间特征图的情况下,我们的模块沿着通道和空间两个不同的维度顺序地推断关注图,然后将关注图与输入特征图相乘 ...
- YOLOv5改进系列(8)——添加SOCA注意力机制
[YOLOv5改进系列]前期回顾: YOLOv5改进系列(0)--重要性能指标与训练结果评价及分析 YOLOv5改进系列(1)--添加SE注意力机制
- 【CBAM Pytorch实现】注意力机制综述阅读推荐
注意力机制推荐阅读:(Attention Mechanism) (博客)Squeeze-and-Excitation Networks(2017) (知乎)CBAM: Convolutional Bl ...
- 改进YOLOv5, YOLOv7系列:1.YOLO超全注意力机制汇总 | S2A, SE,SimAM, SKA,ShA, SOCA, CA, CBAM, CrissCrossA, NAM, GAM等
- PyTorch 1.0 中文官方教程:基于注意力机制的 seq2seq 神经网络翻译
译者:mengfu188 作者: Sean Robertson 在这个项目中,我们将教一个把把法语翻译成英语的神经网络. [KEY: > input, = target, < output ...
- 改进YOLOv5、YOLOv8系列:14.添加S2-MLPv2注意力机制
最新创新点改进推荐 -
最新文章
- 面试官:Java 到底是值传递还是引用传递?
- 024:模版查找路径配置
- properties文件如何注解多行加#
- 第一节、Alex 讲解 python+mysql 交互;
- marquee滚动起始位置_巧用喵影关键帧制作滚动水印,让视频小偷无可盗
- 深入了解Delphi 7中的四种消息框
- rabbitmq 更细致的过滤
- CCF NOI1058 统计单词
- ANSI C和Glib C区别(二)
- spring mvc 上传文件
- Ubuntu 自动关机命令
- ucore操作系统 lab1 实验报告
- Java基础笔记day01
- 【转】VB6和VB.NET的区别
- win10高性能模式
- python中积分怎么表示_python中的数值积分与符号积分
- Beaver‘s Calculator
- 如何返回正确与错误信息
- IPv4地址(定义、分类、特殊、公有、私有)
- c++通过宏控制Log日志的显示与否