CBAM注意力

yolo.py和yaml文件中相应的CBAMC3也要换成CBAM,下面的SE同理

class ChannelAttention(nn.Module):def __init__(self, in_planes, ratio=16):super(ChannelAttention, self).__init__()self.avg_pool = nn.AdaptiveAvgPool2d(1)self.max_pool = nn.AdaptiveMaxPool2d(1)self.f1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)self.relu = nn.ReLU()self.f2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)# 写法二,亦可使用顺序容器# self.sharedMLP = nn.Sequential(# nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(),# nn.Conv2d(in_planes // rotio, in_planes, 1, bias=False))self.sigmoid = nn.Sigmoid()def forward(self, x):avg_out = self.f2(self.relu(self.f1(self.avg_pool(x))))max_out = self.f2(self.relu(self.f1(self.max_pool(x))))out = self.sigmoid(avg_out + max_out)return torch.mul(x, out)class SpatialAttention(nn.Module):def __init__(self, kernel_size=7):super(SpatialAttention, self).__init__()assert kernel_size in (3, 7), 'kernel size must be 3 or 7'padding = 3 if kernel_size == 7 else 1self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)self.sigmoid = nn.Sigmoid()def forward(self, x):avg_out = torch.mean(x, dim=1, keepdim=True)max_out, _ = torch.max(x, dim=1, keepdim=True)out = torch.cat([avg_out, max_out], dim=1)out = self.sigmoid(self.conv(out))return torch.mul(x, out)class CBAMC3(nn.Module):# CSP Bottleneck with 3 convolutionsdef __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansionsuper(CBAMC3, self).__init__()c_ = int(c2 * e)  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c1, c_, 1, 1)self.cv3 = Conv(2 * c_, c2, 1)self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])self.channel_attention = ChannelAttention(c2, 16)self.spatial_attention = SpatialAttention(7)# self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])def forward(self, x):# 将最后的标准卷积模块改为了注意力机制提取特征return self.spatial_attention(self.channel_attention(self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))))

1.这里是卷积注意力的代码,我一般喜欢加在common.py的C3模块后面,不需要做改动,傻瓜ctrl+c+v就可以了。

2.在yolo.py里做改动。在parse_model函数里将对应代码用以下代码替换,还是傻瓜ctrl+c+v。

if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR, CBAMC3]:c1, c2 = ch[f], args[0]if c2 != no:  # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3, C3TR, CBAMC3]:args.insert(2, n)  # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int):  # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2else:c2 = ch[f]

3.在yaml文件里改动。比如你要用s网络,我是这样改的:将骨干网络中的C3模块全部替换为CBAMC3模块(这里需要注意的是,这样改动只能加载少部分预训练权重)。如果不想改动这么大,那么接着往下看。

pytorch中加入注意力机制(CBAM),以yolov5为例_YY_172的博客-CSDN博客_yolov5加注意力

这是首发将CBAM注意力添加到yolov5网络中的博主,我也是看了他的方法,侵删。

backbone:# [from, number, module, args][[-1, 1, Focus, [64, 3]],  # 0-P1/2[-1, 1, Conv, [128, 3, 2]],  # 1-P2/4[-1, 3,CBAMC3, [128]],[-1, 1, Conv, [256, 3, 2]],  # 3-P3/8[-1, 9, CBAMC3, [256]],[-1, 1, Conv, [512, 3, 2]],  # 5-P4/16[-1, 9, CBAMC3, [512]],[-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32[-1, 1, SPP, [1024, [5, 9, 13]]],[-1, 3, CBAMC3, [1024, False]],  # 9]

SE注意力

class SE(nn.Module):def __init__(self, c1, c2, r=16):super(SE, self).__init__()self.avgpool = nn.AdaptiveAvgPool2d(1)self.l1 = nn.Linear(c1, c1 // r, bias=False)self.relu = nn.ReLU(inplace=True)self.l2 = nn.Linear(c1 // r, c1, bias=False)self.sig = nn.Sigmoid()def forward(self, x):print(x.size())b, c, _, _ = x.size()y = self.avgpool(x).view(b, c)y = self.l1(y)y = self.relu(y)y = self.l2(y)y = self.sig(y)y = y.view(b, c, 1, 1)return x * y.expand_as(x)

1.这里是SE注意力的代码段,同上一个注意力的加法一样,我喜欢加在C3后面。

2.在yolo.py中做改动。

def parse_model(d, ch):  # model_dict, input_channels(3)logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchorsno = na * (nc + 5)  # number of outputs = anchors * (classes + 5)layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch outfor i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, argsm = eval(m) if isinstance(m, str) else m  # eval stringsfor j, a in enumerate(args):try:args[j] = eval(a) if isinstance(a, str) else a  # eval stringsexcept:passn = max(round(n * gd), 1) if n > 1 else n  # depth gainif m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR, CoordAtt, SELayer, eca_layer, CBAM]:c1, c2 = ch[f], args[0]if c2 != no:  # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3, C3TR]:args.insert(2, n)  # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int):  # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2else:c2 = ch[f]

3.在你要用的yaml文件中做改动。

backbone:# [from, number, module, args][[-1, 1, Focus, [64, 3]],  # 0-P1/2[-1, 1, Conv, [128, 3, 2]],  # 1-P2/4[-1, 3,C3, [128]],[-1, 1, Conv, [256, 3, 2]],  # 3-P3/8[-1, 9, C3, [256]],[-1, 1, Conv, [512, 3, 2]],  # 5-P4/16[-1, 9, C3, [512]],[-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32[-1, 1, SPP, [1024, [5, 9, 13]]],[-1, 3, C3, [1024, False]],  # 9[-1, 1, SELayer, [1024, 4]]]

 ECA注意力

# class eca_layer(nn.Module):
#     """Constructs a ECA module.
#     Args:
#         channel: Number of channels of the input feature map
#         k_size: Adaptive selection of kernel size
#     """
#     def __init__(self, channel, k_size=3):
#         super(eca_layer, self).__init__()
#         self.avg_pool = nn.AdaptiveAvgPool2d(1)
#         self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
#         self.sigmoid = nn.Sigmoid()
#
#     def forward(self, x):
#         # feature descriptor on the global spatial information
#         y = self.avg_pool(x)
#
#         # Two different branches of ECA module
#         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
#
#         # Multi-scale information fusion
#         y = self.sigmoid(y)
#         x=x*y.expand_as(x)
#
#         return x * y.expand_as(x)

1.这里是注意力代码片段,放到自己的脚本里把注释取消掉就可以了,添加的位置同上,这里就不说了。

2.改动yolo.py。看以下代码段。

      if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR]:c1, c2 = ch[f], args[0]if c2 != no:  # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3,eca_layer]:args.insert(2, n)  # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int):  # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2elif m is eca_layer:channel=args[0]channel=make_divisible(channel*gw,8)if channel != no else channelargs=[channel]else:c2 = ch[f]

3.改动你要用的yaml文件。这里我要解释一下为什么交代了两种添加注意力的方法(第一种:将骨干里的C3全部替换掉;第二种:在骨干最后一层加注意力,做一个输出层)。第二种方法的模型目前还在跑,还没出结果,不过模型的结果也能猜个大概,有稳定的微小提升,detect效果不会提升太多;我在用第一种方法将ECA注意力全部替换掉骨干里的C3时,模型的p、r、map均出现了下降的情况,大概就是一个两个点,但是令人意外的是,他的检测效果很好,能够检测到未作改动前的模型很多检测不到的目标,当然也会比原模型出现更多的误检和漏检情况,手动改阈值后好了很多,因为数据集涉及到公司机密,所以这里就不放出来了,我做的是安全帽的检测,有兴趣的同学可以尝试一下这种添加注意力的方法。

如果只是求提高模型准确率,推荐第二种方法。

接下来就是发表在今年CVPR上的注意力了。

CoorAttention

# class h_sigmoid(nn.Module):
#     def __init__(self, inplace=True):
#         super(h_sigmoid, self).__init__()
#         self.relu = nn.ReLU6(inplace=inplace)
#
#     def forward(self, x):
#         return self.relu(x + 3) / 6
#
#
# class h_swish(nn.Module):
#     def __init__(self, inplace=True):
#         super(h_swish, self).__init__()
#         self.sigmoid = h_sigmoid(inplace=inplace)
#
#     def forward(self, x):
#         return x * self.sigmoid(x)# class CoordAtt(nn.Module):
#     def __init__(self, inp, oup, reduction=32):
#         super(CoordAtt, self).__init__()
#         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
#         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
#
#         mip = max(8, inp // reduction)
#
#         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
#         self.bn1 = nn.BatchNorm2d(mip)
#         self.act = h_swish()
#
#         self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
#         self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
#
#     def forward(self, x):
#         identity = x
#
#         n, c, h, w = x.size()
#         x_h = self.pool_h(x)
#         x_w = self.pool_w(x).permute(0, 1, 3, 2)
#
#         y = torch.cat([x_h, x_w], dim=2)
#         y = self.conv1(y)
#         y = self.bn1(y)
#         y = self.act(y)
#
#         x_h, x_w = torch.split(y, [h, w], dim=2)
#         x_w = x_w.permute(0, 1, 3, 2)
#
#         a_h = self.conv_h(x_h).sigmoid()
#         a_w = self.conv_w(x_w).sigmoid()
#
#         out = identity * a_w * a_h
#
#         return out

这是代码段,加在common.py的C3模块后面

这里是改动yolo.py的部分,最后在yaml文件里的改动这里就不说了,前面提供了两种方法供大家使用,大家可以自行选择。

def parse_model(d, ch):  # model_dict, input_channels(3)logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchorsno = na * (nc + 5)  # number of outputs = anchors * (classes + 5)layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch outfor i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, argsm = eval(m) if isinstance(m, str) else m  # eval stringsfor j, a in enumerate(args):try:args[j] = eval(a) if isinstance(a, str) else a  # eval stringsexcept:passn = max(round(n * gd), 1) if n > 1 else n  # depth gainif m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,C3, C3TR,CBAMC3,CoordAtt]:#c1, c2 = ch[f], args[0]if c2 != no:  # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3, C3TR]:args.insert(2, n)  # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int):  # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2# elif m is eca_layer:#     channel=args[0]#     channel=make_divisible(channel*gw,8)if channel != no else channel#     args=[channel]   elif m is CoordAtt:inp,oup,re = args[0],args[1],args[2]oup = make_divisible(oup * gw, 8) if oup != no else oupargs = [inp,oup,re]else:c2 = ch[f]

后面的ECA和CA注意力添加方法是我对着前两位博主照葫芦画瓢,在我的本地运行多次,就俩字,好用,以后的注意力也可以按照这种方法去添加。

 小白学习中,此笔记纯属学习笔记使用,若有侵权,请联系我删除

yolov5-5.0加入CBAM,SE,CA,ECA注意力机制相关推荐

  1. 文献阅读笔记10——ECA注意力机制

    0 写在前面 第一部分内容更针对于精度的提升,第二部分就需要找到一些方法针对速度的提升. 1 ECA注意力机制 天津大学在2020CVPR发表的一篇文章 1.1 Abstract+Conclusion ...

  2. [YOLOv7/YOLOv5系列算法改进NO.33]引入GAMAttention注意力机制

     前 言:作为当前先进的深度学习目标检测算法YOLOv7,已经集合了大量的trick,但是还是有提高和改进的空间,针对具体应用场景下的检测难点,可以不同的改进方法.此后的系列文章,将重点对YOLOv7 ...

  3. YOLOv5、v7改进之三十一:CrissCrossAttention注意力机制

     前 言:作为当前先进的深度学习目标检测算法YOLOv7,已经集合了大量的trick,但是还是有提高和改进的空间,针对具体应用场景下的检测难点,可以不同的改进方法.此后的系列文章,将重点对YOLOv7 ...

  4. 【CBAM 解读】混合注意力机制:Convolutional Block Attention Module

    摘要 本文提出了卷积块注意模块(CBAM),这是一种简单而有效的前馈卷积神经网络注意模块.在给定中间特征图的情况下,我们的模块沿着通道和空间两个不同的维度顺序地推断关注图,然后将关注图与输入特征图相乘 ...

  5. YOLOv5改进系列(8)——添加SOCA注意力机制

    [YOLOv5改进系列]前期回顾: YOLOv5改进系列(0)--重要性能指标与训练结果评价及分析 YOLOv5改进系列(1)--添加SE注意力机制

  6. 【CBAM Pytorch实现】注意力机制综述阅读推荐

    注意力机制推荐阅读:(Attention Mechanism) (博客)Squeeze-and-Excitation Networks(2017) (知乎)CBAM: Convolutional Bl ...

  7. 改进YOLOv5, YOLOv7系列:1.YOLO超全注意力机制汇总 | S2A, SE,SimAM, SKA,ShA, SOCA, CA, CBAM, CrissCrossA, NAM, GAM等

  8. PyTorch 1.0 中文官方教程:基于注意力机制的 seq2seq 神经网络翻译

    译者:mengfu188 作者: Sean Robertson 在这个项目中,我们将教一个把把法语翻译成英语的神经网络. [KEY: > input, = target, < output ...

  9. 改进YOLOv5、YOLOv8系列:14.添加S2-MLPv2注意力机制

    最新创新点改进推荐 -

最新文章

  1. 面试官:Java 到底是值传递还是引用传递?
  2. 024:模版查找路径配置
  3. properties文件如何注解多行加#
  4. 第一节、Alex 讲解 python+mysql 交互;
  5. marquee滚动起始位置_巧用喵影关键帧制作滚动水印,让视频小偷无可盗
  6. 深入了解Delphi 7中的四种消息框
  7. rabbitmq 更细致的过滤
  8. CCF NOI1058 统计单词
  9. ANSI C和Glib C区别(二)
  10. spring mvc 上传文件
  11. Ubuntu 自动关机命令
  12. ucore操作系统 lab1 实验报告
  13. Java基础笔记day01
  14. 【转】VB6和VB.NET的区别
  15. win10高性能模式
  16. python中积分怎么表示_python中的数值积分与符号积分
  17. Beaver‘s Calculator
  18. 如何返回正确与错误信息
  19. IPv4地址(定义、分类、特殊、公有、私有)
  20. c++通过宏控制Log日志的显示与否

热门文章

  1. java 抢购代码_Java生鲜电商平台-生鲜电商限时抢购功能设计与代码实战(小程序/APP)...
  2. 第四章:小朱笔记hadoop之源码分析-conf分析
  3. 固态继电器与普通继电器的区别
  4. HM15.0说明文档
  5. 20几岁决定男人的一生(摘抄)
  6. 公交专用道:三环将实现全环连通
  7. 计算机中ups的作用,UPS电源是什么?UPS电源有什么作用?
  8. XenServer假死状态
  9. python的pip如何更新到最新版本
  10. 《京韵大鼓——子期听琴》(骆玉笙)(唱词文本)