%matplotlib inline
import torch
import torchvision
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l#对每个锚框进行类别预测
def cls_predictor(num_inputs, num_anchors, num_classes):return nn.Conv2d(num_inputs, num_anchors * (num_classes + 1),kernel_size=3, padding=1)

def forward(x, block):return block(x)Y1 = forward(torch.zeros((2, 8, 20, 20)), cls_predictor(8, 5, 10))
Y2 = forward(torch.zeros((2, 16, 10, 10)), cls_predictor(16, 3, 10))
Y1.shape, Y2.shape

down_sample_blk：先经过第一个，第二个kernel size是33，padding=1卷积层，BN层，ReLU层，之后是22，strides=2的最大池化层。这样，特征图的高和宽就减半了，通道数是超参数。

def down_sample_blk(in_channels, out_channels):blk = []for _ in range(2):blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))blk.append(nn.BatchNorm2d(out_channels))blk.append(nn.ReLU())in_channels = out_channelsblk.append(nn.MaxPool2d(2))return nn.Sequential(*blk)

借本网络块

def base_net():blk = []num_filters = [3, 16, 32, 64]for i in range(len(num_filters) - 1):blk.append(down_sample_blk(num_filters[i], num_filters[i + 1]))return nn.Sequential(*blk)forward(torch.zeros((2, 3, 256, 256)), base_net()).shape

def get_blk(i):if i == 0:blk = base_net()elif i == 1:blk = down_sample_blk(64,128)elif i == 4:blk = nn.AdaptiveAvgPool2d((1,1))else:blk = down_sample_blk(128,128)return blk

def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):Y = blk(X)anchors = d2l.multibox_prior(Y, sizes=size, ratios=ratio)cls_preds = cls_predictor(Y)bbox_preds = bbox_predictor(Y)return (Y, anchors, cls_preds, bbox_preds)

sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],[0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1

def flatten_pred(pred):return torch.flatten(pred.permute(0, 2, 3, 1), start_dim=1)def concat_preds(preds):return torch.cat([flatten_pred(p) for p in preds], dim=1)

小型的SSD网络模型

class TinySSD(nn.Module):def __init__(self, num_classes, **kwargs):super(TinySSD, self).__init__(**kwargs)self.num_classes = num_classes #检测的类别idx_to_in_channels = [64, 128, 128, 128, 128] #不同block的通道数for i in range(5):# 即赋值语句 `self.blk_i = get_blk(i)`setattr(self, f'blk_{i}', get_blk(i)) #blk块setattr(self, f'cls_{i}',cls_predictor(idx_to_in_channels[i],                               num_anchors,num_classes)) # 类别预测，参数为输入通道数，锚框数，类别数setattr(self, f'bbox_{i}',bbox_predictor(idx_to_in_channels[i], num_anchors)) # 边界框预测，参数为输入通道数，锚框数def forward(self, X):anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5 #尺度为5，因此*5for i in range(5):# `getattr(self, 'blk_%d' % i)` 即访问 `self.blk_i`X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward(X, getattr(self, f'blk_{i}'), sizes[i], ratios[i],getattr(self, f'cls_{i}'), getattr(self, f'bbox_{i}')) # 计算5个block中每一个block的输出，anchors，类别预测，边界框偏移预测anchors = torch.cat(anchors, dim=1) # 将5个block的产生的锚框合并到一起[1,5444,4],batch为1是因为每个batch的锚框都是一样的cls_preds = concat_preds(cls_preds)#[32,8,32,32]32是batch，8是每个像素点上（4*2=8）num_anchors * (num_classes + 1)，32，32是特征图的高宽cls_preds = cls_preds.reshape(cls_preds.shape[0], -1,self.num_classes + 1) #reshape为batch，锚框数，类别数+1（1表背景）bbox_preds = concat_preds(bbox_preds)#[1,5444,4],[32,5444,2],[32,21776]return anchors, cls_preds, bbox_preds

一个图像一共生成3232+1616+88+44+1*1= 5444个锚框。

net = TinySSD(num_classes=1)
X = torch.tensor((32,3,256,256))
anchors, cls_preds,bbox_preds = net(X)
print('output anchors:', anchors.shape)
print('output class preds:', cls_preds.shape)
print('output bbox preds:', bbox_preds.shape)

output anchors: torch.Size([1, 5444, 4])
output class preds: torch.Size([32, 5444, 2])
output bbox preds: torch.Size([32, 21776])

读取数据集

batch_size=32
train_iter,_ = d2l.load_data_bananas(batch_size)device,net = d2l.try_gpu(),TinySSD(num_classes=1)
trainer = torch.optim.SGD(net.parameters(),lr=0.2,weight_decay=5e-4)

损失函数和评价函数

cls_loss = nn.CrossEntropyLoss(reduction='none') #reduction表示不进行其他的sum,mean之类的操作
bbox_loss = nn.L1Loss(reduction='none')# 损失函数分两部分，一个是类别损失(用交叉熵),一个是边界框损失(用了L1损失，只算了非背景的正例损失)
def calc_loss(cls_preds,cls_labels,bbox_preds,bbox_labels,bbox_masks):
# [32,5444,2]cls_preds:网络输出的锚框预测类别，[32,5444]cls_labels：为锚框标注的类别，
# [32,21776]bbox_preds:网络输出的锚框的预测偏移量，[32,21776]bbox_labels:为锚框标注的偏移量
# bbox_masks：每个锚框对应的类别（0是背景[32,21776]batch_size,num_classes = cls_preds.shape[0],cls_preds.shape[2]cls = cls_loss(cls_preds.reshape(-1,num_classes),cls_labels.reshape(-1)).rehshape(batch_size,-1).mean(dim=1)bbox = bbox_loss(bbox_preds*bbox_masks,bbox_labels*bbox_masks).mean(dim=1)return cls + bbox

def cls_eval(cls_preds, cls_labels):                                            # 由于类别预测结果放在最后一维， `argmax` 需要指定最后一维。                                        return float(                                                               (cls_preds.argmax(dim=-1).type(cls_labels.dtype) == cls_labels).sum())  def bbox_eval(bbox_preds, bbox_labels, bbox_masks):                           return float((torch.abs((bbox_labels - bbox_preds) * bbox_masks)).sum())  num_epochs, timer = 20, d2l.Timer()
animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],              legend=['class error', 'bbox mae'])
net = net.to(device)

网络模型图

for epoch in range(num_epochs):metric = d2l.Accumulator(4)net.train() #开始训练模式for features,target in train_iter: #在dataset中定义的getitem取值timer.start() #计时trainer.zero_grad() #梯度清零X,Y = features.to(device),target.to(device) #将数据指定到运算设备上anchors,cls_preds,bbox_preds = net(X)#生成多尺度锚框，通过网络模型为每个锚框预测类别和偏移量bbox_labels,bbox_masks,cls_labels = d2l.multibox_target(anchors,Y)#为每个锚框标注类别和偏移量l = calc_loss(cls_preds,cls_labels,bbox_preds,bbox_labels,bbox_masks)#根据锚框的类别和偏移量的预测和标注进行损失计算l.mean().backward() # 反向传播trainer.step() #梯度更新metric.add(cls_eval(cls_preds,cls_labels),cls_labels.numel(),bbox_eval(bbox_preds,bbox_labels,bbox_masks),bbox_labels.numel())cls_err,bbox_mae = 1-metric[0]/metric[1],metric[2]/metric[3] #误差animator.add(epoch+1,(cls_err,bbox_mae)) #画图print(f'class err {cls_err:.2e}, bbox mae {bbox_mae:.2e}')
print(f'{len(train_iter.dataset) / timer.stop():.1f} examples/sec on 'f'{str(device)}')

预测

X = torchvision.io.read_image('../../pytorch/img/banana.jpg').unsqueeze(0).float() #读进来的维度是[1,3,256,256]
img = X.squeeze(0).permute(1,2,0).long()def predict(X):net.eval()anchors, cls_preds,bbox_preds = net(X.to(device))cls_probs = F.softmax(cls_preds,dim=2).permute(0,2,1)#output[1,5444,6]返回值为batch值，每个锚框的类别索引，置信度，预测边界框坐标output = d2l.multibox_detection(cls_preds,bbox_preds,anchors) # 得到类别不是背景的锚框索引idx = [i for i,row in enumerate(output[0]) if row[0]!=-1 return output[0,idx]
output = predict(X) #取值可以为[128,6]【1，2，5444】，【1，21776】，【1，5444，4】

去除掉置信度低于阈值的锚框，得到最终预测结果

def display(img, output, threshold):d2l.set_figsize((5, 5))fig = d2l.plt.imshow(img)for row in output:score = float(row[1])if score < threshold:continueh, w = img.shape[0:2]bbox = [row[2:6] * torch.tensor((w, h, w, h), device=row.device)]d2l.show_bboxes(fig.axes, bbox, '%.2f' % score, 'w')display(img, output.cpu(), threshold=0.9)

目标检测-SSD代码详解相关推荐

深度篇——目标检测史(七) 细说 YOLO-V3目标检测之代码详解
返回主目录返回目标检测史目录上一章:深度篇--目标检测史(六) 细说 YOLO-V3目标检测下一章:深度篇--目标检测史(八) 细说 CornerNet-Lite 目标检测论文地址:< ...
目标检测算法YOLO-V2详解
❝ 上期我们一起学习了YOLO-V1算法的框架原来和损失函数等知识,如下: 目标检测算法YOLO-V1算法详解目标检测模型YOLO-V1损失函数详解 [文末领福利] ❞ 今天,我们一起学习下YOLO ...
Face Paper: 目标检测RSSD论文详解
转载: http://blog.csdn.net/u014380165/article/details/77130922 论文:Enhancement of SSD by concatenating ...
项目一家庭记账软件（目标 + 需求说明 + 代码详解 + 基本金和收支明细记录 + 键盘访问的实现）
家庭记账软件 1. 目标 2. 需求说明 3. 代码详解 4. 基本金和收支明细的记录 5. 键盘访问的实现 1. 目标模拟实现一个基于文本界面的<家庭记账软件> 掌握初步的编程技巧和调 ...
《计算机视觉之目标检测》IOU详解及代码
今天在改模型的时候刚好用到了IOU,因此将IOU说说,记录一下代码,方便以后复用. 1.什么是IOU IoU 的全称为交并比(Intersection over Union),它的计算也比较简单,就是 ...
目标检测指标mAP详解
前言相信刚刚接触目标检测的小伙伴也是有点疑惑吧,目标检测的知识点和模型属实有点多,想要工作找CV的话,目标检测是必须掌握的方向了.我记得在找实习的时候,面试官就问到了我目标检测的指标是什么,答:mA ...
【三维目标检测】Complex-Yolov4详解（一）：数据处理
前面分别介绍了基于点云的三维深度学习算法PointNet.PointNet++,和基于体素的三维深度学习算法VoxelNet.本节将开始介绍基于投影的三维深度学习算法Complex-Yolov4.三维 ...
目标检测 RCNN算法详解
原文:http://blog.csdn.net/shenxiaolu1984/article/details/51066975 [目标检测]RCNN算法详解 Girshick, Ross, et al ...
目标检测算法YOLOv4详解
YOLOv4是精度速度最优平衡, 各种调优手段是真香,本文主要从以下几个方面进行阐述: YOLOv4介绍 YOLOv4框架原理 BackBone训练策略 BackBone推理策略检测头训练策略检测 ...

目标检测-SSD代码详解

目录

读取数据集

损失函数和评价函数

网络模型图

预测

目标检测-SSD代码详解相关推荐

最新文章

热门文章