需要注意的地方是,观察模型的主干网络,与 forword 层存在几个输出,若存在多个输出,

import os
import sys
import time
import cv2
import torch
import utils
import hopenet
import argparse
import torchvision
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import numpy as npfrom PIL import Image
from torchvision import transforms
from torch.autograd import Variable
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus
from pytorch_grad_cam.utils.image import preprocess_image, show_cam_on_imaget = time.time()# 环境参数设置:
def parse_args():"""Parse input arguments."""# use 'default = ' to change parametersparser = argparse.ArgumentParser(description = 'Head pose estimation using the Hopenet network.')# gpuparser.add_argument('--gpu', dest = 'gpu_id', help = 'GPU device id to use [0]',default = 0, type = int)# use gpusparser.add_argument('--use-cuda', action = 'store_true', default = True,help = 'Use NVIDIA GPU acceleration')# path to modelparser.add_argument('--snapshot', dest = 'snapshot', help = 'Path of model snapshot.',default = 'E://Paper/Reader/Head_pose/Deep-head-pose/pre_models/hopenet_robust_alpha1.pkl',type = str)# picture pathparser.add_argument('--picture', dest = 'picture_path', help = 'Path of picture', default = 'E://Paper/Reader/''Head_pose/Deep-head-pose/''input/320_2.jpg')# bbox of pictureparser.add_argument('--bboxes', dest = 'bboxes', help = 'Bounding box annotations of frames', default = 'E://Paper/''Reader/Head_pose/Deep-head-pose/bbox/320_3.txt')# aug smoothparser.add_argument('--aug_smooth', action = 'store_true',help = 'Apply test time augmentation to smooth the CAM')# eigen smoothparser.add_argument('--eigen_smooth', action = 'store_true',help = 'Reduce noise by taking the first principle componenet of cam_weights*activations')parser.add_argument('--method', type = str, default = 'gradcam',choices = ['gradcam', 'gradcam++', 'scorecam'], help = 'Can be gradcam / gradcam++ /scorecam')args = parser.parse_args()# cuda提示符args.use_cuda = args.use_cuda and torch.cuda.is_available()if args.use_cuda:print("Using GPU for acceleration")else:print("Using CPU for computation")return argsif __name__ == '__main__':args = parse_args()# methods to show hot mapmethods = \{"gradcam": GradCAM,"scorecam": ScoreCAM,"gradcam++": GradCAMPlusPlus}# use cudnncudnn.enabled = True# bitch sizebatch_size = 32# get gpu listgpu = args.gpu_id# explanation of the model pathsnapshot_path = args.snapshot# path to pictureout_dir = 'output/pictures'# path to read picturespicture_path = args.picture_pathif args.method not in list(methods.keys()):raise Exception(f"method should be one of {list(methods.keys())}")# if directory of save not exist, create oneif not os.path.exists(out_dir):os.makedirs(out_dir)# report an error if video path not existif not os.path.exists(args.picture_path):sys.exit('picture does not exist')# ResNet50 structuremodel = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)print('Loading snapshot.')# Load snapshotsaved_state_dict = torch.load(snapshot_path)model.load_state_dict(saved_state_dict)print('Loading data.')# transforms of the picturestransformations = transforms.Compose([transforms.Scale(224),  # resizetransforms.CenterCrop(224), transforms.ToTensor(),  # center croptransforms.Normalize(mean = [0.485, 0.456, 0.406],  # mean,std  of (R, G, B)std = [0.229, 0.224, 0.225])])model.cuda(gpu)  # transform to gpuprint('Ready to test network.')# Test the Modelmodel.eval()  # Change model to 'eval' mode (BN uses moving mean/var).total = 0idx_tensor = [idx for idx in range(66)]idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu)# read picturepicture = cv2.imread(picture_path)width = int(picture.shape[1])  # shape[0]返回图片的高度height = int(picture.shape[0])  # shape[1]返回图片的宽度print('width', width)print('height', height)# Define the codec and create VideoWriter object# fourcc = cv2.VideoWriter_fourcc(*'MJPG')# rename outputs# out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, args.fps, (width, height))# 采用热图可视化模型# model_1 = hopenet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 1000)# model_1.eval()# target_layer = model_1.layer3[-1]# print('the target layer:', target_layer)# cam = methods[args.method](model = model_1,#                            target_layer = target_layer,#                            use_cuda = args.use_cuda, )t = time.time()idx = 0count = 0# show the box bounding, try from one picturewith open(args.bboxes, 'r') as f:# return the list contain all ele exp \n ''bbox_line_list = f.read().split()  # use split instead of splitlinesprint('bbox_line_list', bbox_line_list)print('length', len(bbox_line_list))# when list is not null, do:while len(bbox_line_list) > 0:line = bbox_line_list# print('line', line)# 加上跳出循环语句pic_number = line[0]# print('This is the No.%s picture' % pic_number)cv2_pic = cv2.cvtColor(picture, cv2.COLOR_BGR2RGB)  # 将读取到的当前图片转换为RGB# start dict pose# 实验# print("Start detecting pose ...")# 利用 idx下标实现对于单图多人框架结构的添加x_min, y_min, x_max, y_max = int(float(line[idx + 1])), int(float(line[idx + 2])), int(float(line[idx + 3])), int(float(line[idx + 4]))# print('bbox1', x_min, y_min, x_max, y_max)bbox_width = abs(x_max - x_min)bbox_height = abs(y_max - y_min)x_min -= 50x_max += 50y_min -= 50y_max += 30x_min = max(x_min, 0)y_min = max(y_min, 0)x_max = min(picture.shape[1], x_max)y_max = min(picture.shape[0], y_max)# Crop face looselyimg = cv2_pic[y_min:y_max, x_min:x_max]  # 保持长宽比例不变性img = Image.fromarray(img)  # 将 array 转化为 Image# Transformimg = transformations(img)img_shape = img.size()img = img.view(1, img_shape[0], img_shape[1], img_shape[2])img = Variable(img).cuda(gpu)# capture the every frame of video, then define the three euler angles# 从hopenet网络传递回三种角度yaw, pitch, roll = model(img)# # 传入加上人物位置信息的图片参数:# 对于人物分割部分的热图分析:# rgb_img = cv2_pic[y_min:y_max, x_min:x_max]# rgb_img = cv2.resize(rgb_img, (256, 256))# rgb_img = np.float32(rgb_img) / 255# input_tensor = preprocess_image(rgb_img, mean = [0.485, 0.456, 0.406],#                                 std = [0.229, 0.224, 0.225])# print(input_tensor)## target_category = None# cam.batch_size = 32# # use aug_smooth and eigen_smooth to smooth th vision# grayscale_cam = cam(input_tensor = input_tensor,#                     target_category = target_category,#                     aug_smooth = args.aug_smooth,#                     eigen_smooth = args.eigen_smooth)## grayscale_cam = grayscale_cam[0, :]# cam_image = show_cam_on_image(rgb_img, grayscale_cam)# cv2.imwrite(f'{args.method}_bbox_layer4_%s.jpg'%count, cam_image)# print('the yaw', yaw) 此时的yaw,pitch,roll是张量yaw_predicted = F.softmax(yaw)pitch_predicted = F.softmax(pitch)roll_predicted = F.softmax(roll)# Get continuous predictions in degrees.# 对张量内的元素进行求和操作;yaw_predicted = torch.sum(yaw_predicted.data * idx_tensor) * 3 - 99pitch_predicted = torch.sum(pitch_predicted.data * idx_tensor) * 3 - 99roll_predicted = torch.sum(roll_predicted.data * idx_tensor) * 3 - 99# utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2,# (y_min + y_max) / 2, size = bbox_width)# 在原图的基础上,画出与头部姿态相关的三种角度utils.draw_axis(picture, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2,tdy = (y_min + y_max) / 2, size = bbox_height / 2)# 通过输出三个角度发现,数据类型为tensor张量类型,不可用round方法# print('the sum of yaw', yaw_predicted)# print('the sum of pitch', pitch_predicted)# print('the sum of roll', roll_predicted)# Plot expanded bounding box# cv2.rectangle(picture, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1)# 显现出头部姿态的三种角度;# 利用'%.2f'% 来控制输出的位数# 或者 print('{:.2f}'.format(num))cv2.putText(picture, f"Yaw: {'%.2f' % yaw_predicted}", (x_min + 45, y_min + 30), cv2.FONT_HERSHEY_COMPLEX_SMALL,0.5, (0, 255, 0), 1)cv2.putText(picture, f"Pitch: {'%.2f' % pitch_predicted}", (x_min + 45, y_min + 40),cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (0, 255, 0), 1)cv2.putText(picture, f"Roll: {'%.2f' % roll_predicted}", (x_min + 45, y_min + 50),cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (0, 255, 0), 1)# 按照原来的长宽,保存图片# 。。。路径错误导致卡了半天cv2.imwrite('E://Paper/Reader/Head_pose/Deep-head-pose/code/output/pictures/output16.jpg', picture)cv2.imshow('picture', picture)# cv2.waitKey(0)cv2.destroyAllWindows()count += 1idx += 4if idx + 4 > len(line):# 跳出循环break# # 对于加上位置信息后整体的热图# rgb_img = picture# rgb_img = cv2.resize(rgb_img, (256, 256))# rgb_img = np.float32(rgb_img) / 255# input_tensor = preprocess_image(rgb_img, mean = [0.485, 0.456, 0.406],#                                 std = [0.229, 0.224, 0.225])# print(input_tensor)## target_category = None# cam.batch_size = 32# # use aug_smooth and eigen_smooth to smooth th vision# grayscale_cam = cam(input_tensor = input_tensor,#                     target_category = target_category,#                     aug_smooth = args.aug_smooth,#                     eigen_smooth = args.eigen_smooth)## grayscale_cam = grayscale_cam[0, :]# cam_image = show_cam_on_image(rgb_img, grayscale_cam)# cv2.imwrite(f'{args.method}_bbox_layer3.jpg', cam_image)print('time taken=', time.time() - t)


