2s-AGCN Skeleton-Based Action Recognition 代码学习

一, 大致框架

二, 零散的代码学习

一, 大致框架

data-get(N, C, T, V, M)(已经包含时间和空间信息)(样本数，channel，时间帧数，num_node,人数)。

joint_data；- graph_spatial（A）- (agcn.py)model(A,B,C;AGCN) -（main.py）train - test- softmax score a

bone_data；- graph_spatial（A）- (agcn.py)model(A,B,C;AGCN) -（main.py）train - test- softmax score b。

（ensemble.py）a+b -> fused score , action label。

① 针对graph文件夹，就是为了return A，即得到邻接矩阵。论文中使用的N*N表示Ak，即代码中的V（num_node）

class Graph:def __init__(self, labeling_mode='spatial'):self.A = self.get_adjacency_matrix(labeling_mode)...def get_adjacency_matrix(self, labeling_mode=None):if labeling_mode is None:return self.Aif labeling_mode == 'spatial':A = tools.get_spatial_graph(num_node, self_link, inward, outward)else:raise ValueError()return Atools.py
def get_spatial_graph(num_node, self_link, inward, outward):I = edge2mat(self_link, num_node)In = normalize_digraph(edge2mat(inward, num_node))Out = normalize_digraph(edge2mat(outward, num_node))  #inward, outward是列表，列表里是（a,b）这种坐标类型 的数据A = np.stack((I, In, Out))  #3×V*Vreturn Adef edge2mat(link, num_node):A = np.zeros((num_node, num_node))  #论文中使用的N*N表示Ak，即代码中的Vfor i, j in link:A[j, i] = 1return Adef normalize_digraph(A):  # 除以每列的和（归一化）Dl = np.sum(A, 0)  #对每一列相加h, w = A.shape  #即代码中V*VDn = np.zeros((w, w))for i in range(w):if Dl[i] > 0:Dn[i, i] = Dl[i] ** (-1)AD = np.dot(A, Dn)  #h×w w×w -> h×w  即V*Vreturn AD

$A^{k}$ determines whether there are connections between two vertexes, It represents the physical structure of the human body.

ntu_rgb_d.py（kinetics.py，num_node = 18，inward本身从0开始）
num_node = 25
self_link = [(i, i) for i in range(num_node)]  #相同关节点的连接
inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),(8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),(14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),(20, 19), (22, 23), (23, 8), (24, 25), (25, 12)]  #关节点间的可连接方式
inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]  #为了从0开始
outward = [(j, i) for (i, j) in inward]  #反过来，为了构建无向图
neighbor = inward + outward

② 针对model文件夹，分别对应论文中 adaptive graph convolutional network > 4.1layer(unit_gcn,unit_tcn)4.2block(TCN_GCN_unit);4.3network(Model)

#For the temporal dimension,it is straightforward to perform the graph convolution similar to the classical convolution operation.
class unit_tcn(nn.Module):  #temporal GCN(Kt × 1 convolution on the C ×T ×N feature maps) + bndef __init__(self, in_channels, out_channels, kernel_size=9, stride=1):super(unit_tcn, self).__init__()pad = int((kernel_size - 1) / 2)  #输入输出维度不变self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),stride=(stride, 1))self.bn = nn.BatchNorm2d(out_channels)...def forward(self, x):x = self.bn(self.conv(x))return xclass unit_gcn(nn.Module):  #spatial GCN + bn + reludef __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3, adaptive=True, attention=True):super(unit_gcn, self).__init__()inter_channels = out_channels // coff_embeddingnum_jpts = A.shape[-1]self.conv_d = nn.ModuleList()  #容器  appendfor i in range(self.num_subset):self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))if adaptive:self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))self.conv_a = nn.ModuleList()self.conv_b = nn.ModuleList()for i in range(self.num_subset):self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))else:self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)self.adaptive = adaptiveif attention:self.conv_ta = nn.Conv1d(out_channels, 1, 9, padding=4)nn.init.constant_(self.conv_ta.weight, 0)nn.init.constant_(self.conv_ta.bias, 0)# s attentionker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts  #0 代表 假 ， 1 代表真pad = (ker_jpt - 1) // 2self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad)nn.init.xavier_normal_(self.conv_sa.weight)nn.init.constant_(self.conv_sa.bias, 0)# channel attentionrr = 2self.fc1c = nn.Linear(out_channels, out_channels // rr)self.fc2c = nn.Linear(out_channels // rr, out_channels)nn.init.kaiming_normal_(self.fc1c.weight)nn.init.constant_(self.fc1c.bias, 0)nn.init.constant_(self.fc2c.weight, 0)nn.init.constant_(self.fc2c.bias, 0)self.attention = attentionif in_channels != out_channels:self.down = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1),nn.BatchNorm2d(out_channels))else:self.down = lambda x: xself.bn = nn.BatchNorm2d(out_channels)self.soft = nn.Softmax(-2)self.tan = nn.Tanh()self.sigmoid = nn.Sigmoid()self.relu = nn.ReLU(inplace=True)for m in self.modules():if isinstance(m, nn.Conv2d):conv_init(m)elif isinstance(m, nn.BatchNorm2d):bn_init(m, 1)bn_init(self.bn, 1e-6)for i in range(self.num_subset):conv_branch_init(self.conv_d[i], self.num_subset)def forward(self, x):N, C, T, V = x.size()y = Noneif self.adaptive:  #自适应A = A + self.PA  #A+B  #nn.Parameter(torch.from_numpy(A.astype(np.float32)))  #3*V*Vfor i in range(self.num_subset):  #f in Cin*T*N      two embedding functions(one 1 × 1 convolutional layer )A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T)  #N*V*CT（论文中的N*CT）A2 = self.conv_b[i](x).view(N, self.inter_c * T, V)  #N*CT*V（论文中的CT*N）A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1))  # N V V（论文中的N*N）高维矩阵 乘  CkA1 = A[i] + A1  #(论文中N*N) -> A+B+CA2 = x.view(N, C * T, V)  #论文中的CT*Nz = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))  #N*CT*V   (x.size，论文中CT*N，每一个num_subset的输出)y = z + y if y is not None else zelse:A = self.A.cuda(x.get_device()) * self.maskfor i in range(self.num_subset):A1 = A[i]A2 = x.view(N, C * T, V)z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))y = z + y if y is not None else zy = self.bn(y)y += self.down(x)  #residualy = self.relu(y)if self.attention:# spatial attention# temporal attention# channel attention  ...return yclass TCN_GCN_unit(nn.Module):  #Adaptive graph convolutional blockdef __init__(self, in_channels, out_channels, A, stride=1, residual=True, adaptive=True, attention=True):super(TCN_GCN_unit, self).__init__()self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive, attention=attention)self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)  #conv,bn,reluself.relu = nn.ReLU(inplace=True)self.attention = attentionif not residual:self.residual = lambda x: 0elif (in_channels == out_channels) and (stride == 1):self.residual = lambda x: xelse:self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride)  #一层conv代表残差def forward(self, x):y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))return yclass Model(nn.Module):  #Adaptive graph convolutional networkdef __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3,drop_out=0, adaptive=True, attention=True):super(Model, self).__init__()Graph = import_class(graph)self.graph = Graph(**graph_args)A = self.graph.Aself.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)self.l1 = TCN_GCN_unit(3, 64, A, residual=False, adaptive=adaptive, attention=attention)self.l2 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l3 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l4 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l5 = TCN_GCN_unit(64, 128, A, stride=2, adaptive=adaptive, attention=attention)self.l6 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)self.l7 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)self.l8 = TCN_GCN_unit(128, 256, A, stride=2, adaptive=adaptive, attention=attention)self.l9 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)self.l10 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)self.fc = nn.Linear(256, num_class)def forward(self, x):N, C, T, V, M = x.size()x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)x = self.data_bn(x)x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)x = self.l1(x)...x = self.l10(x)# N*M,C,T,Vc_new = x.size(1)x = x.view(N, M, c_new, -1)x = x.mean(3).mean(1)x = self.drop_out(x)return self.fc(x)  #全连接分类

③ data_gen文件夹下，部分可对应4.4. Two-stream networks，

ntu_gendata.py[kinetics_gendata.py]  （get data of joints）fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32)  # N, C, T, V, Mfor i, s in enumerate(tqdm(sample_name)):data = read_xyz(os.path.join(data_path, s), max_body=max_body_kinect, num_joint=num_joint)  #4,25;data-> 3，seq_info['numFrame']，num_joint，max_body[data, label = feeder[i]]fp[i, :, 0:data.shape[1], :, :] = data  #0:data.shape[1]--seq_info['numFrame']fp = pre_normalization(fp)  #N, C, T, V, M  preprocess.pynp.save('{}/{}_data_joint.npy'.format(out_path, part), fp)  #保存关节数据[np.save(data_out_path, fp)]gen_bone_data.py  （calculate the data of bones based on the data of joints）
for dataset in datasets:  #人体关键节点的定义及其连接方式   'ntu/xview', 'ntu/xsub',for set in sets:  # 'train', 'val'print(dataset, set)data = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set))  #下载关节数据N, C, T, V, M = data.shapefp_sp = open_memmap('../data/{}/{}_data_bone.npy'.format(dataset, set),dtype='float32',mode='w+',shape=(N, 3, T, V, M))  #骨骼信息   #创建或加载内存映射.npy文件fp_sp[:, :C, :, :, :] = datafor v1, v2 in tqdm(paris[dataset]):  #paris是不同数据集的 人关节点的 （a,b）连接索引（论文中有人体关键点的图）if dataset != 'kinetics':v1 -= 1  #1~25  -> 0~24v2 -= 1  #1->0fp_sp[:, :, :, v1, :] = data[:, :, :, v1, :] - data[:, :, :, v2, :]  #length information and direction information of the bonemerge_joint_bone_data.py
for dataset in datasets:for set in sets:print(dataset, set)data_jpt = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set))  #关节data_bone = np.load('../data/{}/{}_data_bone.npy'.format(dataset, set))  #骨骼N, C, T, V, M = data_jpt.shapedata_jpt_bone = np.concatenate((data_jpt, data_bone), axis=1)  #对应行进行拼接np.save('../data/{}/{}_data_joint_bone.npy'.format(dataset, set), data_jpt_bone)  #joint+bonegen_motion_data.py  # gen_motion_data.py处理得到的temporal edges没用到，temporal edges只是为了后边时间上的卷积即可
for dataset in datasets:for set in sets:for part in parts:print(dataset, set, part)data = np.load('../data/{}/{}_data_{}.npy'.format(dataset, set, part))  #下载信息  数据集 训练/验证 关节/骨骼N, C, T, V, M = data.shapefp_sp = open_memmap('../data/{}/{}_data_{}_motion.npy'.format(dataset, set, part),dtype='float32',mode='w+',shape=(N, 3, T, V, M))  #写motion信息for t in tqdm(range(T - 1)):fp_sp[:, :, t, :, :] = data[:, :, t + 1, :, :] - data[:, :, t, :, :]  #temporal 相同点 连接fp_sp[:, :, T - 1, :, :] = 0  #举个例子，只有三个时间点，那么表示相邻时间节点的连接的数据， 就只有两个（即三个点，只连接相邻点，有两条线）

④ main.py

class GradualWarmupScheduler(_LRScheduler):
def init_seed(_):
def get_parser():
class Processor():def __init__(self, arg):def load_data(self):def load_model(self):Model = import_class(self.arg.model)def load_optimizer(self):def save_arg(self):def adjust_learning_rate(self, epoch):def print_time(self):def print_log(self, str, print_time=True):def record_time(self):def split_time(self):def train(self, epoch, save_model=False):def eval(self, epoch, save_score=False, loader_name=['test'], wrong_file=None, result_file=None):def start(self):
def str2bool(v):
def import_class(name):  #import_class(self.arg.feeder)  #default='feeder.feeder', help='data loader will be used'components = name.split('.')  #查找所有的'.'间隔的内容，并用列表放置 --2021.4.12更正mod = __import__(components[0])  # import return model   __import__() 函数用于动态加载类和函数  因为考虑到类名经常会发生变化，这里取第一个名字对应的.py文件  【例如feeder.Feeder--> 就是要找到feeder.py的文件】for comp in components[1:]:mod = getattr(mod, comp)  #getattr() 函数用于返回一个对象comp属性值 【针对上边例子，这里就是要获取feeder.py模块文件中的Feeder类，以供对应位置使用】return mod

⑤ README.md （4.4. Two-stream networks）

Preprocess the data with #先对数据进行处理，得到关节数据

python data_gen/ntu_gendata.py

python data_gen/kinetics-gendata.py.
Generate the bone data with: #关节数据转换为骨骼数据

python data_gen/gen_bone_data.py

Change the config file depending on what you want. #分别将关节和骨骼的时空数据送入J-stream 和 B-stream，训练

`python main.py --config ./config/nturgbd-cross-view/train_joint.yaml``python main.py --config ./config/nturgbd-cross-view/train_bone.yaml`

To ensemble the results of joints and bones, run test firstly to generate the scores of the softmax layer. #测试，产生各自softmax分数

`python main.py --config ./config/nturgbd-cross-view/test_joint.yaml``python main.py --config ./config/nturgbd-cross-view/test_bone.yaml`

Then combine the generated scores with: #两个softmax分数相加to obtain the fused score and predictthe action label

`python ensemble.py` --datasets ntu/xview

二, 零散的代码学习

rotation.py

import numpy as np
import mathdef rotation_matrix(axis, theta):  #axis给定轴，theta给定θ弧度。return np.array()  #逆时旋转，返回旋转矩阵
def unit_vector(vector):return vector / np.linalg.norm(vector)  #向量vector/默认是二范数->单位向量
def angle_between(v1, v2):  #弧度角return np.arccos()
def x_rotation(vector, theta):  #绕x轴旋转三维矢量return np.dot(R, vector)
def y_rotation(vector, theta):  #绕y轴旋转三维矢量return np.dot(R, vector)
def z_rotation(vector, theta):  #绕z轴旋转三维矢量return np.dot(R, vector)

1. vector / np.linalg.norm(vector) 向量vector/(默认,根号下每个元素的平方)二范数 -> 单位向量

2. np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) np.clip：v1_u, v2_u对应项相乘再相加，范围应该在（-1.0, 1.0)，超出则等于边界值。 np.arccos返回的是弧度值

preprocess.py

import syssys.path.extend(['../'])
from data_gen.rotation import *
from tqdm import tqdm  #进度条def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]):
#用前面的帧填充空帧#近中心关节#1（ntu中的脊柱关节和动力学中的颈部关节)#将第一人的髋关节（jpt 0）和脊椎（jpt 1）之间的骨骼与z轴平行#np.cross求叉积（向量积）；求出的新的向量是垂直于 z轴#使右肩（jpt 8）和左肩（jpt 4）之间的骨头平行return data

3.左侧 project 工具栏窗口顶部那个齿轮有个 show member 选项，默认是不开的，勾选后 py 文件会显示内部定义的 class 和 def。每个文件可以自由选择折叠还是展开。

4. 关于tqdm

from tqdm import tqdm     进度条
a=(-1,1,0)
for i,j in enumerate(tqdm(a)):print(i,j)>>>0%|                                                     | 0/3 [00:00<?, ?it/s]
0 -1
1 1
2 0
100%|███████████████████████████████████████████| 3/3 [00:00<00:00, 7281.78it/s]

5. axis = np.cross(joint_top - joint_bottom, [0, 0, 1]) np.cross(a,b)求叉积（向量积）；求出的新的向垂直于 a,b形成的平面

6. np.sum()是求总和；np.sum(-1)是先求一个[]里的和

>>> np.eye(3)
array([[1., 0., 0.],[0., 1., 0.],[0., 0., 1.]])>>> np.eye(3).sum(-1)
array([1., 1., 1.])
>>> np.eye(3).sum(-1).sum(-1)
3.0
>>> np.eye(3).sum()
3.0>>> np.array([[1,1],[2,2]]).sum(-1)
array([2, 4])
>>> np.array([[1,1],[2,2]]).sum()
6
>>> np.array([[1,1],[2,2]]).sum(0)
array([3, 3])
>>> np.array([[1,1],[2,2]]).sum(1)
array([2, 4])

ntu_gendata.py

import argparse
import pickle
from tqdm import tqdm
import syssys.path.extend(['../'])
from data_gen.preprocess import pre_normalization
import numpy as np
import osdef read_skeleton_filter(file):  #每一个帧，每个人，每一个关节return skeleton_sequence
def get_nonzero_std(s):  # tvcreturn s
def read_xyz(file, max_body=4, num_joint=25):  # 取了前两个bodyreturn data  #3，seq_info['numFrame']，num_joint，max_body
def gendata(data_path, out_path, ignored_sample_path=None, benchmark='xview', part='eval'):fp = pre_normalization(fp)  #N, C, T, V, M

7. index = energy.argsort()[::-1][0:max_body_true]

argsort返回的是数组值从小到大的索引值;[::-1]取从后向前的元素；max_body_true=2（定义好的）;;

argsort(axis=1)表示按行排列

8. os.listdir(data_path): 返回指定路径下的文件和文件夹列表

kinetics_gendata.py

class Feeder_kinetics(Dataset):def __init__(self,data_path,label_path,ignore_empty_sample=True,window_size=-1,num_person_in=5,num_person_out=2):def load_data(self):def __len__(self):return len(self.sample_name)def __iter__(self):return selfdef __getitem__(self, index):return data_numpy, labeldef gendata(data_path, label_path,data_out_path, label_out_path,num_person_in=num_person_in,  # observe the first 5 personsnum_person_out=num_person_out,  # then choose 2 persons with the highest scoremax_frame=max_frame):

9. data_numpy[1, frame_index, :, m] = pose[1::2] [a::b]从下标为a的元素开始，每隔b个元素输出一次;

若b=-1，表示倒数. a表示倒数(012...)a开始。

main.py

10. super().__init__(optimizer) optimizer是 _LRScheduler继承类的输入

11. answer = input('delete it? y/n:') 接受一个标准输入数据，返回为 string 类型

12. Python __import__() 函数用于动态加载类和函数

getattr(object, name[, default]) 函数用于返回一个对象属性值。等效于object.name

*self._args 表示接受元组类参数；

**kwargs 表示接受字典类参数；

13. vars([object]) 函数返回对象object的属性和属性值的字典对象

14. localtime = time.asctime(time.localtime(time.time()))

localtime格式化时间戳为本地的时间；asctime() 函数接受时间元组并返回一个可读的形式为"Tue Dec 11 18:07:14 2008"（2008年12月11日周二18时07分14秒）的24个字符的字符串。

15. open('{}/log.txt'.format(self.arg.work_dir), 'a') as f: a代表追加，也就是说，打开这个文件之后直百接定位到文件的末尾。

16. round(v * 100 / sum(timer.values())) round() 方法返回浮点数v * 100 / sum(timer.values())的四舍五入值。

17. value, predict_label = torch.max(output.data, 1) value是每行的最大值，predict_label是对应的索引

>>> import torch
>>> import numpy as np
>>> c=np.array([[1,2],[4,3]])
>>> print(c)
[[1 2][4 3]]
>>> b=torch.from_numpy(c)
>>> print(b)
tensor([[1, 2],[4, 3]])
>>> torch.max(b,1)  #b是softmax函数输出的一个tensor，1是每行的最大值(axis)
torch.return_types.max(
values=tensor([2, 4]),
indices=tensor([1, 0]))

feeders/tools.py

18. begin = np.random.randint(step) 取[0, step)的随机整数

19. begin = valid_frame.argmax() 返回的是元素最大值所对应的索引值

20. move_time = random.choice(move_time_candidate) choice() 方法返回一个列表，元组或字符串的随机项。

21. np.arange函数

node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)  #round() 方法返回浮点数 的四舍五入值。 np.arange :【0, T】,步长为T * 1.0 / move_time
node = np.append(node, T) #为node添加T
A = np.random.choice(angle_candidate, num_node)  #angle_candidate中选num_node个（注意是np.）

22. self.sample_name, self.label = pickle.load(f, encoding='latin1')

用python2.X pickle写了一个文件，用python3的pickle读取时, 加上encoding='latin1'，代码就可以正确识别编码输出内容了。

23. data.mean(axis=2, keepdims=True) 求均值，axis表示维度，keepdims=True表示保持原来维度

[-top_k:]表示倒数top_k个

24. data = data.reshape((1,) + data.shape) #np.array([1,2,3,4]).reshape((2,)+(2,)) -> array([[1, 2], [3, 4]])

agcn.py

25. self.PA = torch.nn.Parameter(torch.from_numpy(A.astype(np.float32)))

将一个不可训练的类型Tensor转换成可以训练的类型parameter，成为了模型中根据训练可以改动的参数

26. self.bn = nn.BatchNorm2d(out_channels) ； self.relu = nn.ReLU(inplace=True)

inplace=True从上层网络bn中传递下来的tensor直接进行修改，这样能够节省运算内存，不用多存储其他变量。