
import numpy as npdef bit_product_sum(x, y):return sum([item[0] * item[1] for item in zip(x, y)])def cosine_similarity(x, y, norm=False):""" 计算两个向量x和y的余弦相似度 """assert len(x) == len(y), "len(x) != len(y)"zero_list = [0] * len(x)if x == zero_list or y == zero_list:return float(1) if x == y else float(0)# method 1res = np.array([[x[i] * y[i], x[i] * x[i], y[i] * y[i]] for i in range(len(x))])cos = sum(res[:, 0]) / (np.sqrt(sum(res[:, 1])) * np.sqrt(sum(res[:, 2])))# method 2# cos = bit_product_sum(x, y) / (np.sqrt(bit_product_sum(x, x)) * np.sqrt(bit_product_sum(y, y)))# method 3# dot_product, square_sum_x, square_sum_y = 0, 0, 0# for i in range(len(x)):#     dot_product += x[i] * y[i]#     square_sum_x += x[i] * x[i]#     square_sum_y += y[i] * y[i]# cos = dot_product / (np.sqrt(square_sum_x) * np.sqrt(square_sum_y))return 0.5 * cos + 0.5 if norm else cos  # 归一化到[0, 1]区间内

第一种 调用 sklearn 接口

import sklearn.metrics.pairwise as pwleftfeature = sklearn.preprocessing.normalize(leftfeature)rightfeature= sklearn.preprocessing.normalize(rightfeature)print ("计算cosdistance")  # 调用api接口dis = pw.pairwise_distances(leftfeature, rightfeature, metric='cosine')  # 返回的是什么dis = 1-dis   #cos distance [-1,1] distance = np.empty((len(labels),))  # len(labels)=6000,把返回的值存到distance变量中# print (len(labels))for i in range(len(labels)):distance[i] = dis[i][i]print ('Distance before normalization:\n', distance)print ('Distance max:', np.max(distance), 'Distance min:', np.min(distance), '\n')# 距离需要归一化到0-1,与标签0-1匹配  每个值-最小/最大-最小  ,保证区间在0-1distance_norm = np.empty((len(labels),))for i in range(len(labels)):distance_norm[i] = (distance[i] - np.min(distance)) / (np.max(distance) - np.min(distance))print ('Distance after normalization:\n', distance_norm)# 由distance_norm 和labels 计算精度highestAccuracy, threshold = calculate_accuracy(distance_norm, labels, len(labels))


    g_feats = g_feats / np.sqrt(np.sum(g_feats ** 2, -1, keepdims=True))t_feats = t_feats / np.sqrt(np.sum(t_feats ** 2, -1, keepdims=True))# gallery_label=np.concatenate((test_feats30, test_feats40), axis=0) #拼接### 特征提取结束,进行 比对, top ceshi print("特征的数量维度",img_feats.shape)correct10 = 0correct1= 0for i,line in enumerate(t_feats):  #模型输出line = np.tile(line,(len(g_feats),1))  # repeat 一张图片扩充成所有的维度,用numpy ,方法统一比对dis = np.sum(g_feats * line, 1)  # save index  correspond indexsort_index = np.argsort(-dis, axis=0) #默认 small to large  -dis 从小到大 余弦距离最相似是1,

#然后 pytorch

它们的余弦相似度就是两个特征在经过L2归一化之后的矩阵内积 l2距离计算的就是公式中  A/|A|
得到的距离是 (-1,1),接近1 表示相似,   1-cos  之后范围变成 (2,0),和欧式距离表达的含义一样0表示最相似,
1表示余弦距离的0,基本已经不相似了,所以现在  1-cos是越小越接近,大于1基本不可能相似,也不用特意缩小范围 (0-1)之间,因为(-1,1)之间比0小或者比0.3小的阈值基本不可能相似了。assert metric in ["cosine", "euclidean"], "must choose from [cosine, euclidean], but got {}".format(metric)if metric == "cosine":query_feat = F.normalize(torch.from_numpy(query_features), dim=1)  #gallery_feat = F.normalize(torch.from_numpy(gallery_features), dim=1)dist = 1 - torch.mm(query_feat, gallery_feat.t())  #query 行  gallery 列  的distelse:m, n = query_features.size(0), gallery_features.size(0)xx = torch.pow(torch.from_numpy(query_features), 2).sum(1, keepdim=True).expand(m, n)yy = torch.pow(torch.from_numpy(gallery_features), 2).sum(1, keepdim=True).expand(n, m).t()dist = xx + yydist.addmm_(1, -2, query_features, gallery_features.t())dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability

