您的位置:首页 > 其它

余弦相似性及欧式距离的计算

2018-05-21 20:53 85 查看
def normalize(nparray, order=2, axis=0):#归一化[0,1]
  """Normalize a N-D numpy array along the specified axis."""
  norm = np.linalg.norm(nparray, ord=order, axis=axis, keepdims=True)
  return nparray / (norm + np.finfo(np.float32).eps)

def compute_dist(array1, array2, type='euclidean'):

#type='euclidean'代表是计算欧式距离,type='euclidean'代表是余弦相似性

  # """Compute the euclidean or cosine distance of all pairs.
  # Args:
  #   array1: numpy array with shape [m1, n]
  #   array2: numpy array with shape [m2, n]
  #   type: one of ['cosine', 'euclidean']
  # Returns:
  #   numpy array with shape [m1, m2]
  # """
assert type in ['cosine', 'euclidean']
if type == 'cosine':
             array1 = normalize(array1, axis=1)#进行归一化
             array2 = normalize(array2, axis=1)

dist = np.matmul(array1, array2.T)#点乘
return dist
else:
    
square1 = np.sum(np.square(array1))[..., np.newaxis]
    
square2 = np.sum(np.square(array2))[np.newaxis, ...]
squared_dist = - 2 * np.matmul(array1, array2.T) + square1 + square2
squared_dist[squared_dist < 0] = 0
dist = np.sqrt(squared_dist)

return dist

#以下函数主要是根据属性距离来计算寻找两个属性之间的距离最近的 并且判断是不是同一个类别,如果是一个类别,就是test正确,并且correct加一。

correct = 0

for i in range(2966):
  # min = compute_dist(test_feature[i],test_feature[0],type='euclidean')
min = sys.maxsize
# print(len(min)) #9223372036854775807
for j in range(2966):
if i == j:
     
continue#i==j代表是同一个测试数据,直接跳过

distance = compute_dist(query_output[i],query_output[j],type='euclidean')
# print(distance)
if distance <= min:
min = distance
index = j
if query_label_idx[i] == query_label_idx[index]:
correct += 1
# print(correct)  
print('accuracy:  %f'%(correct/2966))#2966代表有2966 条数据。


阅读更多
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: