您的位置:首页 > 其它

相似度算法:余弦定理

2017-07-15 14:46 92 查看
求两个字符串的相似度,直接贴代码:

public class Similarity {
LinkedHashMap<Character, int[]> vectorMap = new LinkedHashMap<Character, int[]>();

int[] tempArray = null;

public Similarity(String string1, String string2) {
for (Character character1 : string1.toCharArray()) {
if (vectorMap.containsKey(character1)) {
vectorMap.get(character1)[0]++;
} else {
tempArray = new int[2];
tempArray[0] = 1;
tempArray[1] = 0;
vectorMap.put(character1, tempArray);
}
}
for (Character character2 : string2.toCharArray()) {
if (vectorMap.containsKey(character2)) {
vectorMap.get(character2)[1]++;
} else {
tempArray = new int[2];
tempArray[0] = 0;
tempArray[1] = 1;
vectorMap.put(character2, tempArray);
}
}
for (Map.Entry<Character, int[]> entry : vectorMap.entrySet()) {
System.out.println("Key = " + entry.getKey() + ", Value = " + entry.getValue()[0] +","+entry.getValue()[1]);
}
}

// 求余弦相似度
public double sim() {
double result = 0;
//cos c =  a*b/(|a|*|b|)
result = pointMulti(vectorMap) / sq
4000
rtMulti(vectorMap);
return result;
}

private double sqrtMulti(Map<Character, int[]> paramMap) {
double result = 0;
result = squares(paramMap);
result = Math.sqrt(result);
System.out.println("sqrtMulti result:"+result);
return result;
}

// 点乘法
private double pointMulti(Map<Character, int[]> paramMap) {
double result = 0;
Set<Character> keySet = paramMap.keySet();
for (Character character : keySet) {
int temp[] = paramMap.get(character);
result += (temp[0] * temp[1]);
}
System.out.println("pointMulti result:"+result);
return result;
}

// 求平方和
private double squares(Map<Character, int[]> paramMap) {
double result1 = 0.00;  //向量1的模
double result2 = 0.00;  //向量2的模
double resultproduct = 0.00;//向量积
Set<Character> keySet = paramMap.keySet();
for (Character character : keySet) {
int temp[] = paramMap.get(character);
result1 += (temp[0] * temp[0]);
result2 += (temp[1] * temp[1]);
}
resultproduct = result1 * result2;
return resultproduct;
}
public static void main(String[] args) {
String s1 = "我不是一个帅哥";
String s2 = "一个帅哥是我";
Similarity similarity1 = new Similarity(s1, s2);
System.out.println(similarity1.sim());
}
}


输出:

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: