您的位置：首页 > 编程语言 > Python开发

java调用python总出现python第三方库没有定义

2017-03-24 17:17 441 查看

我用的是Anaconda3来编译python 代码

# -*- coding: utf-8 -*-
"""
Created on Fri Mar 10 11:02:50 2017

@author: Administrator
"""

#用余弦相似性进行比较
import numpy as np
#Numpy是Python的一个科学计算的库，提供了矩阵运算的功能
import  jieba
#python分词器
import  copy
import codecs,sys
#自然语言编码转换
title2= "张翰怒斥耍大牌被换角谣言"
title1 = "爱剪辑-危机四伏，落水就会被高压电弄死"
#title1 = "王凯《跨界歌王》姗姗来迟“低音炮”开嗓献唱 - 搜狐视频"
#title2  = "《跨界歌王》王凯清唱“好久不见” 低音炮名不虚传_视频在线观看 - 56.com"
#title1 = "王凯《跨界歌王》姗姗来迟“低音炮”开嗓献唱 - 搜狐视频"
#title2 = "范爷维权获赔15万全捐赠"
#sampfn = "C:\\Users\\Administrator\\Desktop\\sample.txt"
#定义了一个余弦相似度函数
def get_cossimi(x,y):
myx = np.array(x)
myy = np.array(y)
cos1 = np.sum(myx * myy)
cos21 = np.sqrt(sum(myx * myx))
cos22 = np.sqrt(sum(myy * myy))
return cos1 / (cos21 * cos22)

if __name__ == '__main__':
print("loading...")
print("working...")
#title1进行分词
f1_seg_list = jieba.cut(title1)#需要添加一个词典，来弥补结巴分词中没有的词语，从而保证更高的正确率

#title1进行分词
ftest1_seg_list = jieba.cut(title2)

#打开停用词表
f_stop = codecs.open("C:\\Users\\Administrator\\Desktop\\stopword.txt","r","utf-8")
try:
f_stop_text = f_stop.read()
finally:
f_stop.close()
f_stop_seg_list = f_stop_text.split("\n")

test_words = {}
all_words = {}

for myword in f1_seg_list:
#print(".")
if not(myword.strip()) in f_stop_seg_list:
test_words.setdefault(myword, 0)
all_words.setdefault(myword, 0)
all_words[myword] += 1

#read to be tested word
mytest1_words = copy.deepcopy(test_words)
for myword in ftest1_seg_list:
# print(".")
if not(myword.strip()) in f_stop_seg_list:
if myword in mytest1_words:
mytest1_words[myword] += 1

#calculate sample with to be tested text sample
sampdate = []
test1data = []
for key in all_words.keys():
sampdate.append(all_words[key])
test1data.append(mytest1_words[key])
test1simi = get_cossimi(sampdate,test1data)

print(u"title1与title2的余弦相似度%f"%(test1simi))

结果为：
runfile('C:/Users/Administrator/Desktop/cosine xiangsi/cosine simlitary.py', wdir='C:/Users/Administrator/Desktop/cosine xiangsi')
Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\ADMINI~1\AppData\Local\Temp\jieba.cache
loading...
working...
Loading model cost 0.658 seconds.
Prefix dict has been built succesfully.
title1与title2的余弦相似度0.301511

我将代码用java进行封装
import org.python.util.PythonInterpreter;

public class juli
{
public static void main(String args[])
{

PythonInterpreter interpreter = new PythonInterpreter();
interpreter.execfile("D:\\download\\cosine simlitary.py");
}//main
}

出现错误：
Exception in thread "main" Traceback (innermost last):

(no code object) at line 0

SyntaxError: ('Lexical error at line 20, column 35. Encountered: "\\r" (13), after : ""', ('D:\\download\\cosine simlitary.py', 20, 35, u'title1 = "\u95C4\u5823\u4F46\u741A\uE0A6\u734B\u93C4\uE21C\u5C13 \u8930\u64B3\u6E80\u9359\u6226\uE5DE\u93C6\u5B58\u5F3D\u93CB\u6941\u6D3F\u93C2\uFFFD'))

可能是测试的句子中有空格出现

我进行了一些改正又出现了错误提示：

Exception in thread "main" Traceback (innermost last):

File "D:\download\cosine simlitary.py", line 9, in ?

ImportError: no module named numpy

仔细想了好久还是不行，看到了一篇博客，运用他的方法最后结果重要正确了 http://blog.csdn.net/ztf312/article/details/51338060
package cos;

import java.io.BufferedReader;
import java.io.InputStreamReader;

public class juli {
public static void main(String[] args){
try{
System.out.println("start");
Process pr = Runtime.getRuntime().exec("C:\\ProgramData\\Anaconda3\\python.exe untitled0.py");

BufferedReader in = new BufferedReader(new
InputStreamReader(pr.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
System.out.println(line);
}
in.close();
pr.waitFor();
System.out.println("end");
} catch (Exception e){
e.printStackTrace();
}
}
}

结果如下：

start

loading...

working...

title1与title2的余弦相似度0.301511

end

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航