java用NLPIR对本地txt进行分词,并将分词结果写入本地
2016-04-09 12:54
621 查看
一:下载资源:
1:使用的是NLPIR-ICTCLAS2016的java接口
2:平台:win7 64位
二:Myeclipse启动工程
1:代开Myeclipse,导入项目:
导入项目后,只有NIPIRTest.java,实现分词
另外的MyFileRead.java实现读取本地txt文档
MyFileSave.java实现将分词结果保存到本地txt
2:修改NIPIRTest.java类
需要修改2处路径:一处为:CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
另一处为:
String argu = "C:\\NLPIR-ICTCLAS2016";
注意编码格式为:utf-8
3:源码
NIPIRTest.java,:package code;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.jar.Attributes.Name;
import utils.SystemParas;
import com.sun.jna.Library;
import com.sun.jna.Native;
public class NlpirTest {
// 定义接口CLibrary,继承自com.sun.jna.Library
public interface CLibrary extends Library {
// 定义并初始化接口的静态变量
CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
public int NLPIR_Init(String sDataPath, int encoding,
String sLicenceCode);
public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);
public String NLPIR_GetKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public String NLPIR_GetFileKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public int NLPIR_AddUserWord(String sWord);//add by qp 2008.11.10
public int NLPIR_DelUsrWord(String sWord);//add by qp 2008.11.10
public String NLPIR_GetLastErrorMsg();
public void NLPIR_Exit();
}
public static String transString(String aidString, String ori_encoding,
String new_encoding) {
try {
return new String(aidString.getBytes(ori_encoding), new_encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) throws Exception {
String argu = "C:\\NLPIR-ICTCLAS2016";
// String system_charset = "GBK";//GBK----0
@SuppressWarnings("unused")
String system_charset = "UTF-8";
int charset_type = 1;
int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0");
String nativeBytes = null;
String nativeByte = null;
ArrayList<String> name = new ArrayList<String>();
ArrayList<String&
4000
gt; classify = new ArrayList<String>();
if (0 == init_flag) {
nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg();
System.err.println("初始化失败!fail reason is "+nativeBytes);
return;
}
try {
nativeByte = CLibrary.Instance.NLPIR_GetFileKeyWords("C:\\专利文献全文获取_xpdf.txt", 10,false);
System.out.println("关键词提取结果是:" + nativeByte);
String file="C:\\专利文献全文获取_xpdf.txt";
String sinputt= MyFileReader.read(file);
nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sinputt, 1);
System.out.println("分词结果为: " + nativeBytes);
CLibrary.Instance.NLPIR_Exit();
//以空格分离,把每个词/v分别存到数组里
String[] nativeBytesArray=nativeBytes.split(" ");
MyFileSave save=new MyFileSave();
save.Save(nativeBytesArray);
}
} catch (Exception ex) {
// TODO Auto-generated catch block
ex.printStackTrace();
}
}
}
MyFileRead.java
package code;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
public class MyFileReader{
public static String read(String filePath) {
String result = null;
try {
String encoding="utf-8";
File file=new File(filePath);
if(file.isFile() && file.exists()){ //判断文件是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);//考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
result = "";
while((lineTxt = bufferedReader.readLine()) != null){
//System.out.println(lineTxt);
result+= lineTxt;
}
read.close();
}else{
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
return result;
}
}
MyFileSave.java
package code;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
public class MyFileSave {
public void Save(String[] a){
//String rootPath="C:\\";
FileOutputStream foS=null;
try {
foS=new FileOutputStream("C:\\专利文献全文获取分词结果.txt",true);//第二个参数为是否设置追加文件
PrintWriter pWriter=new PrintWriter(foS);
for(int i=0;i<a.length;i++){
pWriter.write(a[i]+" ");
}
pWriter.flush();
} catch (FileNotFoundException e) {
// TODO: handle exception
e.printStackTrace();
}finally{
try {
foS.close();
} catch (Exception e2) {
// TODO: handle exception
e2.printStackTrace();
}
}
}
}
四:运行
运行结果展示:完结!
相关文章推荐
- java项目、包、类
- lambdaj工具类中的 Lambda.maxFrom方法的实现
- 安装eclipse并汉化
- 安装jdk/jre:
- java 动态加载
- JAVA开发,MySQL-SQLServer移植几点备注
- Eclipse中出现-访问限制由于对必需的库XX具有一定限制,因此无法访问类型
- 实例详解Spring的事务传播机制(三)
- spring classpath和filesystem
- hadoop集群,如何运行Java jar包---如何运行mapreduce程序
- java检测Windows服务(NT服务)是否安装、启动
- 从零教你如何获取hadoop2.X源码并使用eclipse关联hadoop2.X源码
- LeetCode 169 -Majority Element ( JAVA )
- Java遍历Map对象的4种方法
- java.awt.headless 详解
- LeetCode 217 -Contains Duplicate ( JAVA )
- java工厂模式
- Struts2中 s:select 标签相关使用
- spring mvc 从 controller 调到静态页面 , html
- Java用JavaService实现Windows系统服务