您的位置:首页 > 编程语言 > Java开发

java用NLPIR对本地txt进行分词,并将分词结果写入本地

2016-04-09 12:54 621 查看

一:下载资源:

1:使用的是NLPIR-ICTCLAS2016的java接口



2:平台:win7 64位

二:Myeclipse启动工程

1:代开Myeclipse,导入项目:



      导入项目后,只有NIPIRTest.java,实现分词
     另外的MyFileRead.java实现读取本地txt文档
     MyFileSave.java实现将分词结果保存到本地txt

2:修改NIPIRTest.java类

需要修改2处路径:
一处为:CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);

另一处为:
String argu = "C:\\NLPIR-ICTCLAS2016";

注意编码格式为:utf-8

3:源码

NIPIRTest.java,:

package code;

import java.io.UnsupportedEncodingException;

import java.util.ArrayList;

import java.util.jar.Attributes.Name;

import utils.SystemParas;

import com.sun.jna.Library;

import com.sun.jna.Native;

public class NlpirTest {
// 定义接口CLibrary,继承自com.sun.jna.Library
public interface CLibrary extends Library {
// 定义并初始化接口的静态变量
CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);

public int NLPIR_Init(String sDataPath, int encoding,
String sLicenceCode);

public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);

public String NLPIR_GetKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public String NLPIR_GetFileKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public int NLPIR_AddUserWord(String sWord);//add by qp 2008.11.10
public int NLPIR_DelUsrWord(String sWord);//add by qp 2008.11.10
public String NLPIR_GetLastErrorMsg();
public void NLPIR_Exit();
}
public static String transString(String aidString, String ori_encoding,
String new_encoding) {
try {
return new String(aidString.getBytes(ori_encoding), new_encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) throws Exception {
String argu = "C:\\NLPIR-ICTCLAS2016";
// String system_charset = "GBK";//GBK----0
@SuppressWarnings("unused")
String system_charset = "UTF-8";
int charset_type = 1;

int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0");
String nativeBytes = null;
String nativeByte = null;
ArrayList<String> name = new ArrayList<String>();

        ArrayList<String&
4000
gt; classify = new ArrayList<String>();
if (0 == init_flag) {
nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg();
System.err.println("初始化失败!fail reason is "+nativeBytes);
return;
}
try {
nativeByte = CLibrary.Instance.NLPIR_GetFileKeyWords("C:\\专利文献全文获取_xpdf.txt", 10,false);

System.out.println("关键词提取结果是:" + nativeByte);

String file="C:\\专利文献全文获取_xpdf.txt";
String sinputt= MyFileReader.read(file);
nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sinputt, 1);
System.out.println("分词结果为: " + nativeBytes);
CLibrary.Instance.NLPIR_Exit();

            //以空格分离,把每个词/v分别存到数组里
String[] nativeBytesArray=nativeBytes.split(" ");
MyFileSave save=new MyFileSave();
save.Save(nativeBytesArray);
}
} catch (Exception ex) {
// TODO Auto-generated catch block
ex.printStackTrace();
}

}

}

MyFileRead.java

package code;

import java.io.BufferedInputStream;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import java.io.Reader;

public class MyFileReader{
public static String read(String filePath) {
String result = null;
 try {

          String encoding="utf-8";

          File file=new File(filePath);

          if(file.isFile() && file.exists()){ //判断文件是否存在

              InputStreamReader read = new InputStreamReader(

              new FileInputStream(file),encoding);//考虑到编码格式

              BufferedReader bufferedReader = new BufferedReader(read);

              String lineTxt = null;

              result = "";

              while((lineTxt = bufferedReader.readLine()) != null){

                  //System.out.println(lineTxt);

             result+= lineTxt;

              }

              read.close();
             
 }else{
     System.out.println("找不到指定的文件");
 }
 } catch (Exception e) {
     System.out.println("读取文件内容出错");
     e.printStackTrace();
 }
 
 return result;
}

}


MyFileSave.java

package code;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.PrintWriter;

public class MyFileSave {

public void Save(String[] a){
//String rootPath="C:\\";
FileOutputStream foS=null;
try {
 
foS=new FileOutputStream("C:\\专利文献全文获取分词结果.txt",true);//第二个参数为是否设置追加文件
PrintWriter pWriter=new PrintWriter(foS);
for(int i=0;i<a.length;i++){
pWriter.write(a[i]+"  ");
}
pWriter.flush();

} catch (FileNotFoundException e) {
// TODO: handle exception
e.printStackTrace();
}finally{
try {
foS.close();
} catch (Exception e2) {
// TODO: handle exception
e2.printStackTrace();
}
}

}

}

四:运行

运行结果展示:





完结!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: