您的位置：首页 > 编程语言

毕业设计数据查找优化代码之一

2007-10-02 22:38 405 查看

/*
* DivWords.java
*
* 创建于 2007年10月2日, 下午9:16
*
* @author 马如林
*
*/

package com.mrl;

import java.io.*;
import java.util.*;

import com.xjt.nlp.word.ICTCLAS;
import com.mrl.FileProcess;

/**
*
* @author rulinma
*/
public class DivWords
{
    /** 测试分类文件夹 */
    private static String FILE_DIR ="D:/blogTestSet";

    /** 分词存放文本    */
    private static String DIV_WORDS ="D:/divWords";

    /** 主程序入口 */
    public static void main(String[] args)
    {
        FileProcess fileProcess = new FileProcess();

        /** 获取所在文件夹*/
        getFile(FILE_DIR);
    }

    /*
     * 根据文件目录读取整个文件夹文件，同时currId表示数据库当前有的文章数
     */
    private static void getFile(String dirPath)
    {
        /** 建立当前目录中文件的File对象 */
        File filesDir = new File(dirPath);

        /** 取得代表目录中所有文件的File对象数组 */
        File list[] = filesDir.listFiles();

        for(int i=0; i<list.length; i++)
        {
            if(list[i].isFile())
            {
                readContent(list[i].getPath ());
           }
        }
    }

    /*
     * 根据文件路径读取文章内容
     */
    private static void readContent(String filePath)
    {
        int i = 0;
        String strContent = null;
        try
        {
            BufferedReader in = new BufferedReader(new FileReader(filePath));
            String strTemp = null;
            while ((strTemp = in.readLine()) != null)
            {
                strContent = strContent + strTemp;
                i++;
             }
            in.close();
        }
        catch (IOException e)
        {
            e.getStackTrace();
        }
        // 调用分词处理
        divLexical(strContent);
     }

    /*
     * 内容分词处理
     */
    private static void divLexical(String strContent)
    {
        /** 调用ICTCLAS进行分词 */
        ICTCLAS ictclas = new ICTCLAS();
        if(!ictclas.init (0,2))
        {
            ictclas.init (0,2);
        }
        String strTrans=ictclas.paragraphProcess(strContent);

        filterString(strTrans);
    }

    /*
     * 过滤字符串比如中国/北京天安门/。，过滤为中国/北京天安门/。两个串
     */
    private static void filterString(String srcStr)
    {
        /** 使用trim去掉前后多余空格防止发生意外 */
        String strTemp = srcStr.trim();
        String tempText = "";

        StringTokenizer st = new StringTokenizer(strTemp," ");
        int len = st.countTokens();
        for(int i=0; i<len; i++)
        {
            tempText = st.nextToken();
            divStr(tempText);
        }
    }

    /*
     * 对连续的字符串过滤，比如中国/北京，过滤为中国北京两个词
     */
    private static void divStr(String srcStr)
    {
        StringTokenizer st = new StringTokenizer(srcStr,"/");

        int len = st.countTokens();

        if(len == 2)
        {
            /** 前一个词 */
            String strPre = st.nextToken ();
            System.out.println(strPre);

            try
            {
                /** 在文件中添加词 */
                BufferedWriter out = new BufferedWriter(new FileWriter(DIV_WORDS,true));
                out.write(strPre);
                out.newLine();
                out.close();
            }
            catch (IOException e)
            {
            }
        }
    }
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： 优化 string import file list null

相关文章推荐

新的分享

章节导航