您的位置:首页 > 编程语言 > Java开发

java 词频统计代码

2016-10-18 23:11 597 查看
package hello;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;

public class WordCount {

public void sort(int[] arry) {
int temp;
for (int i = 0; i < arry.length; i++) {
for (int j = i; j < arry.length; j++) {
if (arry[i] > arry[j]) {
temp = arry[i];
arry[i] = arry[j];
arry[j] = temp;
}
}

}
for (int k = 0; k < arry.length; k++) {
System.out.print(arry[k] + " ");
}
}

public static void main(String[] args) { // 用HashMap存放<单词:词频>这样一个映射关系
HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
// 用正则表达式来过滤字符串中的所有标点符号
String regex = "[【】、.。,,。\"!--;:?\'\\]]";
try {
// 读取要处理的文件
BufferedReader br = new BufferedReader(new FileReader("src/file80.txt"));
String value;
while ((value = br.readLine()) != null) {
value = value.replaceAll(regex, " ");
// 使用StringTokenizer来分词(StringTokenizer详见JDK文档)
StringTokenizer tokenizer = new StringTokenizer(value);
while (tokenizer.hasMoreTokens()) {
String word = tokenizer.nextToken();
if (!hashMap.containsKey(word)) {
hashMap.put(word, new Integer(1));
} else {
int k = hashMap.get(word).intValue() + 1;
hashMap.put(word, new Integer(k));
}
}
}
// 遍历HashMap,输出结果
Iterator iterator = hashMap.keySet().iterator();
while (iterator.hasNext()) {
String word = (String) iterator.next();
System.out.println(word + ":\t" + hashMap.get(word));
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}


  
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: