您的位置:首页 > 编程语言 > Java开发

Java实现统计一篇文章中每个单词出现的次数

2015-09-18 10:35 956 查看
使用Java正则表达式 和HashMap(哈希表)统计一篇文章中的每个单词出现的次数(去除标点符号和空格)

import java.io.BufferedReader;

import java.io.File;

import java.io.FileReader;

import java.util.HashMap;

import java.util.Iterator;

import java.util.Map;

import java.util.Set;

import java.util.Map.Entry;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class Main {
public static void Count(File file){
String str ="";
String result = "";
try {
BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
while((str = bufferedReader.readLine())!=null){
result = result+str;
}
bufferedReader.close();
} catch (Exception e) {
// TODO: handle exception
}
System.out.println(result);
Map<String, Integer> map = new HashMap<String, Integer>();
Pattern p = Pattern.compile("[, . ; ! ? ]");
Matcher m = p.matcher(result);
String [] strs = p.split(result);
for(int i=0;i<strs.length;i++){
if(map.containsKey(strs[i])){
int c = map.get(strs[i]);
c++;
map.put(strs[i], c);
}
else{
map.put(strs[i], 1);
}
}
Set set = map.entrySet();
Iterator it = set.iterator();
int min  = 100;
int max = 0;
String minWord = "";
String maxWord = "";
int x = 0;
while (it.hasNext()) {
Entry<String, Integer> me = (Entry) it.next();
if((int) me.getValue()<min&&!((String) me.getKey()).equals("")){
min = (int) me.getValue();
minWord = (String) me.getKey();
}
if((int) me.getValue()>=max&&!((String) me.getKey()).equals("")){
max = (int) me.getValue();
maxWord = (String) me.getKey();
}
System.out.println(me.getKey()+":"+me.getValue());
}
System.out.println("出现次数最多的是"+":"+max+"   "+maxWord);
}

private void println(Map map){
Set set = map.entrySet();
Iterator it = set.iterator();
while(it.hasNext()){
Entry<String, Integer> entry = (Entry<String, Integer>) it.next(); 
String key = entry.getKey();
int value = entry.getValue();
}
}

public static void main(String[] args){
File file = new File("C:/Users/Administrator/Desktop/james.txt");
Count(file);
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息