Java实现统计一篇文章中每个单词出现的次数
2015-09-18 10:35
956 查看
使用Java正则表达式 和HashMap(哈希表)统计一篇文章中的每个单词出现的次数(去除标点符号和空格)
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void Count(File file){
String str ="";
String result = "";
try {
BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
while((str = bufferedReader.readLine())!=null){
result = result+str;
}
bufferedReader.close();
} catch (Exception e) {
// TODO: handle exception
}
System.out.println(result);
Map<String, Integer> map = new HashMap<String, Integer>();
Pattern p = Pattern.compile("[, . ; ! ? ]");
Matcher m = p.matcher(result);
String [] strs = p.split(result);
for(int i=0;i<strs.length;i++){
if(map.containsKey(strs[i])){
int c = map.get(strs[i]);
c++;
map.put(strs[i], c);
}
else{
map.put(strs[i], 1);
}
}
Set set = map.entrySet();
Iterator it = set.iterator();
int min = 100;
int max = 0;
String minWord = "";
String maxWord = "";
int x = 0;
while (it.hasNext()) {
Entry<String, Integer> me = (Entry) it.next();
if((int) me.getValue()<min&&!((String) me.getKey()).equals("")){
min = (int) me.getValue();
minWord = (String) me.getKey();
}
if((int) me.getValue()>=max&&!((String) me.getKey()).equals("")){
max = (int) me.getValue();
maxWord = (String) me.getKey();
}
System.out.println(me.getKey()+":"+me.getValue());
}
System.out.println("出现次数最多的是"+":"+max+" "+maxWord);
}
private void println(Map map){
Set set = map.entrySet();
Iterator it = set.iterator();
while(it.hasNext()){
Entry<String, Integer> entry = (Entry<String, Integer>) it.next();
String key = entry.getKey();
int value = entry.getValue();
}
}
public static void main(String[] args){
File file = new File("C:/Users/Administrator/Desktop/james.txt");
Count(file);
}
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void Count(File file){
String str ="";
String result = "";
try {
BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
while((str = bufferedReader.readLine())!=null){
result = result+str;
}
bufferedReader.close();
} catch (Exception e) {
// TODO: handle exception
}
System.out.println(result);
Map<String, Integer> map = new HashMap<String, Integer>();
Pattern p = Pattern.compile("[, . ; ! ? ]");
Matcher m = p.matcher(result);
String [] strs = p.split(result);
for(int i=0;i<strs.length;i++){
if(map.containsKey(strs[i])){
int c = map.get(strs[i]);
c++;
map.put(strs[i], c);
}
else{
map.put(strs[i], 1);
}
}
Set set = map.entrySet();
Iterator it = set.iterator();
int min = 100;
int max = 0;
String minWord = "";
String maxWord = "";
int x = 0;
while (it.hasNext()) {
Entry<String, Integer> me = (Entry) it.next();
if((int) me.getValue()<min&&!((String) me.getKey()).equals("")){
min = (int) me.getValue();
minWord = (String) me.getKey();
}
if((int) me.getValue()>=max&&!((String) me.getKey()).equals("")){
max = (int) me.getValue();
maxWord = (String) me.getKey();
}
System.out.println(me.getKey()+":"+me.getValue());
}
System.out.println("出现次数最多的是"+":"+max+" "+maxWord);
}
private void println(Map map){
Set set = map.entrySet();
Iterator it = set.iterator();
while(it.hasNext()){
Entry<String, Integer> entry = (Entry<String, Integer>) it.next();
String key = entry.getKey();
int value = entry.getValue();
}
}
public static void main(String[] args){
File file = new File("C:/Users/Administrator/Desktop/james.txt");
Count(file);
}
}
相关文章推荐
- java对世界各个时区(TimeZone)的通用转换处理方法(转载)
- java-注解annotation
- java-模拟tomcat服务器
- java-用HttpURLConnection发送Http请求.
- java-WEB中的监听器Lisener
- Android IPC进程间通讯机制
- Android之获取手机上的图片和视频缩略图thumbnails
- android string.xml文件中的整型和string型代替
- Android Native 绘图方法
- Android java 与 javascript互访(相互调用)的方法例子
- 介绍一款信息管理系统的开源框架---jeecg
- 聚类算法之kmeans算法java版本
- java实现 PageRank算法
- PropertyChangeListener简单理解
- c语言实现hashmap(转载)
- msql 正则表达式
- 正则表达式