您的位置:首页 > 编程语言 > Java开发

java 导入导出excel数据+搜索文章关键词

2017-07-27 16:34 549 查看
数学建模要用到搜索3000文章大约100多个关键词,这里用java编程,也可以使用python开源工具包jieba分词,java中文分词(Chinese Word Segmentation)工具。之前没想到。下面贴一下代码以便以后能记得:

package FileReadProject;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.*;
import java.util.*;

import FileReadProject.ExcelUtil;

import static ExcelUtilDemo.ExcelCreate.*;

/**
* Created by 82024 on 2017/7/20.
*/
public class FileRead {
private static FileRead fileRead;
private static String getWordPath = "F:\\filetable\\关键词.xlsx";
private static String path = "F:\\file";
//民事民法
private static int minshi;
private static int minshiSum;
//经济金融
private static int jingji;
private static int jingjiSum;
//刑事行政
private static int xingshi;
private static int xingshiSum;
//涉外纠纷
private static int shewai;
private static int shewaiSum;
//公司企业
private static int gongsi;
private static int gongsiSum;
//其他类别
private static int qita;
private static int qitaSum;
public static void main(String[] args) {
//这句一开始报了空指针异常,原因是:path写的不对
//这个是目录

fileRead= new FileRead();
//fileRead.getFileInfo(path);

//getFile(path);
getFileInfo(path);

createExcel();
System.out.println("民事均值50:"+minshiSum/3000);
System.out.println("民事60:"+minshiSum/3000/0.5*0.6);
System.out.println("民事70:"+minshiSum/3000/0.5*0.7);
System.out.println("民事80:"+minshiSum/3000/0.5*0.8);

System.out.println("经济均值50:"+jingjiSum/3000);
System.out.println("经济60:"+jingjiSum/3000/0.5*0.6);
System.out.println("经济70:"+jingjiSum/3000/0.5*0.7);
System.out.println("经济80:"+jingjiSum/3000/0.5*0.8);

System.out.println("刑事均值50:"+xingshiSum/3000);
System.out.println("刑事60:"+xingshiSum/3000/0.5*0.6);
System.out.println("刑事70:"+xingshiSum/3000/0.5*0.7);
System.out.println("刑事80:"+xingshiSum/3000/0.5*0.8);

System.out.println("涉外均值50:"+shewaiSum/3000);
System.out.println("涉外60:"+shewaiSum/3000/0.5*0.6);
System.out.println("涉外70:"+shewaiSum/3000/0.5*0.7);
System.out.println("涉外80:"+shewaiSum/3000/0.5*0.8);

System.out.println("公司均值50:"+gongsiSum/3000);
System.out.println("公司60:"+gongsiSum/3000/0.5*0.6);
System.out.println("公司70:"+gongsiSum/3000/0.5*0.7);
System.out.println("公司80:"+gongsiSum/3000/0.5*0.8);

System.out.println("其他均值50:"+qitaSum/3000);
System.out.println("其他60:"+qitaSum/3000/0.5*0.6);
System.out.println("其他70:"+qitaSum/3000/0.5*0.7);
System.out.println("其他80:"+qitaSum/3000/0.5*0.8);
}

/**
* 获取目录 3000份文件
* @param path
*/
/*public static void getFile(String path){
try{
File file = new File(path);
//目录下的总个数
File[] tempList = file.listFiles();
for(int i=0;i<tempList.length;i++){
//文件的个数
String dataPath = path+"\\"+(i+1);
/**
* 思路:每一个文件都遍历getFileInfo这个方法
*/
/*if(tempList[i].isFile()){
System.out.println("文件:"+tempList[i]);
}*/

/* System.out.println("文件"+(i+1)+"\n");
//得到每列的总数
getFileInfo(dataPath);

minshi=0;
jingji=0;
xingshi=0;
shewai=0;
gongsi=0;
qita=0;
System.out.println();
}
}catch(Exception e){
e.printStackTrace();
}
}*/

/**
* 获取文件信息
* path:具体的文件目录
*/
public static void getFileInfo(String path){
try{
String encoding = "utf8";
//map存关键词和关键词的个数
//设置每行的关键字个数 每读完一行就清零
File file = new File(path);
if(file.isFile()&& file.exists()){
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);
BufferedReader bufferedReader =new BufferedReader(read);
String lineTxt = null;
String[][] array = fileRead.getKeyWord(getWordPath);

Map<String,Integer> counter = new HashMap<>();
while((lineTxt = bufferedReader.readLine())!= null){
//这边思路是:每一行进行全部关键词的遍历,每有一个则计数一个,相同的则累加1,统计出现关键词的频率
for(int i = 0;i< array.length;i++){//行数
//让同一列的count值相加 j变
for(int j = 0; j<array[i].length;j++ ){//列数
if(array[i][j]==null){
continue;
}
if(lineTxt.contains(array[i][j])){
if(counter.containsKey(array[i][j])){
//这个map里面的键值对是关键词出现的次数
//现在将所有的值都放在了map中
//现在要做的是:将一列的值相加存起来
counter.put(array[i][j],counter.get(array[i][j])+1);

if(j==0){
minshi++;
}
if(j==1){
jingji++;
}
if(j==2){
xingshi++;
}
if(j==3){
shewai++;
}
if(j==4){
gongsi++;
}
if(j==5){
qita++;
}

}else{
counter.put(array[i][j],1);
}
}else{
continue;
}
}
}
}
read.close();
/*int num = 0;
for(Iterator<String> it = counter.keySet().iterator(); it.hasNext();){
String key = it.next();
int count = counter.get(key);
num++;
System.out.print(key+":"+count +"  ");
if(num % 10 == 0){
System.out.println();
}
}*/
/*System.out.println(
minshi + " " +
jingji +" " +
xingshi+" " +
shewai+" "+
gongsi+" "+
qita+" ");*/
}
}catch (Exception e){
e.printStackTrace();
}
}

/**
* 获取excel表格内容 获取文章中关键词  已失效
*/
public List<List<String>> getExcelContent(String contentPath){

List<List<String>> dataLst=  new ArrayList<List<String>>();
try {
File file= new File(contentPath);
FileInputStream fis =new FileInputStream(file);
Workbook book = new XSSFWorkbook(fis);
Sheet sheet = book.getSheetAt(0);
//得到行数
int totalRows = sheet.getPhysicalNumberOfRows();
//得到列数
int totalCells = 0;
if(totalRows>=1 && sheet.getRow(0)!= null){
totalCells = sheet.getRow(0).getPhysicalNumberOfCells();
}
String array[][] =new String[totalRows][totalCells];

//循环Excel的行
for(int r= 0;r<totalRows;r++){
Row row = sheet.getRow(r);
if(row == null){
continue;
}
//将获取的关键字放在泛型中
List<String> rowLst = new ArrayList<>();
String cellValue = null;
//循环列
for(int c = 0;c < totalCells;c++){
Cell cell = row.getCell(c);
if(cell != null){
switch(cell.getCellType()){
case HSSFCell.CELL_TYPE_STRING:
cellValue = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BLANK:
cellValue = "";
break;

e7e8
}
//System.out.println(cell.getStringCellValue());
array[r][c] = cell.getStringCellValue();
}
rowLst.add(cellValue);
}
dataLst.add(rowLst);
}

for(int i =0;i<array.length;i++){
for(int j= 0;j<array[i].length;j++){
if(array[i][j] == null){
continue;
}
System.out.println(array[i][j]);
}
}
} catch (Exception e) {
e.printStackTrace();
}
return dataLst;
}
/**
* 获取Excel内容
*/
public static String[][] getKeyWord(String contentPath) {

String[][] array = new String[0][];
try {
File file = new File(contentPath);
FileInputStream fis = new FileInputStream(file);
Workbook book = new XSSFWorkbook(fis);
Sheet sheet = book.getSheetAt(0);
//得到行数
int totalRows = sheet.getPhysicalNumberOfRows();
//得到列数
int totalCells = 0;
if (totalRows >= 1 && sheet.getRow(0) != null) {
totalCells = sheet.getRow(0).getPhysicalNumberOfCells();
}
array = new String[totalRows][totalCells];

//循环Excel的行
for (int r = 0; r < totalRows; r++) {
Row row = sheet.getRow(r);
if (row == null) {
continue;
}
//将获取的关键字放在泛型中
List<String> rowLst = new ArrayList<>();
String cellValue = null;
//循环列
for (int c = 0; c < totalCells; c++) {
Cell cell = row.getCell(c);
if (cell != null) {
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_STRING:
cellValue = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BLANK:
cellValue = "";
break;
}
array[r][c] = cell.getStringCellValue();
}
rowLst.add(cellValue);
}
}
} catch (Exception e) {
e.printStackTrace();
}
return array;
}

/**
* 返回关键词,验证正确性 无实际作用
*/
public static void getEveryKeyWord(){
String[][] array = fileRead.getKeyWord(getWordPath);
for(int i = 0;i< array.length;i++){
for(int j = 0; j<array[i].length;j++ ){
if(array[i][j]==null){
continue;
}
System.out.println(array[i][j]);
}
}
}

/**
* 创建excel表
*/
public static void createExcel(){
Workbook wb = createWorkbook();
CellStyle style = wb.createCellStyle();
List<String> sheetNames = new ArrayList<>();
sheetNames.add("建模拆迁小队");
List<Sheet> sheets = createSheet(wb,sheetNames);
//Row row = createRow(sheets.get(0),0);
/* Cell cell0 =createCell(row,0);
Cell cell1 =createCell(row,1);
Cell cell2 =createCell(row,2);
Cell cell3 =createCell(row,3);
Cell cell4 =createCell(row,4);
Cell cell5 =createCell(row,5);

cell0 = addValue(cell0,"民事民法");
cell1 = addValue(cell1,"经济金融");
cell2 = addValue(cell2,"刑事行政");
cell3 = addValue(cell3,"涉外纠纷");
cell4 = addValue(cell4,"公司企业");
cell5 = addValue(cell5,"其他类别");*/

//获取到每一行的值

try{
File file = new File(path);
//目录下的总个数
File[] tempList = file.listFiles();
for(int i=0;i<tempList.length;i++){
//文件的个数
String dataPath = path+"\\"+(i+1);
System.out.println("文件"+(i+1)+"\n");
//得到每列的总数
getFileInfo(dataPath);

Row row1 = createRow(sheets.get(0),i);
Cell cell00 =createCell(row1,0);
Cell cell11 =createCell(row1,1);
Cell cell22 =createCell(row1,2);
Cell cell33 =createCell(row1,3);
Cell cell44 =createCell(row1,4);
Cell cell55 =createCell(row1,5);

cell00 = addValue(cell00,String.valueOf(minshi));
cell11 = addValue(cell11,String.valueOf(jingji));
cell22 = addValue(cell22,String.valueOf(xingshi));
cell33 = addValue(cell33,String.valueOf(shewai));
cell44 = addValue(cell44,String.valueOf(gongsi));
cell55 = addValue(cell55,String.valueOf(qita));

minshiSum+= minshi;
jingjiSum+=jingji;
xingshiSum+=xingshi;
shewaiSum+=shewai;
gongsiSum+=gongsi;
qitaSum+=qita;

minshi=0;
jingji=0;
xingshi=0;
shewai=0;
gongsi=0;
qita=0;
System.out.println();
}
}catch(Exception e){
e.printStackTrace();
}

/*for(int i =1;i<3001;i++){
getFile(path);
Row row1 = createRow(sheets.get(0),i);
Cell cell00 =createCell(row1,0);
Cell cell11 =createCell(row1,1);
Cell cell22 =createCell(row1,2);
Cell cell33 =createCell(row1,3);
Cell cell44 =createCell(row1,4);
Cell cell55 =createCell(row1,5);

cell00 = addValue(cell00,String.valueOf(minshi));
cell11 = addValue(cell11,String.valueOf(jingji));
cell22 = addValue(cell22,String.valueOf(xingshi));
cell33 = addValue(cell33,String.valueOf(shewai));
cell44 = addValue(cell44,String.valueOf(gongsi));
cell55 = addValue(cell55,String.valueOf(qita));
}*/

//添加样式
style = styleFont(wb, style, Font.BOLDWEIGHT_BOLD, IndexedColors.BLUE.getIndex(), (short)10, "微软雅黑");
/*addStyle(cell0,style);
addStyle(cell1,style);
addStyle(cell2,style);
addStyle(cell3,style);
addStyle(cell4,style);
addStyle(cell5,style);*/

writeWorkbook(wb,"data.xlsx");
}
}


excel的工具类:

package FileReadProject;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.ss.util.WorkbookUtil;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/**
* Created by 82024 on 2017/7/21.
*/
public class ExcelUtil {
/**
* 写入工作簿
*
* @param wb
* @param
*/
public static void writeWorkbook(Workbook wb, String pathBookName) {
FileOutputStream fileOut = null;
try {
fileOut = new FileOutputStream(pathBookName);
wb.write(fileOut);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
fileOut.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 创建工作簿
*
* @return Workbook
*/
public static Workbook createWorkbook() {
Workbook wb = new XSSFWorkbook();
return wb;
}

/**
* 创建工作表
*
* @param sheetNames
* @param wb
*/
public static List<Sheet> createSheet(Workbook wb, List<String> sheetNames) {
List<Sheet> sheets = new ArrayList<Sheet>();
for (String sheetName : sheetNames) {
// 一个安全的方式来创建有效的名称,这个工具替换无效字符用空格('')
String safeName = WorkbookUtil.createSafeSheetName(sheetName);
Sheet sheet = wb.createSheet(safeName);
sheets.add(sheet);
}
return sheets;
}

/**
* 创建行
*
* @param sheet
* @param rowIndex
* @return
*/
public static Row createRow(Sheet sheet, int rowIndex) {
Row row = sheet.createRow(rowIndex);
return row;
}

/**
* 创建单元格
*
* @param row
* @param cellIndex
* @return Cell
*/
public static Cell createCell(Row row, int cellIndex) {
Cell cell = row.createCell(cellIndex);
return cell;
}

/**
* 单元格赋值
*
* @param cell
* @param cellValue
* @return Cell
*/
public static Cell addValue(Cell cell, String cellValue) {
cell.setCellValue(cellValue);
return cell;
}

/**
* 添加样试
*
* @param cell
* @param style
* @return
*/
public static Cell addStyle(Cell cell, CellStyle style) {
cell.setCellStyle(style);
return cell;
}

/**
* 设置字体颜色
*
* @param wb
* @param style
* @param fontBlod
*            字体粗细
* @param fontColor
*            字体颜色
* @param fontHeight
*            字体大小
* @param fontName
*            字体名字
* @return
*/
public static CellStyle styleFont(Workbook wb, CellStyle style,
short fontBlod, short fontColor, short fontHeight, String fontName) {
Font font = wb.createFont();
font.setBoldweight(fontBlod);// 粗体
font.setColor(fontColor);// 字体颜色
font.setFontHeightInPoints(fontHeight);// 字体大小
font.setFontName(fontName);// 字体名字
style.setFont(font);
return style;
}

/**
* 设置背景色
*
* @param style
* @param foregroundCorlor
* @return CellStyle
*/
public static CellStyle styleForegroundColor(CellStyle style,
short foregroundCorlor) {
style.setFillForegroundColor(foregroundCorlor);
style.setFillPattern(CellStyle.SOLID_FOREGROUND);
return style;
}

/**
* 对齐方式
*
* @param style
* @param halign
* @param valign
* @return CellStyle
*/
public static CellStyle styleAlignment(CellStyle style, short halign,
short valign) {
style.setAlignment(halign);
style.setVerticalAlignment(valign);
return style;
}

/**
* 单元格边框
* @param style
* @return CellStyle
*/
public static CellStyle styleFrame(CellStyle style) {
style.setBorderBottom(CellStyle.BORDER_THIN);
style.setBottomBorderColor(IndexedColors.BLACK.getIndex());
style.setBorderLeft(CellStyle.BORDER_THIN);
style.setLeftBorderColor(IndexedColors.BLACK.getIndex());
style.setBorderRight(CellStyle.BORDER_THIN);
style.setRightBorderColor(IndexedColors.BLACK.getIndex());
style.setBorderTop(CellStyle.BORDER_THIN);
style.setTopBorderColor(IndexedColors.BLACK.getIndex());
return style;
}

/**
* 合并单元格
* @param sheet
* @param firstRow
* @param lastRow
* @param firstColumn
* @param lastColumn
*/
public static void mergeCells(Sheet sheet,int firstRow,int lastRow,int firstColumn,int lastColumn) {
sheet.addMergedRegion(new CellRangeAddress(firstRow, lastRow, firstColumn, lastColumn));
}

/**
* 设置列宽度
* @param sheet
* @param columnIndex
* @param columnWidth
*/
public static void columnWidth(Sheet sheet,int columnIndex,int columnWidth) {
sheet.setColumnWidth(columnIndex, 500*columnWidth);
}
}


这里用poi操作的excel,也可以用jxl去操作excel,导的包不一样。对于workbook,两者同时导入jar,会发生冲突。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: