您的位置:首页 > 其它

POI 读取Excel文档中的数据——兼容Excel2003和Excel2007

2016-01-07 11:24 399 查看
  Apache POI是Apache软件基金会的开放源码函式库,POI提供API给Java程序对Microsoft Office格式档案读和写的功能。

  HSSF - 提供读写Microsoft Excel格式档案的功能。
  XSSF - 提供读写Microsoft OOXML格式档案的功能。

  以下是项目工程结构图:
  


  使用POI解析EXCEL文件需要用到POI相关的jar包,这些jar包可以在apache官网上去下载http://poi.apache.org/download.html

这里我使用的jar包版本为poi-3.14-beta1-20151223.jar

  相关代码如下:

  Excel文件解析接口 IExcelParse.java

/*
* IExcelParse.java
*
* 2016-1-6 下午4:45:53
*
* RecluseKapoor
*
* Copyright © 2016, RecluseKapoor. All rights reserved.
*
*/
package com.rk.pub.poi.excel;

/**
* @Title: recluse-Excel文件解析接口
*
* @Description:Excel文件解析接口,所有版本的Excel解析类都要实现该接口
*
* @Company: 卡普工作室
*
* @Website: http://www.cnblogs.com/reclusekapoor/ *
* @author: RecluseKapoor
*
* @CreateDate:2016-1-6 下午9:42:08
*
* @version: 1.0
*
* @lastModify:
*
*/
public interface IExcelParse {
public void loadExcel(String path) throws Exception;

public String getSheetName(int sheetNo);

public int getSheetCount() throws Exception;

public int getRowCount(int sheetNo);

public int getRealRowCount(int sheetNo);

public String readExcelByRowAndCell(int sheetNo, int rowNo, int cellNo)
throws Exception;

public String[] readExcelByRow(int sheetNo, int rowNo) throws Exception;

public String[] readExcelByCell(int sheetNo, int cellNo) throws Exception;

public void close();
}


/*
* ExcelParse2003.java
*
* 2016-1-6 下午4:45:53
*
* RecluseKapoor
*
* Copyright © 2016, RecluseKapoor. All rights reserved.
*
*/
package com.rk.pub.poi.excel;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.Timestamp;
import java.text.DecimalFormat;
import java.util.Date;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
* @Title: recluse--2003版Excel文件解析工具
*
* @Description: 解析2003版Excel文件具体实现类
*
* @Company: 卡普工作室
*
* @Website: http://www.cnblogs.com/reclusekapoor/ *
* @author: RecluseKapoor
*
* @CreateDate:2016-1-6 下午9:59:51
*
* @version: 1.0
*
* @lastModify:
*
*/
public class ExcelParse2003 implements IExcelParse {
// Excel工作区
private HSSFWorkbook wb = null;

/**
* 加载excel文件,获取excel工作区
*
* @param filePathAndName
* @throws FileNotFoundException
* @throws IOException
*/
@Override
public void loadExcel(String filePathAndName) throws FileNotFoundException,
IOException {
FileInputStream fis = null;
POIFSFileSystem fs = null;
try {
fis = new FileInputStream(filePathAndName);
fs = new POIFSFileSystem(fis);
wb = new HSSFWorkbook(fs);
} catch (FileNotFoundException e) {
e.printStackTrace();
throw new FileNotFoundException("加载Excel文件失败:" + e.getMessage());
} catch (IOException e) {
e.printStackTrace();
throw new IOException("加载Excel文件失败:" + e.getMessage());
} finally {
if (fis != null) {
fis.close();
fis = null;
}
if (fs != null) {
fs.close();
}
}
}

/**
* 获取sheet页名称
*
* @param sheetNo
* @return
*/
public String getSheetName(int sheetNo) {
return wb.getSheetName(sheetNo - 1);
}

/**
* 获取sheet页数
*
* @return int
*/
public int getSheetCount() throws Exception {
int sheetCount = wb.getNumberOfSheets();
if (sheetCount == 0) {
throw new Exception("Excel中没有SHEET页");
}
return sheetCount;
}

/**
* 获取sheetNo页行数
*
* @param sheetNo
* @return
*/
public int getRowCount(int sheetNo) {
int rowCount = 0;
HSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
rowCount = sheet.getLastRowNum();
return rowCount;
}

/**
* 获取sheetNo页行数(含有操作或者内容的真实行数)
*
* @param sheetNo
* @return
*/
public int getRealRowCount(int sheetNo) {
int rowCount = 0;
int rowNum = 0;
HSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
rowCount = sheet.getLastRowNum();
if (rowCount == 0) {
return rowCount;
}
HSSFRow row = null;
HSSFCell cell = null;
rowNum = rowCount;
for (int i = 0; i < rowCount; i++) {
row = sheet.getRow(rowNum);
rowNum--;
if (row == null) {
continue;
}
short firstCellNum = row.getFirstCellNum();
short lastCellNum = row.getLastCellNum();
for (int j = firstCellNum; j < lastCellNum; j++) {
cell = row.getCell(j);
if (cell == null) {
continue;
} else if (cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) {
continue;
} else if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
String value = cell.getStringCellValue();
if (value == null || value.equals("")) {
continue;
} else {
value = value.trim();
if (value.isEmpty() || value.equals("")
|| value.length() == 0) {
continue;
}
}
}
rowCount = rowNum + 1;
return rowCount;
}
}
rowCount = rowNum;
return rowCount;
}

/**
* 读取第sheetNo个sheet页中第rowNo行第cellNo列的数据
*
* @param sheetNo
*            sheet页编号
* @param rowNo
*            行号
* @param cellNo
*            列号
* @return 返回相应的excel单元格内容
* @throws Exception
*/
public String readExcelByRowAndCell(int sheetNo, int rowNo, int cellNo)
throws Exception {
String rowCellData = "";
sheetNo = sheetNo - 1;
HSSFSheet sheet = wb.getSheetAt(sheetNo);
String sheetName = wb.getSheetName(sheetNo);
try {
HSSFRow row = sheet.getRow(rowNo - 1);
if (row == null) {
return "NoData";
}
HSSFCell cell = row.getCell((cellNo - 1));
if (cell == null) {
return "NoData";
}
int cellType = cell.getCellType();
if (cellType == HSSFCell.CELL_TYPE_NUMERIC) {// 数值(包括excel中数值、货币、日期、时间、会计专用等单元格格式)
//判断数值是否为日期或时间;但是该判断方法存在漏洞,只能识别一种日期格式。
if (HSSFDateUtil.isCellDateFormatted(cell)) {//日期、时间
double d = cell.getNumericCellValue();
Date date = HSSFDateUtil.getJavaDate(d);
Timestamp timestamp = new Timestamp(date.getTime());
String temp = timestamp.toString();
if (temp.endsWith("00:00:00.0")) {
rowCellData = temp.substring(0,
temp.lastIndexOf("00:00:00.0"));
} else if (temp.endsWith(".0")) {
rowCellData = temp.substring(0, temp.lastIndexOf(".0"));
} else {
rowCellData = timestamp.toString();
}
} else {//数值、货币、会计专用、百分比、分数、科学记数 单元格式
rowCellData = new DecimalFormat("0.########").format(cell
.getNumericCellValue());
}
} else if (cellType == HSSFCell.CELL_TYPE_STRING) {// 字符串
rowCellData = cell.getStringCellValue();
} else if (cellType == HSSFCell.CELL_TYPE_FORMULA) {// 公式
double d = cell.getNumericCellValue();
rowCellData = String.valueOf(d);
} else if (cellType == HSSFCell.CELL_TYPE_BLANK) {// 空值
rowCellData = "";
} else if (cellType == HSSFCell.CELL_TYPE_BOOLEAN) {// boolean值
rowCellData = "";
} else if (cellType == HSSFCell.CELL_TYPE_ERROR) {// 异常
rowCellData = "";
} else {

}
} catch (Exception e) {
e.printStackTrace();
throw new Exception(sheetName + "sheet页中" + "第" + rowNo + "行" + "第"
+ cellNo + "列" + "数据不符合要求,请检查sheet页");
}
return rowCellData;
}

/**
* 读取第sheetNo个sheet页中第rowNo行的数据
*
* @param sheetNo
*            指定sheetNo页
* @param rowNo
*            指定rowNo行
* @return 返回第rowNo行的数据
* @throws Exception
*/
public String[] readExcelByRow(int sheetNo, int rowNo) throws Exception {
String[] rowData = null;
HSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
HSSFRow row = sheet.getRow(rowNo - 1);
int cellCount = row.getLastCellNum();
rowData = new String[cellCount];
for (int k = 1; k <= cellCount; k++) {
rowData[k - 1] = readExcelByRowAndCell(sheetNo, rowNo, k);
}
return rowData;
}

/**
* 读取第sheetNo个sheet页中第cellNo列的数据
*
* @param sheetNo
*            指定sheetNo页
* @param cellNo
*            指定cellNo列号
* @return 返回第cellNo列的数据
* @throws Exception
*/
public String[] readExcelByCell(int sheetNo, int cellNo) throws Exception {
String[] cellData = null;
HSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
int rowCount = sheet.getLastRowNum();
cellData = new String[rowCount + 1];
for (int i = 0; i <= rowCount; i++) {
cellData[i] = readExcelByRowAndCell(sheetNo - 1, i, cellNo - 1);
}
return cellData;
}

/**
* 关闭excel工作区,释放资源
*
* @throws Exception
*/
@Override
public void close() {
if (wb != null) {
try {
wb.close();
wb = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
}


/*
* ExcelParse2007.java
*
* 2016-1-6 下午4:45:53
*
* RecluseKapoor
*
* Copyright © 2016, RecluseKapoor. All rights reserved.
*
*/
package com.rk.pub.poi.excel;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.Timestamp;
import java.text.DecimalFormat;
import java.util.Date;

import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/**
* @Title: recluse--2007版Excel文件解析工具
*
* @Description: 解析2007版Excel文件具体实现类
*
* @Company: 卡普工作室
*
* @Website: http://www.cnblogs.com/reclusekapoor/ *
* @author: RecluseKapoor
*
* @CreateDate:2016-1-6 下午9:51:15
*
* @version: 1.0
*
* @lastModify:
*
*/
public class ExcelParse2007 implements IExcelParse {
// Excel工作区
private XSSFWorkbook wb = null;

/**
* 加载excel文件,获取excel工作区
*
* @param filePathAndName
* @throws FileNotFoundException
* @throws IOException
*/
public void loadExcel(String filePathAndName) throws FileNotFoundException,
IOException {
FileInputStream fis = null;
try {
fis = new FileInputStream(filePathAndName);
wb = new XSSFWorkbook(fis);
} catch (FileNotFoundException e) {
e.printStackTrace();
throw new FileNotFoundException("加载Excel文件失败:" + e.getMessage());
} catch (IOException e) {
e.printStackTrace();
throw new IOException("加载Excel文件失败:" + e.getMessage());
} finally {
if (fis != null) {
fis.close();
fis = null;
}
}
}

/**
* 获取sheet页名称
*
* @param sheetNo
* @return
*/
public String getSheetName(int sheetNo) {
return wb.getSheetName(sheetNo - 1);
}

/**
* 获取sheet页数
*
* @return int
*/
public int getSheetCount() throws Exception {
int sheetCount = wb.getNumberOfSheets();
if (sheetCount == 0) {
throw new Exception("Excel中没有SHEET页");
}
return sheetCount;
}

/**
* 获取sheetNo页行数
*
* @param sheetNo
* @return
*/
public int getRowCount(int sheetNo) {
int rowCount = 0;
XSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
rowCount = sheet.getLastRowNum();
return rowCount;
}

/**
* 获取sheetNo页行数(含有操作或者内容的真实行数)
*
* @param sheetNo
* @return
*/
public int getRealRowCount(int sheetNo) {
int rowCount = 0;
int rowNum = 0;
XSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
rowCount = sheet.getLastRowNum();
if (rowCount == 0) {
return rowCount;
}
XSSFRow row = null;
XSSFCell cell = null;
rowNum = rowCount;
for (int i = 0; i < rowCount; i++) {
row = sheet.getRow(rowNum);
rowNum--;
if (row == null) {
continue;
}
short firstCellNum = row.getFirstCellNum();
short lastCellNum = row.getLastCellNum();
for (int j = firstCellNum; j < lastCellNum; j++) {
cell = row.getCell(j);
if (cell == null) {
continue;
} else if (cell.getCellType() == XSSFCell.CELL_TYPE_BLANK) {
continue;
} else if (cell.getCellType() == XSSFCell.CELL_TYPE_STRING) {
String value = cell.getStringCellValue();
if (value == null || value.equals("")) {
continue;
} else {
value = value.trim();
if (value.isEmpty() || value.equals("")
|| value.length() == 0) {
continue;
}
}
}
rowCount = rowNum + 1;
return rowCount;
}
}

rowCount = rowNum;
return rowCount;
}

/**
* 读取第sheetNo个sheet页中第rowNo行第cellNo列的数据(通过)
*
* @param sheetNo
*            sheet页编号
* @param rowNo
*            行号
* @param cellNo
*            列号
* @return 返回相应的excel单元格内容
* @throws Exception
*/
public String readExcelByRowAndCell(int sheetNo, int rowNo, int cellNo)
throws Exception {
String rowCellData = "";
XSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
String sheetName = wb.getSheetName(sheetNo - 1);
try {
XSSFRow row = sheet.getRow(rowNo - 1);
if (row == null) {
return "NoData";
}
XSSFCell cell = row.getCell((short) (cellNo - 1));
if (cell == null) {
return "NoData";
}
int cellType = cell.getCellType();
String df = cell.getCellStyle().getDataFormatString();
if (cellType == XSSFCell.CELL_TYPE_NUMERIC) {// 数值(包括excel中数值、货币、日期、时间、会计专用等单元格格式)
double d = cell.getNumericCellValue();
// 判断数值是否是日期,该方法只能识别部分日期格式,故加入第二个判断条件对不能识别的日期再次进行识别
if (DateUtil.isCellDateFormatted(cell)
|| df.contains("yyyy\"年\"m\"月\"d\"日\"")) {// 日期、时间单元格格式
Date date = DateUtil.getJavaDate(d);
Timestamp timestamp = new Timestamp(date.getTime());
String temp = timestamp.toString();
if (temp.endsWith("00:00:00.0")) {// yyyy-MM-dd 格式
rowCellData = temp.substring(0,
temp.lastIndexOf("00:00:00.0"));
} else if (temp.endsWith(".0")) {// yyyy-MM-dd hh:mm:ss 格式
rowCellData = temp.substring(0, temp.lastIndexOf(".0"));
} else {
rowCellData = timestamp.toString();
}
} else {// 数值、货币、会计专用、百分比、分数、科学记数 单元格式
rowCellData = new DecimalFormat("0.########").format(d);
}
} else if (cellType == XSSFCell.CELL_TYPE_STRING) {// 文本
rowCellData = cell.getStringCellValue();
} else if (cellType == XSSFCell.CELL_TYPE_FORMULA) {// 公式
double d = cell.getNumericCellValue();
rowCellData = String.valueOf(d);
} else if (cellType == XSSFCell.CELL_TYPE_BLANK) {// 空
rowCellData = "";
} else if (cellType == XSSFCell.CELL_TYPE_BOOLEAN) {// 布尔值
rowCellData = "";
} else if (cellType == XSSFCell.CELL_TYPE_ERROR) {// 异常
rowCellData = "";
} else {
throw new Exception(sheetName + " sheet页中" + "第" + rowNo + "行"
+ "第" + cellNo + "列,单元格格式无法识别,请检查sheet页");
}
} catch (Exception e) {
e.printStackTrace();
throw new Exception(sheetName + "sheet页中" + "第" + rowNo + "行" + "第"
+ cellNo + "列" + "数据不符合要求,请检查sheet页");
}
return rowCellData;
}

/**
* 读取第sheetNo个sheet页中第rowNo行的数据
*
* @param sheetNo
*            指定sheetNo页
* @param rowNo
*            指定rowNo行
* @return
* @throws Exception
*/
public String[] readExcelByRow(int sheetNo, int rowNo) throws Exception {
String[] rowData = null;
XSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
XSSFRow row = sheet.getRow(rowNo - 1);
int cellCount = row.getLastCellNum();
rowData = new String[cellCount];
for (int k = 1; k <= cellCount; k++) {
rowData[k - 1] = readExcelByRowAndCell(sheetNo, rowNo, k);
}
return rowData;
}

/**
* 读取第sheetNo个sheet页中第cellNo列的数据
*
* @param sheetNo
*            指定sheetNo页
* @param cellNo
*            指定cellNo列号
* @return
* @throws Exception
*/
public String[] readExcelByCell(int sheetNo, int cellNo) throws Exception {
String[] cellData = null;
XSSFSheet sheet = wb.getSheetAt(sheetNo - 1);
int rowCount = sheet.getLastRowNum();
cellData = new String[rowCount + 1];
for (int i = 0; i <= rowCount; i++) {
cellData[i] = readExcelByRowAndCell(sheetNo - 1, i, cellNo - 1);
}
return cellData;
}

/**
* 关闭excel工作区,释放资源
*
* @throws Exception
*/
@Override
public void close() {
if (wb != null) {
try {
wb.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
wb = null;
}
}
}
}


/*
* ExcelParse.java
*
* 2016-1-6 下午4:45:53
*
* RecluseKapoor
*
* Copyright © 2016, RecluseKapoor. All rights reserved.
*
*/
package com.rk.pub.poi;

import com.rk.pub.poi.excel.ExcelParse2003;
import com.rk.pub.poi.excel.ExcelParse2007;
import com.rk.pub.poi.excel.IExcelParse;

/**
* @Title:recluse-Excel文件解析工具类(兼容2003和2007版本Excel)
*
* @Description: 该工具类用于解析Excel文件,同时兼容2003版和2007版Excel文件的解析,且随时可以进行新版本的扩展,
*               <p>
*               若要支持新版本Excel格式的解析,只需要在excle包下新增一个实现了IExcelParse接口的实现类,
*               <p>
*               在新增的实现类中实现新对版本Excel格式的解析的功能代码即可 ; 该扩展方法可以最大程度的实现解耦 。
*               <p>
*
* @Company: 卡普工作室
*
* @Website: http://www.cnblogs.com/reclusekapoor/ *
* @author: RecluseKapoor
*
* @CreateDate:2016-1-6 下午9:43:56
*
* @version: 1.0
*
* @lastModify:
*
*/
public class ExcelParse {

private IExcelParse excelParse = null;

/**
* 加载实例,根据不同版本的Excel文件,加载不同的具体实现实例
*
* @param path
* @return
*/
private boolean getInstance(String path) throws Exception {
path = path.toLowerCase();
if (path.endsWith(".xls")) {
excelParse = new ExcelParse2003();
} else if (path.endsWith(".xlsx")) {
excelParse = new ExcelParse2007();
} else {
throw new Exception("对不起,目前系统不支持对该版本Excel文件的解析。");
}
return true;
}

/**
* 获取excel工作区
*
* @param path
* @throws Exception
*/
public void loadExcel(String filePathAndName) throws Exception {
getInstance(filePathAndName);
excelParse.loadExcel(filePathAndName);
}

/**
* 获取sheet页名称
*
* @param sheetNo
* @return
*/
public String getSheetName(int sheetNo) {
return excelParse.getSheetName(sheetNo);
}

/**
* 获取sheet页数
*
* @return
* @throws Exception
*/
public int getSheetCount() throws Exception {
return excelParse.getSheetCount();
}

/**
* 获取sheetNo页行数
*
* @param sheetNo
* @return
* @throws Exception
*/
public int getRowCount(int sheetNo) {
return excelParse.getRowCount(sheetNo);
}

/**
* 获取sheetNo页行数(含有操作或者内容的真实行数)
*
* @param sheetNo
* @return
* @throws Exception
*/
public int getRealRowCount(int sheetNo) {
return excelParse.getRealRowCount(sheetNo);
}

/**
* 读取第sheetNo个sheet页中第rowNo行第cellNo列的数据
*
* @param sheetNo
*            sheet页编号
* @param rowNo
*            行号
* @param cellNo
*            列号
* @return 返回相应的excel单元格内容
* @throws Exception
*/
public String readExcelByRowAndCell(int sheetNo, int rowNo, int cellNo)
throws Exception {
return excelParse.readExcelByRowAndCell(sheetNo, rowNo, cellNo);
}

/**
* 读取指定SHEET页指定行的Excel内容
*
* @param sheetNo
*            指定SHEET页
* @param lineNo
*            指定行
* @return
* @throws Exception
*/
public String[] readExcelByRow(int sheetNo, int rowNo) throws Exception {
return excelParse.readExcelByRow(sheetNo, rowNo);
}

/**
* 读取指定SHEET页指定列中的数据
*
* @param sheetNo
*            指定SHEET页
* @param cellNo
*            指定列号
* @return
* @throws Exception
*/
public String[] readExcelByCell(int sheetNo, int cellNo) throws Exception {
return excelParse.readExcelByCell(sheetNo, cellNo);
}

/**
* 关闭excel工作区,释放资源
*
*/
public void close() {
excelParse.close();
}

/**
* 测试方法
*
* @param args
*/
public static void main(String[] args) {
ExcelParse parse = new ExcelParse();
try {
// 加载excel文件
parse.loadExcel("E:\\2007.xls");
// 统计sheet页数
System.out.println(parse.getSheetCount());
// 读取单元格信息
System.out.println(parse.readExcelByRowAndCell(1, 1, 1));
} catch (Exception e) {
e.printStackTrace();
} finally {
// 释放资源
parse.close();
}
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: