您的位置：首页 > 编程语言 > Java开发

Java使用poi读取excel数据（excel可能很大，先转换为csv再读取）

2017-07-26 10:49 1101 查看

	————————————配置————————————
	jdbc.properties中加入： excelUrl=/……xlsx文件目录路径/ (excelUrl + “xxxx.xlsx” 为完整路径)

	导入poi-3.16下的6个jar包，poi-3.16/lib下的5个jar包，poi-3.16/ooxml-lib下的2个jar包

	将Excel_reader.java 和 XLSX2CSV.java 导入项目

	————————————方法————————————

	Excel_reader类中的:

	xlsx_reader(String excel_name , ArrayList<Object> args)

	//excel_name为要读取的xlsx文件名（带后缀） , args为要获取的列号的列表
	//返回二维数组ArrayList<ArrayList<String>> 第一维表示xlsx的行，第二维表示xlsx中该行的单元格

	//空单元格返回null，需要自己处理成—--或0

	//args可以填 int 或者 String ，若args[i]为int,那么返回的二维数组的第I列为xlsx中的第args[i]列
	//若args[i]为String,那么返回的二维数组的第i列为改字符常量


	//例如 xlsx_reader(“崇明县-表15：“夏淡”绿叶菜种植补贴-2014.xlsx”,args)
	// 其中 args=[7,8,9,”绿肥”]
	//那么返回的二维数组内容如下：
	[小明 , 350401219948383**** , null , 绿肥]
	[小红 , 645354354354323**** , null , 绿肥]
	[小兰 , 445353453425643**** , null , 绿肥]
	。。。。。。

XLSX2CSV.java

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

/**
* A rudimentary XLSX -> CSV processor modeled on the
* POI sample program XLS2CSVmra from the package
* org.apache.poi.hssf.eventusermodel.examples.
* As with the HSSF version, this tries to spot missing
* rows and cells, and output empty entries for them.
* <p/>
* Data sheets are read using a SAX parser to keep the
* memory footprint relatively small, so this should be
* able to read enormous workbooks. The styles table and
* the shared-string table must be kept in memory. The
* standard POI styles table class is used, but a custom
* (read-only) class is used for the shared string table
* because the standard POI SharedStringsTable grows very
* quickly with the number of unique strings.
* <p/>
* For a more advanced implementation of SAX event parsing
* of XLSX files, see {@link XSSFEventBasedExcelExtractor}
* and {@link XSSFSheetXMLHandler}. Note that for many cases,
* it may be possible to simply use those with a custom
* {@link SheetContentsHandler} and no SAX code needed of
* your own!
*/
public class XLSX2CSV {
/**
* Uses the XSSF Event SAX helpers to do most of the work
* of parsing the Sheet XML, and outputs the contents
* as a (basic) CSV.
*/
private class SheetToCSV implements SheetContentsHandler {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;

private void outputMissingRows(int number) {
for (int i = 0; i < number; i++) {
curstr = new ArrayList<String>();
for (int j = 0; j < minColumns; j++) {
curstr.add(null);
}
output.add(curstr);
}
}

@Override
public void startRow(int rowNum) {
curstr = new ArrayList<String>();
// If there were gaps, output the missing rows
outputMissingRows(rowNum - currentRow - 1);
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
}

@Override
public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i = currentCol; i < minColumns ; i++) {
curstr.add(null);
}
output.add(curstr);
}

@Override
public void cell(String cellReference, String formattedValue,
XSSFComment comment) {
// if (firstCellOfRow) {
// firstCellOfRow = false;
// } else {
// curstr.append(',');
// }

// gracefully handle missing CellRef here in a similar way as XSSFCell does
if (cellReference == null) {
cellReference = new CellAddress(currentRow, currentCol).formatAsString();
}

// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i = 0; i < missedCols; i++) {
curstr.add(null);
}
currentCol = thisCol;

// Number or string?
try {
Double.parseDouble(formattedValue);
curstr.add(formattedValue);
} catch (NumberFormatException e) {
// output.append('"');
curstr.add(formattedValue);
// output.append('"');
}
}

@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}

///////////////////////////////////////

private final OPCPackage xlsxPackage;

/**
* Number of columns to read starting with leftmost
*/
private final int minColumns;

/**
* Destination for data
*/

private ArrayList<ArrayList<String>> output;
private ArrayList<String> curstr;

public ArrayList<ArrayList<String>> get_output(){
return output;
}

/**
* Creates a new XLSX -> CSV converter
*
* @param pkg The XLSX package to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLSX2CSV(OPCPackage pkg, int minColumns) {
this.xlsxPackage = pkg;
this.minColumns = minColumns;
}

/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}

/**
* Initiates the processing of the XLS workbook file to CSV.
*
* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/
public void process()
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
output = new ArrayList<ArrayList<String>> ();
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
System.out.println("正在读取sheet： "+sheetName + " [index=" + index + "]:");
processSheet(styles, strings, new SheetToCSV(), stream);
System.out.println("sheet 读取完成!");
stream.close();
++index;
}
}

// public static void main(String[] args) throws Exception {
// /* if (args.length < 1) {
// System.err.println("Use:");
// System.err.println(" XLSX2CSV <xlsx file> [min columns]");
// return;
// }*/
//
// File xlsxFile = new File("F:\\8月数据.xlsx");
// if (!xlsxFile.exists()) {
// System.err.println("Not found or not a file: " + xlsxFile.getPath());
// return;
// }
//
// int minColumns = -1;
// if (args.length >= 2)
// minColumns = Integer.parseInt(args[1]);
//
// // The package open is instantaneous, as it should be.
// OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
// XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);
// xlsx2csv.process();
// p.close();
// }
}
Excel_reader.java
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class Excel_reader {

// *************xlsx文件读取函数************************
// 在jdbc.properties上加上 excelUrl：xlsx文件的目录
// excel_name为文件名，arg为需要查询的列号(输入数字则返回对应列 , 输入字符串则固定返回这个字符串)
// 返回
@SuppressWarnings({ "resource", "unused" })
public static ArrayList<ArrayList<String>> xlsx_reader(String excel_name, ArrayList<Object> args)
throws IOException {
// 读取excel文件夹url
Properties properties = new Properties();
InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
properties.load(inStream);
String excelUrl = properties.getProperty("excelUrl");

File xlsxFile = new File(excelUrl + excel_name);
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return null;
}
ArrayList<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();
try {
OPCPackage p;
p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
XLSX2CSV xlsx2csv = new XLSX2CSV(p, 20); // 20代表最大列数
xlsx2csv.process();
excel_output = xlsx2csv.get_output();
p.close(); //释放
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

System.out.println(excel_name + " 读取完毕");

// //读取xlsx文件
// XSSFWorkbook xssfWorkbook = null;
// //寻找目录读取文件
// System.out.println("开始读取 "+excel_name);
// File excelFile = new File(excelUrl+excel_name);
// InputStream is = new FileInputStream(excelFile);
// xssfWorkbook = new XSSFWorkbook(is);
//
// if(xssfWorkbook==null){
// System.out.println("未读取到内容,请检查路径！");
// return null;
// }else{
// System.out.println(excel_name+" 读取完毕");
// }

ArrayList<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();
// 遍历xlsx中的sheet

// 对于每个sheet，读取其中的每一行
for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
ArrayList<String> cur_output = excel_output.get(rowNum);
ArrayList<String> curarr = new ArrayList<String>();
for (int columnNum = 0; columnNum < args.size(); columnNum++) {
Object obj = args.get(columnNum);
if (obj instanceof String) {
curarr.add(obj.toString());
} else if (obj instanceof Integer) {
String cell = cur_output.get((int) obj);
curarr.add(cell);
} else {
System.out.print("类型错误！");
return null;
}
}
ans.add(curarr);
}

return ans;
}

// // 判断后缀为xlsx的excel文件的数据类
// @SuppressWarnings("deprecation")
// private static String getValue(XSSFCell xssfRow) {
// if (xssfRow == null) {
// return null;
// }
// if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {
// return String.valueOf(xssfRow.getBooleanCellValue());
// } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {
// double cur = xssfRow.getNumericCellValue();
// long longVal = Math.round(cur);
// Object inputValue = null;
// if (Double.parseDouble(longVal + ".0") == cur)
// inputValue = longVal;
// else
// inputValue = cur;
// return String.valueOf(inputValue);
// } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BLANK
// || xssfRow.getCellType() == xssfRow.CELL_TYPE_ERROR) {
// return "";
// } else {
// return String.valueOf(xssfRow.getStringCellValue());
// }
// }

}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航