JAVA解析PDF、WORD、EXCEL文档
2017-01-05 17:54
501 查看
java解析pdf、doc、docx、xls、xlsx格式文档
读取PDF文件jar引用
读取WORD文件jar引用
读取EXCEL文件jar引用
<!-- EXCEL -->
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16-beta1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.16-beta1</version>
</dependency>
读取WORD文件方法
读取PDF文件方法
读取EXCEL,xls格式
/**
* @param filePath
* 文件路径
* @return 读出的Excel的内容
*/
@SuppressWarnings({"resource", "deprecation"})
public static String getTextFromExcel(String filePath) {
StringBuffer buff = new StringBuffer();
try {
// 创建对Excel工作簿文件的引用
HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));
// 创建对工作表的引用。
for (int numSheets = 0; numSheets < wb
.getNumberOfSheets(); numSheets++) {
if (null != wb.getSheetAt(numSheets)) {
HSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
.getLastRowNum(); rowNumOfSheet++) {
if (null != aSheet.getRow(rowNumOfSheet)) {
HSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
for (int cellNumOfRow = 0; cellNumOfRow <= aRow
.getLastCellNum(); cellNumOfRow++) {
if (null != aRow.getCell(cellNumOfRow)) {
HSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
switch (aCell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA :
break;
case HSSFCell.CELL_TYPE_NUMERIC :
buff.append(
aCell.getNumericCellValue())
.append('\t');
break;
case HSSFCell.CELL_TYPE_STRING :
buff.append(
aCell.getStringCellValue())
.append('\t');
break;
}
}
}
buff.append('\n');
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
读取EXCEL,xlxs格式
@SuppressWarnings("deprecation")
public static String getTextFromExcel2007(String filePath) {
StringBuffer buff = new StringBuffer();
try {
// 创建对Excel工作簿文件的引用
@SuppressWarnings("resource")
XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));
// 创建对工作表的引用。
for (int numSheets = 0; numSheets < wb
.getNumberOfSheets(); numSheets++) {
if (null != wb.getSheetAt(numSheets)) {
XSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
.getLastRowNum(); rowNumOfSheet++) {
if (null != aSheet.getRow(rowNumOfSheet)) {
XSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
for (int cellNumOfRow = 0; cellNumOfRow <= aRow
.getLastCellNum(); cellNumOfRow++) {
if (null != aRow.getCell(cellNumOfRow)) {
XSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
switch (aCell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA :
break;
case HSSFCell.CELL_TYPE_NUMERIC :
buff.append(
aCell.getNumericCellValue())
.append('\t');
break;
case HSSFCell.CELL_TYPE_STRING :
buff.append(
aCell.getStringCellValue())
.append('\t');
break;
}
}
}
buff.append('\n');
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
读取PDF文件jar引用
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>1.8.13</version> </dependency>
读取WORD文件jar引用
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.16-beta1</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.16-beta1</version> </dependency>
读取EXCEL文件jar引用
<!-- EXCEL -->
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16-beta1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.16-beta1</version>
</dependency>
读取WORD文件方法
/** * * @Title: getTextFromWord * @Description: 读取word * @param filePath * 文件路径 * @return: String 读出的Word的内容 */ public static String getTextFromWord(String filePath) { String result = null; File file = new File(filePath); FileInputStream fis = null; try { fis = new FileInputStream(file); @SuppressWarnings("resource") WordExtractor wordExtractor = new WordExtractor(fis); result = wordExtractor.getText(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (fis != null) { try { fis.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; }
读取PDF文件方法
/** * * @Title: getTextFromPdf * @Description: 读取pdf文件内容 * @param filePath * @return: 读出的pdf的内容 */ public static String getTextFromPdf(String filePath) { String result = null; FileInputStream is = null; PDDocument document = null; try { is = new FileInputStream(filePath); PDFParser parser = new PDFParser(is); parser.parse(); document = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); result = stripper.getText(document); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } if (document != null) { try { document.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; }
读取EXCEL,xls格式
/**
* @param filePath
* 文件路径
* @return 读出的Excel的内容
*/
@SuppressWarnings({"resource", "deprecation"})
public static String getTextFromExcel(String filePath) {
StringBuffer buff = new StringBuffer();
try {
// 创建对Excel工作簿文件的引用
HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));
// 创建对工作表的引用。
for (int numSheets = 0; numSheets < wb
.getNumberOfSheets(); numSheets++) {
if (null != wb.getSheetAt(numSheets)) {
HSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
.getLastRowNum(); rowNumOfSheet++) {
if (null != aSheet.getRow(rowNumOfSheet)) {
HSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
for (int cellNumOfRow = 0; cellNumOfRow <= aRow
.getLastCellNum(); cellNumOfRow++) {
if (null != aRow.getCell(cellNumOfRow)) {
HSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
switch (aCell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA :
break;
case HSSFCell.CELL_TYPE_NUMERIC :
buff.append(
aCell.getNumericCellValue())
.append('\t');
break;
case HSSFCell.CELL_TYPE_STRING :
buff.append(
aCell.getStringCellValue())
.append('\t');
break;
}
}
}
buff.append('\n');
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
读取EXCEL,xlxs格式
@SuppressWarnings("deprecation")
public static String getTextFromExcel2007(String filePath) {
StringBuffer buff = new StringBuffer();
try {
// 创建对Excel工作簿文件的引用
@SuppressWarnings("resource")
XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));
// 创建对工作表的引用。
for (int numSheets = 0; numSheets < wb
.getNumberOfSheets(); numSheets++) {
if (null != wb.getSheetAt(numSheets)) {
XSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
.getLastRowNum(); rowNumOfSheet++) {
if (null != aSheet.getRow(rowNumOfSheet)) {
XSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
for (int cellNumOfRow = 0; cellNumOfRow <= aRow
.getLastCellNum(); cellNumOfRow++) {
if (null != aRow.getCell(cellNumOfRow)) {
XSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
switch (aCell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA :
break;
case HSSFCell.CELL_TYPE_NUMERIC :
buff.append(
aCell.getNumericCellValue())
.append('\t');
break;
case HSSFCell.CELL_TYPE_STRING :
buff.append(
aCell.getStringCellValue())
.append('\t');
break;
}
}
}
buff.append('\n');
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
相关文章推荐
- java--读取word,excel和pdf文档的包和读取方法
- Java如何操作Word, Excel, PDF文档(1)
- Java处理Word, Excel, PDF文档的4种开源系统的代码例子
- JAVA解析文件pdf、word、excel
- Java通过OpenOffice将word、ppt、excel、图片、txt文档转换成PDF
- 用java获得word,excel,pdf等文档的内容
- Java操作Word,Excel,PDF文档
- 用java获得word,excel,pdf文档的内容
- Java处理OA中常用的word文档,PDf,excel等问题---2
- Java处理Word, Excel, PDF文档的4种开源系统的代码例子
- 用java获得word,excel,pdf等文档的内容
- Java如何操作Word, Excel, PDF文档?
- Java如何操作Word, Excel, PDF文档(3)
- Java程序员从笨鸟到菜鸟之(一百零三)java操作office和pdf文件(一)java读取word,excel和pdf文档内容
- Java程序员从笨鸟到菜鸟之(一百零三)java操作office和pdf文件(一)java读取word,excel和pdf文档内容
- java 技术文档 用java获得word,excel,pdf等文档的内容
- JAVA中如何获得word,excel,pdf等文档的内容
- Java如何操作Word, Excel, PDF文档
- 如何用Java操作Word, Excel, PDF文档(转)
- Java处理OA中常用的word文档,PDf,excel等问题---1