您的位置:首页 > 编程语言 > Java开发

JAVA解析PDF、WORD、EXCEL文档

2017-01-05 17:54 501 查看
java解析pdf、doc、docx、xls、xlsx格式文档

读取PDF文件jar引用

<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>1.8.13</version>
</dependency>


读取WORD文件jar引用



<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.16-beta1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16-beta1</version>
</dependency>


读取EXCEL文件jar引用

<!-- EXCEL -->
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16-beta1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.16-beta1</version>
</dependency>

读取WORD文件方法



/**
*
* @Title: getTextFromWord
* @Description: 读取word
* @param filePath
*            文件路径
* @return: String 读出的Word的内容
*/
public static String getTextFromWord(String filePath) {
String result = null;
File file = new File(filePath);
FileInputStream fis = null;
try {
fis = new FileInputStream(file);
@SuppressWarnings("resource")
WordExtractor wordExtractor = new WordExtractor(fis);
result = wordExtractor.getText();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (fis != null) {
try {
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}




读取PDF文件方法


/**
*
* @Title: getTextFromPdf
* @Description: 读取pdf文件内容
* @param filePath
* @return: 读出的pdf的内容
*/
public static String getTextFromPdf(String filePath) {
String result = null;
FileInputStream is = null;
PDDocument document = null;
try {
is = new FileInputStream(filePath);
PDFParser parser = new PDFParser(is);
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(document);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}

读取EXCEL,xls格式

/**
* @param filePath
* 文件路径
* @return 读出的Excel的内容
*/
@SuppressWarnings({"resource", "deprecation"})
public static String getTextFromExcel(String filePath) {
StringBuffer buff = new StringBuffer();
try {
// 创建对Excel工作簿文件的引用
HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));
// 创建对工作表的引用。
for (int numSheets = 0; numSheets < wb
.getNumberOfSheets(); numSheets++) {
if (null != wb.getSheetAt(numSheets)) {
HSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
.getLastRowNum(); rowNumOfSheet++) {
if (null != aSheet.getRow(rowNumOfSheet)) {
HSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
for (int cellNumOfRow = 0; cellNumOfRow <= aRow
.getLastCellNum(); cellNumOfRow++) {
if (null != aRow.getCell(cellNumOfRow)) {
HSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
switch (aCell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA :
break;
case HSSFCell.CELL_TYPE_NUMERIC :
buff.append(
aCell.getNumericCellValue())
.append('\t');
break;
case HSSFCell.CELL_TYPE_STRING :
buff.append(
aCell.getStringCellValue())
.append('\t');
break;
}
}
}
buff.append('\n');
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
读取EXCEL,xlxs格式

@SuppressWarnings("deprecation")
public static String getTextFromExcel2007(String filePath) {
StringBuffer buff = new StringBuffer();
try {
// 创建对Excel工作簿文件的引用
@SuppressWarnings("resource")
XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));
// 创建对工作表的引用。
for (int numSheets = 0; numSheets < wb
.getNumberOfSheets(); numSheets++) {
if (null != wb.getSheetAt(numSheets)) {
XSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
.getLastRowNum(); rowNumOfSheet++) {
if (null != aSheet.getRow(rowNumOfSheet)) {
XSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
for (int cellNumOfRow = 0; cellNumOfRow <= aRow
.getLastCellNum(); cellNumOfRow++) {
if (null != aRow.getCell(cellNumOfRow)) {
XSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
switch (aCell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA :
break;
case HSSFCell.CELL_TYPE_NUMERIC :
buff.append(
aCell.getNumericCellValue())
.append('\t');
break;
case HSSFCell.CELL_TYPE_STRING :
buff.append(
aCell.getStringCellValue())
.append('\t');
break;
}
}
}
buff.append('\n');
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息