您的位置：首页 > Web前端 > HTML

POI 将Excle2003，Excle2007，word2003,word2007转换为html

2016-09-20 00:00 351 查看

上一篇是写了关于解析ppt，这一篇是关于Excle,Word的，其实用poi解析excle是非常好用的，参考了网上大神的东西，自己添加修改了些东西，都是写代码的苦命兄弟，拿出来共同参考下，有意见大家指正。遇到的问题是，如果用json将这些html代码返回的页面是不行的，因为json不支持html格式输出，折衷的办法是通过encodeURI编码，然后DecodeURI解码，但是全篇解码会有问题，有些字符如“=”，“；”等无法解析完全。所以不太建议用json，如果非用不可，最好手动解码（一听就知道是个很痛苦的事情），但还是会让html有瑕疵

package com.ysy.officeRead.controller;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import java.io.OutputStreamWriter;
import java.io.StringWriter;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

public class OfficeBeRead {

/**
*url：标示上传文件在服务器本地的全路径，用来创建图片储存文件夹，使用uuID作为文件夹名称，挺恶心的事情
*projectPath：文件在服务器上的路径
*/
public String poiWord2003ToHtml(String url, String projectPath) {
String pathString = url.substring(0, url.lastIndexOf("."));
String proString2 = projectPath.substring(0, projectPath.lastIndexOf("."))+"/";
String file = "1.doc";
String content = "";

//创建文件夹

try {

InputStream inputStream = new FileInputStream(url);
HWPFDocument worDocument = new HWPFDocument(inputStream);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory
.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {

public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
float widthInches, float heightInches) {
// TODO Auto-generated method stub
return suggestedName;
}
});

wordToHtmlConverter.processDocument(worDocument);
List pics = worDocument.getPicturesTable().getAllPictures();
if(pics!=null){
for (int i = 0; i < pics.size(); i++) {
Picture picture = (Picture) pics.get(i);

File file2 = new File(pathString,picture.suggestFullFileName());
if(!file2.exists()&&!file2.isDirectory()){
file2.getParentFile().mkdirs();
file2.createNewFile();
}
picture.writeImageContent(new FileOutputStream(pathString+"/"+picture.suggestFullFileName()));
}
}

Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outputStream);

TransformerFactory tfFactory = TransformerFactory.newInstance();
Transformer serializer = tfFactory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outputStream.close();

content = new String(outputStream.toByteArray());

//图片路径替换

FileUtils.write(new File(pathString, "1.html"), content, "utf-8");

content = replaceAllStr(content, proString2);

} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}

return content;

}

/**
* url为文件上传后所在路径
* projectPath 为文件所在项目下的访问路径
*/
public String poiWord2007ToHtml(String url,String projectPath){

String sourceFileNameString = url; //目标文件路径
String imagePathString = url.substring(0, url.lastIndexOf("."));
String targetFileNameString = imagePathString+"1.html";

String proString2 = projectPath.substring(0, projectPath.lastIndexOf("."))+"/";

String out = "";
FileOutputStream outputStream = null;
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileNameString));
XHTMLOptions options = XHTMLOptions.create();
//存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(imagePathString)));
//html中图片的路径
options.URIResolver(new BasicURIResolver("/"));

File file2 = new File(targetFileNameString);
if(!file2.exists()&&!file2.isDirectory()){
file2.getParentFile().mkdirs();
file2.createNewFile();
}

outputStream = new FileOutputStream(targetFileNameString);
outputStreamWriter = new OutputStreamWriter(outputStream);
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);

FileInputStream file = new FileInputStream( new File(targetFileNameString));
// size 为字串的长度，这里一次性读完

int size=file.available();

byte[] buffer=new byte[size];

file.read(buffer);

file.close();

out=new String(buffer);

//这是用来解决生成的汉字是Uncio十进制码的
out = StringEscapeUtils.unescapeHtml(out);

System.out.println(out);

out = replaceAllStr(out, proString2);

} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}finally{
if(outputStream != null){
try {
outputStream.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

if(outputStreamWriter != null){
try {
outputStreamWriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

}

return out;

}

/* public static void main(String[] args) {
System.out.println(new OfficeBeRead().poiWord2003ToHtml());
}*/

/**
*替换字符串中指定字符位置添加指定的字符串
*
*在此用来替换图片源路径
*
*/
public String replaceAllStr(String content,String imgurl){

String[] ss = content.split("<img src="+"\"");
String sssString = "";
if (ss.length>1) {
for (int i = 0; i < ss.length-1; i++) {
sssString = sssString+ss[i]+"<img src="+"\""+imgurl;
}

sssString = sssString + ss[ss.length-1];
}

return sssString;

}

/**
*poi将Excel转换为html
*该方法无法解析图片
*
*/
public String PoiExcel2003ToHtml(String url,String projectPath){

File excelFile = new File(url);

InputStream iStream = null;
FileOutputStream outputStream = null;
StringWriter writer = null;
String imagePathString = url.substring(0, url.lastIndexOf("."));
String htmlFile = imagePathString+"1.html";
File htmlfile2 = new File(htmlFile);
File filep = new File(htmlfile2.getParent());
String content = "";
try {
if(excelFile.exists()){
if(!filep.exists()){
filep.mkdirs();
}
iStream = new FileInputStream(excelFile); //初始化文件
HSSFWorkbook workbook = new HSSFWorkbook(iStream);
ExcelToHtmlConverter converter = new ExcelToHtmlConverter(DocumentBuilderFactory
.newInstance().newDocumentBuilder().newDocument());
converter.processWorkbook(workbook);

writer = new StringWriter();
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(
new DOMSource(converter.getDocument()),
new StreamResult(writer));
outputStream = new FileOutputStream(htmlFile);
outputStream.write(writer.toString().getBytes("UTF-8"));

FileInputStream fis = new FileInputStream(htmlfile2); //获取html文件输入流

int size = fis.available();
byte[] buffer=new byte[size];

fis.read(buffer);

fis.close();

content = new String(buffer);
System.out.println(content);

outputStream.flush();
outputStream.close();
writer.close();

}

} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
} finally{
if(iStream != null){
try {
iStream.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if(outputStream != null){
try {
outputStream.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if(writer!=null){
try {
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

}

return content;

}

/**
* POI 解析Excel2007版，生成HTML
* @param fileName 文件(含地址)
* @return 解析出来的HTML页面String
*/
public String PoiExcel2007ToHtml(String url,String projectPath){
StringBuffer content = new StringBuffer();
XSSFWorkbook xwb = null;
try{
// 构造 XSSFWorkbook 对象，strPath 传入文件路径
xwb = new XSSFWorkbook(url);
content.append("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><title>Parse Excel With POI</title></head><body>");
// 循环工作表Sheet
for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) {
XSSFSheet xSheet = xwb.getSheetAt(numSheet);
if (xSheet == null) {
continue;
}

content.append("<h3 valign='middle' align='center'>"+xSheet.getSheetName()+"</h3>");

content.append("<table valign='middle' align='center' border=1 cellspacing=0 cellpadding=1>");

// 循环行Row
for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) {
XSSFRow xRow = xSheet.getRow(rowNum);
if (xRow == null) {
continue;
}
content.append("<tr align='middle'>");
// 循环列Cell
for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) {
XSSFCell xCell = xRow.getCell(cellNum);
if (xCell == null || "".equals(xCell)) {
content.append("<td>").append(" ").append("</td>");
}else if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) {
content.append("<td>").append(" ").append(xCell.getBooleanCellValue()).append("</td>");
} else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) {
content.append("<td>").append(" ").append(this.doubleToString(xCell.getNumericCellValue())).append("</td>");
} else{
content.append("<td>").append(" ").append(xCell.getStringCellValue()).append("</td>");
}
}
content.append("</tr>");
}
content.append("</table>");
}
content.append("</body></html>");

}catch(Exception e){
e.printStackTrace();
System.out.println("POI解析Excel2007错误");
}
return content.toString();
}

/**
* change double variable into string type
* @param d
* @return
*/
public String doubleToString(double d){
String str = Double.valueOf(d).toString();
String temp = str;
String result = "";
if(str.indexOf("E")>2)
result = str.substring(0,1) + temp.substring(2, str.indexOf("E"));
else{
if(str.indexOf(".0")>0)
result = str.substring(0,str.indexOf(".0")) ;
else
result = str;
}
return result;
}
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： poi word2003 word2007 excle2003 excle2007

相关文章推荐

新的分享

章节导航