POI实现word文档转html文件
2016-09-30 17:43
375 查看
[b]POI word文件转html[/b]
package com.feiruo.officeConvert; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import org.apache.poi.hwpf.usermodel.Picture; public abstract class OfficeConvert { // 图片的存放地址 private String imgPath = null; // 文件存放的地址 private String parentPath = null; // 文件内容 private String fileContent = null; private String encode = "UTF-8"; /** * 将指定的doc文档进行格式转换 * * @param docPath * *.doc文档地址 * * @throws FileNotFoundException * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public abstract void convert(String docPath) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerException; /** * 将文件内容写入到磁盘 * * @param filepath * 保存转换文件的地址 */ public void writeFile(String filepath) { FileOutputStream fos = null; BufferedWriter bw = null; File f=new File(this.parentPath); if(!f.exists()){ f.mkdirs(); } try { File file = new File(filepath); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, encode)); bw.write(fileContent); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } public String checkSetPath(String path){ path=path.trim(); if(path.lastIndexOf("/")<path.length()-1) path+="/"; if(path.indexOf("\"")>0)path=path.replaceAll("\"", ""); if(path.indexOf(">")>0)path=path.replaceAll(">", ">"); if(path.indexOf("<")>0)path=path.replaceAll("<", "<"); //TODO if(path.indexOf("*")>0)path=path.replaceAll("/*", ""); return path; } public String getEncode() { return encode; } public void setEncode(String encode) { this.encode = encode; } /** * 获取图片存放地址 * * @return <strong>java.lang.String</strong> */ public String getImgPath() { return imgPath; } /** * 设置图片的存放地址文件夹路径 * * @param imgPath * 设置图片的存放文件夹名称 */ public void setImgPath(String imgPath) { this.imgPath = checkSetPath(imgPath); } /** * 获取存放文件的目录地址 * * @return <strong>java.lang.String</strong> */ public String getParentPath() { return parentPath; } /** * 设置文件存放的路径 * * @param parentPath * 文件地址 */ public void setParentPath(String parentPath) { this.parentPath = checkSetPath(parentPath); } /** * 获取文件内容 * * @return <strong>java.lang.String</strong> */ public String getFileContent() { return fileContent; } public void setFileContent(String content){ this.fileContent=content; } }
package com.feiruo.officeConvert; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.List; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.w3c.dom.Document; /** * 将*.doc文档转换为*.html文件格式 * * @author Jdk.feiruo. * @since JDK 1.7 POI 3.8 * @version 1.0 */ public class DocToHtml extends OfficeConvert implements IOfficeConvert { private List<Picture> pics = null; /** * @param parentPath * html文件存放地址 * @param imageppth * html图片存放地址 * @param encoding * 设置html的编码格式 */ public DocToHtml(String parentPath, String imageppth, String encoding) { setParentPath(checkSetPath(parentPath)); setImgPath(checkSetPath(imageppth)); this.setEncode(encoding); } public DocToHtml() { } /** * 将*doc文档转为*html文件 * * @param docPath * *doc文档的所在地址 * * @throws FileNotFoundException * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public void convert(String docPath) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerException { HWPFDocument wordDocument = new HWPFDocument(new FileInputStream( docPath)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { return suggestedName; } }); wordToHtmlConverter.processDocument(wordDocument); pics = wordDocument.getPicturesTable().getAllPictures(); Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, this.getEncode()); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); String htmlContent = new String(out.toByteArray()); if(htmlContent.indexOf("<img src=\"") > 0){ htmlContent=htmlContent.replaceAll("<img src=\"", "<img src=\"" + getImgPath()); } setFileContent(htmlContent); } @Override public void writeWithName(String fileName) { // 先保存文档中的图片 if (pics != null) { File imgfile = new File(this.getParentPath() + this.getImgPath()); // 如果当前文件夹不存在,则创建新文件夹 if (!imgfile.exists()) imgfile.mkdirs(); for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get(i); try { pic.writeImageContent(new FileOutputStream(imgfile + "//" + pic.suggestFullFileName())); } catch (IOException e) { e.printStackTrace(); } } } // 保存html源码文件 this.writeFile(getParentPath()+fileName+".html"); } }
package com.feiruo.Test; import java.io.FileNotFoundException; import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import com.yinhai.officeConvert.DocToHtml; public class Test{ public static void main(String[] args) { Test t=new Test(); } public Test(){ DocToHtml dth=new DocToHtml("C://test", "f", "UTF-8"); try { dth.convert("D://test//test.doc"); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } dth.writeWithName("feiruo"); } }
package com.feiruo.officeConvert; public interface IOfficeConvert { /** * 将文件写入到磁盘 * @param fileName 要写入文件的名称 */ public void writeWithName(String fileName); }
相关文章推荐
- Java引用POI实现Word转Html方法
- C# web实现word 转Html、office转Html、pdf转图片 在线预览文件
- asp.net 实现在线浏览word文档(word转html)
- Java:封装POI实现word的docx文件的简单模板功能
- 利用POI将word转换成html实现在线阅读
- 将WORD文档转换成为HTML网页文件的C#代码
- 利用POI将word转换成html实现在线阅读
- java html内容生成word文件实现代码
- java+poi实现word转html显示
- poi转excel,word文档为html
- Java引用POI实现Word转Html方法
- 利用Office2003实现PDF文件转Word文档
- poi实现word文档的导入(针对.doc .docx rtf)
- php实现将上传word文件转为html的方法
- apache poi操作office文档---- POI Word DOC格式转Html
- asp.net导出excel-一行代码实现excel、xml、pdf、word、html、csv等7种格式文件导出功能而且美观-SNF快速开发平台
- java+poi实现word转html显示
- poi操作word文档文件操作
- (四) POI word 转 html 文档
- php实现word转html文档卡死的问题