解析html 生成word文档
2015-11-21 11:24
501 查看
工具:
jsoup+itext
iTextAsian.jar itext-rtf-2.1.7.jar iText-2.1.7.jar jsoup-1.8.3.jar
html内容
<p>
adfadfs<img src="/lab-app/pics/image/20151121/20151121105307_666.jpg" alt="" />
</p>
<p>
asdfadfasdf
</p>
<p>
<img src="/lab-app/pics/image/20151121/20151121105324_144.jpg" alt="" />
</p>
解析html标签
public class HtmlToStringUtils {
public static List<String> htmlStringToList(String content) {
List<String> htmls = new ArrayList<String>();
Document doc = Jsoup.parse(content);
Elements ele = doc.getElementsByTag("p");
for (Element e : ele) {
if (e.text() != null) {
htmls.add(e.text());
}
if (e.getElementsByTag("img").size() > 0) {
Elements imgs = e.getElementsByTag("img");
for (Element img : imgs) {
if (img.attr("src").length() > 0) {
htmls.add("dy_doc::img=" + img.attr("src"));
}
}
}
}
return htmls;
}
}
根据具体的类生成文档
public class CreateCoursewareToWord {
private Document document;
private String picpath;
private String docpath;
public CreateCoursewareToWord(String picpath,String docpath){
this.document = new Document(PageSize.A4);
this.picpath = picpath;
this.docpath = docpath;
}
public void startCreateWord(Courseware courseware){
try {
RtfWriter2.getInstance(document,new FileOutputStream(docpath+courseware.getName()+".doc"));
document.open();
writeHead(courseware.getName(),document);
writeTitleToWord("111",courseware.getName(),document);
writeTitleToWord("222",courseware.getCourse().getName(),document);
writeMultiTitleToWord("333",HtmlToStringUtils.htmlStringToList(courseware.getDesc()),document);
writeMultiTitleToWord("444",HtmlToStringUtils.htmlStringToList(courseware.getTarget()),document);
writeMultiTitleToWord("555",HtmlToStringUtils.htmlStringToList(courseware.getDevice()),document);
writeMultiTitleToWord("666",HtmlToStringUtils.htmlStringToList(courseware.getSteps()),document);
} catch (FileNotFoundException e) {
e.printStackTrace();
} finally{
document.close();
}
}
public void writeTitleToWord(String title,String content,Document document){
try {
Paragraph p = new Paragraph(title+":"+content, new Font(Font.NORMAL, 13,
Font.NORMAL, new Color(0, 0, 0)));
p.setAlignment(0);
document.add(p);
// document.add(new Paragraph(content));
} catch (DocumentException e) {
e.printStackTrace();
}
}
public void writeMultiTitleToWord(String title,List<String> content,Document document){
try {
Paragraph p = new Paragraph(title, new Font(Font.NORMAL, 13,
Font.BOLD, new Color(0, 0, 0)));
p.setAlignment(0);
document.add(p);
for(int i=0;i<content.size();i++){
if(content.get(i).contains("dy_doc::img=")){
String path = content.get(i).trim();
int position = path.indexOf("/image/");
path = path.substring(position+1, path.length());
String[] paths = path.split("/");
String imagepath = picpath+File.separator;
for(int k=0;k<paths.length;k++){
if(k!=paths.length-1){
imagepath+=paths[k]+File.separator;
}else{
imagepath+=paths[k];
}
}
Image img = Image.getInstance(imagepath);
img.setAbsolutePosition(0, 0);
document.add(img);
}else{
document.add(new Paragraph(content.get(i).trim()));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void writeHead(String head,Document document){
try {
Paragraph p = new Paragraph(head, new Font(Font.NORMAL, 16,
Font.BOLD, new Color(0, 0, 0)));
p.setAlignment(1);
document.add(p);
} catch (DocumentException e) {
e.printStackTrace();
}
}
}
jsoup+itext
iTextAsian.jar itext-rtf-2.1.7.jar iText-2.1.7.jar jsoup-1.8.3.jar
html内容
<p>
adfadfs<img src="/lab-app/pics/image/20151121/20151121105307_666.jpg" alt="" />
</p>
<p>
asdfadfasdf
</p>
<p>
<img src="/lab-app/pics/image/20151121/20151121105324_144.jpg" alt="" />
</p>
解析html标签
public class HtmlToStringUtils {
public static List<String> htmlStringToList(String content) {
List<String> htmls = new ArrayList<String>();
Document doc = Jsoup.parse(content);
Elements ele = doc.getElementsByTag("p");
for (Element e : ele) {
if (e.text() != null) {
htmls.add(e.text());
}
if (e.getElementsByTag("img").size() > 0) {
Elements imgs = e.getElementsByTag("img");
for (Element img : imgs) {
if (img.attr("src").length() > 0) {
htmls.add("dy_doc::img=" + img.attr("src"));
}
}
}
}
return htmls;
}
}
根据具体的类生成文档
public class CreateCoursewareToWord {
private Document document;
private String picpath;
private String docpath;
public CreateCoursewareToWord(String picpath,String docpath){
this.document = new Document(PageSize.A4);
this.picpath = picpath;
this.docpath = docpath;
}
public void startCreateWord(Courseware courseware){
try {
RtfWriter2.getInstance(document,new FileOutputStream(docpath+courseware.getName()+".doc"));
document.open();
writeHead(courseware.getName(),document);
writeTitleToWord("111",courseware.getName(),document);
writeTitleToWord("222",courseware.getCourse().getName(),document);
writeMultiTitleToWord("333",HtmlToStringUtils.htmlStringToList(courseware.getDesc()),document);
writeMultiTitleToWord("444",HtmlToStringUtils.htmlStringToList(courseware.getTarget()),document);
writeMultiTitleToWord("555",HtmlToStringUtils.htmlStringToList(courseware.getDevice()),document);
writeMultiTitleToWord("666",HtmlToStringUtils.htmlStringToList(courseware.getSteps()),document);
} catch (FileNotFoundException e) {
e.printStackTrace();
} finally{
document.close();
}
}
public void writeTitleToWord(String title,String content,Document document){
try {
Paragraph p = new Paragraph(title+":"+content, new Font(Font.NORMAL, 13,
Font.NORMAL, new Color(0, 0, 0)));
p.setAlignment(0);
document.add(p);
// document.add(new Paragraph(content));
} catch (DocumentException e) {
e.printStackTrace();
}
}
public void writeMultiTitleToWord(String title,List<String> content,Document document){
try {
Paragraph p = new Paragraph(title, new Font(Font.NORMAL, 13,
Font.BOLD, new Color(0, 0, 0)));
p.setAlignment(0);
document.add(p);
for(int i=0;i<content.size();i++){
if(content.get(i).contains("dy_doc::img=")){
String path = content.get(i).trim();
int position = path.indexOf("/image/");
path = path.substring(position+1, path.length());
String[] paths = path.split("/");
String imagepath = picpath+File.separator;
for(int k=0;k<paths.length;k++){
if(k!=paths.length-1){
imagepath+=paths[k]+File.separator;
}else{
imagepath+=paths[k];
}
}
Image img = Image.getInstance(imagepath);
img.setAbsolutePosition(0, 0);
document.add(img);
}else{
document.add(new Paragraph(content.get(i).trim()));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void writeHead(String head,Document document){
try {
Paragraph p = new Paragraph(head, new Font(Font.NORMAL, 16,
Font.BOLD, new Color(0, 0, 0)));
p.setAlignment(1);
document.add(p);
} catch (DocumentException e) {
e.printStackTrace();
}
}
}
相关文章推荐
- HTML之基本布局设计之三栏式、两栏式设计
- HTML 5 断点续上传
- PHPCMS V9 使str_cut可以输出html源代码
- Html中有关定位于浮动
- 如何在html文件中导入header、footer等
- html中a标签的跳转问题
- html基础试题
- HTML特殊符号对照表、常用的字符实体
- HtmlAgilityPack 抓取网页信息
- C#关于iTextSharp将html转换为pdf不支持中文问题
- canvas-6font.html
- canvas-5Bezier-QuadraticCurveTo.html
- canvas-4fillstyle.html
- canvas-4createPattern.html
- canvas-3radialGradient.html
- canvas-3linearGradient.html
- canvas-2arcTo.html
- canvas-star7.html
- canvas-star6.html
- canvas-star6-drawMoon.html