您的位置:首页 > 运维架构 > 网站架构

解决一些网站文字不能复制的问题?java应用

2015-12-23 01:13 489 查看
package we;

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class Copy {

public static void main(String[] args) throws Exception {

File source = new File("H:\\333.htm");

BufferedReader in = new BufferedReader(new InputStreamReader(

new FileInputStream(source)));

File desc = new File("321.txt");

BufferedWriter out = new BufferedWriter(new OutputStreamWriter(

new FileOutputStream(desc), "UTF-8"));

String str = null;

String regex = "<p.*?>(.*?)</p>";

String regex1 = "<p><img";

String regex2 = "<p><em>";

Pattern p = Pattern.compile(regex);

Pattern p1 = Pattern.compile(regex1);

Pattern p2 = Pattern.compile(regex2);

while ((str = in.readLine()) != null) {

Matcher m = p.matcher(str);

Matcher m1 = p1.matcher(str);

Matcher m2 = p2.matcher(str);

while (m.find()) {

if (m1.find())

continue;

else if (m2.find())

continue;

out.write(m.group(1) + "\r\n");

out.flush();

}

}

out.close();

in.close();

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: