实现Java读取网页内容并下载网页中出现的图片
2017-02-19 23:01
441 查看
import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; public class GetContentPicture { public void getHtmlPicture(String httpUrl) { URL url; BufferedInputStream in; FileOutputStream file; try { System.out.println("取网络图片"); String fileName = httpUrl.substring(httpUrl.lastIndexOf("/")); String filePath = "./pic/"; url = new URL(httpUrl); in = new BufferedInputStream(url.openStream()); file = new FileOutputStream(new File(filePath+fileName)); int t; while ((t = in.read()) != -1) { file.write(t); } file.close(); in.close(); System.out.println("图片获取成功"); } catch (MalformedURLException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public String getHtmlCode(String httpUrl) throws IOException { String content =""; URL uu = new URL(httpUrl); // 创建URL类对象 BufferedReader ii = new BufferedReader(new InputStreamReader(uu .openStream())); // //使用openStream得到一输入流并由此构造一个BufferedReader对象 String input; while ((input = ii.readLine()) != null) { // 建立读取循环,并判断是否有读取值 content += input; } ii.close(); return content; } public void get(String url) throws IOException { String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")"; String searchImgReg2 = "(?x)(src|SRC|background|BACKGROUND)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")"; String content = this.getHtmlCode(url); System.out.println(content); Pattern pattern = Pattern.compile(searchImgReg); Matcher matcher = pattern.matcher(content); while (matcher.find()) { System.out.println(matcher.group(3)); this.getHtmlPicture(url+matcher.group(3)); } pattern = Pattern.compile(searchImgReg2); matcher = pattern.matcher(content); while (matcher.find()) { System.out.println(matcher.group(3)); this.getHtmlPicture(matcher.group(3)); } // searchImgReg = // "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")"; } public static void main(String[] args) throws IOException { String url = "http://www.baidu.com/"; GetContentPicture gcp = new GetContentPicture(); gcp.get(url); } }
相关文章推荐
- Java读取网页内容并下载图片的实例
- java代码实现从网上直接下载图片或者网页
- Java实现网页截屏功能(图片下载功能)的几种方式(整理)
- 需求(Java语言实现):对存有图片URL的txt文件进行逐行读取,自行下载txt文件中的图片到指定文件夹中
- java下载网页并读取内容
- java 实现根据url,将url对应的网页存为一张完整的图片,不是截图,也就是图片是整个网页的内容
- java下载网页内容和网络图片
- java下载网页内容和网络图片
- java读取网页图片路径并下载到本地
- java下载读取网页内容方式
- Java读取网页内容并生成静态页面的简单实现
- JAVA实现图片下载至缓存处理后,再上传FTP
- java读取(正则表达式分析)网页内容
- 用.net实现网页图片以幻灯片形式播放,图片从数据库中读取前5张!!
- gd库图片下载类实现下载网页所有图片
- 使用JAVA读取ORACLE BLOB字段实现上传下载
- Java实现HTTP文件下载和图片(验证码)浏览
- java 实现图片下载
- java 实现文件下载的时候,避免浏览器自动打开文件,而是出现另存为对话框
- JS中实现网页中禁止下载图片