jsoup实现网页图片下载
2015-04-20 17:46
316 查看
/* *2015/4/14 */ public class DownImages { private static int COUNT = 0; private static int DOWN_COUNT = 0; public static void jsoupHTML(String urlPath, String saveDz) throws Exception{ Document doc = Jsoup.connect(urlPath).timeout(1000000).get(); //:当前页中的图片 Elements srcLinks = doc.select("img[src$=.jpg]"); for (Element link : srcLinks) { //:剔除标签,只剩链接路径 String imagesPath = link.attr("src"); System.out.println("当前访问路径:"+imagesPath); getImages(urlPath,imagesPath, saveDz+ ++COUNT +".jpg"); } //:当前页中的图片 Elements srcLinks_gifs = doc.select("img[src$=.gif]"); for (Element link : srcLinks_gifs) { //:剔除标签,只剩链接路径 String imagesPath = link.attr("src"); System.out.println("当前访问路径:"+imagesPath); getImages(urlPath,imagesPath, saveDz+ ++COUNT +".gif"); } //:当前页中的图片 Elements srcLinks_pngs = doc.select("img[src$=.png]"); for (Element link : srcLinks_pngs) { //:剔除标签,只剩链接路径 String imagesPath = link.attr("src"); System.out.println("当前访问路径:"+imagesPath); getImages(urlPath,imagesPath, saveDz+ ++COUNT +".png"); } //:提取网站中所有的href连接 Elements linehrefs = doc.select("a[href]"); for (Element linehref : linehrefs) { String lihr = linehref.attr("href"); if(lihr.length()>4){ String ht = lihr.substring(0, 4); String htt = lihr.substring(0, 1); if(!ht.equals("http") && htt.equals("/")){ lihr = urlPath + lihr; } if(lihr.substring(0, 4).equals("http")){ Document docs = Jsoup.connect(lihr).timeout(1000000).get(); Elements links = docs.select("img[src$=.jpg]"); for (Element link : links) { //:剔除标签,只剩链接路径 String imagesPath = link.attr("src"); System.out.println("当前访问路径:"+imagesPath); // getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".jpg"); getImages(urlPath,imagesPath, saveDz+ COUNT++ +".jpg"); } Elements links_gifs = docs.select("img[src$=.gif]"); for (Element link : links_gifs) { //:剔除标签,只剩链接路径 String imagesPath = link.attr("src"); System.out.println("当前访问路径:"+imagesPath); // getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".gif"); getImages(urlPath,imagesPath, saveDz+ COUNT++ +".gif"); } Elements links_pngs = docs.select("img[src$=.png]"); for (Element link : links_pngs) { //:剔除标签,只剩链接路径 String imagesPath = link.attr("src"); System.out.println("当前访问路径:"+imagesPath); //getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".png"); getImages(urlPath,imagesPath, saveDz+ COUNT++ +".png"); } } } } } /** 43 * @param urlPath 图片路径 44 * @throws Exception 45 */ public static void getImages(String urlPath,String imagePath,String fileName) throws Exception{ String realUrl =""; if(imagePath.startsWith("http")){ realUrl = imagePath; }else{ realUrl = urlPath+imagePath; } URL url = new URL(realUrl);//:获取的路径 //:http协议连接对象 HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setReadTimeout(6 * 10000); //HTTP 响应消息获取状态码 getResponseCode 200 401等 getResponseMessage 正常 等 if (conn.getResponseCode() <10000){ InputStream inputStream = conn.getInputStream(); byte[] data = readStream(inputStream); if(data.length>(1024*10)){ FileOutputStream outputStream = new FileOutputStream(fileName); outputStream.write(data); System.err.println("第"+ ++DOWN_COUNT +"图片下载成功"); //err优先级比out高, log4j outputStream.close(); } } } /** 66 * 读取url中数据,并以字节的形式返回 67 * @param inputStream 68 * @return 69 * @throws Exception 70 */ public static byte[] readStream(InputStream inputStream) throws Exception{ ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = -1; while((len = inputStream.read(buffer)) !=-1){ outputStream.write(buffer, 0, len); } outputStream.close(); inputStream.close(); return outputStream.toByteArray(); } /* * 文件重命名 */ public static String rename(){ SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss"); String name = sdf.format(new Date()); return name; } public static void main(String[] args) { String saveDz = "F://images1//"; File file = new File(saveDz); if(!file.exists()){ file.mkdir(); } try { //String urlPath = "http://www.22mm.cc/"; // String urlPath = "http://www.22mm.cc/"; //http://www.521auto.com/ // String urlPath = "http://www.163.com/"; String urlPath = "http://www.521auto.com"; jsoupHTML(urlPath,saveDz+"0000"); // jsoupHTML(urlPath,saveDz+rename()); } catch (Exception e) { e.printStackTrace(); }finally{ System.out.println("共访问"+COUNT+"张图片,其中下载"+DOWN_COUNT+"张图片"); } } }
相关文章推荐
- Servlet实现将图片写入到网页和实现图片下载的功能
- java代码实现从网上直接下载图片或者网页
- Servlet实现将图片写入到网页和实现图片下载的功能
- 动态生成二维码图片后通过js(JavaScript)或jq实现网页图片转base64格式下载
- android开发实现下载网页图片到本地并显示
- php中使用gd库实现下载网页中所有图片
- Python 下载网页 Jpg 图片以及 Gif 图片实现
- Java实现网页截屏功能(图片下载功能)的几种方式(整理)
- C#实现下载网页图片文件
- dreamweaver实现图片超链接(点击图片按钮实现下载指定apk功能,可以用于在静态网页里做公司宣传页等)
- Jsoup实现新闻网页的爬取,标题,正文,图片,新闻时间,网页链接的解析示例
- JS中实现网页中禁止下载图片
- 【Python】python3实现网页爬虫下载图片
- Python3实现Web网页图片下载
- gd库图片下载类实现下载网页所有图片
- Python3实现Web网页图片下载
- 利用jsoup 如何从网页中下载图片
- php中使用gd库实现下载网页中所有图片
- 实现Java读取网页内容并下载网页中出现的图片