您的位置:首页 > Web前端 > JavaScript

jsoup实现网页图片下载

2015-04-20 17:46 316 查看
/*
*2015/4/14
*/
public class DownImages {
		    private static int COUNT = 0;
		    private static int DOWN_COUNT = 0;
		    public static void jsoupHTML(String urlPath, String saveDz) throws Exception{
		    Document doc = Jsoup.connect(urlPath).timeout(1000000).get();
		        //:当前页中的图片
		        Elements srcLinks = doc.select("img[src$=.jpg]");
		        for (Element link : srcLinks) {
		            //:剔除标签,只剩链接路径
		            String imagesPath = link.attr("src");
		            System.out.println("当前访问路径:"+imagesPath);
		         
		            getImages(urlPath,imagesPath, saveDz+ ++COUNT +".jpg");
		        }
		        //:当前页中的图片
		        Elements srcLinks_gifs = doc.select("img[src$=.gif]");
		        for (Element link : srcLinks_gifs) {
		            //:剔除标签,只剩链接路径
		            String imagesPath = link.attr("src");
		            System.out.println("当前访问路径:"+imagesPath);
		         
		            getImages(urlPath,imagesPath, saveDz+ ++COUNT +".gif");
		        }
		        //:当前页中的图片
		        Elements srcLinks_pngs = doc.select("img[src$=.png]");
		        for (Element link : srcLinks_pngs) {
		            //:剔除标签,只剩链接路径
		            String imagesPath = link.attr("src");
		            System.out.println("当前访问路径:"+imagesPath);
		         
		            getImages(urlPath,imagesPath, saveDz+ ++COUNT +".png");
		        }
		        //:提取网站中所有的href连接
		        Elements linehrefs = doc.select("a[href]");
		        for (Element linehref : linehrefs) {
		            String lihr = linehref.attr("href");
		            if(lihr.length()>4){
		                String ht = lihr.substring(0, 4);
		                String htt = lihr.substring(0, 1);
		                if(!ht.equals("http") && htt.equals("/")){
		
		                    lihr = urlPath + lihr;
		
		                }
		
		                if(lihr.substring(0, 4).equals("http")){
		
		                    Document docs = Jsoup.connect(lihr).timeout(1000000).get();
		
		                    Elements links = docs.select("img[src$=.jpg]");
		
		                    for (Element link : links) {
		
		                        //:剔除标签,只剩链接路径
		
		                        String imagesPath = link.attr("src");
		
		                        System.out.println("当前访问路径:"+imagesPath);
		
		                       // getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".jpg");
		                        getImages(urlPath,imagesPath, saveDz+ COUNT++ +".jpg");
		
		                    }
		                    
		                    Elements links_gifs = docs.select("img[src$=.gif]");
		            		
		                    for (Element link : links_gifs) {
		
		                        //:剔除标签,只剩链接路径
		
		                        String imagesPath = link.attr("src");
		
		                        System.out.println("当前访问路径:"+imagesPath);
		
		                      //  getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".gif");
		                        getImages(urlPath,imagesPath, saveDz+ COUNT++ +".gif");
		
		                    }
		                    
		                    Elements links_pngs = docs.select("img[src$=.png]");
		            		
		                    for (Element link : links_pngs) {
		
		                        //:剔除标签,只剩链接路径
		
		                        String imagesPath = link.attr("src");
		
		                        System.out.println("当前访问路径:"+imagesPath);
		
		                        //getImages(urlPath+imagesPath, "d://images//0000"+ COUNT++ +".png");
		                        getImages(urlPath,imagesPath, saveDz+ COUNT++ +".png");
		                    }
		                }
		            }
		        }
		    }
		    /**
		43
		     * @param urlPath 图片路径
		44
		     * @throws Exception
		45
		     */
		    public static void getImages(String urlPath,String imagePath,String fileName) throws Exception{
		    	String realUrl ="";
		    	if(imagePath.startsWith("http")){
		    		realUrl = imagePath;
		    	}else{
		    		realUrl = urlPath+imagePath;
		    	}
		    	
		        URL url = new URL(realUrl);//:获取的路径
		        //:http协议连接对象
		        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
		        conn.setRequestMethod("GET");
		        conn.setReadTimeout(6 * 10000);
		        //HTTP 响应消息获取状态码  getResponseCode 200 401等    getResponseMessage  正常 等
		        if (conn.getResponseCode() <10000){
		            InputStream inputStream = conn.getInputStream();
		            byte[] data = readStream(inputStream);
		            if(data.length>(1024*10)){
		                FileOutputStream outputStream = new FileOutputStream(fileName);
		                outputStream.write(data);
		                System.err.println("第"+ ++DOWN_COUNT +"图片下载成功");   //err优先级比out高, log4j
		                outputStream.close();
		            }
		        }
		         
		    }
		     
		    /**
		66
		     * 读取url中数据,并以字节的形式返回
		67
		     * @param inputStream
		68
		     * @return
		69
		     * @throws Exception
		70
		     */
		    public static byte[] readStream(InputStream inputStream) throws Exception{
		        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
		        byte[] buffer = new byte[1024];
		        int len = -1;
		        while((len = inputStream.read(buffer)) !=-1){
		            outputStream.write(buffer, 0, len);
		        }
		        outputStream.close();
		        inputStream.close();
		        return outputStream.toByteArray();
		    }

            /*
		     *  文件重命名
		    */
		    public static String  rename(){
		       SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
		      String name = sdf.format(new Date());
		      return name;
		    }
		    
		    public static void main(String[] args) {
		    	
		    	String saveDz = "F://images1//";
		    	File file = new File(saveDz);
		    	if(!file.exists()){
		    		file.mkdir();
		    	}		    	
		        try {
		            //String urlPath = "http://www.22mm.cc/";
		           // String urlPath = "http://www.22mm.cc/";  //http://www.521auto.com/
		        	// String urlPath = "http://www.163.com/";
		        	 String urlPath = "http://www.521auto.com";
		        	 jsoupHTML(urlPath,saveDz+"0000");
		           // jsoupHTML(urlPath,saveDz+rename());
		        } catch (Exception e) {
		            e.printStackTrace();
		        }finally{
		            System.out.println("共访问"+COUNT+"张图片,其中下载"+DOWN_COUNT+"张图片");
		        }
		    }
		}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: