您的位置:首页 > 编程语言 > Go语言

Golang爬虫下载资源到本地

2021-12-24 19:37 1246 查看

通过Glang爬取猛男图片到本地保存;

package main

import (
"fmt"
"io"
"net/http"
"os"
"regexp"
"strconv"
)

//http读取网页数据写入result返回
func HttpGet(url string) (result string, err error) {
resp, err1 := http.Get(url)
if err1 != nil {
err = err1
return
}
defer resp.Body.Close()

buf := make([]byte, 4096)

for {
n, err2 := resp.Body.Read(buf)
if n == 0 {
break
}
if err2 != nil && err2 != io.EOF {
err = err2
return
}
result += string(buf[:n])
}
return result, err
}

var path string = "D:/test/Gundam/"

//保存图片到本地
func saveImg(index int, url string, page chan int) {
//图片httpbody
fmt.Println(index, "-----", url)

//爬取的网站重复使用了这张图
if url == "http://images.17173.com/gd/images/ms/15019.gif" {
return
}

resp, err := http.Get(url)
if err != nil {
fmt.Println("http get err:", err)
return
}
defer resp.Body.Close()

//打开文件流
picName := path + strconv.Itoa(index) + ".gif"
f, errf := os.Create(picName)
if errf != nil {
fmt.Println("os create err:", errf)
return
}
defer f.Close()

buf := make([]byte, 4096)

//读httpbody数据写入文件流
for {
n, err2 := resp.Body.Read(buf)
if n == 0 {
break
}
if err2 != nil && err2 != io.EOF {
err = err2
return
}

f.Write(buf[:n])
}

page <- index
}

//爬取图片
func spiderPic(data string) {

str := regexp.MustCompile("<A href=\"(.*?)\" target=_blank><IMG")
alls := str.FindAllStringSubmatch(data, -1)

page := make(chan int)

for index, value := range alls {
result, err := HttpGet(value[1])

if err != nil {
fmt.Println("HttpGet err3:", err)
return
}

regexpStr := regexp.MustCompile("src=\"(.*?)\" width=\"120\"")
picData := regexpStr.FindAllStringSubmatch(result, -1)

if len(picData) == 0 {
regexpStr = regexp.MustCompile("src=\"(.*?)\" width=120")
picData = regexpStr.FindAllStringSubmatch(result, -1)
}

go saveImg(index, picData[0][1], page)
}

//防止主go退出
count := len(alls)
for i := 0; i < count; i++ {
fmt.Printf("Download %d gif\n", <-page)
}
}

func working() {
url := "http://gd.17173.com/mechanics/index.shtml"

result, err := HttpGet(url)
if err != nil {
fmt.Println("HttpGet err1:", err)
return
}

spiderPic(result)
}

func main() {
working()
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: