package main
import (
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
"time"
)
func test() {
/pageStr := GetPageStr("https://www.umei.cc/bizhitupian/meinvbizhi/")
fmt.Println(pageStr)/
GetLink("https://www.umei.cc/bizhitupian/weimeibizhi/")
}
func DownLoadFile(url string,filename string)(ok bool) {
resp, err := http.Get(url)
HandleError(err,"http.Get")
defer resp.Body.Close()
bytes, err := ioutil.ReadAll(resp.Body)
HandleError(err,"resp.Body")
filename="D:/mypath/blogweb_gin/img/"+filename
//写出数据
err = ioutil.WriteFile(filename, bytes, 0666)
if err!=nil {
return false
}else {
return true
}
}
var(
//存放图片链接的数据管道
chanImageUrls chan string
waitGroup sync.WaitGroup
//用于监控协程
chanTask chan string
)
//初始化数据管道
//爬虫写出26个协程向管道中添加图片链接
//任务统计协程检查26个任务是否都完成了完成则关闭数据管道
//下载协程从管道里读取链接并下载
func main() {
//test()
//DownLoadFile("http://kr.shanghai-jiuxin.com/file/2020/1031/small191468637cab2f0206f7d1d9b175ac81.jpg","1.jpg")
chanImageUrls = make(chan string,1000000)
chanTask = make(chan string,26)
//爬虫协程
for i:=1;i<27;i++ {
waitGroup.Add(1)
go getImgUrls("https://www.umei.cc/p/gaoqing/cn/"+strconv.Itoa(i)+".htm")
}
waitGroup.Add(1)
go CheckOK()
for i:=0;i<5;i++ {
waitGroup.Add(1)
go DownloadImage()
}
waitGroup.Wait()
}
//获取所有url
func getImgUrls(url string) {
urls := getImages(url)
for _, url := range urls {
chanImageUrls <-url
}
//标识当前协程完成
chanTask<-url
waitGroup.Done()
}
//获取当前页的图片链接
func getImages(url string)(urls[]string) {
pageStr := GetPageStr(url)
re := regexp.MustCompile(reImage)
results := re.FindAllStringSubmatch(pageStr, -1)
fmt.Printf("共找到%d条结果\n",len(results))
for , result := range results {
url = result[0]
urls = append(urls,url)
}
return urls
}
func CheckOK() {
var count int
for {
url:=<-chanTask
fmt.Printf("%s完成了爬取任务\n",url)
count++
if count==26 {
close(chanImageUrls)
break
}
}
waitGroup.Done()
}
func DownloadImage() {
for url:= range chanImageUrls {
filename:=GetName(url)
OK := DownLoadFile(url, filename)
if OK {
fmt.Printf("%s下载成功",filename)
}else {
fmt.Printf("%s失败",filename)
}
}
waitGroup.Done()
}
func GetName(url string)(filename string) {
lastIndex := strings.LastIndex(url, "/")
filename = url[lastIndex+1:]
timePrefix := strconv.Itoa(int(time.Now().UnixNano()))
filename = timePrefix+""+filename
return
}
有疑问加站长微信联系(非本文作者)