package main
import(
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strings"
"os"
"image/png"
"image/jpeg"
"image"
)
var filePath = "F:/girls/"
func fileExist(fileName string) bool {
if _,ok:=os.Stat(fileName);ok == nil{
return true
}
return false
}
func Substr(str string, start, length int) string {
rs := []rune(str)
rl := len(rs)
end := 0
if start < 0 {
start = rl - 1 + start
}
end = start + length
if start > end {
start, end = end, start
}
if start < 0 {
start = 0
}
if start > rl {
start = rl
}
if end < 0 {
end = 0
}
if end > rl {
end = rl
}
return string(rs[start:end])
}
func getImageList(url string, c chan int){
fmt.Println("get page link url==>", url)
body:=getUrl(url)
if body == ""{
return
}
reg := regexp.MustCompile("http://www.meizitu.com/a/[0-9]+.html")
links:=reg.FindAllString(body, -1)
getImageLink(links, c)
}
func getImageLink(links []string, c chan int){
for _, uri := range links{
fmt.Println("Get images url, page link==>", uri)
body:=getUrl(uri)
if ""==body{
return
}
reg:=regexp.MustCompile("http://pic.meizitu.com/wp-content/uploads/[^\\.]+\\.(jpg|png|gif)")
images:=reg.FindAllString(body, -1)
downloadImage(images)
}
c <- 1
}
func downloadImage(images []string){
for _,v:=range images{
fmt.Println("Download image, url==>", v)
imageType:=Substr(v, -2, 3)
resp,ok:=http.Get(v)
if nil!=ok{
continue
}
defer resp.Body.Close()
flag:=false
var iImage image.Image
content,ok:=ioutil.ReadAll(resp.Body)
body:=string(content)
if imageType=="jpg"{
iImage,ok=jpeg.Decode(strings.NewReader(body))
flag=true
if nil!=ok{
continue
}
} else if imageType == "png"{
iImage,ok=png.Decode(strings.NewReader(body))
flag=true
if nil!=ok{
continue
}
}
if flag{
rect:=iImage.Bounds()
if rect.Max.X < 200 || rect.Max.Y < 200{
//只下载大图,小图跳过
fmt.Println("Skip download image, url ==>", v)
continue
}
}
// body:=getUrl(v)
if nil!=ok || "" == body{
fmt.Println("content is null")
continue
}
paths:=strings.Split(v,"/")
len:=len(paths)
fileName:=filePath + paths[len-4]+ paths[len-3]+ paths[len-2] + paths[len-1]
if fileExist(fileName){
continue
}
f,ok:=os.Create(fileName)
if ok!=nil{
fmt.Println("open file error")
return
}
defer f.Close()
f.WriteString(body)
}
}
func getUrl(url string) string{
resp,ok:=http.Get(url)
if nil!=ok{
return ""
}
defer resp.Body.Close()
str,ok:=ioutil.ReadAll(resp.Body)
if ok!=nil{
return ""
}
return string(str)
}
func main() {
fms:="http://www.meizitu.com/a/sifang_5_%d.html"
max_page:=10
cur_page:=1
offset:=cur_page+max_page
ch:=make(chan int, max_page)
for ;cur_page<offset;cur_page++{
go func(page int){
url:=fmt.Sprintf(fms, page)
fmt.Println("Parse url:",url)
getImageList(url, ch)
}(cur_page)
}
sum:=0
forEnd:
for{
select{
case <- ch:
sum+=1
if sum == max_page{
break forEnd
}
}
}
fmt.Println("done!")
}
有疑问加站长微信联系(非本文作者)