go版下载妹子图

qii · · 1474 次点击 · · 开始浏览

这是一个创建于的文章，其中的信息可能已经有所发展或是发生改变。

package main

import(
   "fmt"
   "io/ioutil"
   "net/http"
   "regexp"
   "strings"
   "os"
   "image/png"
   "image/jpeg"
   "image"
)

var filePath = "F:/girls/"

func fileExist(fileName string) bool {
   if _,ok:=os.Stat(fileName);ok == nil{
      return true
   }
   return false
}

func Substr(str string, start, length int) string {
    rs := []rune(str)
    rl := len(rs)
    end := 0

    if start < 0 {
        start = rl - 1 + start
    }
    end = start + length

    if start > end {
        start, end = end, start
    }

    if start < 0 {
        start = 0
    }
    if start > rl {
        start = rl
    }
    if end < 0 {
        end = 0
    }
    if end > rl {
        end = rl
    }

    return string(rs[start:end])
}

func getImageList(url string, c chan int){
   fmt.Println("get page link url==>", url)
   body:=getUrl(url)
   if body == ""{
      return
   }
   reg := regexp.MustCompile("http://www.meizitu.com/a/[0-9]+.html")
   links:=reg.FindAllString(body, -1)
   getImageLink(links, c)
}

func getImageLink(links []string, c chan int){
   for _, uri := range links{
      fmt.Println("Get images url, page link==>", uri)
         body:=getUrl(uri)
         if ""==body{
            return
         }
         reg:=regexp.MustCompile("http://pic.meizitu.com/wp-content/uploads/[^\\.]+\\.(jpg|png|gif)")
         images:=reg.FindAllString(body, -1)
         downloadImage(images)
   }
   c <- 1
}

func downloadImage(images []string){
   for _,v:=range images{
      fmt.Println("Download image, url==>", v)
      imageType:=Substr(v, -2, 3)
      resp,ok:=http.Get(v)
      if nil!=ok{
         continue
      }
      defer resp.Body.Close()
      flag:=false
      var iImage image.Image
      content,ok:=ioutil.ReadAll(resp.Body)
      body:=string(content)
      if imageType=="jpg"{
         iImage,ok=jpeg.Decode(strings.NewReader(body))
         flag=true
         if nil!=ok{
            continue
         }
      } else if imageType == "png"{
         iImage,ok=png.Decode(strings.NewReader(body))
         flag=true
         if nil!=ok{
            continue
         }
      }
      if flag{
         rect:=iImage.Bounds()
         if rect.Max.X < 200 || rect.Max.Y < 200{
            //只下载大图，小图跳过
            fmt.Println("Skip download image, url ==>", v)
            continue
         }
      }
      // body:=getUrl(v)
      if nil!=ok || "" == body{
         fmt.Println("content is null")
         continue
      }
      paths:=strings.Split(v,"/")
      len:=len(paths)
      fileName:=filePath + paths[len-4]+  paths[len-3]+  paths[len-2] +  paths[len-1]
      if fileExist(fileName){
         continue
      }
      f,ok:=os.Create(fileName)
      if ok!=nil{
         fmt.Println("open file error")
         return
      }
      defer f.Close()
      f.WriteString(body)
   }
}

func getUrl(url string) string{
   resp,ok:=http.Get(url)
   if nil!=ok{
      return ""
   }
   defer resp.Body.Close()
   str,ok:=ioutil.ReadAll(resp.Body)
   if ok!=nil{
      return ""
   }
   return string(str)
}

func main() {
   fms:="http://www.meizitu.com/a/sifang_5_%d.html"
   max_page:=10
   cur_page:=1
   offset:=cur_page+max_page
   ch:=make(chan int, max_page)
   for ;cur_page<offset;cur_page++{
      go func(page int){
         url:=fmt.Sprintf(fms, page)
         fmt.Println("Parse url:",url)
         getImageList(url, ch)
      }(cur_page)
   }
   sum:=0
   forEnd:
   for{
      select{
      case <- ch:
         sum+=1
         if sum == max_page{
            break forEnd
         }
      }
   }
   fmt.Println("done!")
}

重构的代码：

package main

import(
   "fmt"
   "io/ioutil"
   "net/http"
   "regexp"
   "strings"
   "os"
   "image/png"
   "image/jpeg"
   "image/gif"
   "image"
   "errors"
)

type Config struct{
   SavePath string
   MinWidth int
   MinHeight int
   Overwrite bool
   MaxPage int
   StartPage int
}

func NewConfig(savePath string, minWidth, minHeight, maxPage, startPage int, overwrite bool) *Config{
   return &Config{
      savePath,
      minWidth,
      minHeight,
      overwrite,
      maxPage,
      startPage,
   }
}

const (
   PAGE_URL string = "http://www.meizitu.com/a/sifang_5_%d.html"
   IMAGE_LIST_LINKS string = "http://www.meizitu.com/a/[0-9]+.html"
   IMAGE_IMAGE_LINKS string = "http://pic.meizitu.com/wp-content/uploads/[^\\.]+\\.(jpg|png|gif)"
)

type Webpage struct {
   Config *Config
}

func NewWebpage(config *Config) *Webpage{
   return &Webpage{Config: config}
}

func (self *Webpage) ParsePage(url string) []string{
   offset := self.Config.StartPage + self.Config.MaxPage
   var urls []string
   for curPage := self.Config.StartPage; curPage < offset; curPage ++{
      urls = append(urls, fmt.Sprintf(url, curPage))
   }
   return urls
}

func (self *Webpage) Get(url string) (body string){
   resp,ok:=http.Get(url)
   if nil!=ok{
      return ""
   }
   defer resp.Body.Close()
   str,ok:=ioutil.ReadAll(resp.Body)
   if ok!=nil{
      return ""
   }
   return string(str)
}

func (self *Webpage) ParseUrl(url, pattern string) (links []string){
   fmt.Println("Parse url ==>", url)
   body := self.Get(url)
   if "" == body{
      return []string{}
   }
   reg := regexp.MustCompile(pattern)
   return reg.FindAllString(body, -1)
}

func (self *Webpage) GetSaveName(url string) string{
   paths:=strings.Split(url, "/")
   len:=len(paths)
   fileName:=self.Config.SavePath + paths[len-4]+  paths[len-3]+  paths[len-2] +  paths[len-1]
   return fileName
}

func (self *Webpage) Download(urls []string)  {
   for _,url := range urls{
      fmt.Println("Start download image from url ==>", url)
      fileName := self.GetSaveName(url)
      if self.FileExist(fileName) && !self.Config.Overwrite{
         fmt.Println("Image already exists, skip download ==>", url)
         continue
      }
      body := self.Get(url)
      if "" == body{
         continue
      }
      if !self.CheckSize(body, self.GetExt(url)){
         fmt.Println("Image size too small, skip download ==>", url)
         continue
      }
      if !self.SaveImage(body, fileName){
         fmt.Println("Save image failed ==>", url)
      }
   }
}

func (self *Webpage) SaveImage(body, name string) bool {
   f,ok:=os.Create(name)
   if ok!=nil{
      fmt.Println("open file error")
      return false
   }
   defer f.Close()
   if _,err:=f.WriteString(body);err == nil{
      return true
   }
   return false
}

func (self *Webpage) GetExt(url string) string{
   if url == ""{
      return ""
   }
   temp := strings.Split(url, ".")
   return temp[len(temp) - 1]
}

func (self *Webpage) CheckSize(body, ext string) bool {
   if self.Config.MinWidth <= 0 && self.Config.MinHeight <= 0 {
      return true
   }
   var iImage image.Image
   var ok error = errors.New("Unknow image type")
   switch ext {
   case "jpg":
      iImage,ok=jpeg.Decode(strings.NewReader(body))
   case "png":
      iImage,ok=png.Decode(strings.NewReader(body))
   case "gif":
      iImage,ok=gif.Decode(strings.NewReader(body))
   default:
      fmt.Println("Unknow image format")
      return false
   }
   if ok == nil {
      rect := iImage.Bounds()
      if self.Config.MinWidth <= rect.Max.X && self.Config.MinHeight <= rect.Max.Y{
         return true
      }
   }
   return false
}


func (self *Webpage) FileExist(name string) bool{
   if _, ok := os.Stat(name); ok == nil {
      return true
   }
   return false
}

func (self *Webpage) RunTask(){
   urls:=self.ParsePage(PAGE_URL)
   sum:=0
   l:=len(urls)
   c:=make(chan int, l)
   for _, url := range urls{
      go func(url string){
         links := self.ParseUrl(url, IMAGE_LIST_LINKS)
         for _,v := range links{
            uris := self.ParseUrl(v, IMAGE_IMAGE_LINKS)
            self.Download(uris)
         }
         c <- 1
      }(url)
   }
   forEnd:
   for {
      select{
      case <-c:
         sum ++;
         if sum == l{
            break forEnd
         }
      }
   }
}

func main() {
   config := NewConfig(
      "F:/girls/",
      400,
      400,
      1,
      11,
      false,
   )

   webpage := NewWebpage(config)
   webpage.RunTask()

   fmt.Println("done!")
}

有疑问加站长微信联系（非本文作者）