package main
import (
"fmt"
"github.com/gocolly/colly"
)
func main() {
c := colly.NewCollector()
c.OnHTML("#kesfxqxq_A01_03_01 a", func(e *colly.HTMLElement) {
link := e.Attr("href")
content := e.Text
fmt.Printf("link : %s \t", link)
fmt.Printf("content : %s \t", content)
fmt.Println()
})
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL.String())
})
c.Visit("http://fulixinchengtj.fang.com/esf/")
}
这是抓取房天下小区详情页的demo,content打印出来是乱码,各位有解决方法的请不吝赐教!
go get github.com/axgle/mahonia
func ConvertToString(src string, srcCode string, tagCode string) string {
srcCoder := mahonia.NewDecoder(srcCode)
srcResult := srcCoder.ConvertString(src)
tagCoder := mahonia.NewDecoder(tagCode)
_, cdata, _ := tagCoder.Translate([]byte(srcResult), true)
result := string(cdata)
return result
}
#2
更多评论