package main
import ( // "errors" "fmt" "io/ioutil" "net/http" "os" "regexp" "strings" )
type Mall struct { name string cat []*Catagory }
type Catagory struct { id int64 name string link string subCat []*SubCatagory }
type SubCatagory struct { id int64 name string link string detailCat []*DetailCatagory }
type DetailCatagory struct { id int64 name string link string goods map[string]interface{} }
var Jd = Mall{name: "Jd"}
var topCatagoryStart = regexp.MustCompile([[:space:]]*\
)
var topCatagoryFetch = regexp.MustCompile([[:space:]]*.*\(?P
)
var topCatagoryEnd = regexp.MustCompile([[:space:]]*\
)
var subCatagoryFetch = regexp.MustCompile(``)
var detailCatagoryStart = regexp.MustCompile([[:space:]]*\
)
var detailCatagoryFetch = regexp.MustCompile([[:space:]]*.*\.*)\"[[:space:]]+target="_blank"\>(?P
)
func dumpJdCatagory(mall *Mall) { fmt.Println(mall.name) for , c := range Jd.cat { fmt.Println(c) fmt.Printf("Catagory: %s\n", c.name) for , sc := range c.subCat { fmt.Printf("SubCatagory: %s, Link: %s\n", sc.name, sc.link) for _, dc := range sc.detailCat { fmt.Printf("DetailCatagory: %s. Link: %s\n", dc.name, dc.link) } } } }
func main() { resp, err := http.Get("http://www.jd.com/allSort.aspx") if err != nil { panic(err.Error()) }
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err.Error())
}
// fmt.Println(string(body))
file, err := os.Create("jd_list.html")
if err != nil {
panic(err.Error())
}
_, err = file.Write(body)
if err != nil {
panic(err.Error())
}
Jd.cat = make([]*Catagory, 0, 100)
var top = false
var sub = false
var detail = false
var cat *Catagory
var subCat *SubCatagory
var detailCat *DetailCatagory
s := strings.Split(string(body), "\n")
for _, line := range s {
if topCatagoryStart.MatchString(line) {
top = true
sub = false
detail = false
}
if top == true {
if topCatagoryFetch.MatchString(line) {
sub = true
/*
fmt.Println(topCatagoryFetch.FindStringSubmatch(line)[1])
cat = &Catagory{name: topCatagoryFetch.FindStringSubmatch(line)[1]}
cat.subCat = make([]*SubCatagory, 40, 100)
*/
cat = new(Catagory)
cat.name = topCatagoryFetch.FindStringSubmatch(line)[1]
cat.subCat = make([]*SubCatagory, 0, 100)
Jd.cat = append(Jd.cat, cat)
//fmt.Println("Catagory")
//fmt.Println(cat)
}
}
if sub == true {
if detailCatagoryStart.MatchString(line) {
/*
fmt.Println(detailCatagoryStart.FindStringSubmatch(line)[1])
fmt.Println(detailCatagoryStart.FindStringSubmatch(line)[2])
subCat = &SubCatagory{name: detailCatagoryStart.FindStringSubmatch(line)[2], link: detailCatagoryStart.FindStringSubmatch(line)[1]}
subCat.detailCat = make([]*DetailCatagory, 50, 100)
*/
subCat = new(SubCatagory)
subCat.name = detailCatagoryStart.FindStringSubmatch(line)[2]
subCat.link = detailCatagoryStart.FindStringSubmatch(line)[1]
subCat.detailCat = make([]*DetailCatagory, 0, 100)
cat.subCat = append(cat.subCat, subCat)
//fmt.Println("SubCatagory")
//fmt.Println(subCat)
detail = true
}
}
if detail == true {
if detailCatagoryFetch.MatchString(line) {
/*
fmt.Println(detailCatagoryFetch.FindStringSubmatch(line)[1])
fmt.Println(detailCatagoryFetch.FindStringSubmatch(line)[2])
detailCat = &DetailCatagory{name: detailCatagoryFetch.FindStringSubmatch(line)[2], link: detailCatagoryFetch.FindStringSubmatch(line)[1]}
*/
detailCat = new(DetailCatagory)
detailCat.name = detailCatagoryFetch.FindStringSubmatch(line)[2]
detailCat.link = detailCatagoryFetch.FindStringSubmatch(line)[1]
subCat.detailCat = append(subCat.detailCat, detailCat)
//fmt.Println("DetailCatagory")
// fmt.Println(detailCat)
}
if topCatagoryEnd.MatchString(line) {
top = false
sub = false
detail = false
}
}
}
dumpJdCatagory(&Jd)
//fmt.Println(s)
//fmt.Printf("%d bytes has been write to jd_list.html", n)
}
有疑问加站长微信联系(非本文作者)
