package main
import (
// "errors"
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
"strings"
)
type Mall struct {
name string
cat []*Catagory
}
type Catagory struct {
id int64
name string
link string
subCat []*SubCatagory
}
type SubCatagory struct {
id int64
name string
link string
detailCat []*DetailCatagory
}
type DetailCatagory struct {
id int64
name string
link string
goods map[string]interface{}
}
var Jd = Mall{name: "Jd"}
var topCatagoryStart = regexp.MustCompile(`[[:space:]]*\<div[[:space:]]+class="category-item m"\>`)
var topCatagoryFetch = regexp.MustCompile(`[[:space:]]*.*\<span\>(?P<topC>.*)\</span\>`)
var topCatagoryEnd = regexp.MustCompile(`[[:space:]]*\</div\>`)
var subCatagoryFetch = regexp.MustCompile(``)
var detailCatagoryStart = regexp.MustCompile(`[[:space:]]*\<dt\>\<a[[:space:]]+href=\"//(?P<cat2link>.*)\"[[:space:]]+target="_blank"\>(?P<cat2name>[\p{Han}]+)\</a\>\</dt\>`)
var detailCatagoryFetch = regexp.MustCompile(`[[:space:]]*.*\<a[[:space:]]+href=\"//(?P<cat2link>.*)\"[[:space:]]+target="_blank"\>(?P<cat2name>[\p{Han}]+)\</a\>`)
func dumpJdCatagory(mall *Mall) {
fmt.Println(mall.name)
for _, c := range Jd.cat {
fmt.Println(c)
fmt.Printf("Catagory: %s\n", c.name)
for _, sc := range c.subCat {
fmt.Printf("SubCatagory: %s, Link: %s\n", sc.name, sc.link)
for _, dc := range sc.detailCat {
fmt.Printf("DetailCatagory: %s. Link: %s\n", dc.name, dc.link)
}
}
}
}
func main() {
resp, err := http.Get("http://www.jd.com/allSort.aspx")
if err != nil {
panic(err.Error())
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err.Error())
}
// fmt.Println(string(body))
file, err := os.Create("jd_list.html")
if err != nil {
panic(err.Error())
}
_, err = file.Write(body)
if err != nil {
panic(err.Error())
}
Jd.cat = make([]*Catagory, 0, 100)
var top = false
var sub = false
var detail = false
var cat *Catagory
var subCat *SubCatagory
var detailCat *DetailCatagory
s := strings.Split(string(body), "\n")
for _, line := range s {
if topCatagoryStart.MatchString(line) {
top = true
sub = false
detail = false
}
if top == true {
if topCatagoryFetch.MatchString(line) {
sub = true
/*
fmt.Println(topCatagoryFetch.FindStringSubmatch(line)[1])
cat = &Catagory{name: topCatagoryFetch.FindStringSubmatch(line)[1]}
cat.subCat = make([]*SubCatagory, 40, 100)
*/
cat = new(Catagory)
cat.name = topCatagoryFetch.FindStringSubmatch(line)[1]
cat.subCat = make([]*SubCatagory, 0, 100)
Jd.cat = append(Jd.cat, cat)
//fmt.Println("Catagory")
//fmt.Println(cat)
}
}
if sub == true {
if detailCatagoryStart.MatchString(line) {
/*
fmt.Println(detailCatagoryStart.FindStringSubmatch(line)[1])
fmt.Println(detailCatagoryStart.FindStringSubmatch(line)[2])
subCat = &SubCatagory{name: detailCatagoryStart.FindStringSubmatch(line)[2], link: detailCatagoryStart.FindStringSubmatch(line)[1]}
subCat.detailCat = make([]*DetailCatagory, 50, 100)
*/
subCat = new(SubCatagory)
subCat.name = detailCatagoryStart.FindStringSubmatch(line)[2]
subCat.link = detailCatagoryStart.FindStringSubmatch(line)[1]
subCat.detailCat = make([]*DetailCatagory, 0, 100)
cat.subCat = append(cat.subCat, subCat)
//fmt.Println("SubCatagory")
//fmt.Println(subCat)
detail = true
}
}
if detail == true {
if detailCatagoryFetch.MatchString(line) {
/*
fmt.Println(detailCatagoryFetch.FindStringSubmatch(line)[1])
fmt.Println(detailCatagoryFetch.FindStringSubmatch(line)[2])
detailCat = &DetailCatagory{name: detailCatagoryFetch.FindStringSubmatch(line)[2], link: detailCatagoryFetch.FindStringSubmatch(line)[1]}
*/
detailCat = new(DetailCatagory)
detailCat.name = detailCatagoryFetch.FindStringSubmatch(line)[2]
detailCat.link = detailCatagoryFetch.FindStringSubmatch(line)[1]
subCat.detailCat = append(subCat.detailCat, detailCat)
//fmt.Println("DetailCatagory")
// fmt.Println(detailCat)
}
if topCatagoryEnd.MatchString(line) {
top = false
sub = false
detail = false
}
}
}
dumpJdCatagory(&Jd)
//fmt.Println(s)
//fmt.Printf("%d bytes has been write to jd_list.html", n)
}
有疑问加站长微信联系(非本文作者)