package main import ( "encoding/json" "fmt" "io/ioutil" "net/http" "os" //"reflect" "errors" "regexp" "strconv" "strings" // "unicode/utf8" "database/sql" _ "github.com/go-sql-driver/mysql" ) type JdProduct struct { skuid string name string skuidkey string href string src string cat []int64 catName []string brand string pType string venderId string shopId string specialAttrs string price string } /* type JdPrice struct { id string `json: "id"` p string `json: "p"` m string `json: "m"` } */ const ( offTheShelf = "SaleNo" ) var db *sql.DB var productSkuidFetch = regexp.MustCompile(`[[:space:]]*skuid:[[:space:]]+(?P<skuid>[0-9]+),`) var productNameFetch = regexp.MustCompile(`[[:space:]]*name:[[:space:]]+\'(?P<name>.*)\',`) var productSkuidkeyFetch = regexp.MustCompile(`[[:space:]]*skuidkey:[[:space:]]*\'(?P<skuidkey>.*)\',`) var productHrefFetch = regexp.MustCompile(`[[:space:]]*href:[[:space:]]+\'(?P<href>.*)\',`) var productSrcFetch = regexp.MustCompile(`[[:space:]]*src:[[:space:]]+\'(?P<src>.*)\',`) var productCatFetch = regexp.MustCompile(`[[:space:]]*cat:[[:space:]]+\[(?P<cat>.*)\],`) var productCatNameFetch = regexp.MustCompile(`[[:space:]]*catName:[[:space:]]+\[(?P<catname>.*)\],`) var productBrandFetch = regexp.MustCompile(`[[:space:]]*brand:[[:space:]]+(?P<brand>[0-9]+),`) var productPTypeFetch = regexp.MustCompile(`[[:space:]]*pType:[[:space:]]+(?P<ptype>[0-9]+),`) var productVenderIdFetch = regexp.MustCompile(`[[:space:]]*venderId:[[:space:]]*(?P<venderId>[0-9]+),`) var productShopIdFetch = regexp.MustCompile(`[[:space:]]*shopId:[[:space:]]*\'(?P<shopId>[0-9]+)\',`) var productSpecialAttrsFetch = regexp.MustCompile(`[[:space:]]*specialAttrs:[[:space:]]*\[(?P<specialAttrs>.*)\],`) func connectDB() (db *sql.DB, err error) { db, err = sql.Open("mysql", "root:leeweop@/jd") if err != nil { panic(err.Error()) return nil, errors.New("Connect to db failed") } return db, nil } func createDatabase(db *sql.DB, name string) error { _, err := db.Exec("CREATE DATABASE IF NOT EXISTS jd DEFAULT CHARSET utf8 COLLATE utf8_general_ci") if err != nil { panic(err.Error()) return errors.New("Create database failed") } return nil } func createTable(db *sql.DB, name string) error { command := "CREATE TABLE IF NOT EXISTS " + name + " (skuid BIGINT(64) NOT NULL PRIMARY KEY, name VARCHAR(256) NOT NULL, skuidkey VARCHAR(64) NOT NULL, href VARCHAR(128) NOT NULL, src VARCHAR(128) NOT NULL, cat1 INT(32) NOT NULL, cat2 INT(32) NOT NULL, cat3 INT(32) NOT NULL, brand VARCHAR(128) NOT NULL, pType INT(32) NOT NULL, venderId VARCHAR(64) NOT NULL, shopId VARCHAR(64) NOT NULL, specialAttrs VARCHAR(256) NULL, price DOUBLE NOT NULL)" fmt.Println(command) _, err := db.Exec(command) if err != nil { panic(err.Error()) return errors.New("Create table failed") } return nil } func insertIntoDB(db *sql.DB, product *JdProduct) error { stmt, err := db.Prepare("INSERT INTO test (skuid, name, skuidkey, href, src, cat1, cat2, cat3, brand, pType, venderId, shopId, specialAttrs, price)values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") defer stmt.Close() if err != nil { panic(err.Error()) return errors.New("Command error") } _, err = stmt.Exec(product.skuid, product.name, product.skuidkey, product.href, product.src, product.cat[0], product.cat[1], product.cat[2], product.brand, product.pType, product.venderId, product.shopId, product.specialAttrs, product.price) if err != nil { panic(err.Error()) return errors.New("Excute command error") } return nil } func dumpDatabase(db *sql.DB) { _, err := db.Exec("show databases") if err != nil { panic(err.Error()) } var p JdProduct p.cat = make([]int64, 3) rows, err := db.Query("select * from test") for rows.Next() { rows.Scan(&p.skuid, &p.name, &p.skuidkey, &p.href, &p.src, &p.cat[0], &p.cat[1], &p.cat[2], &p.brand, &p.pType, &p.venderId, &p.shopId, &p.specialAttrs, &p.price) } fmt.Printf("%+v\n", p) } func flushDatabase(db *sql.DB) { _, _ = db.Exec("drop table test") } /* func init() { db, err := connectDB() if err != nil { panic(err.Error()) } flushDatabase(db) createDatabase(db, "jd") createTable(db, "test") // dumpDatabase(db) //db.Close() } */ func main() { // for i := 260000; i < 2611111; i++ { // u := fmt.Sprintf("http://item.jd.com/%d.html", i) // fmt.Println(u) db, err := connectDB() if err != nil { panic(err.Error()) } flushDatabase(db) createDatabase(db, "jd") createTable(db, "test") resp, err := http.Get("http://item.jd.com/2788767.html") //resp, err := http.Get(u) if err != nil { fmt.Println(err.Error()) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { panic(err.Error()) } var product JdProduct if productSkuidFetch.MatchString(string(body)) { fmt.Println(productSkuidFetch.SubexpNames()[1]) fmt.Println(productSkuidFetch.FindStringSubmatch(string(body))[1]) product.skuid = productSkuidFetch.FindStringSubmatch(string(body))[1] } if productNameFetch.MatchString(string(body)) { fmt.Println(productNameFetch.SubexpNames()[1]) fmt.Println(productNameFetch.FindStringSubmatch(string(body))[1]) product.name = productNameFetch.FindStringSubmatch(string(body))[1] } if productSkuidkeyFetch.MatchString(string(body)) { fmt.Println(productSkuidkeyFetch.SubexpNames()[1]) fmt.Println(productSkuidkeyFetch.FindStringSubmatch(string(body))[1]) product.skuidkey = productSkuidkeyFetch.FindStringSubmatch(string(body))[1] } if productHrefFetch.MatchString(string(body)) { fmt.Println(productHrefFetch.SubexpNames()[1]) fmt.Println(productHrefFetch.FindStringSubmatch(string(body))[1]) product.href = productHrefFetch.FindStringSubmatch(string(body))[1] } if productSrcFetch.MatchString(string(body)) { fmt.Println(productSrcFetch.SubexpNames()[1]) fmt.Println(productSrcFetch.FindStringSubmatch(string(body))[1]) product.src = productSrcFetch.FindStringSubmatch(string(body))[1] } if productCatFetch.MatchString(string(body)) { fmt.Println(productCatFetch.SubexpNames()[1]) fmt.Println(productCatFetch.FindStringSubmatch(string(body))[1]) //product.cat = productCatFetch.FindStringSubmatch(string(body))[1] s := strings.Split(productCatFetch.FindStringSubmatch(string(body))[1], ",") for _, val := range s { // fmt.Println(val) v, _ := strconv.ParseInt(val, 10, 32) // fmt.Printf("%+c\n", v) product.cat = append(product.cat, v) } } if productCatNameFetch.MatchString(string(body)) { fmt.Println(productCatNameFetch.SubexpNames()[1]) fmt.Println(productCatNameFetch.FindStringSubmatch(string(body))[1]) //product.catName = productCatNameFetch.FindStringSubmatch(string(body))[1] s := strings.Split(productCatNameFetch.FindStringSubmatch(string(body))[1], ",") for _, val := range s { product.catName = append(product.catName, val) } } if productBrandFetch.MatchString(string(body)) { fmt.Println(productBrandFetch.SubexpNames()[1]) fmt.Println(productBrandFetch.FindStringSubmatch(string(body))[1]) product.brand = productBrandFetch.FindStringSubmatch(string(body))[1] } if productPTypeFetch.MatchString(string(body)) { fmt.Println(productPTypeFetch.SubexpNames()[1]) fmt.Println(productPTypeFetch.FindStringSubmatch(string(body))[1]) product.pType = productPTypeFetch.FindStringSubmatch(string(body))[1] } if productVenderIdFetch.MatchString(string(body)) { fmt.Println(productVenderIdFetch.SubexpNames()[1]) fmt.Println(productVenderIdFetch.FindStringSubmatch(string(body))[1]) product.venderId = productVenderIdFetch.FindStringSubmatch(string(body))[1] } if productShopIdFetch.MatchString(string(body)) { fmt.Println(productShopIdFetch.SubexpNames()[1]) fmt.Println(productShopIdFetch.FindStringSubmatch(string(body))[1]) product.shopId = productShopIdFetch.FindStringSubmatch(string(body))[1] } if productSpecialAttrsFetch.MatchString(string(body)) { fmt.Println(productSpecialAttrsFetch.SubexpNames()[1]) fmt.Println(productSpecialAttrsFetch.FindStringSubmatch(string(body))[1]) product.specialAttrs = productSpecialAttrsFetch.FindStringSubmatch(string(body))[1] if strings.Contains(product.specialAttrs, offTheShelf) { //continue fmt.Println("") } } url := "http://p.3.cn/prices/mgets?skuIds=J_" + product.skuid + "&type=" + product.pType fmt.Println(url) r, err := http.Get(url) if err != nil { panic(err.Error()) } p, err := ioutil.ReadAll(r.Body) if err != nil { panic(err.Error()) } fmt.Println(string(p)) m := make([]map[string]interface{}, 10) e := json.Unmarshal([]byte(p), &m) if e != nil { panic(e.Error()) } if val, ok := m[0]["p"].(string); ok { product.price = val } var name string s := strings.Split(product.name, "\\u") for _, val := range s { // fmt.Println(val) v, _ := strconv.ParseInt(val, 16, 32) // fmt.Printf("%+c\n", v) name += fmt.Sprintf("%c", v) } fmt.Println(name) product.name = name //fmt.Printf("%+v\n", product) insertIntoDB(db, &product) dumpDatabase(db) file, err := os.Create("jd.html") if err != nil { panic(err.Error()) } _, err = file.Write(body) if err != nil { panic(err.Error()) } // } // fmt.Println(n, " byte has been write to jd.html") }
有疑问加站长微信联系(非本文作者)