通过xpath
未加入线程,channel机制,自行加一下即可
使用sqlite
package main
import (
"database/sql"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"strings"
_ "github.com/mattn/go-sqlite3"
"github.com/moovweb/gokogiri"
"github.com/moovweb/gokogiri/xpath"
)
type Doub struct {
id int
name string
url string
info string
}
func main() {
// 获取并读取网页
resp, _ := http.Get("http://movie.douban.com/tv/")
page, _ := ioutil.ReadAll(resp.Body)
// 解析web
doc, err := gokogiri.ParseHtml(page)
if err != nil {
fmt.Println("Parsing err")
return
}
// 提供xpath
xps1 := xpath.Compile("//div/table/tr[@class='item']/td[2]/div[@class='pl2']/a")
// 按xpath搜索
ss1, err := doc.Root().Search(xps1)
// sqlite 3
os.Remove("./foo.db")
db, err := sql.Open("sqlite3", "./foo.db")
if err != nil {
log.Fatal(err)
}
defer db.Close()
// 建表
sql := `create table douban(id integer primary key autoincrement , name text, url text, info text);`
db.Exec(sql)
tx, err := db.Begin()
if err != nil {
log.Fatal(err)
}
// 插入准备
stmt, err := tx.Prepare("insert into douban(id, name, url, info) values(?,?,?,?);")
if err != nil {
log.Fatal(err)
}
defer stmt.Close()
// 执行插入
for _, s1 := range ss1 {
resp, _ := http.Get(s1.Attributes()["href"].String())
page, _ := ioutil.ReadAll(resp.Body)
doc, _ := gokogiri.ParseHtml(page)
xps2 := xpath.Compile("//div[@id='info']")
ss2, _ := doc.Root().Search(xps2)
for _, s2 := range ss2 {
_, err = stmt.Exec(nil, strings.Split(s1.Content(), "/")[0], s1.Attributes()["href"].String(), s2.Content())
if err != nil {
log.Fatal(err)
}
}
}
tx.Commit()
// 查询
var douban []Doub = make([]Doub, 0)
rows, err := db.Query("select id, name, url, info from douban")
if err != nil {
log.Fatal(err)
}
defer rows.Close()
for rows.Next() {
var doub Doub
rows.Scan(&doub.id, &doub.name, &doub.url, &doub.info)
douban = append(douban, doub)
}
fmt.Println(douban)
rows.Close()
defer doc.Free()
}
使用mysql
package main
import (
"database/sql"
"fmt"
"io/ioutil"
"log"
"net/http"
"strings"
_ "github.com/go-sql-driver/mysql"
"github.com/moovweb/gokogiri"
"github.com/moovweb/gokogiri/xpath"
)
type Doub struct {
id int
name string
url string
info string
}
func main() {
// 获取并读取网页
resp, _ := http.Get("http://movie.douban.com/tv/")
page, _ := ioutil.ReadAll(resp.Body)
// 解析web
doc, err := gokogiri.ParseHtml(page)
if err != nil {
fmt.Println("Parsing err")
return
}
// 提供xpath
xps1 := xpath.Compile("//div/table/tr[@class='item']/td[2]/div[@class='pl2']/a")
// 按xpath搜索
ss1, err := doc.Root().Search(xps1)
// mysql
db, err := sql.Open("mysql", "root:root@/test")
if err != nil {
log.Fatalf("Open database error: %s\n", err)
}
defer db.Close()
tx, err := db.Begin()
if err != nil {
log.Fatal(err)
}
// 插入准备
stmt, err := db.Prepare("insert into douban(name, url, info) values(?,?,?);")
defer stmt.Close()
if err != nil {
log.Println(err)
return
}
// 执行插入
for _, s1 := range ss1 {
resp, _ := http.Get(s1.Attributes()["href"].String())
page, _ := ioutil.ReadAll(resp.Body)
doc, _ := gokogiri.ParseHtml(page)
xps2 := xpath.Compile("//div[@id='info']")
ss2, _ := doc.Root().Search(xps2)
for _, s2 := range ss2 {
_, err = stmt.Exec(strings.Split(s1.Content(), "/")[0], s1.Attributes()["href"].String(), s2.Content())
if err != nil {
log.Fatal(err)
}
}
}
tx.Commit()
// 查询
var douban []Doub = make([]Doub, 0)
rows, err := db.Query("select id, name, url, info from douban")
if err != nil {
log.Fatal(err)
}
defer rows.Close()
for rows.Next() {
var doub Doub
rows.Scan(&doub.id, &doub.name, &doub.url, &doub.info)
douban = append(douban, doub)
}
fmt.Println(douban)
rows.Close()
defer doc.Free()
}
有疑问加站长微信联系(非本文作者)