Golang 爬拉勾的数据,并生成 csv 文件

momaek · 2017-11-23 06:15:57 · 1511 次点击 · 预计阅读时间 7 分钟 · 大约8小时之前 开始浏览    
这是一个创建于 2017-11-23 06:15:57 的文章,其中的信息可能已经有所发展或是发生改变。

package main

import (
    "encoding/json"
    "encoding/csv"
    "fmt"
    "net/http"
    "net/url"
    "strconv"
    "strings"
    "time"
    "os"
    "bytes"
)

func call(param url.Values, res interface{}) error {
    req, _ := http.NewRequest(http.MethodPost, "https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false&isSchoolJob=0", strings.NewReader(param.Encode()))
    req.Header.Set("Origin", "https://www.lagou.com")
    req.Header.Set("X-Anit-Forge-Code", "0")
    req.Header.Set("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4")
    req.Header.Set("X-Requested-With", "XMLHttpRequest")
    req.Header.Set("Accept-Encoding", "gzip, deflate, br")
    req.Header.Set("Connection", "keep-alive")
    req.Header.Set("Pragma", "no-cache")
    req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
    req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
    req.Header.Set("Accept", "application/json, text/javascript, */*; q=0.01")
    req.Header.Set("Cache-Control", "no-cache")
    req.Header.Set("Referer", "https://www.lagou.com/jobs/list_go?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput=")
    req.Header.Set("X-Anit-Forge-Token", "None")

    resp, err := http.DefaultClient.Do(req)
    if err != nil {
        return err
    }

    defer resp.Body.Close()

    if res != nil {
        d := json.NewDecoder(resp.Body)
        d.Decode(res)
    }

    return nil
}

// 由于 拉勾 只有30页。所以,这个地方就直接写死 30 了
const pagesize = 30

func Lagou(language string) (res [][]string) {
    param := url.Values{}
    param.Set("first", "true")
    param.Set("kd", language)

    workInfos := make([]WorkInfo, 0, 400)

    for i := 1; i <= pagesize; i++ {
        fmt.Printf("now is %d page \n", i)
        param.Set("pn", strconv.Itoa(i))
        ret := LagouRet{}
        err := call(param, &ret)
        if err != nil {
            fmt.Println("err: ", err)
            return
        }

        if ret.Code != 0 {
            err = fmt.Errorf("%s", ret.Msg)
            return
        }

        workInfos = append(workInfos, ret.Content.PositionResult.Result...)
        param.Del("first")
        param.Set("first", "false")
        time.Sleep(10 * time.Second)
    }

    return ConvertWorkInfo2Csv(workInfos)
}

func ConvertWorkInfo2Csv(infos []WorkInfo) (res [][]string) {
    title := []string{"工作年限", "教育水平", "工资", "职位名称", "职位标签", "地点", "公司名称", "公司类型", "融资情况", "公司标签"}
    res = make([][]string, 0)
    res = append(res, title)
    tmp := make([]string, 10)
    for _, v := range infos {
        tmp = []string{v.WorkYear, v.Education, v.Salary, v.PositionName, strings.Join(v.PositionLables, ":"), v.City + " " +
            strings.Join(v.BusinessZones, ":"), v.CompanyFullName, v.IndustryField, v.FinanceStage, strings.Join(v.CompanyLabelList, ":")}
        res = append(res, tmp)
    }

    return
}

type Error struct {
    Code int    `json:"code"`
    Msg  string `json:"msg"`
}

type LagouRet struct {
    Error
    Content struct {
        HrMap          map[string]HrInfo `json:"hrInfoMap"`
        PositionResult struct {
            Result []WorkInfo `json:"result"`
        } `json:"positionResult"`
    } `json:"content"`
}

type HrInfo struct {
    CanTalk      bool   `json:"canTalk"`
    Phone        string `json:"phone"`
    Portrait     string `json:"portrait"`
    PositionName string `json:"positionName"`
    RealName     string `json:"realName"`
    ReceiveEmail string `json:"receiveEmail"`
    UserLevel    string `json:"userLevel"`
}

type WorkInfo struct {
    WorkYear              string      `json:"workYear"`
    Education             string      `json:"education"`
    JobNature             string      `json:"jobNature"`
    CompanyID             int         `json:"companyId"`
    PositionName          string      `json:"positionName"`
    PositionID            int         `json:"positionId"`
    CreateTime            string      `json:"createTime"`
    City                  string      `json:"city"`
    CompanyLogo           string      `json:"companyLogo"`
    IndustryField         string      `json:"industryField"`
    PositionAdvantage     string      `json:"positionAdvantage"`
    Salary                string      `json:"salary"`
    CompanySize           string      `json:"companySize"`
    Approve               int         `json:"approve"`
    Score                 int         `json:"score"`
    CompanyShortName      string      `json:"companyShortName"`
    PositionLables        []string    `json:"positionLables"`
    IndustryLables        []string    `json:"industryLables"`
    PublisherID           int         `json:"publisherId"`
    FinanceStage          string      `json:"financeStage"`
    CompanyLabelList      []string    `json:"companyLabelList"`
    District              string      `json:"district"`
    BusinessZones         []string    `json:"businessZones"`
    FormatCreateTime      string      `json:"formatCreateTime"`
    AdWord                int         `json:"adWord"`
    CompanyFullName       string      `json:"companyFullName"`
    ImState               string      `json:"imState"`
    LastLogin             int64       `json:"lastLogin"`
    Explain               interface{} `json:"explain"`
    Plus                  interface{} `json:"plus"`
    PcShow                int         `json:"pcShow"`
    AppShow               int         `json:"appShow"`
    Deliver               int         `json:"deliver"`
    GradeDescription      interface{} `json:"gradeDescription"`
    PromotionScoreExplain interface{} `json:"promotionScoreExplain"`
    FirstType             string      `json:"firstType"`
    SecondType            string      `json:"secondType"`
    IsSchoolJob           int         `json:"isSchoolJob"`
}

func writeCsv2File(result [][]string, fileName string) {
    csvBuf := new(bytes.Buffer)
    writter := csv.NewWriter(csvBuf)

    writter.WriteAll(result)
    writter.Flush()

    f, err := os.Create(fileName)
    if err != nil {
        fmt.Println(err)
        return
    }

    defer f.Close()
    f.Write(csvBuf.Bytes())
}

func main() {
    // 这个地方的参数是你想要查的职位相关的东西
    writeCsv2File(Lagou("go"), "lagou.csv")
}

有疑问加站长微信联系(非本文作者))

入群交流(和以上内容无关):加入Go大咖交流群,或添加微信:liuxiaoyan-s 备注:入群;或加QQ群:692541889

1511 次点击  
加入收藏 微博
暂无回复
添加一条新回复 (您需要 登录 后才能回复 没有账号 ?)
  • 请尽量让自己的回复能够对别人有帮助
  • 支持 Markdown 格式, **粗体**、~~删除线~~、`单行代码`
  • 支持 @ 本站用户;支持表情(输入 : 提示),见 Emoji cheat sheet
  • 图片支持拖拽、截图粘贴等方式上传