自动解析html页面的编码格式:
需要依赖 golang.org/x/text 和 golang.org/x/net 这两个外部库
package main import ( "net/http" "io/ioutil" "fmt" "golang.org/x/text/transform" "io" "golang.org/x/text/encoding" "bufio" "golang.org/x/net/html/charset" ) // encoding determine for html page , eg: gbk gb2312 GB18030 func determineEncoding(r io.Reader) encoding.Encoding { bytes, err := bufio.NewReader(r).Peek(1024) if err != nil { panic(err) } e, _, _ := charset.DetermineEncoding(bytes, "") return e } func main() { resp, err := http.Get("http://www.zhenai.com/zhenghun") if err != nil { panic(err) } defer resp.Body.Close() e := determineEncoding(resp.Body) reader := transform.NewReader(resp.Body, e.NewDecoder()) if resp.StatusCode == http.StatusOK { bodyBytes, err := ioutil.ReadAll(reader) if err != nil { panic(err) } fmt.Printf("%s\n", bodyBytes) } }
有疑问加站长微信联系(非本文作者)