golang HTTP ReadRequest

golang_yh · · 7553 次点击 · · 开始浏览

这是一个创建于的文章，其中的信息可能已经有所发展或是发生改变。

服务端收到client发送的Http数据后怎么解析成具体的Request对象呢？下面来看看golang是如何处理的

首先看一个具体应用实例：仅仅包含HTTP里面的Method,URL和Proto

package main

import (
	"bufio"
	"fmt"
	_ "io"
	"net/http"
	"net/textproto"
	"strings"
)

func main() {
	request()

}

//当然这个实例比较简单，仅仅是通过\n来解析出几个字段而已
func request() {
	paths := "/xxx"
	br := bufio.NewReader(strings.NewReader("GET " + paths + " HTTP/1.1\r\nHost: test\r\n\r\n"))
	tp := textproto.NewReader(br)
	var s string
	var err error
	if s, err = tp.ReadLine(); err != nil {

		fmt.Printf("prase err: %v \n", err)
	}
	fmt.Printf("readLine restult: %v \n", s)

	req := new(http.Request)

	req.Method, req.RequestURI, req.Proto, _ = parseRequestLine(s)

	fmt.Printf("prase result %v \n", req)

}

func parseRequestLine(line string) (method, requestURI, proto string, ok bool) {
	s1 := strings.Index(line, " ")
	s2 := strings.Index(line[s1+1:], " ")
	if s1 < 0 || s2 < 0 {
		return
	}
	s2 += s1 + 1
	return line[:s1], line[s1+1 : s2], line[s2+1:], true
}

//--------------------------------输出结果为:
//readLine restult: GET /xxx HTTP/1.1 
//prase result &{GET <nil> HTTP/1.1 0 0 map[] <nil> 0 [] false  map[] map[] <nil> map[]  /xxx <nil>}

再来深入看看golang的http源码

func ReadRequest(b *bufio.Reader) (req *Request, err error) {

	tp := newTextprotoReader(b) //封装一个Reader结构
	req = new(Request)

	// First line: GET /index.html HTTP/1.0  //最开始的一行信息的解析
	var s string
	if s, err = tp.ReadLine(); err != nil {
		return nil, err
	}
	defer func() {
		putTextprotoReader(tp)
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
	}()

	var ok bool //最开始的一行信息的解析
	req.Method, req.RequestURI, req.Proto, ok = parseRequestLine(s)
	if !ok {
		return nil, &badStringError{"malformed HTTP request", s}
	}
	rawurl := req.RequestURI //client请求的URL
	//HTTP对应的ProtoMajor和ProtoMinor
	if req.ProtoMajor, req.ProtoMinor, ok = ParseHTTPVersion(req.Proto); !ok {
		return nil, &badStringError{"malformed HTTP version", req.Proto}
	}

	// CONNECT requests are used two different ways, and neither uses a full URL:
	// The standard use is to tunnel HTTPS through an HTTP proxy.
	// It looks like "CONNECT www.google.com:443 HTTP/1.1", and the parameter is
	// just the authority section of a URL. This information should go in req.URL.Host.
	//
	// The net/rpc package also uses CONNECT, but there the parameter is a path
	// that starts with a slash. It can be parsed with the regular URL parser,
	// and the path will end up in req.URL.Path, where it needs to be in order for
	// RPC to work.
	
	//处理Method == "CONNECT" 的具体情况
	justAuthority := req.Method == "CONNECT" && !strings.HasPrefix(rawurl, "/")
	if justAuthority {
		rawurl = "http://" + rawurl
	}
        
        //HTTP URL解析
	if req.URL, err = url.ParseRequestURI(rawurl); err != nil {
		return nil, err
	}

	if justAuthority {
		// Strip the bogus "http://" back off.
		req.URL.Scheme = ""
	}

	// Subsequent lines: Key: value.
	mimeHeader, err := tp.ReadMIMEHeader() //HTTP Header中的key-value数据
	if err != nil {
		return nil, err
	}
	req.Header = Header(mimeHeader)

	// RFC2616: Must treat
	//	GET /index.html HTTP/1.1
	//	Host: www.google.com
	// and
	//	GET http://www.google.com/index.html HTTP/1.1
	//	Host: doesntmatter
	// the same.  In the second case, any Host line is ignored.
	req.Host = req.URL.Host
	if req.Host == "" {
		req.Host = req.Header.get("Host")
	}
	delete(req.Header, "Host") //把Header里面的map中的Host去掉

	fixPragmaCacheControl(req.Header) //设置Cache-Control字段

	err = readTransfer(req, b)
	if err != nil {
		return nil, err
	}

	req.Close = shouldClose(req.ProtoMajor, req.ProtoMinor, req.Header, false) //是否需要关闭
	return req, nil
}

ParseHTTPVersion（解析HTTP的ProtoMajor 和 ProtoMinor）

func ParseHTTPVersion(vers string) (major, minor int, ok bool) {
	const Big = 1000000 // arbitrary upper bound 
	switch vers {
	case "HTTP/1.1":
		return 1, 1, true
	case "HTTP/1.0":
		return 1, 0, true
	}
	if !strings.HasPrefix(vers, "HTTP/") {
		return 0, 0, false
	}
	dot := strings.Index(vers, ".")
	if dot < 0 {
		return 0, 0, false
	}
	major, err := strconv.Atoi(vers[5:dot])
	if err != nil || major < 0 || major > Big {
		return 0, 0, false
	}
	minor, err = strconv.Atoi(vers[dot+1:])
	if err != nil || minor < 0 || minor > Big {
		return 0, 0, false
	}
	return major, minor, true
}

设置 Cache-Control字段

func fixPragmaCacheControl(header Header) {
	if hp, ok := header["Pragma"]; ok && len(hp) > 0 && hp[0] == "no-cache" {
		if _, presentcc := header["Cache-Control"]; !presentcc {
			header["Cache-Control"] = []string{"no-cache"}
		}
	}
}

是否需要关闭

func shouldClose(major, minor int, header Header, removeCloseHeader bool) bool {
	if major < 1 {
		return true
	} else if major == 1 && minor == 0 { //这种情况判断长连接
		if !strings.Contains(strings.ToLower(header.get("Connection")), "keep-alive") {
			return true
		}
		return false
	} else {
		// TODO: Should split on commas, toss surrounding white space,
		// and check each field. //判断Connection字段
		if strings.ToLower(header.get("Connection")) == "close" {
			if removeCloseHeader {
				header.Del("Connection")
			}
			return true
		}
	}
	return false
}

比较长的区分Request和Response的readTransfer方法

// msg is *Request or *Response.
func readTransfer(msg interface{}, r *bufio.Reader) (err error) {
	t := &transferReader{RequestMethod: "GET"}

	// Unify input
	isResponse := false
	switch rr := msg.(type) {
	case *Response:
		t.Header = rr.Header 
		t.StatusCode = rr.StatusCode
		t.ProtoMajor = rr.ProtoMajor
		t.ProtoMinor = rr.ProtoMinor
		t.Close = shouldClose(t.ProtoMajor, t.ProtoMinor, t.Header, true)
		isResponse = true
		if rr.Request != nil {
			t.RequestMethod = rr.Request.Method
		}
	case *Request:
		t.Header = rr.Header
		t.ProtoMajor = rr.ProtoMajor
		t.ProtoMinor = rr.ProtoMinor
		// Transfer semantics for Requests are exactly like those for
		// Responses with status code 200, responding to a GET method
		t.StatusCode = 200
	default:
		panic("unexpected type")
	}

	// Default to HTTP/1.1
	if t.ProtoMajor == 0 && t.ProtoMinor == 0 {
		t.ProtoMajor, t.ProtoMinor = 1, 1
	}

	// Transfer encoding, content length
	t.TransferEncoding, err = fixTransferEncoding(t.RequestMethod, t.Header)
	if err != nil {
		return err
	}

	realLength, err := fixLength(isResponse, t.StatusCode, t.RequestMethod, t.Header, t.TransferEncoding)
	if err != nil {
		return err
	}
	if isResponse && t.RequestMethod == "HEAD" {
		if n, err := parseContentLength(t.Header.get("Content-Length")); err != nil {
			return err
		} else {
			t.ContentLength = n
		}
	} else {
		t.ContentLength = realLength
	}

	// Trailer
	t.Trailer, err = fixTrailer(t.Header, t.TransferEncoding)
	if err != nil {
		return err
	}

	// If there is no Content-Length or chunked Transfer-Encoding on a *Response
	// and the status is not 1xx, 204 or 304, then the body is unbounded.
	// See RFC2616, section 4.4.
	switch msg.(type) {
	case *Response:
		if realLength == -1 &&
			!chunked(t.TransferEncoding) &&
			bodyAllowedForStatus(t.StatusCode) {
			// Unbounded body.
			t.Close = true
		}
	}

	// Prepare body reader.  ContentLength < 0 means chunked encoding
	// or close connection when finished, since multipart is not supported yet
	//注意这里ContentLength < 0 表示 chunked encoding 或者已经接受完数据，关闭连接
	
	switch {
	case chunked(t.TransferEncoding):
		if noBodyExpected(t.RequestMethod) {
			t.Body = eofReader
		} else {
			t.Body = &body{src: internal.NewChunkedReader(r), hdr: msg, r: r, closing: t.Close}
		}
	case realLength == 0:
		t.Body = eofReader
	case realLength > 0:
		t.Body = &body{src: io.LimitReader(r, realLength), closing: t.Close}
	default:
		// realLength < 0, i.e. "Content-Length" not mentioned in header
		if t.Close {
			// Close semantics (i.e. HTTP/1.0)
			t.Body = &body{src: r, closing: t.Close}
		} else {
			// Persistent connection (i.e. HTTP/1.1)
			t.Body = eofReader
		}
	}

	// Unify output
	switch rr := msg.(type) {
	case *Request:
		rr.Body = t.Body
		rr.ContentLength = t.ContentLength
		rr.TransferEncoding = t.TransferEncoding
		rr.Close = t.Close
		rr.Trailer = t.Trailer
	case *Response:
		rr.Body = t.Body
		rr.ContentLength = t.ContentLength
		rr.TransferEncoding = t.TransferEncoding
		rr.Close = t.Close
		rr.Trailer = t.Trailer
	}

	return nil
}

总结：如果自己想动手写HTTP服务器，这些一些基础性的处理工作可以借鉴一下

有疑问加站长微信联系（非本文作者）