Pratt Parser 是一种非常容易手写,代码可读性很好,而且性能不错的 Parser 编写方式(Pratt Parsers: Expression Parsing Made Easy)。实现了一个框架来支持各种手写 Pratt Parser 的需求。比如,要对下面这个表达式进行求值。
4/(1+1)+2
求值的结果应该是 4。下面这个手写 Parser 就实现了这样一个递归下降的解析过程。
import "github.com/modern-go/parse"
import "github.com/modern-go/parse/read"
const precedenceAssignment = 1
const precedenceConditional = 2
const precedenceSum = 3
const precedenceProduct = 4
const precedenceExponent = 5
const precedencePrefix = 6
const precedencePostfix = 7
const precedenceCall = 8
type exprLexer struct {
value *valueToken
plus *plusToken
minus *minusToken
multiply *multiplyToken
divide *divideToken
group *groupToken
}
var expr = &exprLexer{}
func (lexer *exprLexer) Parse(src *parse.Source, precedence int) interface{} {
return parse.Parse(src, lexer, precedence)
}
func (lexer *exprLexer) InfixToken(src *parse.Source) (parse.InfixToken, int) {
switch src.Peek1() {
case '+':
return lexer.plus, precedenceSum
case '-':
return lexer.minus, precedenceSum
case '*':
return lexer.multiply, precedenceProduct
case '/':
return lexer.divide, precedenceProduct
default:
return nil, 0
}
}
func (lexer *exprLexer) PrefixToken(src *parse.Source) parse.PrefixToken {
switch src.Peek1() {
case '(':
return lexer.group
case '-':
return lexer.minus
default:
return lexer.value
}
}
type valueToken struct {
}
func (token *valueToken) PrefixParse(src *parse.Source) interface{} {
return read.Int(src)
}
type plusToken struct {
}
func (token *plusToken) InfixParse(src *parse.Source, left interface{}) interface{} {
leftValue := left.(int)
src.Consume1('+')
rightValue := expr.Parse(src, precedenceSum).(int)
return leftValue + rightValue
}
type minusToken struct {
}
func (token *minusToken) PrefixParse(src *parse.Source) interface{} {
src.Consume1('-')
expr := expr.Parse(src, precedencePrefix).(int)
return -expr
}
func (token *minusToken) InfixParse(src *parse.Source, left interface{}) interface{} {
leftValue := left.(int)
src.Consume1('-')
rightValue := expr.Parse(src, precedenceSum).(int)
return leftValue - rightValue
}
type multiplyToken struct {
}
func (token *multiplyToken) InfixParse(src *parse.Source, left interface{}) interface{} {
leftValue := left.(int)
src.Consume1('*')
rightValue := expr.Parse(src, precedenceProduct).(int)
return leftValue * rightValue
}
type divideToken struct {
}
func (token *divideToken) InfixParse(src *parse.Source, left interface{}) interface{} {
leftValue := left.(int)
src.Consume1('/')
rightValue := expr.Parse(src, precedenceProduct).(int)
return leftValue / rightValue
}
type groupToken struct {
}
func (token *groupToken) PrefixParse(src *parse.Source) interface{} {
src.Consume1('(')
expr := expr.Parse(src, 0)
src.Consume1(')')
return expr
}
主要提供的可复用的东西有三个。
- parse.Parse:主循环代码
- parse.Source:支持 look ahead,支持 byte by byte,支持 rune by rune,支持 savepoint 回溯
- read /discard:一些经过优化的,常见的匹配字符串读取代码,比如 discard 当前连续的空格等等,比如 http://read.Int(src) 读取10进制的字符串。
首先是 "parse.Parse" 这个 main loop:
func Parse(src *Source, lexer Lexer, precedence int) interface{} {
token := lexer.PrefixToken(src)
if token == nil {
src.ReportError(errors.New("can not parse"))
return nil
}
InfoLogger.Println("prefix", ">>>", reflect.TypeOf(token))
left := token.PrefixParse(src)
InfoLogger.Println("prefix", "<<<", reflect.TypeOf(token))
for {
if src.Error() != nil {
return left
}
token, infixPrecedence := lexer.InfixToken(src)
if token == nil {
return left
}
if precedence >= infixPrecedence {
concurrent.InfoLogger.Println("precedence skip ", reflect.TypeOf(token), precedence, infixPrecedence)
return left
}
concurrent.InfoLogger.Println("infix ", ">>>", reflect.TypeOf(token))
left = token.InfixParse(src, left)
concurrent.InfoLogger.Println("infix ", "<<<", reflect.TypeOf(token))
}
return left
}
其次是 parse.Source。可以看一下回溯的能力
// UnicodeRange read unicode until one not in table,
// the bytes will be appended to the space passed in.
// If space is nil, new space will be allocated from heap
func UnicodeRange(src *parse.Source, space []byte, table *unicode.RangeTable) []byte {
src.Savepoint("UnicodeRange")
length := 0
for src.Error() == nil {
r, n := src.PeekRune()
if !unicode.Is(table, r) {
break
}
length += n
src.ConsumeN(n)
}
if src.FatalError() != nil {
return nil
}
src.RollbackTo("UnicodeRange")
return src.CopyN(space, length)
}
库的代码在这里:
modern-go/parse有疑问加站长微信联系(非本文作者)