go无法编译这个正则表达式吗?

bluealbin · · 2825 次点击
// LemmatizerRule package main import ( "bufio" "fmt" "os" "regexp" "strconv" "strings" ) const ( RULE_DIRECT_LEFT = 0 RULE_DIRECT_RIGHT = 1 ) func main() { fmt.Println("Start") rules, err := LoadLemmatizerRules("englishrules.txt") if err != nil { panic("Bad Error!") } s := "cutting" for k, v := range rules { fmt.Println(k) for _, vv := range v { l, _ := vv.Apply(s, nil) //fmt.Println(vv.source) fmt.Println(l) } } } /** A Lemmatizer Rule which uses regular expressions. * * <p> * A lemmarizer rule specifies a string substitution pattern * used as part of the process of reducing an elaborated * morphological form to its base form (lemma). * </p> */ type LemmatizerRule struct { /** Original rule text. */ ruleText string /** Source pattern string to match. */ source string /** Compiled source pattern matcher. */ compiledSource *regexp.Regexp /** Replacement string. */ replacement string /** Compiled VCR string matcher. */ vcrMatcher *regexp.Regexp //=compile( "([VCRA]+)" ) /** Match direction (LEFT or RIGHT). */ direction int /** Minimum match length. */ matchLength int /** Must match dictionary entry. */ mustMatchDictionaryEntry bool } func (t *LemmatizerRule) Apply(spelling string, lexicon map[string]string) (string, error) { if t.compiledSource.MatchString(spelling) { lemma := t.compiledSource.ReplaceAllString(spelling, t.replacement) if t.mustMatchDictionaryEntry && lexicon != nil { _, ok := lexicon[lemma] if !ok { return "", nil } } return lemma, nil } return "", nil } func SplitByWhiteSpace(line string) []string { list := []string{} tok := "" for _, ch := range line { if ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t' { if len(tok) > 0 { list = append(list, tok) tok = "" } } else { tok += string(ch) } } if len(tok) > 0 { list = append(list, tok) } if len(list) == 0 { return nil } return list } func MakeDefaultLemmatizerRule(ruleText string) (*LemmatizerRule, error) { var rule LemmatizerRule rule.ruleText = ruleText rule.direction = RULE_DIRECT_RIGHT rule.source = "" rule.replacement = "" rule.mustMatchDictionaryEntry = false rule.vcrMatcher, _ = regexp.Compile("([VCRA]+)") tokens := SplitByWhiteSpace(ruleText) if tokens == nil { return nil, nil } idx := 0 ch := tokens[0][0] if ch == '<' || ch == '>' { if ch == '<' { rule.direction = RULE_DIRECT_LEFT } d, err := strconv.Atoi(string(tokens[0][1])) if err != nil { return nil, nil } rule.matchLength = d idx = 1 } else if ch == '+' { rule.mustMatchDictionaryEntry = true idx = 1 } rule.source = tokens[idx] if idx+1 < len(tokens) { rule.replacement = tokens[idx+1] } replacementCount := 1 if rule.matchLength > 0 { if rule.direction == RULE_DIRECT_RIGHT { rule.source = fmt.Sprintf("(..)%s", rule.source) rule.replacement = fmt.Sprintf("$%d%s", replacementCount, rule.replacement) replacementCount++ } else { rule.matchLength-- rule.source = fmt.Sprintf("^(.{1,%d})%s", rule.matchLength, rule.source) rule.replacement = fmt.Sprintf("$%d%s", replacementCount, rule.replacement) replacementCount++ } } if strings.Index(rule.source, "CC") >= 0 { rule.source = strings.Replace(rule.source, "CC", "([^aeiouy])\\$1", -1) //???? rule.replacement = strings.Replace(rule.replacement, "C", fmt.Sprintf("$%d", replacementCount), 1) } else { phonolog := rule.vcrMatcher.FindString(rule.source) if len(phonolog) > 0 { phonolog_orgi := phonolog phonolog = strings.Replace(phonolog, "V", "[aeiouy]", -1) phonolog = strings.Replace(phonolog, "C", "[^aeiouy]", -1) phonolog = strings.Replace(phonolog, "R", "r", -1) phonolog = strings.Replace(phonolog, "A", ".*", -1) rule.source = strings.Replace(rule.source, phonolog_orgi, fmt.Sprintf("(%s)", phonolog), 1) rule.replacement = strings.Replace(rule.replacement, phonolog_orgi, fmt.Sprintf("$%d", replacementCount), 1) replacementCount++ } } rule.source = rule.source + "$" cc, err := regexp.Compile(rule.source) if err != nil { return nil, nil } rule.compiledSource = cc return &rule, nil } func LoadLemmatizerRules(path string) (map[string][]*LemmatizerRule, error) { file, err := os.Open(path) if err != nil { panic("Bad Error!") } defer file.Close() reader := bufio.NewReader(file) result := make(map[string][]*LemmatizerRule) PosTag := "" cc := 0 for { line, err := reader.ReadString('\n') if err != nil { break } line = strings.Trim(line, " \t\r\n") if len(line) == 0 { continue } if strings.Contains(line, ":") { PosTag = strings.Trim(line, ":") PosTag = strings.ToLower(PosTag) continue } rule, err := MakeDefaultLemmatizerRule(line) if err != nil { continue } if len(PosTag) == 0 { continue } _, ok := result[PosTag] if !ok { result[PosTag] = []*LemmatizerRule{} } result[PosTag] = append(result[PosTag], rule) cc++ } if cc == 0 { return nil, nil } return result, nil }
#6
更多评论
改成: `([^aeiouy])$1` 试试 go 用的正则跟其他的有些不太一样,参见文档:http://docs.studygolang.com/pkg/regexp/
#1
试过了,不行啊。
#2