From 19646015efe70e5decdceb659d5268430d91eff1 Mon Sep 17 00:00:00 2001 From: Mauri de Souza Meneguzzo Date: Thu, 27 Jul 2023 22:25:16 -0300 Subject: [PATCH] regexp/syntax: accept (?...) as valid capture Currently the only named capture supported by regexp is (?Pa). The syntax (?a) is also widely used and there is currently an effort from the Rust regex and RE2 teams to also accept this syntax. Fixes #58458 --- src/regexp/syntax/parse.go | 19 ++++++++++++++----- src/regexp/syntax/parse_test.go | 6 ++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go index accee9ab08..a4ccfe3bdb 100644 --- a/src/regexp/syntax/parse.go +++ b/src/regexp/syntax/parse.go @@ -1159,9 +1159,18 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { // support all three as well. EcmaScript 4 uses only the Python form. // // In both the open source world (via Code Search) and the - // Google source tree, (?Pname) is the dominant form, - // so that's the one we implement. One is enough. - if len(t) > 4 && t[2] == 'P' && t[3] == '<' { + // Google source tree, (?Pname) and (?name) are the + // dominant forms of named captures and both are supported. + startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<' + startsWithName := len(t) > 3 && t[2] == '<' + + if startsWithP || startsWithName { + // position of expr start + exprStartPos := 4 + if startsWithName { + exprStartPos = 3 + } + // Pull out name. end := strings.IndexRune(t, '>') if end < 0 { @@ -1171,8 +1180,8 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { return "", &Error{ErrInvalidNamedCapture, s} } - capture := t[:end+1] // "(?P" - name := t[4:end] // "name" + capture := t[:end+1] // "(?P" or "(?" + name := t[exprStartPos:end] // "name" if err = checkUTF8(name); err != nil { return "", err } diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go index 67e3c5622a..d7999046e0 100644 --- a/src/regexp/syntax/parse_test.go +++ b/src/regexp/syntax/parse_test.go @@ -160,6 +160,7 @@ var parseTests = []parseTest{ // Test named captures {`(?Pa)`, `cap{name:lit{a}}`}, + {`(?a)`, `cap{name:lit{a}}`}, // Case-folded literals {`[Aa]`, `litfold{A}`}, @@ -482,6 +483,11 @@ var invalidRegexps = []string{ `(?Pa)`, `(?P<>a)`, + `(?a`, + `(?`, + `(?a)`, + `(?<>a)`, `[a-Z]`, `(?i)[a-Z]`, `\Q\E*`,