regexp/syntax: accept (?<name>...) as valid capture

Currently the only named capture supported by regexp is (?P<name>a).

The syntax (?<name>a) is also widely used and there is currently an effort from
 the Rust regex and RE2 teams to also accept this syntax.

Fixes #58458
This commit is contained in:
Mauri de Souza Meneguzzo 2023-07-27 22:25:16 -03:00
parent a8a6f90a23
commit 19646015ef
2 changed files with 20 additions and 5 deletions

View File

@ -1159,9 +1159,18 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
// support all three as well. EcmaScript 4 uses only the Python form.
//
// In both the open source world (via Code Search) and the
// Google source tree, (?P<expr>name) is the dominant form,
// so that's the one we implement. One is enough.
if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
// Google source tree, (?P<expr>name) and (?<expr>name) are the
// dominant forms of named captures and both are supported.
startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<'
startsWithName := len(t) > 3 && t[2] == '<'
if startsWithP || startsWithName {
// position of expr start
exprStartPos := 4
if startsWithName {
exprStartPos = 3
}
// Pull out name.
end := strings.IndexRune(t, '>')
if end < 0 {
@ -1171,8 +1180,8 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
return "", &Error{ErrInvalidNamedCapture, s}
}
capture := t[:end+1] // "(?P<name>"
name := t[4:end] // "name"
capture := t[:end+1] // "(?P<name>" or "(?<name>"
name := t[exprStartPos:end] // "name"
if err = checkUTF8(name); err != nil {
return "", err
}

View File

@ -160,6 +160,7 @@ var parseTests = []parseTest{
// Test named captures
{`(?P<name>a)`, `cap{name:lit{a}}`},
{`(?<name>a)`, `cap{name:lit{a}}`},
// Case-folded literals
{`[Aa]`, `litfold{A}`},
@ -482,6 +483,11 @@ var invalidRegexps = []string{
`(?P<name`,
`(?P<x y>a)`,
`(?P<>a)`,
`(?<name>a`,
`(?<name>`,
`(?<name`,
`(?<x y>a)`,
`(?<>a)`,
`[a-Z]`,
`(?i)[a-Z]`,
`\Q\E*`,