diff --git a/src/regexp/backtrack.go b/src/regexp/backtrack.go index 239abc3a57..9fb7d1e493 100644 --- a/src/regexp/backtrack.go +++ b/src/regexp/backtrack.go @@ -257,7 +257,8 @@ func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { } case syntax.InstEmptyWidth: - if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 { + flag := i.context(pos) + if !flag.match(syntax.EmptyOp(inst.Arg)) { continue } pc = inst.Out diff --git a/src/regexp/exec.go b/src/regexp/exec.go index e1870021f2..efe764e2dc 100644 --- a/src/regexp/exec.go +++ b/src/regexp/exec.go @@ -114,6 +114,61 @@ func (m *machine) alloc(i *syntax.Inst) *thread { return t } +// A lazyFlag is a lazily-evaluated syntax.EmptyOp, +// for checking zero-width flags like ^ $ \A \z \B \b. +// It records the pair of relevant runes and does not +// determine the implied flags until absolutely necessary +// (most of the time, that means never). +type lazyFlag uint64 + +func newLazyFlag(r1, r2 rune) lazyFlag { + return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2))) +} + +func (f lazyFlag) match(op syntax.EmptyOp) bool { + if op == 0 { + return true + } + r1 := rune(f >> 32) + if op&syntax.EmptyBeginLine != 0 { + if r1 != '\n' && r1 >= 0 { + return false + } + op &^= syntax.EmptyBeginLine + } + if op&syntax.EmptyBeginText != 0 { + if r1 >= 0 { + return false + } + op &^= syntax.EmptyBeginText + } + if op == 0 { + return true + } + r2 := rune(f) + if op&syntax.EmptyEndLine != 0 { + if r2 != '\n' && r2 >= 0 { + return false + } + op &^= syntax.EmptyEndLine + } + if op&syntax.EmptyEndText != 0 { + if r2 >= 0 { + return false + } + op &^= syntax.EmptyEndText + } + if op == 0 { + return true + } + if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) { + op &^= syntax.EmptyWordBoundary + } else { + op &^= syntax.EmptyNoWordBoundary + } + return op == 0 +} + // match runs the machine over the input starting at pos. // It reports whether a match was found. // If so, m.matchcap holds the submatch information. @@ -133,9 +188,9 @@ func (m *machine) match(i input, pos int) bool { if r != endOfText { r1, width1 = i.step(pos + width) } - var flag syntax.EmptyOp + var flag lazyFlag if pos == 0 { - flag = syntax.EmptyOpContext(-1, r) + flag = newLazyFlag(-1, r) } else { flag = i.context(pos) } @@ -164,10 +219,10 @@ func (m *machine) match(i input, pos int) bool { if len(m.matchcap) > 0 { m.matchcap[0] = pos } - m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil) + m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil) } - flag = syntax.EmptyOpContext(r, r1) - m.step(runq, nextq, pos, pos+width, r, flag) + flag = newLazyFlag(r, r1) + m.step(runq, nextq, pos, pos+width, r, &flag) if width == 0 { break } @@ -202,7 +257,7 @@ func (m *machine) clear(q *queue) { // The step processes the rune c (which may be endOfText), // which starts at position pos and ends at nextPos. // nextCond gives the setting for the empty-width flags after c. -func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) { +func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) { longest := m.re.longest for j := 0; j < len(runq.dense); j++ { d := &runq.dense[j] @@ -259,7 +314,7 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy // It also recursively adds an entry for all instructions reachable from pc by following // empty-width conditions satisfied by cond. pos gives the current position // in the input. -func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.EmptyOp, t *thread) *thread { +func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread { Again: if pc == 0 { return t @@ -286,7 +341,7 @@ Again: pc = i.Arg goto Again case syntax.InstEmptyWidth: - if syntax.EmptyOp(i.Arg)&^cond == 0 { + if cond.match(syntax.EmptyOp(i.Arg)) { pc = i.Out goto Again } @@ -365,16 +420,16 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in if r != endOfText { r1, width1 = i.step(pos + width) } - var flag syntax.EmptyOp + var flag lazyFlag if pos == 0 { - flag = syntax.EmptyOpContext(-1, r) + flag = newLazyFlag(-1, r) } else { flag = i.context(pos) } pc := re.onepass.Start inst := re.onepass.Inst[pc] // If there is a simple literal prefix, skip over it. - if pos == 0 && syntax.EmptyOp(inst.Arg)&^flag == 0 && + if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) && len(re.prefix) > 0 && i.canCheckPrefix() { // Match requires literal prefix; fast search for it. if !i.hasPrefix(re) { @@ -422,7 +477,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in case syntax.InstNop: continue case syntax.InstEmptyWidth: - if syntax.EmptyOp(inst.Arg)&^flag != 0 { + if !flag.match(syntax.EmptyOp(inst.Arg)) { goto Return } continue @@ -435,7 +490,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in if width == 0 { break } - flag = syntax.EmptyOpContext(r, r1) + flag = newLazyFlag(r, r1) pos += width r, width = r1, width1 if r != endOfText { diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index 98146031c0..3586029555 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -311,7 +311,7 @@ type input interface { canCheckPrefix() bool // can we look ahead without losing info? hasPrefix(re *Regexp) bool index(re *Regexp, pos int) int - context(pos int) syntax.EmptyOp + context(pos int) lazyFlag } // inputString scans a string. @@ -342,7 +342,7 @@ func (i *inputString) index(re *Regexp, pos int) int { return strings.Index(i.str[pos:], re.prefix) } -func (i *inputString) context(pos int) syntax.EmptyOp { +func (i *inputString) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { @@ -358,7 +358,7 @@ func (i *inputString) context(pos int) syntax.EmptyOp { r2, _ = utf8.DecodeRuneInString(i.str[pos:]) } } - return syntax.EmptyOpContext(r1, r2) + return newLazyFlag(r1, r2) } // inputBytes scans a byte slice. @@ -389,7 +389,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int { return bytes.Index(i.str[pos:], re.prefixBytes) } -func (i *inputBytes) context(pos int) syntax.EmptyOp { +func (i *inputBytes) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { @@ -405,7 +405,7 @@ func (i *inputBytes) context(pos int) syntax.EmptyOp { r2, _ = utf8.DecodeRune(i.str[pos:]) } } - return syntax.EmptyOpContext(r1, r2) + return newLazyFlag(r1, r2) } // inputReader scans a RuneReader. @@ -441,8 +441,8 @@ func (i *inputReader) index(re *Regexp, pos int) int { return -1 } -func (i *inputReader) context(pos int) syntax.EmptyOp { - return 0 +func (i *inputReader) context(pos int) lazyFlag { + return 0 // not used } // LiteralPrefix returns a literal string that must begin any match