regexp: add ASCII fast path for context methods

The step method implementations check directly if the next rune
only needs one byte to be decoded and avoid calling utf8.DecodeRune
for such ASCII characters.

Introduce the same fast path optimization for rune decoding
for the context methods.

Results for regexp benchmarks that use the context methods:

name                            old time/op  new time/op  delta
AnchoredLiteralShortNonMatch-4  97.5ns ± 1%  94.8ns ± 2%  -2.80%  (p=0.000 n=45+43)
AnchoredShortMatch-4             163ns ± 1%   160ns ± 1%  -1.84%  (p=0.000 n=46+47)
NotOnePassShortA-4               742ns ± 2%   742ns ± 2%    ~     (p=0.440 n=49+50)
NotOnePassShortB-4               535ns ± 1%   533ns ± 2%  -0.37%  (p=0.005 n=46+48)
OnePassLongPrefix-4              169ns ± 2%   166ns ± 2%  -2.06%  (p=0.000 n=50+49)

Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335
Reviewed-on: https://go-review.googlesource.com/38256
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Martin Möhrmann 2017-03-04 07:18:26 +01:00 committed by Brad Fitzpatrick
parent 8a16d7d40a
commit e74c6cd3c0
1 changed files with 24 additions and 8 deletions

View File

@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
func (i *inputString) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText
if pos > 0 && pos <= len(i.str) {
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
}
}
if pos < len(i.str) {
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
}
return syntax.EmptyOpContext(r1, r2)
}
@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
func (i *inputBytes) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText
if pos > 0 && pos <= len(i.str) {
r1, _ = utf8.DecodeLastRune(i.str[:pos])
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRune(i.str[:pos])
}
}
if pos < len(i.str) {
r2, _ = utf8.DecodeRune(i.str[pos:])
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRune(i.str[pos:])
}
}
return syntax.EmptyOpContext(r1, r2)
}