time: optimize Parse for RFC3339 and RFC3339Nano

RFC 3339 is the most common time representation,
being used in an overwhelming 57.3% of all specified formats,
while the next competitor only holds 7.5% usage.
Specially optimize parsing to handle the RFC 3339 format.
To reduce the complexity of error checking,
parseRFC3339 simply returns a bool indicating parsing success.
It leaves error handling to the general parse path.

To assist in fuzzing, the internal parse function was left unmodified
so that we could test that parseRFC3339 and parse agree with each other.

Performance:

	name             old time/op  new time/op  delta
	ParseRFC3339UTC  112ns ± 1%   37ns ± 1%    -67.37%  (p=0.000 n=9+9)
	ParseRFC3339TZ   259ns ± 2%   67ns ± 1%    -73.92%  (p=0.000 n=10+9)

Credit goes to Amarjeet Anand for a prior CL attemping to optimize this.
See CL 425014.

Fixes #54093

Change-Id: I14f4e8c52b092d44ceef6863f261842ed7e83f4c
Reviewed-on: https://go-review.googlesource.com/c/go/+/425197
Reviewed-by: Rob Pike <r@golang.org>
Run-TryBot: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Jenny Rakoczy <jenny@golang.org>
This commit is contained in:
Joe Tsai 2022-08-22 11:29:03 -07:00 committed by Gopher Robot
parent 7ffbcd1987
commit f7396aaea0
4 changed files with 189 additions and 31 deletions

View File

@ -135,3 +135,5 @@ var Quote = quote
var AppendFormatAny = Time.appendFormat
var AppendFormatRFC3339 = Time.appendFormatRFC3339
var ParseAny = parse
var ParseRFC3339 = parseRFC3339

View File

@ -618,6 +618,7 @@ func (t Time) Format(layout string) string {
// AppendFormat is like Format but appends the textual
// representation to b and returns the extended buffer.
func (t Time) AppendFormat(b []byte, layout string) []byte {
// Optimize for RFC3339 as it accounts for over half of all representations.
switch layout {
case RFC3339:
return t.appendFormatRFC3339(b, false)
@ -1018,6 +1019,12 @@ func skip(value, prefix string) (string, error) {
// differ by the actual zone offset. To avoid such problems, prefer time layouts
// that use a numeric zone offset, or use ParseInLocation.
func Parse(layout, value string) (Time, error) {
// Optimize for RFC3339 as it accounts for over half of all representations.
if layout == RFC3339 || layout == RFC3339Nano {
if t, ok := parseRFC3339(value, Local); ok {
return t, nil
}
}
return parse(layout, value, UTC, Local)
}
@ -1027,9 +1034,88 @@ func Parse(layout, value string) (Time, error) {
// Second, when given a zone offset or abbreviation, Parse tries to match it
// against the Local location; ParseInLocation uses the given location.
func ParseInLocation(layout, value string, loc *Location) (Time, error) {
// Optimize for RFC3339 as it accounts for over half of all representations.
if layout == RFC3339 || layout == RFC3339Nano {
if t, ok := parseRFC3339(value, loc); ok {
return t, nil
}
}
return parse(layout, value, loc, loc)
}
func parseRFC3339(s string, local *Location) (Time, bool) {
// parseUint parses s as an unsigned decimal integer and
// verifies that it is within some range.
// If it is invalid or out-of-range,
// it sets ok to false and returns the min value.
ok := true
parseUint := func(s string, min, max int) (x int) {
for _, c := range []byte(s) {
if c < '0' || '9' < c {
ok = false
return min
}
x = x*10 + int(c) - '0'
}
if x < min || max < x {
ok = false
return min
}
return x
}
// Parse the date and time.
if len(s) < len("2006-01-02T15:04:05") {
return Time{}, false
}
year := parseUint(s[0:4], 0, 9999) // e.g., 2006
month := parseUint(s[5:7], 1, 12) // e.g., 01
day := parseUint(s[8:10], 1, daysIn(Month(month), year)) // e.g., 02
hour := parseUint(s[11:13], 0, 23) // e.g., 15
min := parseUint(s[14:16], 0, 59) // e.g., 04
sec := parseUint(s[17:19], 0, 59) // e.g., 05
if !ok || !(s[4] == '-' && s[7] == '-' && s[10] == 'T' && s[13] == ':' && s[16] == ':') {
return Time{}, false
}
s = s[19:]
// Parse the fractional second.
var nsec int
if len(s) >= 2 && s[0] == '.' && isDigit(s, 1) {
n := 2
for ; n < len(s) && isDigit(s, n); n++ {
}
nsec, _, _ = parseNanoseconds(s, n)
s = s[n:]
}
// Parse the time zone.
t := Date(year, Month(month), day, hour, min, sec, nsec, UTC)
if s != "Z" {
if len(s) != len("-07:00") {
return Time{}, false
}
hr := parseUint(s[1:3], 0, 23) // e.g., 07
mm := parseUint(s[4:6], 0, 59) // e.g., 00
if !ok || !((s[0] == '-' || s[0] == '+') && s[3] == ':') {
return Time{}, false
}
zoneOffset := (hr*60 + mm) * 60
if s[0] == '-' {
zoneOffset *= -1
}
t.addSec(-int64(zoneOffset))
// Use local zone with the given offset if possible.
if _, offset, _, _, _ := local.lookup(t.unixSec()); offset == zoneOffset {
t.setLoc(local)
} else {
t.setLoc(FixedZone("", zoneOffset))
}
}
return t, true
}
func parse(layout, value string, defaultLocation, local *Location) (Time, error) {
alayout, avalue := layout, value
rangeErrString := "" // set if a value is out of range

View File

@ -871,44 +871,44 @@ func TestFormatFractionalSecondSeparators(t *testing.T) {
}
}
var longFractionalDigitsTests = []struct {
value string
want int
}{
// 9 digits
{"2021-09-29T16:04:33.000000000Z", 0},
{"2021-09-29T16:04:33.000000001Z", 1},
{"2021-09-29T16:04:33.100000000Z", 100_000_000},
{"2021-09-29T16:04:33.100000001Z", 100_000_001},
{"2021-09-29T16:04:33.999999999Z", 999_999_999},
{"2021-09-29T16:04:33.012345678Z", 12_345_678},
// 10 digits, truncates
{"2021-09-29T16:04:33.0000000000Z", 0},
{"2021-09-29T16:04:33.0000000001Z", 0},
{"2021-09-29T16:04:33.1000000000Z", 100_000_000},
{"2021-09-29T16:04:33.1000000009Z", 100_000_000},
{"2021-09-29T16:04:33.9999999999Z", 999_999_999},
{"2021-09-29T16:04:33.0123456789Z", 12_345_678},
// 11 digits, truncates
{"2021-09-29T16:04:33.10000000000Z", 100_000_000},
{"2021-09-29T16:04:33.00123456789Z", 1_234_567},
// 12 digits, truncates
{"2021-09-29T16:04:33.000123456789Z", 123_456},
// 15 digits, truncates
{"2021-09-29T16:04:33.9999999999999999Z", 999_999_999},
}
// Issue 48685 and 54567.
func TestParseFractionalSecondsLongerThanNineDigits(t *testing.T) {
tests := []struct {
s string
want int
}{
// 9 digits
{"2021-09-29T16:04:33.000000000Z", 0},
{"2021-09-29T16:04:33.000000001Z", 1},
{"2021-09-29T16:04:33.100000000Z", 100_000_000},
{"2021-09-29T16:04:33.100000001Z", 100_000_001},
{"2021-09-29T16:04:33.999999999Z", 999_999_999},
{"2021-09-29T16:04:33.012345678Z", 12_345_678},
// 10 digits, truncates
{"2021-09-29T16:04:33.0000000000Z", 0},
{"2021-09-29T16:04:33.0000000001Z", 0},
{"2021-09-29T16:04:33.1000000000Z", 100_000_000},
{"2021-09-29T16:04:33.1000000009Z", 100_000_000},
{"2021-09-29T16:04:33.9999999999Z", 999_999_999},
{"2021-09-29T16:04:33.0123456789Z", 12_345_678},
// 11 digits, truncates
{"2021-09-29T16:04:33.10000000000Z", 100_000_000},
{"2021-09-29T16:04:33.00123456789Z", 1_234_567},
// 12 digits, truncates
{"2021-09-29T16:04:33.000123456789Z", 123_456},
// 15 digits, truncates
{"2021-09-29T16:04:33.9999999999999999Z", 999_999_999},
}
for _, tt := range tests {
for _, tt := range longFractionalDigitsTests {
for _, format := range []string{RFC3339, RFC3339Nano} {
tm, err := Parse(format, tt.s)
tm, err := Parse(format, tt.value)
if err != nil {
t.Errorf("Parse(%q, %q) error: %v", format, tt.s, err)
t.Errorf("Parse(%q, %q) error: %v", format, tt.value, err)
continue
}
if got := tm.Nanosecond(); got != tt.want {
t.Errorf("Parse(%q, %q) = got %d, want %d", format, tt.s, got, tt.want)
t.Errorf("Parse(%q, %q) = got %d, want %d", format, tt.value, got, tt.want)
}
}
}
@ -955,3 +955,61 @@ func FuzzFormatRFC3339(f *testing.F) {
}
})
}
func FuzzParseRFC3339(f *testing.F) {
for _, tt := range formatTests {
f.Add(tt.result)
}
for _, tt := range parseTests {
f.Add(tt.value)
}
for _, tt := range parseErrorTests {
f.Add(tt.value)
}
for _, tt := range longFractionalDigitsTests {
f.Add(tt.value)
}
f.Fuzz(func(t *testing.T, s string) {
// equalTime is like time.Time.Equal, but also compares the time zone.
equalTime := func(t1, t2 Time) bool {
name1, offset1 := t1.Zone()
name2, offset2 := t2.Zone()
return t1.Equal(t2) && name1 == name2 && offset1 == offset2
}
for _, tz := range []*Location{UTC, Local} {
// Parsing as RFC3339 or RFC3339Nano should be identical.
t1, err1 := ParseAny(RFC3339, s, UTC, tz)
t2, err2 := ParseAny(RFC3339Nano, s, UTC, tz)
switch {
case (err1 == nil) != (err2 == nil):
t.Fatalf("ParseAny(%q) error mismatch:\n\tgot: %v\n\twant: %v", s, err1, err2)
case !equalTime(t1, t2):
t.Fatalf("ParseAny(%q) value mismatch:\n\tgot: %v\n\twant: %v", s, t1, t2)
}
// TODO(https://go.dev/issue/54580):
// Remove these checks after ParseAny rejects all invalid RFC 3339.
if err1 == nil {
num2 := func(s string) byte { return 10*(s[0]-'0') + (s[1] - '0') }
switch {
case len(s) > 12 && s[12] == ':':
t.Skipf("ParseAny(%q) incorrectly allows single-digit hour fields", s)
case len(s) > 19 && s[19] == ',':
t.Skipf("ParseAny(%q) incorrectly allows comma as sub-second separator", s)
case !strings.HasSuffix(s, "Z") && len(s) > 4 && (num2(s[len(s)-5:]) >= 24 || num2(s[len(s)-2:]) >= 60):
t.Skipf("ParseAny(%q) incorrectly allows out-of-range zone offset", s)
}
}
// Customized parser should be identical to general parser.
switch got, ok := ParseRFC3339(s, tz); {
case ok != (err1 == nil):
t.Fatalf("ParseRFC3339(%q) error mismatch:\n\tgot: %v\n\twant: %v", s, ok, err1 == nil)
case !equalTime(got, t1):
t.Fatalf("ParseRFC3339(%q) value mismatch:\n\tgot: %v\n\twant: %v", s, got, t2)
}
}
})
}

View File

@ -1445,6 +1445,18 @@ func BenchmarkParse(b *testing.B) {
}
}
func BenchmarkParseRFC3339UTC(b *testing.B) {
for i := 0; i < b.N; i++ {
Parse(RFC3339, "2020-08-22T11:27:43.123456789Z")
}
}
func BenchmarkParseRFC3339TZ(b *testing.B) {
for i := 0; i < b.N; i++ {
Parse(RFC3339, "2020-08-22T11:27:43.123456789-02:00")
}
}
func BenchmarkParseDuration(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseDuration("9007199254.740993ms")