diff --git a/src/pkg/encoding/json/decode.go b/src/pkg/encoding/json/decode.go index 458fb39ec0..4db566726e 100644 --- a/src/pkg/encoding/json/decode.go +++ b/src/pkg/encoding/json/decode.go @@ -8,6 +8,7 @@ package json import ( + "bytes" "encoding" "encoding/base64" "errors" @@ -15,7 +16,6 @@ import ( "reflect" "runtime" "strconv" - "strings" "unicode" "unicode/utf16" "unicode/utf8" @@ -500,11 +500,11 @@ func (d *decodeState) object(v reflect.Value) { d.error(errPhase) } - // Read string key. + // Read key. start := d.off - 1 op = d.scanWhile(scanContinue) item := d.data[start : d.off-1] - key, ok := unquote(item) + key, ok := unquoteBytes(item) if !ok { d.error(errPhase) } @@ -526,11 +526,11 @@ func (d *decodeState) object(v reflect.Value) { fields := cachedTypeFields(v.Type()) for i := range fields { ff := &fields[i] - if ff.name == key { + if bytes.Equal(ff.nameBytes, key) { f = ff break } - if f == nil && strings.EqualFold(ff.name, key) { + if f == nil && ff.equalFold(ff.nameBytes, key) { f = ff } } diff --git a/src/pkg/encoding/json/encode.go b/src/pkg/encoding/json/encode.go index 7d6c71d7a9..8c71770ca4 100644 --- a/src/pkg/encoding/json/encode.go +++ b/src/pkg/encoding/json/encode.go @@ -936,6 +936,9 @@ func (e *encodeState) stringBytes(s []byte) (int, error) { // A field represents a single field found in a struct. type field struct { name string + nameBytes []byte // []byte(name) + equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent + tag bool index []int typ reflect.Type @@ -943,6 +946,12 @@ type field struct { quoted bool } +func fillField(f field) field { + f.nameBytes = []byte(f.name) + f.equalFold = foldFunc(f.nameBytes) + return f +} + // byName sorts field by name, breaking ties with depth, // then breaking ties with "name came from json tag", then // breaking ties with index sequence. @@ -1042,8 +1051,14 @@ func typeFields(t reflect.Type) []field { if name == "" { name = sf.Name } - fields = append(fields, field{name, tagged, index, ft, - opts.Contains("omitempty"), opts.Contains("string")}) + fields = append(fields, fillField(field{ + name: name, + tag: tagged, + index: index, + typ: ft, + omitEmpty: opts.Contains("omitempty"), + quoted: opts.Contains("string"), + })) if count[f.typ] > 1 { // If there were multiple instances, add a second, // so that the annihilation code will see a duplicate. @@ -1057,7 +1072,7 @@ func typeFields(t reflect.Type) []field { // Record new anonymous struct to explore in next round. nextCount[ft]++ if nextCount[ft] == 1 { - next = append(next, field{name: ft.Name(), index: index, typ: ft}) + next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft})) } } } diff --git a/src/pkg/encoding/json/fold.go b/src/pkg/encoding/json/fold.go new file mode 100644 index 0000000000..d6f77c93e5 --- /dev/null +++ b/src/pkg/encoding/json/fold.go @@ -0,0 +1,143 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "unicode/utf8" +) + +const ( + caseMask = ^byte(0x20) // Mask to ignore case in ASCII. + kelvin = '\u212a' + smallLongEss = '\u017f' +) + +// foldFunc returns one of four different case folding equivalence +// functions, from most general (and slow) to fastest: +// +// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 +// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') +// 3) asciiEqualFold, no special, but includes non-letters (including _) +// 4) simpleLetterEqualFold, no specials, no non-letters. +// +// The letters S and K are special because they map to 3 runes, not just 2: +// * S maps to s and to U+017F 'ſ' Latin small letter long s +// * k maps to K and to U+212A 'K' Kelvin sign +// See http://play.golang.org/p/tTxjOc0OGo +// +// The returned function is specialized for matching against s and +// should only be given s. It's not curried for performance reasons. +func foldFunc(s []byte) func(s, t []byte) bool { + nonLetter := false + special := false // special letter + for _, b := range s { + if b >= utf8.RuneSelf { + return bytes.EqualFold + } + upper := b & caseMask + if upper < 'A' || upper > 'Z' { + nonLetter = true + } else if upper == 'K' || upper == 'S' { + // See above for why these letters are special. + special = true + } + } + if special { + return equalFoldRight + } + if nonLetter { + return asciiEqualFold + } + return simpleLetterEqualFold +} + +// equalFoldRight is a specialization of bytes.EqualFold when s is +// known to be all ASCII (including punctuation), but contains an 's', +// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. +// See comments on foldFunc. +func equalFoldRight(s, t []byte) bool { + for _, sb := range s { + if len(t) == 0 { + return false + } + tb := t[0] + if tb < utf8.RuneSelf { + if sb != tb { + sbUpper := sb & caseMask + if 'A' <= sbUpper && sbUpper <= 'Z' { + if sbUpper != tb&caseMask { + return false + } + } else { + return false + } + } + t = t[1:] + continue + } + // sb is ASCII and t is not. t must be either kelvin + // sign or long s; sb must be s, S, k, or K. + tr, size := utf8.DecodeRune(t) + switch sb { + case 's', 'S': + if tr != smallLongEss { + return false + } + case 'k', 'K': + if tr != kelvin { + return false + } + default: + return false + } + t = t[size:] + + } + if len(t) > 0 { + return false + } + return true +} + +// asciiEqualFold is a specialization of bytes.EqualFold for use when +// s is all ASCII (but may contain non-letters) and contains no +// special-folding letters. +// See comments on foldFunc. +func asciiEqualFold(s, t []byte) bool { + if len(s) != len(t) { + return false + } + for i, sb := range s { + tb := t[i] + if sb == tb { + continue + } + if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { + if sb&caseMask != tb&caseMask { + return false + } + } else { + return false + } + } + return true +} + +// simpleLetterEqualFold is a specialization of bytes.EqualFold for +// use when s is all ASCII letters (no underscores, etc) and also +// doesn't contain 'k', 'K', 's', or 'S'. +// See comments on foldFunc. +func simpleLetterEqualFold(s, t []byte) bool { + if len(s) != len(t) { + return false + } + for i, b := range s { + if b&caseMask != t[i]&caseMask { + return false + } + } + return true +} diff --git a/src/pkg/encoding/json/fold_test.go b/src/pkg/encoding/json/fold_test.go new file mode 100644 index 0000000000..9fb94646a8 --- /dev/null +++ b/src/pkg/encoding/json/fold_test.go @@ -0,0 +1,116 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "strings" + "testing" + "unicode/utf8" +) + +var foldTests = []struct { + fn func(s, t []byte) bool + s, t string + want bool +}{ + {equalFoldRight, "", "", true}, + {equalFoldRight, "a", "a", true}, + {equalFoldRight, "", "a", false}, + {equalFoldRight, "a", "", false}, + {equalFoldRight, "a", "A", true}, + {equalFoldRight, "AB", "ab", true}, + {equalFoldRight, "AB", "ac", false}, + {equalFoldRight, "sbkKc", "ſbKKc", true}, + {equalFoldRight, "SbKkc", "ſbKKc", true}, + {equalFoldRight, "SbKkc", "ſbKK", false}, + {equalFoldRight, "e", "é", false}, + {equalFoldRight, "s", "S", true}, + + {simpleLetterEqualFold, "", "", true}, + {simpleLetterEqualFold, "abc", "abc", true}, + {simpleLetterEqualFold, "abc", "ABC", true}, + {simpleLetterEqualFold, "abc", "ABCD", false}, + {simpleLetterEqualFold, "abc", "xxx", false}, + + {asciiEqualFold, "a_B", "A_b", true}, + {asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent +} + +func TestFold(t *testing.T) { + for i, tt := range foldTests { + if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want { + t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want) + } + truth := strings.EqualFold(tt.s, tt.t) + if truth != tt.want { + t.Errorf("strings.EqualFold doesn't agree with case %d", i) + } + } +} + +func TestFoldAgainstUnicode(t *testing.T) { + const bufSize = 5 + buf1 := make([]byte, 0, bufSize) + buf2 := make([]byte, 0, bufSize) + var runes []rune + for i := 0x20; i <= 0x7f; i++ { + runes = append(runes, rune(i)) + } + runes = append(runes, kelvin, smallLongEss) + + funcs := []struct { + name string + fold func(s, t []byte) bool + letter bool // must be ASCII letter + simple bool // must be simple ASCII letter (not 'S' or 'K') + }{ + { + name: "equalFoldRight", + fold: equalFoldRight, + }, + { + name: "asciiEqualFold", + fold: asciiEqualFold, + simple: true, + }, + { + name: "simpleLetterEqualFold", + fold: simpleLetterEqualFold, + simple: true, + letter: true, + }, + } + + for _, ff := range funcs { + for _, r := range runes { + if r >= utf8.RuneSelf { + continue + } + if ff.letter && !isASCIILetter(byte(r)) { + continue + } + if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') { + continue + } + for _, r2 := range runes { + buf1 := append(buf1[:0], 'x') + buf2 := append(buf2[:0], 'x') + buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)] + buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)] + buf1 = append(buf1, 'x') + buf2 = append(buf2, 'x') + want := bytes.EqualFold(buf1, buf2) + if got := ff.fold(buf1, buf2); got != want { + t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want) + } + } + } + } +} + +func isASCIILetter(b byte) bool { + return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') +}