mirror of https://github.com/golang/go.git
unicode: upgrade to 8.0.0
Not sure if I'm on time for 1.5; Unicode 8 just got released. Straighforward upgrade. Only changed maketables.go to prevent it from adding the Cherokee upper and lower case mappings. This change causes the caseOrbit table to NOT change. Added tests to verify that the relevant functions still produce the correct result, even for Cherokee. Fixes #11309 Change-Id: I42850f5b3399bde125b002efc78eff96dbd86a08 Reviewed-on: https://go-review.googlesource.com/11286 Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
parent
834fef80ae
commit
fe15da62f7
|
|
@ -328,3 +328,4 @@ pkg syscall (netbsd-arm), type IfMsghdr struct, Pad_cgo_1 [4]uint8
|
|||
pkg syscall (netbsd-arm-cgo), const SizeofIfData = 132
|
||||
pkg syscall (netbsd-arm-cgo), type IfMsghdr struct, Pad_cgo_1 [4]uint8
|
||||
pkg unicode, const Version = "6.3.0"
|
||||
pkg unicode, const Version = "7.0.0"
|
||||
|
|
|
|||
|
|
@ -948,3 +948,10 @@ pkg syscall (openbsd-amd64-cgo), type SysProcAttr struct, Pgid int
|
|||
pkg text/template, method (*Template) DefinedTemplates() string
|
||||
pkg text/template, method (*Template) Option(...string) *Template
|
||||
pkg time, method (Time) AppendFormat([]uint8, string) []uint8
|
||||
pkg unicode, const Version = "8.0.0"
|
||||
pkg unicode, var Ahom *RangeTable
|
||||
pkg unicode, var Anatolian_Hieroglyphs *RangeTable
|
||||
pkg unicode, var Hatran *RangeTable
|
||||
pkg unicode, var Multani *RangeTable
|
||||
pkg unicode, var Old_Hungarian *RangeTable
|
||||
pkg unicode, var SignWriting *RangeTable
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
package strconv
|
||||
|
||||
// (468+138+67)*2 + (326)*4 = 2650 bytes
|
||||
// (470+136+73)*2 + (342)*4 = 2726 bytes
|
||||
|
||||
var isPrint16 = []uint16{
|
||||
0x0020, 0x007e,
|
||||
|
|
@ -26,8 +26,8 @@ var isPrint16 = []uint16{
|
|||
0x0800, 0x082d,
|
||||
0x0830, 0x085b,
|
||||
0x085e, 0x085e,
|
||||
0x08a0, 0x08b2,
|
||||
0x08e4, 0x098c,
|
||||
0x08a0, 0x08b4,
|
||||
0x08e3, 0x098c,
|
||||
0x098f, 0x0990,
|
||||
0x0993, 0x09b2,
|
||||
0x09b6, 0x09b9,
|
||||
|
|
@ -51,6 +51,7 @@ var isPrint16 = []uint16{
|
|||
0x0ad0, 0x0ad0,
|
||||
0x0ae0, 0x0ae3,
|
||||
0x0ae6, 0x0af1,
|
||||
0x0af9, 0x0af9,
|
||||
0x0b01, 0x0b0c,
|
||||
0x0b0f, 0x0b10,
|
||||
0x0b13, 0x0b39,
|
||||
|
|
@ -73,7 +74,7 @@ var isPrint16 = []uint16{
|
|||
0x0be6, 0x0bfa,
|
||||
0x0c00, 0x0c39,
|
||||
0x0c3d, 0x0c4d,
|
||||
0x0c55, 0x0c59,
|
||||
0x0c55, 0x0c5a,
|
||||
0x0c60, 0x0c63,
|
||||
0x0c66, 0x0c6f,
|
||||
0x0c78, 0x0cb9,
|
||||
|
|
@ -84,7 +85,7 @@ var isPrint16 = []uint16{
|
|||
0x0d01, 0x0d3a,
|
||||
0x0d3d, 0x0d4e,
|
||||
0x0d57, 0x0d57,
|
||||
0x0d60, 0x0d63,
|
||||
0x0d5f, 0x0d63,
|
||||
0x0d66, 0x0d75,
|
||||
0x0d79, 0x0d7f,
|
||||
0x0d82, 0x0d96,
|
||||
|
|
@ -117,7 +118,8 @@ var isPrint16 = []uint16{
|
|||
0x1318, 0x135a,
|
||||
0x135d, 0x137c,
|
||||
0x1380, 0x1399,
|
||||
0x13a0, 0x13f4,
|
||||
0x13a0, 0x13f5,
|
||||
0x13f8, 0x13fd,
|
||||
0x1400, 0x169c,
|
||||
0x16a0, 0x16f8,
|
||||
0x1700, 0x1714,
|
||||
|
|
@ -167,9 +169,9 @@ var isPrint16 = []uint16{
|
|||
0x2030, 0x205e,
|
||||
0x2070, 0x2071,
|
||||
0x2074, 0x209c,
|
||||
0x20a0, 0x20bd,
|
||||
0x20a0, 0x20be,
|
||||
0x20d0, 0x20f0,
|
||||
0x2100, 0x2189,
|
||||
0x2100, 0x218b,
|
||||
0x2190, 0x23fa,
|
||||
0x2400, 0x2426,
|
||||
0x2440, 0x244a,
|
||||
|
|
@ -177,6 +179,7 @@ var isPrint16 = []uint16{
|
|||
0x2b76, 0x2b95,
|
||||
0x2b98, 0x2bb9,
|
||||
0x2bbd, 0x2bd1,
|
||||
0x2bec, 0x2bef,
|
||||
0x2c00, 0x2cf3,
|
||||
0x2cf9, 0x2d27,
|
||||
0x2d2d, 0x2d2d,
|
||||
|
|
@ -193,19 +196,19 @@ var isPrint16 = []uint16{
|
|||
0x3131, 0x31ba,
|
||||
0x31c0, 0x31e3,
|
||||
0x31f0, 0x4db5,
|
||||
0x4dc0, 0x9fcc,
|
||||
0x4dc0, 0x9fd5,
|
||||
0xa000, 0xa48c,
|
||||
0xa490, 0xa4c6,
|
||||
0xa4d0, 0xa62b,
|
||||
0xa640, 0xa6f7,
|
||||
0xa700, 0xa7ad,
|
||||
0xa7b0, 0xa7b1,
|
||||
0xa7b0, 0xa7b7,
|
||||
0xa7f7, 0xa82b,
|
||||
0xa830, 0xa839,
|
||||
0xa840, 0xa877,
|
||||
0xa880, 0xa8c4,
|
||||
0xa8ce, 0xa8d9,
|
||||
0xa8e0, 0xa8fb,
|
||||
0xa8e0, 0xa8fd,
|
||||
0xa900, 0xa953,
|
||||
0xa95f, 0xa97c,
|
||||
0xa980, 0xa9d9,
|
||||
|
|
@ -217,9 +220,8 @@ var isPrint16 = []uint16{
|
|||
0xab01, 0xab06,
|
||||
0xab09, 0xab0e,
|
||||
0xab11, 0xab16,
|
||||
0xab20, 0xab5f,
|
||||
0xab64, 0xab65,
|
||||
0xabc0, 0xabed,
|
||||
0xab20, 0xab65,
|
||||
0xab70, 0xabed,
|
||||
0xabf0, 0xabf9,
|
||||
0xac00, 0xd7a3,
|
||||
0xd7b0, 0xd7c6,
|
||||
|
|
@ -234,8 +236,7 @@ var isPrint16 = []uint16{
|
|||
0xfd92, 0xfdc7,
|
||||
0xfdf0, 0xfdfd,
|
||||
0xfe00, 0xfe19,
|
||||
0xfe20, 0xfe2d,
|
||||
0xfe30, 0xfe6b,
|
||||
0xfe20, 0xfe6b,
|
||||
0xfe70, 0xfefc,
|
||||
0xff01, 0xffbe,
|
||||
0xffc2, 0xffc7,
|
||||
|
|
@ -370,8 +371,6 @@ var isNotPrint16 = []uint16{
|
|||
0x318f,
|
||||
0x321f,
|
||||
0x32ff,
|
||||
0xa69e,
|
||||
0xa78f,
|
||||
0xa9ce,
|
||||
0xa9ff,
|
||||
0xab27,
|
||||
|
|
@ -418,12 +417,13 @@ var isPrint32 = []uint32{
|
|||
0x01083c, 0x01083c,
|
||||
0x01083f, 0x01089e,
|
||||
0x0108a7, 0x0108af,
|
||||
0x010900, 0x01091b,
|
||||
0x0108e0, 0x0108f5,
|
||||
0x0108fb, 0x01091b,
|
||||
0x01091f, 0x010939,
|
||||
0x01093f, 0x01093f,
|
||||
0x010980, 0x0109b7,
|
||||
0x0109be, 0x0109bf,
|
||||
0x010a00, 0x010a06,
|
||||
0x0109bc, 0x0109cf,
|
||||
0x0109d2, 0x010a06,
|
||||
0x010a0c, 0x010a33,
|
||||
0x010a38, 0x010a3a,
|
||||
0x010a3f, 0x010a47,
|
||||
|
|
@ -438,6 +438,9 @@ var isPrint32 = []uint32{
|
|||
0x010b99, 0x010b9c,
|
||||
0x010ba9, 0x010baf,
|
||||
0x010c00, 0x010c48,
|
||||
0x010c80, 0x010cb2,
|
||||
0x010cc0, 0x010cf2,
|
||||
0x010cfa, 0x010cff,
|
||||
0x010e60, 0x010e7e,
|
||||
0x011000, 0x01104d,
|
||||
0x011052, 0x01106f,
|
||||
|
|
@ -446,19 +449,19 @@ var isPrint32 = []uint32{
|
|||
0x0110f0, 0x0110f9,
|
||||
0x011100, 0x011143,
|
||||
0x011150, 0x011176,
|
||||
0x011180, 0x0111c8,
|
||||
0x0111cd, 0x0111cd,
|
||||
0x0111d0, 0x0111da,
|
||||
0x0111e1, 0x0111f4,
|
||||
0x011180, 0x0111cd,
|
||||
0x0111d0, 0x0111f4,
|
||||
0x011200, 0x01123d,
|
||||
0x011280, 0x0112a9,
|
||||
0x0112b0, 0x0112ea,
|
||||
0x0112f0, 0x0112f9,
|
||||
0x011301, 0x01130c,
|
||||
0x011300, 0x01130c,
|
||||
0x01130f, 0x011310,
|
||||
0x011313, 0x011339,
|
||||
0x01133c, 0x011344,
|
||||
0x011347, 0x011348,
|
||||
0x01134b, 0x01134d,
|
||||
0x011350, 0x011350,
|
||||
0x011357, 0x011357,
|
||||
0x01135d, 0x011363,
|
||||
0x011366, 0x01136c,
|
||||
|
|
@ -466,17 +469,22 @@ var isPrint32 = []uint32{
|
|||
0x011480, 0x0114c7,
|
||||
0x0114d0, 0x0114d9,
|
||||
0x011580, 0x0115b5,
|
||||
0x0115b8, 0x0115c9,
|
||||
0x0115b8, 0x0115dd,
|
||||
0x011600, 0x011644,
|
||||
0x011650, 0x011659,
|
||||
0x011680, 0x0116b7,
|
||||
0x0116c0, 0x0116c9,
|
||||
0x011700, 0x011719,
|
||||
0x01171d, 0x01172b,
|
||||
0x011730, 0x01173f,
|
||||
0x0118a0, 0x0118f2,
|
||||
0x0118ff, 0x0118ff,
|
||||
0x011ac0, 0x011af8,
|
||||
0x012000, 0x012398,
|
||||
0x012000, 0x012399,
|
||||
0x012400, 0x012474,
|
||||
0x012480, 0x012543,
|
||||
0x013000, 0x01342e,
|
||||
0x014400, 0x014646,
|
||||
0x016800, 0x016a38,
|
||||
0x016a40, 0x016a69,
|
||||
0x016a6e, 0x016a6f,
|
||||
|
|
@ -497,7 +505,7 @@ var isPrint32 = []uint32{
|
|||
0x01d000, 0x01d0f5,
|
||||
0x01d100, 0x01d126,
|
||||
0x01d129, 0x01d172,
|
||||
0x01d17b, 0x01d1dd,
|
||||
0x01d17b, 0x01d1e8,
|
||||
0x01d200, 0x01d245,
|
||||
0x01d300, 0x01d356,
|
||||
0x01d360, 0x01d371,
|
||||
|
|
@ -508,7 +516,8 @@ var isPrint32 = []uint32{
|
|||
0x01d50d, 0x01d546,
|
||||
0x01d54a, 0x01d6a5,
|
||||
0x01d6a8, 0x01d7cb,
|
||||
0x01d7ce, 0x01d7ff,
|
||||
0x01d7ce, 0x01da8b,
|
||||
0x01da9b, 0x01daaf,
|
||||
0x01e800, 0x01e8c4,
|
||||
0x01e8c7, 0x01e8d6,
|
||||
0x01ee00, 0x01ee24,
|
||||
|
|
@ -530,13 +539,7 @@ var isPrint32 = []uint32{
|
|||
0x01f210, 0x01f23a,
|
||||
0x01f240, 0x01f248,
|
||||
0x01f250, 0x01f251,
|
||||
0x01f300, 0x01f32c,
|
||||
0x01f330, 0x01f37d,
|
||||
0x01f380, 0x01f3ce,
|
||||
0x01f3d4, 0x01f3f7,
|
||||
0x01f400, 0x01f54a,
|
||||
0x01f550, 0x01f642,
|
||||
0x01f645, 0x01f6cf,
|
||||
0x01f300, 0x01f6d0,
|
||||
0x01f6e0, 0x01f6ec,
|
||||
0x01f6f0, 0x01f6f3,
|
||||
0x01f700, 0x01f773,
|
||||
|
|
@ -546,9 +549,13 @@ var isPrint32 = []uint32{
|
|||
0x01f850, 0x01f859,
|
||||
0x01f860, 0x01f887,
|
||||
0x01f890, 0x01f8ad,
|
||||
0x01f910, 0x01f918,
|
||||
0x01f980, 0x01f984,
|
||||
0x01f9c0, 0x01f9c0,
|
||||
0x020000, 0x02a6d6,
|
||||
0x02a700, 0x02b734,
|
||||
0x02b740, 0x02b81d,
|
||||
0x02b820, 0x02cea1,
|
||||
0x02f800, 0x02fa1d,
|
||||
0x0e0100, 0x0e01ef,
|
||||
}
|
||||
|
|
@ -562,12 +569,18 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
|
|||
0x0809,
|
||||
0x0836,
|
||||
0x0856,
|
||||
0x08f3,
|
||||
0x0a04,
|
||||
0x0a14,
|
||||
0x0a18,
|
||||
0x10bd,
|
||||
0x1135,
|
||||
0x11e0,
|
||||
0x1212,
|
||||
0x1287,
|
||||
0x1289,
|
||||
0x128e,
|
||||
0x129e,
|
||||
0x1304,
|
||||
0x1329,
|
||||
0x1331,
|
||||
|
|
@ -589,6 +602,7 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
|
|||
0xd53f,
|
||||
0xd545,
|
||||
0xd551,
|
||||
0xdaa0,
|
||||
0xee04,
|
||||
0xee20,
|
||||
0xee23,
|
||||
|
|
@ -618,7 +632,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
|
|||
0xf0c0,
|
||||
0xf0d0,
|
||||
0xf12f,
|
||||
0xf4ff,
|
||||
0xf57a,
|
||||
0xf5a4,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ var upperTest = []rune{
|
|||
0x181,
|
||||
0x376,
|
||||
0x3cf,
|
||||
0x13bd,
|
||||
0x1f2a,
|
||||
0x2102,
|
||||
0x2c00,
|
||||
|
|
@ -46,6 +47,7 @@ var notupperTest = []rune{
|
|||
0x377,
|
||||
0x387,
|
||||
0x2150,
|
||||
0xab7d,
|
||||
0xffff,
|
||||
0x10000,
|
||||
}
|
||||
|
|
@ -194,6 +196,15 @@ var caseTest = []caseT{
|
|||
{LowerCase, 0x0148, 0x0148},
|
||||
{TitleCase, 0x0148, 0x0147},
|
||||
|
||||
// Lowercase lower than uppercase.
|
||||
// AB78;CHEROKEE SMALL LETTER GE;Ll;0;L;;;;;N;;;13A8;;13A8
|
||||
{UpperCase, 0xab78, 0x13a8},
|
||||
{LowerCase, 0xab78, 0xab78},
|
||||
{TitleCase, 0xab78, 0x13a8},
|
||||
{UpperCase, 0x13a8, 0x13a8},
|
||||
{LowerCase, 0x13a8, 0xab78},
|
||||
{TitleCase, 0x13a8, 0x13a8},
|
||||
|
||||
// Last block in the 5.1.0 table
|
||||
// 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
|
||||
{UpperCase, 0x10400, 0x10400},
|
||||
|
|
@ -405,6 +416,9 @@ var simpleFoldTests = []string{
|
|||
// Extra special cases: has lower/upper but no case fold.
|
||||
"İ",
|
||||
"ı",
|
||||
|
||||
// Upper comes before lower (Cherokee).
|
||||
"\u13b0\uab80",
|
||||
}
|
||||
|
||||
func TestSimpleFold(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ func main() {
|
|||
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
|
||||
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
|
||||
var url = flag.String("url",
|
||||
"http://www.unicode.org/Public/7.0.0/ucd/",
|
||||
"http://www.unicode.org/Public/8.0.0/ucd/",
|
||||
"URL of Unicode database directory")
|
||||
var tablelist = flag.String("tables",
|
||||
"all",
|
||||
|
|
@ -1152,11 +1152,14 @@ func printCasefold() {
|
|||
}
|
||||
}
|
||||
|
||||
// Delete the groups for which assuming [lower, upper] is right.
|
||||
// Delete the groups for which assuming [lower, upper] or [upper, lower] is right.
|
||||
for i, orb := range caseOrbit {
|
||||
if len(orb) == 2 && chars[orb[0]].upperCase == orb[1] && chars[orb[1]].lowerCase == orb[0] {
|
||||
caseOrbit[i] = nil
|
||||
}
|
||||
if len(orb) == 2 && chars[orb[1]].upperCase == orb[0] && chars[orb[0]].lowerCase == orb[1] {
|
||||
caseOrbit[i] = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Record orbit information in chars.
|
||||
|
|
|
|||
|
|
@ -14,9 +14,11 @@ type T struct {
|
|||
script string
|
||||
}
|
||||
|
||||
// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0 and 7.0.0 mostly to
|
||||
// discover when new scripts and categories arise.
|
||||
// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0
|
||||
// mostly to discover when new scripts and categories arise.
|
||||
var inTest = []T{
|
||||
{0x11711, "Ahom"},
|
||||
{0x14646, "Anatolian_Hieroglyphs"},
|
||||
{0x06e2, "Arabic"},
|
||||
{0x0567, "Armenian"},
|
||||
{0x10b20, "Avestan"},
|
||||
|
|
@ -58,6 +60,7 @@ var inTest = []T{
|
|||
{0x3028, "Han"},
|
||||
{0x11b8, "Hangul"},
|
||||
{0x1727, "Hanunoo"},
|
||||
{0x108FF, "Hatran"},
|
||||
{0x05a0, "Hebrew"},
|
||||
{0x3058, "Hiragana"},
|
||||
{0x10841, "Imperial_Aramaic"},
|
||||
|
|
@ -94,12 +97,14 @@ var inTest = []T{
|
|||
{0x11611, "Modi"},
|
||||
{0x1822, "Mongolian"},
|
||||
{0x16a60, "Mro"},
|
||||
{0x11293, "Multani"},
|
||||
{0x104c, "Myanmar"},
|
||||
{0x10880, "Nabataean"},
|
||||
{0x19c3, "New_Tai_Lue"},
|
||||
{0x07f8, "Nko"},
|
||||
{0x169b, "Ogham"},
|
||||
{0x1c6a, "Ol_Chiki"},
|
||||
{0x10C80, "Old_Hungarian"},
|
||||
{0x10310, "Old_Italic"},
|
||||
{0x10a80, "Old_North_Arabian"},
|
||||
{0x10350, "Old_Permic"},
|
||||
|
|
@ -121,6 +126,7 @@ var inTest = []T{
|
|||
{0x111a0, "Sharada"},
|
||||
{0x10463, "Shavian"},
|
||||
{0x115c1, "Siddham"},
|
||||
{0x1D920, "SignWriting"},
|
||||
{0x0dbd, "Sinhala"},
|
||||
{0x110d0, "Sora_Sompeng"},
|
||||
{0x1ba3, "Sundanese"},
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue