exp/locale/collate: fixed two bugs uncovered by regression tests.

The first bug was that tertiary ignorables had the same colElem as
implicit colElems, yielding unexpected results. The current encoding
ensures that a non-implicit colElem is never 0.  This fix uncovered
another bug of the trie that indexed incorrectly into the null block.
This was caused by an unfinished optimization that would avoid the
need to max out the most-significant bits of continuation bytes.
This bug was also present in the trie used in exp/norm and has been
fixed there as well. The appearence of the bug was rare, as the lower
blocks happened to be nearly nil.

R=r
CC=golang-dev
https://golang.org/cl/6127070
This commit is contained in:
Marcel van Lohuizen 2012-05-02 17:01:41 +02:00
parent 81d9621534
commit 10838165d8
13 changed files with 201 additions and 201 deletions

View File

@ -25,11 +25,11 @@ const (
// For normal collation elements, we assume that a collation element either has
// a primary or non-default secondary value, not both.
// Collation elements with a primary value are of the form
// 010ppppp pppppppp pppppppp tttttttt, where
// 000ppppp pppppppp pppppppp tttttttt, where
// - p* is primary collation value
// - t* is the tertiary collation value
// Collation elements with a secondary value are of the form
// 00000000 ssssssss ssssssss tttttttt, where
// 01000000 ssssssss ssssssss tttttttt, where
// - s* is the secondary collation value
// - t* is the tertiary collation value
const (
@ -37,7 +37,7 @@ const (
maxSecondaryBits = 16
maxTertiaryBits = 8
isPrimary = 0x40000000
isSecondary = 0x40000000
)
func makeCE(weights []int) (uint32, error) {
@ -57,10 +57,10 @@ func makeCE(weights []int) (uint32, error) {
return 0, fmt.Errorf("makeCE: non-default secondary weight for non-zero primary: %X", weights)
}
ce = uint32(weights[0]<<maxTertiaryBits + weights[2])
ce |= isPrimary
} else {
// secondary weight form
ce = uint32(weights[1]<<maxTertiaryBits + weights[2])
ce |= isSecondary
}
return ce, nil
}
@ -162,7 +162,6 @@ const (
// http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset

View File

@ -29,9 +29,9 @@ func decompCE(in []int) (ce uint32, err error) {
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0}, 000},
{normalCE, []int{0, 30, 3}, 0x1E03},
{normalCE, []int{100, defaultSecondary, 3}, 0x40006403},
{normalCE, []int{0, 0, 0}, 0x40000000},
{normalCE, []int{0, 30, 3}, 0x40001E03},
{normalCE, []int{100, defaultSecondary, 3}, 0x6403},
{normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-default secondary
{normalCE, []int{100, 1, 3}, 0xFFFF},
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF},

View File

@ -19,7 +19,10 @@ import (
"reflect"
)
const blockSize = 64
const (
blockSize = 64
blockOffset = 2 // Substract 2 blocks to compensate for the 0x80 added to continuation bytes.
)
type trie struct {
index []uint16
@ -102,7 +105,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int64 {
if n.isInternal() {
v, ok := index.lookupBlockIdx[h]
if !ok {
v = int64(len(index.lookupBlocks))
v = int64(len(index.lookupBlocks)) - blockOffset
index.lookupBlocks = append(index.lookupBlocks, n)
index.lookupBlockIdx[h] = v
}
@ -110,7 +113,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int64 {
} else {
v, ok := index.valueBlockIdx[h]
if !ok {
v = int64(len(index.valueBlocks))
v = int64(len(index.valueBlocks)) - blockOffset
index.valueBlocks = append(index.valueBlocks, n)
index.valueBlockIdx[h] = v
}

View File

@ -79,24 +79,24 @@ var testLookup = [640]uint16 {
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0c2:0x03, 0x0c4:0x04,
0x0c8:0x05,
0x0df:0x06,
0x0e0:0x04,
0x0ef:0x05,
0x0f0:0x07, 0x0f4:0x09,
0x0c2:0x01, 0x0c4:0x02,
0x0c8:0x03,
0x0df:0x04,
0x0e0:0x02,
0x0ef:0x03,
0x0f0:0x05, 0x0f4:0x07,
// Block 0x4, offset 0x100
0x120:0x07, 0x126:0x08,
0x120:0x05, 0x126:0x06,
// Block 0x5, offset 0x140
0x17f:0x09,
0x17f:0x07,
// Block 0x6, offset 0x180
0x180:0x0a, 0x184:0x0b,
0x180:0x08, 0x184:0x09,
// Block 0x7, offset 0x1c0
0x1d0:0x06,
0x1d0:0x04,
// Block 0x8, offset 0x200
0x23f:0x0c,
0x23f:0x0a,
// Block 0x9, offset 0x240
0x24f:0x08,
0x24f:0x06,
}
var testTrie = trie{ testLookup[:], testValues[:]}

View File

@ -68,17 +68,18 @@ func (ce colElem) ctype() ceType {
// For normal collation elements, we assume that a collation element either has
// a primary or non-default secondary value, not both.
// Collation elements with a primary value are of the form
// 010ppppp pppppppp pppppppp tttttttt, where
// 000ppppp pppppppp pppppppp tttttttt, where
// - p* is primary collation value
// - t* is the tertiary collation value
// Collation elements with a secondary value are of the form
// 00000000 ssssssss ssssssss tttttttt, where
// 01000000 ssssssss ssssssss tttttttt, where
// - s* is the secondary collation value
// - t* is the tertiary collation value
func splitCE(ce colElem) weights {
const secondaryMask = 0x40000000
w := weights{}
w.tertiary = uint8(ce)
if ce&0x40000000 != 0 {
if ce&secondaryMask == 0 {
// primary weight form
w.primary = uint32((ce >> 8) & 0x1FFFFF)
w.secondary = defaultSecondary

View File

@ -20,14 +20,14 @@ func makeCE(weights []int) colElem {
maxPrimaryBits = 21
maxSecondaryBits = 16
maxTertiaryBits = 8
isPrimary = 0x40000000
isSecondary = 0x40000000
)
var ce colElem
if weights[0] != 0 {
ce = colElem(weights[0]<<maxTertiaryBits + weights[2])
ce |= isPrimary
} else {
ce = colElem(weights[1]<<maxTertiaryBits + weights[2])
ce |= isSecondary
}
return ce
}

View File

@ -27,15 +27,10 @@ const (
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
maskx = 0x3F // 0011 1111
mask2 = 0x1F // 0001 1111
mask3 = 0x0F // 0000 1111
mask4 = 0x07 // 0000 0111
)
func (t *trie) lookupValue(n uint16, b byte) colElem {
return colElem(t.values[int(n)<<6+int(b&maskx)])
return colElem(t.values[int(n)<<6+int(b)])
}
// lookup returns the trie value for the first UTF-8 encoding in s and
@ -67,7 +62,7 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)&maskx
o := int(i)<<6 + int(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
@ -83,13 +78,13 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)&maskx
o := int(i)<<6 + int(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)&maskx
o = int(i)<<6 + int(c2)
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {

View File

@ -89,18 +89,18 @@ var testValues = [832]uint32{
}
var testLookup = [640]uint16{
0x0c2: 0x03, 0x0c4: 0x04,
0x0c8: 0x05,
0x0df: 0x06,
0x0e0: 0x04,
0x0ef: 0x05,
0x0f0: 0x07, 0x0f4: 0x09,
0x120: 0x07, 0x126: 0x08,
0x17f: 0x09,
0x180: 0x0a, 0x184: 0x0b,
0x1d0: 0x06,
0x23f: 0x0c,
0x24f: 0x08,
0x0c2: 0x01, 0x0c4: 0x02,
0x0c8: 0x03,
0x0df: 0x04,
0x0e0: 0x02,
0x0ef: 0x03,
0x0f0: 0x05, 0x0f4: 0x07,
0x120: 0x05, 0x126: 0x06,
0x17f: 0x07,
0x180: 0x08, 0x184: 0x09,
0x1d0: 0x04,
0x23f: 0x0a,
0x24f: 0x06,
}
var testTrie = trie{testLookup[:], testValues[:]}

View File

@ -3746,75 +3746,75 @@ var nfcLookup = [1088]uint8{
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0c2: 0x2e, 0x0c3: 0x03, 0x0c4: 0x04, 0x0c5: 0x05, 0x0c6: 0x2f, 0x0c7: 0x06,
0x0c8: 0x07, 0x0ca: 0x30, 0x0cc: 0x08, 0x0cd: 0x09, 0x0ce: 0x0a, 0x0cf: 0x31,
0x0d0: 0x0b, 0x0d1: 0x32, 0x0d2: 0x33, 0x0d3: 0x0c, 0x0d6: 0x0d, 0x0d7: 0x34,
0x0d8: 0x35, 0x0d9: 0x0e, 0x0db: 0x36, 0x0dc: 0x37, 0x0dd: 0x38, 0x0df: 0x39,
0x0e0: 0x04, 0x0e1: 0x05, 0x0e2: 0x06, 0x0e3: 0x07,
0x0ea: 0x08, 0x0eb: 0x09, 0x0ec: 0x09, 0x0ed: 0x0a, 0x0ef: 0x0b,
0x0f0: 0x10,
0x0c2: 0x2c, 0x0c3: 0x01, 0x0c4: 0x02, 0x0c5: 0x03, 0x0c6: 0x2d, 0x0c7: 0x04,
0x0c8: 0x05, 0x0ca: 0x2e, 0x0cc: 0x06, 0x0cd: 0x07, 0x0ce: 0x08, 0x0cf: 0x2f,
0x0d0: 0x09, 0x0d1: 0x30, 0x0d2: 0x31, 0x0d3: 0x0a, 0x0d6: 0x0b, 0x0d7: 0x32,
0x0d8: 0x33, 0x0d9: 0x0c, 0x0db: 0x34, 0x0dc: 0x35, 0x0dd: 0x36, 0x0df: 0x37,
0x0e0: 0x02, 0x0e1: 0x03, 0x0e2: 0x04, 0x0e3: 0x05,
0x0ea: 0x06, 0x0eb: 0x07, 0x0ec: 0x07, 0x0ed: 0x08, 0x0ef: 0x09,
0x0f0: 0x0e,
// Block 0x4, offset 0x100
0x120: 0x3a, 0x121: 0x3b, 0x124: 0x3c, 0x125: 0x3d, 0x126: 0x3e, 0x127: 0x3f,
0x128: 0x40, 0x129: 0x41, 0x12a: 0x42, 0x12b: 0x43, 0x12c: 0x3e, 0x12d: 0x44, 0x12e: 0x45, 0x12f: 0x46,
0x131: 0x47, 0x132: 0x48, 0x133: 0x49, 0x134: 0x4a, 0x135: 0x4b, 0x137: 0x4c,
0x138: 0x4d, 0x139: 0x4e, 0x13a: 0x4f, 0x13b: 0x50, 0x13c: 0x51, 0x13d: 0x52, 0x13e: 0x53, 0x13f: 0x54,
0x120: 0x38, 0x121: 0x39, 0x124: 0x3a, 0x125: 0x3b, 0x126: 0x3c, 0x127: 0x3d,
0x128: 0x3e, 0x129: 0x3f, 0x12a: 0x40, 0x12b: 0x41, 0x12c: 0x3c, 0x12d: 0x42, 0x12e: 0x43, 0x12f: 0x44,
0x131: 0x45, 0x132: 0x46, 0x133: 0x47, 0x134: 0x48, 0x135: 0x49, 0x137: 0x4a,
0x138: 0x4b, 0x139: 0x4c, 0x13a: 0x4d, 0x13b: 0x4e, 0x13c: 0x4f, 0x13d: 0x50, 0x13e: 0x51, 0x13f: 0x52,
// Block 0x5, offset 0x140
0x140: 0x55, 0x142: 0x56, 0x144: 0x57, 0x145: 0x58, 0x146: 0x59, 0x147: 0x5a,
0x14d: 0x5b,
0x15c: 0x5c, 0x15f: 0x5d,
0x162: 0x5e, 0x164: 0x5f,
0x168: 0x60, 0x169: 0x61, 0x16c: 0x0f, 0x16d: 0x62, 0x16e: 0x63, 0x16f: 0x64,
0x170: 0x65, 0x173: 0x66, 0x177: 0x67,
0x178: 0x10, 0x179: 0x11, 0x17a: 0x12, 0x17b: 0x13, 0x17c: 0x14, 0x17d: 0x15, 0x17e: 0x16, 0x17f: 0x17,
0x140: 0x53, 0x142: 0x54, 0x144: 0x55, 0x145: 0x56, 0x146: 0x57, 0x147: 0x58,
0x14d: 0x59,
0x15c: 0x5a, 0x15f: 0x5b,
0x162: 0x5c, 0x164: 0x5d,
0x168: 0x5e, 0x169: 0x5f, 0x16c: 0x0d, 0x16d: 0x60, 0x16e: 0x61, 0x16f: 0x62,
0x170: 0x63, 0x173: 0x64, 0x177: 0x65,
0x178: 0x0e, 0x179: 0x0f, 0x17a: 0x10, 0x17b: 0x11, 0x17c: 0x12, 0x17d: 0x13, 0x17e: 0x14, 0x17f: 0x15,
// Block 0x6, offset 0x180
0x180: 0x68, 0x183: 0x69, 0x184: 0x6a, 0x186: 0x6b, 0x187: 0x6c,
0x188: 0x6d, 0x189: 0x18, 0x18a: 0x19, 0x18b: 0x6e, 0x18c: 0x6f,
0x1ab: 0x70,
0x1b3: 0x71, 0x1b5: 0x72, 0x1b7: 0x73,
0x180: 0x66, 0x183: 0x67, 0x184: 0x68, 0x186: 0x69, 0x187: 0x6a,
0x188: 0x6b, 0x189: 0x16, 0x18a: 0x17, 0x18b: 0x6c, 0x18c: 0x6d,
0x1ab: 0x6e,
0x1b3: 0x6f, 0x1b5: 0x70, 0x1b7: 0x71,
// Block 0x7, offset 0x1c0
0x1c0: 0x74, 0x1c1: 0x1a, 0x1c2: 0x1b, 0x1c3: 0x1c,
0x1c0: 0x72, 0x1c1: 0x18, 0x1c2: 0x19, 0x1c3: 0x1a,
// Block 0x8, offset 0x200
0x219: 0x75, 0x21b: 0x76,
0x220: 0x77, 0x223: 0x78, 0x224: 0x79, 0x225: 0x7a, 0x226: 0x7b, 0x227: 0x7c,
0x22a: 0x7d, 0x22b: 0x7e, 0x22f: 0x7f,
0x230: 0x80, 0x231: 0x80, 0x232: 0x80, 0x233: 0x80, 0x234: 0x80, 0x235: 0x80, 0x236: 0x80, 0x237: 0x80,
0x238: 0x80, 0x239: 0x80, 0x23a: 0x80, 0x23b: 0x80, 0x23c: 0x80, 0x23d: 0x80, 0x23e: 0x80, 0x23f: 0x80,
0x219: 0x73, 0x21b: 0x74,
0x220: 0x75, 0x223: 0x76, 0x224: 0x77, 0x225: 0x78, 0x226: 0x79, 0x227: 0x7a,
0x22a: 0x7b, 0x22b: 0x7c, 0x22f: 0x7d,
0x230: 0x7e, 0x231: 0x7e, 0x232: 0x7e, 0x233: 0x7e, 0x234: 0x7e, 0x235: 0x7e, 0x236: 0x7e, 0x237: 0x7e,
0x238: 0x7e, 0x239: 0x7e, 0x23a: 0x7e, 0x23b: 0x7e, 0x23c: 0x7e, 0x23d: 0x7e, 0x23e: 0x7e, 0x23f: 0x7e,
// Block 0x9, offset 0x240
0x240: 0x80, 0x241: 0x80, 0x242: 0x80, 0x243: 0x80, 0x244: 0x80, 0x245: 0x80, 0x246: 0x80, 0x247: 0x80,
0x248: 0x80, 0x249: 0x80, 0x24a: 0x80, 0x24b: 0x80, 0x24c: 0x80, 0x24d: 0x80, 0x24e: 0x80, 0x24f: 0x80,
0x250: 0x80, 0x251: 0x80, 0x252: 0x80, 0x253: 0x80, 0x254: 0x80, 0x255: 0x80, 0x256: 0x80, 0x257: 0x80,
0x258: 0x80, 0x259: 0x80, 0x25a: 0x80, 0x25b: 0x80, 0x25c: 0x80, 0x25d: 0x80, 0x25e: 0x80, 0x25f: 0x80,
0x260: 0x80, 0x261: 0x80, 0x262: 0x80, 0x263: 0x80, 0x264: 0x80, 0x265: 0x80, 0x266: 0x80, 0x267: 0x80,
0x268: 0x80, 0x269: 0x80, 0x26a: 0x80, 0x26b: 0x80, 0x26c: 0x80, 0x26d: 0x80, 0x26e: 0x80, 0x26f: 0x80,
0x270: 0x80, 0x271: 0x80, 0x272: 0x80, 0x273: 0x80, 0x274: 0x80, 0x275: 0x80, 0x276: 0x80, 0x277: 0x80,
0x278: 0x80, 0x279: 0x80, 0x27a: 0x80, 0x27b: 0x80, 0x27c: 0x80, 0x27d: 0x80, 0x27e: 0x80, 0x27f: 0x80,
0x240: 0x7e, 0x241: 0x7e, 0x242: 0x7e, 0x243: 0x7e, 0x244: 0x7e, 0x245: 0x7e, 0x246: 0x7e, 0x247: 0x7e,
0x248: 0x7e, 0x249: 0x7e, 0x24a: 0x7e, 0x24b: 0x7e, 0x24c: 0x7e, 0x24d: 0x7e, 0x24e: 0x7e, 0x24f: 0x7e,
0x250: 0x7e, 0x251: 0x7e, 0x252: 0x7e, 0x253: 0x7e, 0x254: 0x7e, 0x255: 0x7e, 0x256: 0x7e, 0x257: 0x7e,
0x258: 0x7e, 0x259: 0x7e, 0x25a: 0x7e, 0x25b: 0x7e, 0x25c: 0x7e, 0x25d: 0x7e, 0x25e: 0x7e, 0x25f: 0x7e,
0x260: 0x7e, 0x261: 0x7e, 0x262: 0x7e, 0x263: 0x7e, 0x264: 0x7e, 0x265: 0x7e, 0x266: 0x7e, 0x267: 0x7e,
0x268: 0x7e, 0x269: 0x7e, 0x26a: 0x7e, 0x26b: 0x7e, 0x26c: 0x7e, 0x26d: 0x7e, 0x26e: 0x7e, 0x26f: 0x7e,
0x270: 0x7e, 0x271: 0x7e, 0x272: 0x7e, 0x273: 0x7e, 0x274: 0x7e, 0x275: 0x7e, 0x276: 0x7e, 0x277: 0x7e,
0x278: 0x7e, 0x279: 0x7e, 0x27a: 0x7e, 0x27b: 0x7e, 0x27c: 0x7e, 0x27d: 0x7e, 0x27e: 0x7e, 0x27f: 0x7e,
// Block 0xa, offset 0x280
0x280: 0x80, 0x281: 0x80, 0x282: 0x80, 0x283: 0x80, 0x284: 0x80, 0x285: 0x80, 0x286: 0x80, 0x287: 0x80,
0x288: 0x80, 0x289: 0x80, 0x28a: 0x80, 0x28b: 0x80, 0x28c: 0x80, 0x28d: 0x80, 0x28e: 0x80, 0x28f: 0x80,
0x290: 0x80, 0x291: 0x80, 0x292: 0x80, 0x293: 0x80, 0x294: 0x80, 0x295: 0x80, 0x296: 0x80, 0x297: 0x80,
0x298: 0x80, 0x299: 0x80, 0x29a: 0x80, 0x29b: 0x80, 0x29c: 0x80, 0x29d: 0x80, 0x29e: 0x81,
0x280: 0x7e, 0x281: 0x7e, 0x282: 0x7e, 0x283: 0x7e, 0x284: 0x7e, 0x285: 0x7e, 0x286: 0x7e, 0x287: 0x7e,
0x288: 0x7e, 0x289: 0x7e, 0x28a: 0x7e, 0x28b: 0x7e, 0x28c: 0x7e, 0x28d: 0x7e, 0x28e: 0x7e, 0x28f: 0x7e,
0x290: 0x7e, 0x291: 0x7e, 0x292: 0x7e, 0x293: 0x7e, 0x294: 0x7e, 0x295: 0x7e, 0x296: 0x7e, 0x297: 0x7e,
0x298: 0x7e, 0x299: 0x7e, 0x29a: 0x7e, 0x29b: 0x7e, 0x29c: 0x7e, 0x29d: 0x7e, 0x29e: 0x7f,
// Block 0xb, offset 0x2c0
0x2e4: 0x1d, 0x2e5: 0x1e, 0x2e6: 0x1f, 0x2e7: 0x20,
0x2e8: 0x21, 0x2e9: 0x22, 0x2ea: 0x23, 0x2eb: 0x24, 0x2ec: 0x82, 0x2ed: 0x83,
0x2f8: 0x84,
0x2e4: 0x1b, 0x2e5: 0x1c, 0x2e6: 0x1d, 0x2e7: 0x1e,
0x2e8: 0x1f, 0x2e9: 0x20, 0x2ea: 0x21, 0x2eb: 0x22, 0x2ec: 0x80, 0x2ed: 0x81,
0x2f8: 0x82,
// Block 0xc, offset 0x300
0x307: 0x85,
0x328: 0x86,
0x307: 0x83,
0x328: 0x84,
// Block 0xd, offset 0x340
0x341: 0x77, 0x342: 0x87,
0x341: 0x75, 0x342: 0x85,
// Block 0xe, offset 0x380
0x385: 0x88, 0x386: 0x89, 0x387: 0x8a,
0x389: 0x8b,
0x385: 0x86, 0x386: 0x87, 0x387: 0x88,
0x389: 0x89,
// Block 0xf, offset 0x3c0
0x3e0: 0x25, 0x3e1: 0x26, 0x3e2: 0x27, 0x3e3: 0x28, 0x3e4: 0x29, 0x3e5: 0x2a, 0x3e6: 0x2b, 0x3e7: 0x2c,
0x3e8: 0x2d,
0x3e0: 0x23, 0x3e1: 0x24, 0x3e2: 0x25, 0x3e3: 0x26, 0x3e4: 0x27, 0x3e5: 0x28, 0x3e6: 0x29, 0x3e7: 0x2a,
0x3e8: 0x2b,
// Block 0x10, offset 0x400
0x410: 0x0c, 0x411: 0x0d,
0x41d: 0x0e,
0x42f: 0x0f,
0x410: 0x0a, 0x411: 0x0b,
0x41d: 0x0c,
0x42f: 0x0d,
}
var nfcTrie = trie{nfcLookup[:], nfcValues[:], nfcSparseValues[:], nfcSparseOffset[:], 46}
var nfcTrie = trie{nfcLookup[:], nfcValues[:], nfcSparseValues[:], nfcSparseOffset[:], 44}
// nfkcValues: 5568 entries, 11136 bytes
// Block 2 is the null block.
@ -5642,84 +5642,84 @@ var nfkcLookup = [1152]uint8{
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0c2: 0x57, 0x0c3: 0x03, 0x0c4: 0x04, 0x0c5: 0x05, 0x0c6: 0x58, 0x0c7: 0x06,
0x0c8: 0x07, 0x0ca: 0x59, 0x0cb: 0x5a, 0x0cc: 0x08, 0x0cd: 0x09, 0x0ce: 0x0a, 0x0cf: 0x0b,
0x0d0: 0x0c, 0x0d1: 0x5b, 0x0d2: 0x5c, 0x0d3: 0x0d, 0x0d6: 0x0e, 0x0d7: 0x5d,
0x0d8: 0x5e, 0x0d9: 0x0f, 0x0db: 0x5f, 0x0dc: 0x60, 0x0dd: 0x61, 0x0df: 0x62,
0x0e0: 0x04, 0x0e1: 0x05, 0x0e2: 0x06, 0x0e3: 0x07,
0x0ea: 0x08, 0x0eb: 0x09, 0x0ec: 0x09, 0x0ed: 0x0a, 0x0ef: 0x0b,
0x0f0: 0x11,
0x0c2: 0x55, 0x0c3: 0x01, 0x0c4: 0x02, 0x0c5: 0x03, 0x0c6: 0x56, 0x0c7: 0x04,
0x0c8: 0x05, 0x0ca: 0x57, 0x0cb: 0x58, 0x0cc: 0x06, 0x0cd: 0x07, 0x0ce: 0x08, 0x0cf: 0x09,
0x0d0: 0x0a, 0x0d1: 0x59, 0x0d2: 0x5a, 0x0d3: 0x0b, 0x0d6: 0x0c, 0x0d7: 0x5b,
0x0d8: 0x5c, 0x0d9: 0x0d, 0x0db: 0x5d, 0x0dc: 0x5e, 0x0dd: 0x5f, 0x0df: 0x60,
0x0e0: 0x02, 0x0e1: 0x03, 0x0e2: 0x04, 0x0e3: 0x05,
0x0ea: 0x06, 0x0eb: 0x07, 0x0ec: 0x07, 0x0ed: 0x08, 0x0ef: 0x09,
0x0f0: 0x0f,
// Block 0x4, offset 0x100
0x120: 0x63, 0x121: 0x64, 0x124: 0x65, 0x125: 0x66, 0x126: 0x67, 0x127: 0x68,
0x128: 0x69, 0x129: 0x6a, 0x12a: 0x6b, 0x12b: 0x6c, 0x12c: 0x67, 0x12d: 0x6d, 0x12e: 0x6e, 0x12f: 0x6f,
0x131: 0x70, 0x132: 0x71, 0x133: 0x72, 0x134: 0x73, 0x135: 0x74, 0x137: 0x75,
0x138: 0x76, 0x139: 0x77, 0x13a: 0x78, 0x13b: 0x79, 0x13c: 0x7a, 0x13d: 0x7b, 0x13e: 0x7c, 0x13f: 0x7d,
0x120: 0x61, 0x121: 0x62, 0x124: 0x63, 0x125: 0x64, 0x126: 0x65, 0x127: 0x66,
0x128: 0x67, 0x129: 0x68, 0x12a: 0x69, 0x12b: 0x6a, 0x12c: 0x65, 0x12d: 0x6b, 0x12e: 0x6c, 0x12f: 0x6d,
0x131: 0x6e, 0x132: 0x6f, 0x133: 0x70, 0x134: 0x71, 0x135: 0x72, 0x137: 0x73,
0x138: 0x74, 0x139: 0x75, 0x13a: 0x76, 0x13b: 0x77, 0x13c: 0x78, 0x13d: 0x79, 0x13e: 0x7a, 0x13f: 0x7b,
// Block 0x5, offset 0x140
0x140: 0x7e, 0x142: 0x7f, 0x143: 0x80, 0x144: 0x81, 0x145: 0x82, 0x146: 0x83, 0x147: 0x84,
0x14d: 0x85,
0x15c: 0x86, 0x15f: 0x87,
0x162: 0x88, 0x164: 0x89,
0x168: 0x8a, 0x169: 0x8b, 0x16c: 0x10, 0x16d: 0x8c, 0x16e: 0x8d, 0x16f: 0x8e,
0x170: 0x8f, 0x173: 0x90, 0x174: 0x91, 0x175: 0x11, 0x176: 0x12, 0x177: 0x92,
0x178: 0x13, 0x179: 0x14, 0x17a: 0x15, 0x17b: 0x16, 0x17c: 0x17, 0x17d: 0x18, 0x17e: 0x19, 0x17f: 0x1a,
0x140: 0x7c, 0x142: 0x7d, 0x143: 0x7e, 0x144: 0x7f, 0x145: 0x80, 0x146: 0x81, 0x147: 0x82,
0x14d: 0x83,
0x15c: 0x84, 0x15f: 0x85,
0x162: 0x86, 0x164: 0x87,
0x168: 0x88, 0x169: 0x89, 0x16c: 0x0e, 0x16d: 0x8a, 0x16e: 0x8b, 0x16f: 0x8c,
0x170: 0x8d, 0x173: 0x8e, 0x174: 0x8f, 0x175: 0x0f, 0x176: 0x10, 0x177: 0x90,
0x178: 0x11, 0x179: 0x12, 0x17a: 0x13, 0x17b: 0x14, 0x17c: 0x15, 0x17d: 0x16, 0x17e: 0x17, 0x17f: 0x18,
// Block 0x6, offset 0x180
0x180: 0x93, 0x181: 0x94, 0x182: 0x95, 0x183: 0x96, 0x184: 0x1b, 0x185: 0x1c, 0x186: 0x97, 0x187: 0x98,
0x188: 0x99, 0x189: 0x1d, 0x18a: 0x1e, 0x18b: 0x9a, 0x18c: 0x9b,
0x191: 0x1f, 0x192: 0x20, 0x193: 0x9c,
0x1a8: 0x9d, 0x1a9: 0x9e, 0x1ab: 0x9f,
0x1b1: 0xa0, 0x1b3: 0xa1, 0x1b5: 0xa2, 0x1b7: 0xa3,
0x1ba: 0xa4, 0x1bb: 0xa5, 0x1bc: 0x21, 0x1bd: 0x22, 0x1be: 0x23, 0x1bf: 0xa6,
0x180: 0x91, 0x181: 0x92, 0x182: 0x93, 0x183: 0x94, 0x184: 0x19, 0x185: 0x1a, 0x186: 0x95, 0x187: 0x96,
0x188: 0x97, 0x189: 0x1b, 0x18a: 0x1c, 0x18b: 0x98, 0x18c: 0x99,
0x191: 0x1d, 0x192: 0x1e, 0x193: 0x9a,
0x1a8: 0x9b, 0x1a9: 0x9c, 0x1ab: 0x9d,
0x1b1: 0x9e, 0x1b3: 0x9f, 0x1b5: 0xa0, 0x1b7: 0xa1,
0x1ba: 0xa2, 0x1bb: 0xa3, 0x1bc: 0x1f, 0x1bd: 0x20, 0x1be: 0x21, 0x1bf: 0xa4,
// Block 0x7, offset 0x1c0
0x1c0: 0xa7, 0x1c1: 0x24, 0x1c2: 0x25, 0x1c3: 0x26, 0x1c4: 0xa8, 0x1c5: 0xa9, 0x1c6: 0x27,
0x1c8: 0x28, 0x1c9: 0x29, 0x1ca: 0x2a, 0x1cb: 0x2b, 0x1cc: 0x2c, 0x1cd: 0x2d, 0x1ce: 0x2e, 0x1cf: 0x2f,
0x1c0: 0xa5, 0x1c1: 0x22, 0x1c2: 0x23, 0x1c3: 0x24, 0x1c4: 0xa6, 0x1c5: 0xa7, 0x1c6: 0x25,
0x1c8: 0x26, 0x1c9: 0x27, 0x1ca: 0x28, 0x1cb: 0x29, 0x1cc: 0x2a, 0x1cd: 0x2b, 0x1ce: 0x2c, 0x1cf: 0x2d,
// Block 0x8, offset 0x200
0x219: 0xaa, 0x21b: 0xab, 0x21d: 0xac,
0x220: 0xad, 0x223: 0xae, 0x224: 0xaf, 0x225: 0xb0, 0x226: 0xb1, 0x227: 0xb2,
0x22a: 0xb3, 0x22b: 0xb4, 0x22f: 0xb5,
0x230: 0xb6, 0x231: 0xb6, 0x232: 0xb6, 0x233: 0xb6, 0x234: 0xb6, 0x235: 0xb6, 0x236: 0xb6, 0x237: 0xb6,
0x238: 0xb6, 0x239: 0xb6, 0x23a: 0xb6, 0x23b: 0xb6, 0x23c: 0xb6, 0x23d: 0xb6, 0x23e: 0xb6, 0x23f: 0xb6,
0x219: 0xa8, 0x21b: 0xa9, 0x21d: 0xaa,
0x220: 0xab, 0x223: 0xac, 0x224: 0xad, 0x225: 0xae, 0x226: 0xaf, 0x227: 0xb0,
0x22a: 0xb1, 0x22b: 0xb2, 0x22f: 0xb3,
0x230: 0xb4, 0x231: 0xb4, 0x232: 0xb4, 0x233: 0xb4, 0x234: 0xb4, 0x235: 0xb4, 0x236: 0xb4, 0x237: 0xb4,
0x238: 0xb4, 0x239: 0xb4, 0x23a: 0xb4, 0x23b: 0xb4, 0x23c: 0xb4, 0x23d: 0xb4, 0x23e: 0xb4, 0x23f: 0xb4,
// Block 0x9, offset 0x240
0x240: 0xb6, 0x241: 0xb6, 0x242: 0xb6, 0x243: 0xb6, 0x244: 0xb6, 0x245: 0xb6, 0x246: 0xb6, 0x247: 0xb6,
0x248: 0xb6, 0x249: 0xb6, 0x24a: 0xb6, 0x24b: 0xb6, 0x24c: 0xb6, 0x24d: 0xb6, 0x24e: 0xb6, 0x24f: 0xb6,
0x250: 0xb6, 0x251: 0xb6, 0x252: 0xb6, 0x253: 0xb6, 0x254: 0xb6, 0x255: 0xb6, 0x256: 0xb6, 0x257: 0xb6,
0x258: 0xb6, 0x259: 0xb6, 0x25a: 0xb6, 0x25b: 0xb6, 0x25c: 0xb6, 0x25d: 0xb6, 0x25e: 0xb6, 0x25f: 0xb6,
0x260: 0xb6, 0x261: 0xb6, 0x262: 0xb6, 0x263: 0xb6, 0x264: 0xb6, 0x265: 0xb6, 0x266: 0xb6, 0x267: 0xb6,
0x268: 0xb6, 0x269: 0xb6, 0x26a: 0xb6, 0x26b: 0xb6, 0x26c: 0xb6, 0x26d: 0xb6, 0x26e: 0xb6, 0x26f: 0xb6,
0x270: 0xb6, 0x271: 0xb6, 0x272: 0xb6, 0x273: 0xb6, 0x274: 0xb6, 0x275: 0xb6, 0x276: 0xb6, 0x277: 0xb6,
0x278: 0xb6, 0x279: 0xb6, 0x27a: 0xb6, 0x27b: 0xb6, 0x27c: 0xb6, 0x27d: 0xb6, 0x27e: 0xb6, 0x27f: 0xb6,
0x240: 0xb4, 0x241: 0xb4, 0x242: 0xb4, 0x243: 0xb4, 0x244: 0xb4, 0x245: 0xb4, 0x246: 0xb4, 0x247: 0xb4,
0x248: 0xb4, 0x249: 0xb4, 0x24a: 0xb4, 0x24b: 0xb4, 0x24c: 0xb4, 0x24d: 0xb4, 0x24e: 0xb4, 0x24f: 0xb4,
0x250: 0xb4, 0x251: 0xb4, 0x252: 0xb4, 0x253: 0xb4, 0x254: 0xb4, 0x255: 0xb4, 0x256: 0xb4, 0x257: 0xb4,
0x258: 0xb4, 0x259: 0xb4, 0x25a: 0xb4, 0x25b: 0xb4, 0x25c: 0xb4, 0x25d: 0xb4, 0x25e: 0xb4, 0x25f: 0xb4,
0x260: 0xb4, 0x261: 0xb4, 0x262: 0xb4, 0x263: 0xb4, 0x264: 0xb4, 0x265: 0xb4, 0x266: 0xb4, 0x267: 0xb4,
0x268: 0xb4, 0x269: 0xb4, 0x26a: 0xb4, 0x26b: 0xb4, 0x26c: 0xb4, 0x26d: 0xb4, 0x26e: 0xb4, 0x26f: 0xb4,
0x270: 0xb4, 0x271: 0xb4, 0x272: 0xb4, 0x273: 0xb4, 0x274: 0xb4, 0x275: 0xb4, 0x276: 0xb4, 0x277: 0xb4,
0x278: 0xb4, 0x279: 0xb4, 0x27a: 0xb4, 0x27b: 0xb4, 0x27c: 0xb4, 0x27d: 0xb4, 0x27e: 0xb4, 0x27f: 0xb4,
// Block 0xa, offset 0x280
0x280: 0xb6, 0x281: 0xb6, 0x282: 0xb6, 0x283: 0xb6, 0x284: 0xb6, 0x285: 0xb6, 0x286: 0xb6, 0x287: 0xb6,
0x288: 0xb6, 0x289: 0xb6, 0x28a: 0xb6, 0x28b: 0xb6, 0x28c: 0xb6, 0x28d: 0xb6, 0x28e: 0xb6, 0x28f: 0xb6,
0x290: 0xb6, 0x291: 0xb6, 0x292: 0xb6, 0x293: 0xb6, 0x294: 0xb6, 0x295: 0xb6, 0x296: 0xb6, 0x297: 0xb6,
0x298: 0xb6, 0x299: 0xb6, 0x29a: 0xb6, 0x29b: 0xb6, 0x29c: 0xb6, 0x29d: 0xb6, 0x29e: 0xb7,
0x280: 0xb4, 0x281: 0xb4, 0x282: 0xb4, 0x283: 0xb4, 0x284: 0xb4, 0x285: 0xb4, 0x286: 0xb4, 0x287: 0xb4,
0x288: 0xb4, 0x289: 0xb4, 0x28a: 0xb4, 0x28b: 0xb4, 0x28c: 0xb4, 0x28d: 0xb4, 0x28e: 0xb4, 0x28f: 0xb4,
0x290: 0xb4, 0x291: 0xb4, 0x292: 0xb4, 0x293: 0xb4, 0x294: 0xb4, 0x295: 0xb4, 0x296: 0xb4, 0x297: 0xb4,
0x298: 0xb4, 0x299: 0xb4, 0x29a: 0xb4, 0x29b: 0xb4, 0x29c: 0xb4, 0x29d: 0xb4, 0x29e: 0xb5,
// Block 0xb, offset 0x2c0
0x2e4: 0x30, 0x2e5: 0x31, 0x2e6: 0x32, 0x2e7: 0x33,
0x2e8: 0x34, 0x2e9: 0x35, 0x2ea: 0x36, 0x2eb: 0x37, 0x2ec: 0x38, 0x2ed: 0x39, 0x2ee: 0x3a, 0x2ef: 0x3b,
0x2f0: 0x3c, 0x2f1: 0x3d, 0x2f2: 0x3e, 0x2f3: 0x3f, 0x2f4: 0x40, 0x2f5: 0x41, 0x2f6: 0x42, 0x2f7: 0x43,
0x2f8: 0x44, 0x2f9: 0x45, 0x2fa: 0x46, 0x2fb: 0x47, 0x2fc: 0xb8, 0x2fd: 0x48, 0x2fe: 0x49, 0x2ff: 0xb9,
0x2e4: 0x2e, 0x2e5: 0x2f, 0x2e6: 0x30, 0x2e7: 0x31,
0x2e8: 0x32, 0x2e9: 0x33, 0x2ea: 0x34, 0x2eb: 0x35, 0x2ec: 0x36, 0x2ed: 0x37, 0x2ee: 0x38, 0x2ef: 0x39,
0x2f0: 0x3a, 0x2f1: 0x3b, 0x2f2: 0x3c, 0x2f3: 0x3d, 0x2f4: 0x3e, 0x2f5: 0x3f, 0x2f6: 0x40, 0x2f7: 0x41,
0x2f8: 0x42, 0x2f9: 0x43, 0x2fa: 0x44, 0x2fb: 0x45, 0x2fc: 0xb6, 0x2fd: 0x46, 0x2fe: 0x47, 0x2ff: 0xb7,
// Block 0xc, offset 0x300
0x307: 0xba,
0x328: 0xbb,
0x307: 0xb8,
0x328: 0xb9,
// Block 0xd, offset 0x340
0x341: 0xad, 0x342: 0xbc,
0x341: 0xab, 0x342: 0xba,
// Block 0xe, offset 0x380
0x385: 0xbd, 0x386: 0xbe, 0x387: 0xbf,
0x389: 0xc0,
0x390: 0xc1, 0x391: 0xc2, 0x392: 0xc3, 0x393: 0xc4, 0x394: 0xc5, 0x395: 0xc6, 0x396: 0xc7, 0x397: 0xc8,
0x398: 0xc9, 0x399: 0xca, 0x39a: 0x4a, 0x39b: 0xcb, 0x39c: 0xcc, 0x39d: 0xcd, 0x39e: 0xce, 0x39f: 0x4b,
0x385: 0xbb, 0x386: 0xbc, 0x387: 0xbd,
0x389: 0xbe,
0x390: 0xbf, 0x391: 0xc0, 0x392: 0xc1, 0x393: 0xc2, 0x394: 0xc3, 0x395: 0xc4, 0x396: 0xc5, 0x397: 0xc6,
0x398: 0xc7, 0x399: 0xc8, 0x39a: 0x48, 0x39b: 0xc9, 0x39c: 0xca, 0x39d: 0xcb, 0x39e: 0xcc, 0x39f: 0x49,
// Block 0xf, offset 0x3c0
0x3c4: 0x4c, 0x3c5: 0xcf, 0x3c6: 0xd0,
0x3c8: 0x4d, 0x3c9: 0xd1,
0x3c4: 0x4a, 0x3c5: 0xcd, 0x3c6: 0xce,
0x3c8: 0x4b, 0x3c9: 0xcf,
// Block 0x10, offset 0x400
0x420: 0x4e, 0x421: 0x4f, 0x422: 0x50, 0x423: 0x51, 0x424: 0x52, 0x425: 0x53, 0x426: 0x54, 0x427: 0x55,
0x428: 0x56,
0x420: 0x4c, 0x421: 0x4d, 0x422: 0x4e, 0x423: 0x4f, 0x424: 0x50, 0x425: 0x51, 0x426: 0x52, 0x427: 0x53,
0x428: 0x54,
// Block 0x11, offset 0x440
0x450: 0x0c, 0x451: 0x0d,
0x45d: 0x0e, 0x45f: 0x0f,
0x46f: 0x10,
0x450: 0x0a, 0x451: 0x0b,
0x45d: 0x0c, 0x45f: 0x0d,
0x46f: 0x0e,
}
var nfkcTrie = trie{nfkcLookup[:], nfkcValues[:], nfkcSparseValues[:], nfkcSparseOffset[:], 87}
var nfkcTrie = trie{nfkcLookup[:], nfkcValues[:], nfkcSparseValues[:], nfkcSparseOffset[:], 85}
// recompMap: 7448 bytes (entries only)
var recompMap = map[uint32]rune{

View File

@ -23,7 +23,7 @@ type trie struct {
// the value for b is by r.value + (b - r.lo) * stride.
func (t *trie) lookupValue(n uint8, b byte) uint16 {
if n < t.cutoff {
return t.values[uint16(n)<<6+uint16(b&maskx)]
return t.values[uint16(n)<<6+uint16(b)]
}
offset := t.sparseOffset[n-t.cutoff]
header := t.sparse[offset]
@ -53,11 +53,6 @@ const (
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
maskx = 0x3F // 0011 1111
mask2 = 0x1F // 0001 1111
mask3 = 0x0F // 0000 1111
mask4 = 0x07 // 0000 0111
)
// lookup returns the trie value for the first UTF-8 encoding in s and
@ -89,7 +84,7 @@ func (t *trie) lookup(s []byte) (v uint16, sz int) {
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
@ -105,13 +100,13 @@ func (t *trie) lookup(s []byte) (v uint16, sz int) {
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
o = uint16(i)<<6 + uint16(c2)
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
@ -152,7 +147,7 @@ func (t *trie) lookupString(s string) (v uint16, sz int) {
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
@ -168,13 +163,13 @@ func (t *trie) lookupString(s string) (v uint16, sz int) {
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
o = uint16(i)<<6 + uint16(c2)
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
@ -200,11 +195,11 @@ func (t *trie) lookupUnsafe(s []byte) uint16 {
if c0 < t3 {
return t.lookupValue(i, s[1])
}
i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
i = t.index[uint16(i)<<6+uint16(s[1])]
if c0 < t4 {
return t.lookupValue(i, s[2])
}
i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
i = t.index[uint16(i)<<6+uint16(s[2])]
if c0 < t5 {
return t.lookupValue(i, s[3])
}
@ -225,11 +220,11 @@ func (t *trie) lookupStringUnsafe(s string) uint16 {
if c0 < t3 {
return t.lookupValue(i, s[1])
}
i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
i = t.index[uint16(i)<<6+uint16(s[1])]
if c0 < t4 {
return t.lookupValue(i, s[2])
}
i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
i = t.index[uint16(i)<<6+uint16(s[2])]
if c0 < t5 {
return t.lookupValue(i, s[3])
}

View File

@ -96,13 +96,17 @@ func TestLookup(t *testing.T) {
}
for i, tt := range tests {
v, sz := testdata.lookup(tt.bytes)
if int(v) != 0 {
if v != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
// Verify defaults.
if v, _ := testdata.lookup([]byte{0xC1, 0x8C}); v != 0 {
t.Errorf("lookup of non-existing rune should be 0; found %X", v)
}
}
func TestLookupUnsafe(t *testing.T) {

View File

@ -4,7 +4,7 @@
package norm
var testRunes = []rune{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
var testRunes = []int32{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
// testdataValues: 192 entries, 384 bytes
// Block 2 is the null block.
@ -62,24 +62,24 @@ var testdataLookup = [640]uint8{
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0c2: 0x03, 0x0c4: 0x04,
0x0c8: 0x05,
0x0df: 0x06,
0x0e0: 0x04,
0x0ef: 0x05,
0x0f0: 0x07, 0x0f4: 0x09,
0x0c2: 0x01, 0x0c4: 0x02,
0x0c8: 0x03,
0x0df: 0x04,
0x0e0: 0x02,
0x0ef: 0x03,
0x0f0: 0x05, 0x0f4: 0x07,
// Block 0x4, offset 0x100
0x120: 0x07, 0x126: 0x08,
0x120: 0x05, 0x126: 0x06,
// Block 0x5, offset 0x140
0x17f: 0x09,
0x17f: 0x07,
// Block 0x6, offset 0x180
0x180: 0x0a, 0x184: 0x0b,
0x180: 0x08, 0x184: 0x09,
// Block 0x7, offset 0x1c0
0x1d0: 0x06,
0x1d0: 0x04,
// Block 0x8, offset 0x200
0x23f: 0x0c,
0x23f: 0x0a,
// Block 0x9, offset 0x240
0x24f: 0x08,
0x24f: 0x06,
}
var testdataTrie = trie{testdataLookup[:], testdataValues[:], testdataSparseValues[:], testdataSparseOffset[:], 3}
var testdataTrie = trie{testdataLookup[:], testdataValues[:], testdataSparseValues[:], testdataSparseOffset[:], 1}

View File

@ -19,8 +19,11 @@ import (
"unicode/utf8"
)
const blockSize = 64
const maxSparseEntries = 16
const (
blockSize = 64
blockOffset = 2 // Substract two blocks to compensate for the 0x80 added to continuation bytes.
maxSparseEntries = 16
)
// Intermediate trie structure
type trieNode struct {
@ -157,7 +160,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int {
if n.isInternal() {
v, ok := index.lookupBlockIdx[h]
if !ok {
v = len(index.lookupBlocks)
v = len(index.lookupBlocks) - blockOffset
index.lookupBlocks = append(index.lookupBlocks, n)
index.lookupBlockIdx[h] = v
}
@ -166,7 +169,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int {
v, ok := index.valueBlockIdx[h]
if !ok {
if c := n.countSparseEntries(); c > maxSparseEntries {
v = len(index.valueBlocks)
v = len(index.valueBlocks) - blockOffset
index.valueBlocks = append(index.valueBlocks, n)
index.valueBlockIdx[h] = v
} else {
@ -295,7 +298,7 @@ func (t *trieNode) printTables(name string) int {
}
fmt.Print("\n}\n\n")
cutoff := len(index.valueBlocks)
cutoff := len(index.valueBlocks) - blockOffset
ni := len(index.lookupBlocks) * blockSize
fmt.Printf("// %sLookup: %d bytes\n", name, ni)
fmt.Printf("// Block 0 is the null block.\n")