mirror of https://github.com/golang/go.git
encoding: require unique alphabet for base32 and base64
In order for decoding to faithfully reproduce the encoded input, the symbols must be unique (i.e., provide a bijective mapping). Thus, reject duplicate symbols in NewEncoding. As a minor optimization, modify WithPadding to use the decodeMap to quickly check whether the padding character is used in O(1) instead of O(32) or O(64). Change-Id: I5631f6ff9335c35d59d020dc0e307e3520786fbc Reviewed-on: https://go-review.googlesource.com/c/go/+/520335 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Auto-Submit: Joseph Tsai <joetsai@digital-static.net> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Run-TryBot: Joseph Tsai <joetsai@digital-static.net>
This commit is contained in:
parent
9070f27039
commit
243c8c0eec
|
|
@ -20,8 +20,8 @@ import (
|
||||||
// introduced for SASL GSSAPI and standardized in RFC 4648.
|
// introduced for SASL GSSAPI and standardized in RFC 4648.
|
||||||
// The alternate "base32hex" encoding is used in DNSSEC.
|
// The alternate "base32hex" encoding is used in DNSSEC.
|
||||||
type Encoding struct {
|
type Encoding struct {
|
||||||
encode [32]byte
|
encode [32]byte // mapping of symbol index to symbol byte value
|
||||||
decodeMap [256]byte
|
decodeMap [256]uint8 // mapping of symbol byte value to symbol index
|
||||||
padChar rune
|
padChar rune
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -45,14 +45,19 @@ const (
|
||||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
||||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
||||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||||
|
invalidIndex = '\xff'
|
||||||
)
|
)
|
||||||
|
|
||||||
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
|
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
|
||||||
const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
|
const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
|
||||||
|
|
||||||
// NewEncoding returns a new Encoding defined by the given alphabet,
|
// NewEncoding returns a new padded Encoding defined by the given alphabet,
|
||||||
// which must be a 32-byte string. The alphabet is treated as sequence
|
// which must be a 32-byte string that contains unique byte values and
|
||||||
// of byte values without any special treatment for multi-byte UTF-8.
|
// does not contain the padding character or CR / LF ('\r', '\n').
|
||||||
|
// The alphabet is treated as a sequence of byte values
|
||||||
|
// without any special treatment for multi-byte UTF-8.
|
||||||
|
// The resulting Encoding uses the default padding character ('='),
|
||||||
|
// which may be changed or disabled via WithPadding.
|
||||||
func NewEncoding(encoder string) *Encoding {
|
func NewEncoding(encoder string) *Encoding {
|
||||||
if len(encoder) != 32 {
|
if len(encoder) != 32 {
|
||||||
panic("encoding alphabet is not 32-bytes long")
|
panic("encoding alphabet is not 32-bytes long")
|
||||||
|
|
@ -64,7 +69,16 @@ func NewEncoding(encoder string) *Encoding {
|
||||||
copy(e.decodeMap[:], decodeMapInitialize)
|
copy(e.decodeMap[:], decodeMapInitialize)
|
||||||
|
|
||||||
for i := 0; i < len(encoder); i++ {
|
for i := 0; i < len(encoder); i++ {
|
||||||
e.decodeMap[encoder[i]] = byte(i)
|
// Note: While we document that the alphabet cannot contain
|
||||||
|
// the padding character, we do not enforce it since we do not know
|
||||||
|
// if the caller intends to switch the padding from StdPadding later.
|
||||||
|
switch {
|
||||||
|
case encoder[i] == '\n' || encoder[i] == '\r':
|
||||||
|
panic("encoding alphabet contains newline character")
|
||||||
|
case e.decodeMap[encoder[i]] != invalidIndex:
|
||||||
|
panic("encoding alphabet includes duplicate symbols")
|
||||||
|
}
|
||||||
|
e.decodeMap[encoder[i]] = uint8(i)
|
||||||
}
|
}
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
|
@ -85,16 +99,12 @@ var HexEncoding = NewEncoding(encodeHex)
|
||||||
// Padding characters above '\x7f' are encoded as their exact byte value
|
// Padding characters above '\x7f' are encoded as their exact byte value
|
||||||
// rather than using the UTF-8 representation of the codepoint.
|
// rather than using the UTF-8 representation of the codepoint.
|
||||||
func (enc Encoding) WithPadding(padding rune) *Encoding {
|
func (enc Encoding) WithPadding(padding rune) *Encoding {
|
||||||
if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
|
switch {
|
||||||
|
case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
|
||||||
panic("invalid padding")
|
panic("invalid padding")
|
||||||
|
case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
|
||||||
|
panic("padding contained in alphabet")
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < len(enc.encode); i++ {
|
|
||||||
if rune(enc.encode[i]) == padding {
|
|
||||||
panic("padding contained in alphabet")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enc.padChar = padding
|
enc.padChar = padding
|
||||||
return &enc
|
return &enc
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@ import (
|
||||||
// (RFC 1421). RFC 4648 also defines an alternate encoding, which is
|
// (RFC 1421). RFC 4648 also defines an alternate encoding, which is
|
||||||
// the standard encoding with - and _ substituted for + and /.
|
// the standard encoding with - and _ substituted for + and /.
|
||||||
type Encoding struct {
|
type Encoding struct {
|
||||||
encode [64]byte
|
encode [64]byte // mapping of symbol index to symbol byte value
|
||||||
decodeMap [256]byte
|
decodeMap [256]uint8 // mapping of symbol byte value to symbol index
|
||||||
padChar rune
|
padChar rune
|
||||||
strict bool
|
strict bool
|
||||||
}
|
}
|
||||||
|
|
@ -48,14 +48,16 @@ const (
|
||||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
||||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
|
||||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||||
|
invalidIndex = '\xff'
|
||||||
)
|
)
|
||||||
|
|
||||||
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||||
const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
||||||
|
|
||||||
// NewEncoding returns a new padded Encoding defined by the given alphabet,
|
// NewEncoding returns a new padded Encoding defined by the given alphabet,
|
||||||
// which must be a 64-byte string that does not contain the padding character
|
// which must be a 64-byte string that contains unique byte values and
|
||||||
// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values
|
// does not contain the padding character or CR / LF ('\r', '\n').
|
||||||
|
// The alphabet is treated as a sequence of byte values
|
||||||
// without any special treatment for multi-byte UTF-8.
|
// without any special treatment for multi-byte UTF-8.
|
||||||
// The resulting Encoding uses the default padding character ('='),
|
// The resulting Encoding uses the default padding character ('='),
|
||||||
// which may be changed or disabled via WithPadding.
|
// which may be changed or disabled via WithPadding.
|
||||||
|
|
@ -63,11 +65,6 @@ func NewEncoding(encoder string) *Encoding {
|
||||||
if len(encoder) != 64 {
|
if len(encoder) != 64 {
|
||||||
panic("encoding alphabet is not 64-bytes long")
|
panic("encoding alphabet is not 64-bytes long")
|
||||||
}
|
}
|
||||||
for i := 0; i < len(encoder); i++ {
|
|
||||||
if encoder[i] == '\n' || encoder[i] == '\r' {
|
|
||||||
panic("encoding alphabet contains newline character")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
e := new(Encoding)
|
e := new(Encoding)
|
||||||
e.padChar = StdPadding
|
e.padChar = StdPadding
|
||||||
|
|
@ -75,7 +72,16 @@ func NewEncoding(encoder string) *Encoding {
|
||||||
copy(e.decodeMap[:], decodeMapInitialize)
|
copy(e.decodeMap[:], decodeMapInitialize)
|
||||||
|
|
||||||
for i := 0; i < len(encoder); i++ {
|
for i := 0; i < len(encoder); i++ {
|
||||||
e.decodeMap[encoder[i]] = byte(i)
|
// Note: While we document that the alphabet cannot contain
|
||||||
|
// the padding character, we do not enforce it since we do not know
|
||||||
|
// if the caller intends to switch the padding from StdPadding later.
|
||||||
|
switch {
|
||||||
|
case encoder[i] == '\n' || encoder[i] == '\r':
|
||||||
|
panic("encoding alphabet contains newline character")
|
||||||
|
case e.decodeMap[encoder[i]] != invalidIndex:
|
||||||
|
panic("encoding alphabet includes duplicate symbols")
|
||||||
|
}
|
||||||
|
e.decodeMap[encoder[i]] = uint8(i)
|
||||||
}
|
}
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
|
@ -88,16 +94,12 @@ func NewEncoding(encoder string) *Encoding {
|
||||||
// Padding characters above '\x7f' are encoded as their exact byte value
|
// Padding characters above '\x7f' are encoded as their exact byte value
|
||||||
// rather than using the UTF-8 representation of the codepoint.
|
// rather than using the UTF-8 representation of the codepoint.
|
||||||
func (enc Encoding) WithPadding(padding rune) *Encoding {
|
func (enc Encoding) WithPadding(padding rune) *Encoding {
|
||||||
if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
|
switch {
|
||||||
|
case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
|
||||||
panic("invalid padding")
|
panic("invalid padding")
|
||||||
|
case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
|
||||||
|
panic("padding contained in alphabet")
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < len(enc.encode); i++ {
|
|
||||||
if rune(enc.encode[i]) == padding {
|
|
||||||
panic("padding contained in alphabet")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enc.padChar = padding
|
enc.padChar = padding
|
||||||
return &enc
|
return &enc
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue