mirror of https://github.com/golang/go.git
strconv: accept underscores in ParseInt, ParseUint, ParseFloat
This CL modifies ParseInt, ParseUint, and ParseFloat to accept digit-separating underscores in their arguments. For ParseInt and ParseUint, the underscores are only allowed when base == 0. See golang.org/design/19308-number-literals for background. For #28493. Change-Id: I057ca2539d89314643f591ba8144c3ea7126651c Reviewed-on: https://go-review.googlesource.com/c/160243 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
e2d87f2ca5
commit
36a81d5ec3
|
|
@ -83,6 +83,9 @@ func (b *decimal) set(s string) (ok bool) {
|
||||||
sawdigits := false
|
sawdigits := false
|
||||||
for ; i < len(s); i++ {
|
for ; i < len(s); i++ {
|
||||||
switch {
|
switch {
|
||||||
|
case s[i] == '_':
|
||||||
|
// underscoreOK already called
|
||||||
|
continue
|
||||||
case s[i] == '.':
|
case s[i] == '.':
|
||||||
if sawdot {
|
if sawdot {
|
||||||
return
|
return
|
||||||
|
|
@ -135,7 +138,11 @@ func (b *decimal) set(s string) (ok bool) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
e := 0
|
e := 0
|
||||||
for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
|
for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
|
||||||
|
if s[i] == '_' {
|
||||||
|
// underscoreOK already called
|
||||||
|
continue
|
||||||
|
}
|
||||||
if e < 10000 {
|
if e < 10000 {
|
||||||
e = e*10 + int(s[i]) - '0'
|
e = e*10 + int(s[i]) - '0'
|
||||||
}
|
}
|
||||||
|
|
@ -187,6 +194,10 @@ func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) {
|
||||||
dp := 0
|
dp := 0
|
||||||
for ; i < len(s); i++ {
|
for ; i < len(s); i++ {
|
||||||
switch c := s[i]; true {
|
switch c := s[i]; true {
|
||||||
|
case c == '_':
|
||||||
|
// underscoreOK already called
|
||||||
|
continue
|
||||||
|
|
||||||
case c == '.':
|
case c == '.':
|
||||||
if sawdot {
|
if sawdot {
|
||||||
return
|
return
|
||||||
|
|
@ -258,7 +269,11 @@ func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
e := 0
|
e := 0
|
||||||
for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
|
for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
|
||||||
|
if s[i] == '_' {
|
||||||
|
// underscoreOK already called
|
||||||
|
continue
|
||||||
|
}
|
||||||
if e < 10000 {
|
if e < 10000 {
|
||||||
e = e*10 + int(s[i]) - '0'
|
e = e*10 + int(s[i]) - '0'
|
||||||
}
|
}
|
||||||
|
|
@ -640,6 +655,9 @@ func atof64(s string) (f float64, err error) {
|
||||||
// away from the largest floating point number of the given size,
|
// away from the largest floating point number of the given size,
|
||||||
// ParseFloat returns f = ±Inf, err.Err = ErrRange.
|
// ParseFloat returns f = ±Inf, err.Err = ErrRange.
|
||||||
func ParseFloat(s string, bitSize int) (float64, error) {
|
func ParseFloat(s string, bitSize int) (float64, error) {
|
||||||
|
if !underscoreOK(s) {
|
||||||
|
return 0, syntaxError(fnParseFloat, s)
|
||||||
|
}
|
||||||
if bitSize == 32 {
|
if bitSize == 32 {
|
||||||
f, err := atof32(s)
|
f, err := atof32(s)
|
||||||
return float64(f), err
|
return float64(f), err
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,9 @@ var atoftests = []atofTest{
|
||||||
{"0x1p0", "1", nil},
|
{"0x1p0", "1", nil},
|
||||||
{"0x1p1", "2", nil},
|
{"0x1p1", "2", nil},
|
||||||
{"0x1p-1", "0.5", nil},
|
{"0x1p-1", "0.5", nil},
|
||||||
|
{"0x1ep-1", "15", nil},
|
||||||
|
{"-0x1ep-1", "-15", nil},
|
||||||
|
{"-0x1_ep-1", "-15", nil},
|
||||||
{"0x1p-200", "6.223015277861142e-61", nil},
|
{"0x1p-200", "6.223015277861142e-61", nil},
|
||||||
{"0x1p200", "1.6069380442589903e+60", nil},
|
{"0x1p200", "1.6069380442589903e+60", nil},
|
||||||
{"0x1fFe2.p0", "131042", nil},
|
{"0x1fFe2.p0", "131042", nil},
|
||||||
|
|
@ -299,6 +302,40 @@ var atoftests = []atofTest{
|
||||||
// Round to even (up).
|
// Round to even (up).
|
||||||
{"1.00000000000000033306690738754696212708950042724609375", "1.0000000000000004", nil},
|
{"1.00000000000000033306690738754696212708950042724609375", "1.0000000000000004", nil},
|
||||||
{"0x1.00000000000018p0", "1.0000000000000004", nil},
|
{"0x1.00000000000018p0", "1.0000000000000004", nil},
|
||||||
|
|
||||||
|
// Underscores.
|
||||||
|
{"1_23.50_0_0e+1_2", "1.235e+14", nil},
|
||||||
|
{"-_123.5e+12", "0", ErrSyntax},
|
||||||
|
{"+_123.5e+12", "0", ErrSyntax},
|
||||||
|
{"_123.5e+12", "0", ErrSyntax},
|
||||||
|
{"1__23.5e+12", "0", ErrSyntax},
|
||||||
|
{"123_.5e+12", "0", ErrSyntax},
|
||||||
|
{"123._5e+12", "0", ErrSyntax},
|
||||||
|
{"123.5_e+12", "0", ErrSyntax},
|
||||||
|
{"123.5__0e+12", "0", ErrSyntax},
|
||||||
|
{"123.5e_+12", "0", ErrSyntax},
|
||||||
|
{"123.5e+_12", "0", ErrSyntax},
|
||||||
|
{"123.5e_-12", "0", ErrSyntax},
|
||||||
|
{"123.5e-_12", "0", ErrSyntax},
|
||||||
|
{"123.5e+1__2", "0", ErrSyntax},
|
||||||
|
{"123.5e+12_", "0", ErrSyntax},
|
||||||
|
|
||||||
|
{"0x_1_2.3_4_5p+1_2", "74565", nil},
|
||||||
|
{"-_0x12.345p+12", "0", ErrSyntax},
|
||||||
|
{"+_0x12.345p+12", "0", ErrSyntax},
|
||||||
|
{"_0x12.345p+12", "0", ErrSyntax},
|
||||||
|
{"0x__12.345p+12", "0", ErrSyntax},
|
||||||
|
{"0x1__2.345p+12", "0", ErrSyntax},
|
||||||
|
{"0x12_.345p+12", "0", ErrSyntax},
|
||||||
|
{"0x12._345p+12", "0", ErrSyntax},
|
||||||
|
{"0x12.3__45p+12", "0", ErrSyntax},
|
||||||
|
{"0x12.345_p+12", "0", ErrSyntax},
|
||||||
|
{"0x12.345p_+12", "0", ErrSyntax},
|
||||||
|
{"0x12.345p+_12", "0", ErrSyntax},
|
||||||
|
{"0x12.345p_-12", "0", ErrSyntax},
|
||||||
|
{"0x12.345p-_12", "0", ErrSyntax},
|
||||||
|
{"0x12.345p+1__2", "0", ErrSyntax},
|
||||||
|
{"0x12.345p+12_", "0", ErrSyntax},
|
||||||
}
|
}
|
||||||
|
|
||||||
var atof32tests = []atofTest{
|
var atof32tests = []atofTest{
|
||||||
|
|
|
||||||
|
|
@ -58,10 +58,12 @@ const maxUint64 = 1<<64 - 1
|
||||||
func ParseUint(s string, base int, bitSize int) (uint64, error) {
|
func ParseUint(s string, base int, bitSize int) (uint64, error) {
|
||||||
const fnParseUint = "ParseUint"
|
const fnParseUint = "ParseUint"
|
||||||
|
|
||||||
if len(s) == 0 {
|
if s == "" || !underscoreOK(s) {
|
||||||
return 0, syntaxError(fnParseUint, s)
|
return 0, syntaxError(fnParseUint, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
base0 := base == 0
|
||||||
|
|
||||||
s0 := s
|
s0 := s
|
||||||
switch {
|
switch {
|
||||||
case 2 <= base && base <= 36:
|
case 2 <= base && base <= 36:
|
||||||
|
|
@ -70,7 +72,7 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) {
|
||||||
case base == 0:
|
case base == 0:
|
||||||
// Look for octal, hex prefix.
|
// Look for octal, hex prefix.
|
||||||
switch {
|
switch {
|
||||||
case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'):
|
case s[0] == '0' && len(s) >= 3 && lower(s[1]) == 'x':
|
||||||
if len(s) < 3 {
|
if len(s) < 3 {
|
||||||
return 0, syntaxError(fnParseUint, s0)
|
return 0, syntaxError(fnParseUint, s0)
|
||||||
}
|
}
|
||||||
|
|
@ -111,12 +113,13 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) {
|
||||||
for _, c := range []byte(s) {
|
for _, c := range []byte(s) {
|
||||||
var d byte
|
var d byte
|
||||||
switch {
|
switch {
|
||||||
|
case c == '_' && base0:
|
||||||
|
// underscoreOK already called
|
||||||
|
continue
|
||||||
case '0' <= c && c <= '9':
|
case '0' <= c && c <= '9':
|
||||||
d = c - '0'
|
d = c - '0'
|
||||||
case 'a' <= c && c <= 'z':
|
case 'a' <= lower(c) && lower(c) <= 'z':
|
||||||
d = c - 'a' + 10
|
d = lower(c) - 'a' + 10
|
||||||
case 'A' <= c && c <= 'Z':
|
|
||||||
d = c - 'A' + 10
|
|
||||||
default:
|
default:
|
||||||
return 0, syntaxError(fnParseUint, s0)
|
return 0, syntaxError(fnParseUint, s0)
|
||||||
}
|
}
|
||||||
|
|
@ -164,8 +167,7 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) {
|
||||||
func ParseInt(s string, base int, bitSize int) (i int64, err error) {
|
func ParseInt(s string, base int, bitSize int) (i int64, err error) {
|
||||||
const fnParseInt = "ParseInt"
|
const fnParseInt = "ParseInt"
|
||||||
|
|
||||||
// Empty string bad.
|
if s == "" {
|
||||||
if len(s) == 0 {
|
|
||||||
return 0, syntaxError(fnParseInt, s)
|
return 0, syntaxError(fnParseInt, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -236,10 +238,60 @@ func Atoi(s string) (int, error) {
|
||||||
return n, nil
|
return n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slow path for invalid or big integers.
|
// Slow path for invalid, big, or underscored integers.
|
||||||
i64, err := ParseInt(s, 10, 0)
|
i64, err := ParseInt(s, 10, 0)
|
||||||
if nerr, ok := err.(*NumError); ok {
|
if nerr, ok := err.(*NumError); ok {
|
||||||
nerr.Func = fnAtoi
|
nerr.Func = fnAtoi
|
||||||
}
|
}
|
||||||
return int(i64), err
|
return int(i64), err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// underscoreOK reports whether the underscores in s are allowed.
|
||||||
|
// Checking them in this one function lets all the parsers skip over them simply.
|
||||||
|
// Underscore must appear only between digits or between a base prefix and a digit.
|
||||||
|
func underscoreOK(s string) bool {
|
||||||
|
// saw tracks the last character (class) we saw:
|
||||||
|
// ^ for beginning of number,
|
||||||
|
// 0 for a digit or base prefix,
|
||||||
|
// _ for an underscore,
|
||||||
|
// ! for none of the above.
|
||||||
|
saw := '^'
|
||||||
|
i := 0
|
||||||
|
|
||||||
|
// Optional sign.
|
||||||
|
if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
|
||||||
|
s = s[1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional base prefix.
|
||||||
|
hex := false
|
||||||
|
if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
|
||||||
|
i = 2
|
||||||
|
saw = '0' // base prefix counts as a digit for "underscore as digit separator"
|
||||||
|
hex = lower(s[1]) == 'x'
|
||||||
|
}
|
||||||
|
|
||||||
|
// Number proper.
|
||||||
|
for ; i < len(s); i++ {
|
||||||
|
// Digits are always okay.
|
||||||
|
if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
|
||||||
|
saw = '0'
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Underscore must follow digit.
|
||||||
|
if s[i] == '_' {
|
||||||
|
if saw != '0' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
saw = '_'
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Underscore must also be followed by digit.
|
||||||
|
if saw == '_' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Saw non-digit, non-underscore.
|
||||||
|
saw = '!'
|
||||||
|
}
|
||||||
|
return saw != '_'
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,10 @@ var parseUint64Tests = []parseUint64Test{
|
||||||
{"18446744073709551615", 1<<64 - 1, nil},
|
{"18446744073709551615", 1<<64 - 1, nil},
|
||||||
{"18446744073709551616", 1<<64 - 1, ErrRange},
|
{"18446744073709551616", 1<<64 - 1, ErrRange},
|
||||||
{"18446744073709551620", 1<<64 - 1, ErrRange},
|
{"18446744073709551620", 1<<64 - 1, ErrRange},
|
||||||
|
{"1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed
|
||||||
|
{"_12345", 0, ErrSyntax},
|
||||||
|
{"1__2345", 0, ErrSyntax},
|
||||||
|
{"12345_", 0, ErrSyntax},
|
||||||
}
|
}
|
||||||
|
|
||||||
type parseUint64BaseTest struct {
|
type parseUint64BaseTest struct {
|
||||||
|
|
@ -61,6 +65,42 @@ var parseUint64BaseTests = []parseUint64BaseTest{
|
||||||
{"01777777777777777777778", 0, 0, ErrSyntax},
|
{"01777777777777777777778", 0, 0, ErrSyntax},
|
||||||
{"02000000000000000000000", 0, 1<<64 - 1, ErrRange},
|
{"02000000000000000000000", 0, 1<<64 - 1, ErrRange},
|
||||||
{"0200000000000000000000", 0, 1 << 61, nil},
|
{"0200000000000000000000", 0, 1 << 61, nil},
|
||||||
|
|
||||||
|
// underscores allowed with base == 0 only
|
||||||
|
{"1_2_3_4_5", 0, 12345, nil},
|
||||||
|
{"_12345", 0, 0, ErrSyntax},
|
||||||
|
{"1__2345", 0, 0, ErrSyntax},
|
||||||
|
{"12345_", 0, 0, ErrSyntax},
|
||||||
|
|
||||||
|
{"1_2_3_4_5", 10, 0, ErrSyntax},
|
||||||
|
{"_12345", 10, 0, ErrSyntax},
|
||||||
|
{"1__2345", 10, 0, ErrSyntax},
|
||||||
|
{"12345_", 10, 0, ErrSyntax},
|
||||||
|
|
||||||
|
{"0x_1_2_3_4_5", 0, 0x12345, nil},
|
||||||
|
{"_0x12345", 0, 0, ErrSyntax},
|
||||||
|
{"0x__12345", 0, 0, ErrSyntax},
|
||||||
|
{"0x1__2345", 0, 0, ErrSyntax},
|
||||||
|
{"0x1234__5", 0, 0, ErrSyntax},
|
||||||
|
{"0x12345_", 0, 0, ErrSyntax},
|
||||||
|
|
||||||
|
{"1_2_3_4_5", 16, 0, ErrSyntax},
|
||||||
|
{"_12345", 16, 0, ErrSyntax},
|
||||||
|
{"1__2345", 16, 0, ErrSyntax},
|
||||||
|
{"1234__5", 16, 0, ErrSyntax},
|
||||||
|
{"12345_", 16, 0, ErrSyntax},
|
||||||
|
|
||||||
|
{"0_1_2_3_4_5", 0, 012345, nil},
|
||||||
|
{"_012345", 0, 0, ErrSyntax},
|
||||||
|
{"0__12345", 0, 0, ErrSyntax},
|
||||||
|
{"01234__5", 0, 0, ErrSyntax},
|
||||||
|
{"012345_", 0, 0, ErrSyntax},
|
||||||
|
|
||||||
|
{"0_1_2_3_4_5", 8, 0, ErrSyntax},
|
||||||
|
{"_012345", 8, 0, ErrSyntax},
|
||||||
|
{"0__12345", 8, 0, ErrSyntax},
|
||||||
|
{"01234__5", 8, 0, ErrSyntax},
|
||||||
|
{"012345_", 8, 0, ErrSyntax},
|
||||||
}
|
}
|
||||||
|
|
||||||
type parseInt64Test struct {
|
type parseInt64Test struct {
|
||||||
|
|
@ -87,6 +127,11 @@ var parseInt64Tests = []parseInt64Test{
|
||||||
{"-9223372036854775808", -1 << 63, nil},
|
{"-9223372036854775808", -1 << 63, nil},
|
||||||
{"9223372036854775809", 1<<63 - 1, ErrRange},
|
{"9223372036854775809", 1<<63 - 1, ErrRange},
|
||||||
{"-9223372036854775809", -1 << 63, ErrRange},
|
{"-9223372036854775809", -1 << 63, ErrRange},
|
||||||
|
{"-1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed
|
||||||
|
{"-_12345", 0, ErrSyntax},
|
||||||
|
{"_12345", 0, ErrSyntax},
|
||||||
|
{"1__2345", 0, ErrSyntax},
|
||||||
|
{"12345_", 0, ErrSyntax},
|
||||||
}
|
}
|
||||||
|
|
||||||
type parseInt64BaseTest struct {
|
type parseInt64BaseTest struct {
|
||||||
|
|
@ -144,6 +189,26 @@ var parseInt64BaseTests = []parseInt64BaseTest{
|
||||||
{"10", 16, 16, nil},
|
{"10", 16, 16, nil},
|
||||||
{"-123456789abcdef", 16, -0x123456789abcdef, nil},
|
{"-123456789abcdef", 16, -0x123456789abcdef, nil},
|
||||||
{"7fffffffffffffff", 16, 1<<63 - 1, nil},
|
{"7fffffffffffffff", 16, 1<<63 - 1, nil},
|
||||||
|
|
||||||
|
// underscores
|
||||||
|
{"-0x_1_2_3_4_5", 0, -0x12345, nil},
|
||||||
|
{"0x_1_2_3_4_5", 0, 0x12345, nil},
|
||||||
|
{"-_0x12345", 0, 0, ErrSyntax},
|
||||||
|
{"_-0x12345", 0, 0, ErrSyntax},
|
||||||
|
{"_0x12345", 0, 0, ErrSyntax},
|
||||||
|
{"0x__12345", 0, 0, ErrSyntax},
|
||||||
|
{"0x1__2345", 0, 0, ErrSyntax},
|
||||||
|
{"0x1234__5", 0, 0, ErrSyntax},
|
||||||
|
{"0x12345_", 0, 0, ErrSyntax},
|
||||||
|
|
||||||
|
{"-0_1_2_3_4_5", 0, -012345, nil}, // octal
|
||||||
|
{"0_1_2_3_4_5", 0, 012345, nil}, // octal
|
||||||
|
{"-_012345", 0, 0, ErrSyntax},
|
||||||
|
{"_-012345", 0, 0, ErrSyntax},
|
||||||
|
{"_012345", 0, 0, ErrSyntax},
|
||||||
|
{"0__12345", 0, 0, ErrSyntax},
|
||||||
|
{"01234__5", 0, 0, ErrSyntax},
|
||||||
|
{"012345_", 0, 0, ErrSyntax},
|
||||||
}
|
}
|
||||||
|
|
||||||
type parseUint32Test struct {
|
type parseUint32Test struct {
|
||||||
|
|
@ -162,6 +227,11 @@ var parseUint32Tests = []parseUint32Test{
|
||||||
{"987654321", 987654321, nil},
|
{"987654321", 987654321, nil},
|
||||||
{"4294967295", 1<<32 - 1, nil},
|
{"4294967295", 1<<32 - 1, nil},
|
||||||
{"4294967296", 1<<32 - 1, ErrRange},
|
{"4294967296", 1<<32 - 1, ErrRange},
|
||||||
|
{"1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed
|
||||||
|
{"_12345", 0, ErrSyntax},
|
||||||
|
{"_12345", 0, ErrSyntax},
|
||||||
|
{"1__2345", 0, ErrSyntax},
|
||||||
|
{"12345_", 0, ErrSyntax},
|
||||||
}
|
}
|
||||||
|
|
||||||
type parseInt32Test struct {
|
type parseInt32Test struct {
|
||||||
|
|
@ -190,6 +260,11 @@ var parseInt32Tests = []parseInt32Test{
|
||||||
{"-2147483648", -1 << 31, nil},
|
{"-2147483648", -1 << 31, nil},
|
||||||
{"2147483649", 1<<31 - 1, ErrRange},
|
{"2147483649", 1<<31 - 1, ErrRange},
|
||||||
{"-2147483649", -1 << 31, ErrRange},
|
{"-2147483649", -1 << 31, ErrRange},
|
||||||
|
{"-1_2_3_4_5", 0, ErrSyntax}, // base=10 so no underscores allowed
|
||||||
|
{"-_12345", 0, ErrSyntax},
|
||||||
|
{"_12345", 0, ErrSyntax},
|
||||||
|
{"1__2345", 0, ErrSyntax},
|
||||||
|
{"12345_", 0, ErrSyntax},
|
||||||
}
|
}
|
||||||
|
|
||||||
type numErrorTest struct {
|
type numErrorTest struct {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue