diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 6fcebe6593..08fc14d837 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -759,7 +759,36 @@ func TrimRight(s []byte, cutset string) []byte { // TrimSpace returns a subslice of s by slicing off all leading and // trailing white space, as defined by Unicode. func TrimSpace(s []byte) []byte { - return TrimFunc(s, unicode.IsSpace) + // Fast path for ASCII: look for the first ASCII non-space byte + start := 0 + for ; start < len(s); start++ { + c := s[start] + if c >= utf8.RuneSelf { + // If we run into a non-ASCII byte, fall back to the + // slower unicode-aware method on the remaining bytes + return TrimFunc(s[start:], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + break + } + } + + // Now look for the first ASCII non-space byte from the end + stop := len(s) + for ; stop > start; stop-- { + c := s[stop-1] + if c >= utf8.RuneSelf { + return TrimFunc(s[start:stop], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + break + } + } + + // At this point s[start:stop] starts and ends with an ASCII + // non-space bytes, so we're done. Non-ASCII cases have already + // been handled above. + return s[start:stop] } // Runes interprets s as a sequence of UTF-8-encoded code points. diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index 80a54f6118..98ba95009d 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -1617,9 +1617,21 @@ func BenchmarkFieldsFunc(b *testing.B) { } func BenchmarkTrimSpace(b *testing.B) { - s := []byte(" Some text. \n") - for i := 0; i < b.N; i++ { - TrimSpace(s) + tests := []struct { + name string + input []byte + }{ + {"NoTrim", []byte("typical")}, + {"ASCII", []byte(" foo bar ")}, + {"SomeNonASCII", []byte(" \u2000\t\r\n x\t\t\r\r\ny\n \u3000 ")}, + {"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")}, + } + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + TrimSpace(test.input) + } + }) } } diff --git a/src/strings/strings.go b/src/strings/strings.go index a98f5d8ff1..e14fffb2b8 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -818,7 +818,36 @@ func TrimRight(s string, cutset string) string { // TrimSpace returns a slice of the string s, with all leading // and trailing white space removed, as defined by Unicode. func TrimSpace(s string) string { - return TrimFunc(s, unicode.IsSpace) + // Fast path for ASCII: look for the first ASCII non-space byte + start := 0 + for ; start < len(s); start++ { + c := s[start] + if c >= utf8.RuneSelf { + // If we run into a non-ASCII byte, fall back to the + // slower unicode-aware method on the remaining bytes + return TrimFunc(s[start:], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + break + } + } + + // Now look for the first ASCII non-space byte from the end + stop := len(s) + for ; stop > start; stop-- { + c := s[stop-1] + if c >= utf8.RuneSelf { + return TrimFunc(s[start:stop], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + break + } + } + + // At this point s[start:stop] starts and ends with an ASCII + // non-space bytes, so we're done. Non-ASCII cases have already + // been handled above. + return s[start:stop] } // TrimPrefix returns s without the provided leading prefix string. diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index eee2dd55df..500671aca4 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -1731,3 +1731,19 @@ func BenchmarkJoin(b *testing.B) { }) } } + +func BenchmarkTrimSpace(b *testing.B) { + tests := []struct{ name, input string }{ + {"NoTrim", "typical"}, + {"ASCII", " foo bar "}, + {"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "}, + {"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"}, + } + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + TrimSpace(test.input) + } + }) + } +}