mirror of https://github.com/golang/go.git
html/template: fix string iteration in replacement operations
In css, js, and html, the replacement operations are implemented
by iterating on strings (rune by rune). The for/range
statement is used. The length of the rune is required
and added to the index to properly slice the string.
This is potentially wrong because there is a discrepancy between
the result of utf8.RuneLen and the increment of the index
(set by the for/range statement). For invalid strings,
utf8.RuneLen('\ufffd') == 3, while the index is incremented
only by 1 byte.
htmlReplacer triggers a panic at slicing time for some
invalid strings.
Use a more robust iteration mechanism based on
utf8.DecodeRuneInString, and make sure the same
pattern is used for all similar functions in this
package.
Fixes #10799
Change-Id: Ibad3857b2819435d9fa564f06fc2ca8774102841
Reviewed-on: https://go-review.googlesource.com/10105
Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
parent
d6bbcea22a
commit
a1c1a763bc
|
|
@ -157,56 +157,20 @@ func isCSSSpace(b byte) bool {
|
|||
func cssEscaper(args ...interface{}) string {
|
||||
s, _ := stringify(args...)
|
||||
var b bytes.Buffer
|
||||
written := 0
|
||||
for i, r := range s {
|
||||
r, w, written := rune(0), 0, 0
|
||||
for i := 0; i < len(s); i += w {
|
||||
// See comment in htmlEscaper.
|
||||
r, w = utf8.DecodeRuneInString(s[i:])
|
||||
var repl string
|
||||
switch r {
|
||||
case 0:
|
||||
repl = `\0`
|
||||
case '\t':
|
||||
repl = `\9`
|
||||
case '\n':
|
||||
repl = `\a`
|
||||
case '\f':
|
||||
repl = `\c`
|
||||
case '\r':
|
||||
repl = `\d`
|
||||
// Encode HTML specials as hex so the output can be embedded
|
||||
// in HTML attributes without further encoding.
|
||||
case '"':
|
||||
repl = `\22`
|
||||
case '&':
|
||||
repl = `\26`
|
||||
case '\'':
|
||||
repl = `\27`
|
||||
case '(':
|
||||
repl = `\28`
|
||||
case ')':
|
||||
repl = `\29`
|
||||
case '+':
|
||||
repl = `\2b`
|
||||
case '/':
|
||||
repl = `\2f`
|
||||
case ':':
|
||||
repl = `\3a`
|
||||
case ';':
|
||||
repl = `\3b`
|
||||
case '<':
|
||||
repl = `\3c`
|
||||
case '>':
|
||||
repl = `\3e`
|
||||
case '\\':
|
||||
repl = `\\`
|
||||
case '{':
|
||||
repl = `\7b`
|
||||
case '}':
|
||||
repl = `\7d`
|
||||
switch {
|
||||
case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
|
||||
repl = cssReplacementTable[r]
|
||||
default:
|
||||
continue
|
||||
}
|
||||
b.WriteString(s[written:i])
|
||||
b.WriteString(repl)
|
||||
written = i + utf8.RuneLen(r)
|
||||
written = i + w
|
||||
if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
|
||||
b.WriteByte(' ')
|
||||
}
|
||||
|
|
@ -218,6 +182,30 @@ func cssEscaper(args ...interface{}) string {
|
|||
return b.String()
|
||||
}
|
||||
|
||||
var cssReplacementTable = []string{
|
||||
0: `\0`,
|
||||
'\t': `\9`,
|
||||
'\n': `\a`,
|
||||
'\f': `\c`,
|
||||
'\r': `\d`,
|
||||
// Encode HTML specials as hex so the output can be embedded
|
||||
// in HTML attributes without further encoding.
|
||||
'"': `\22`,
|
||||
'&': `\26`,
|
||||
'\'': `\27`,
|
||||
'(': `\28`,
|
||||
')': `\29`,
|
||||
'+': `\2b`,
|
||||
'/': `\2f`,
|
||||
':': `\3a`,
|
||||
';': `\3b`,
|
||||
'<': `\3c`,
|
||||
'>': `\3e`,
|
||||
'\\': `\\`,
|
||||
'{': `\7b`,
|
||||
'}': `\7d`,
|
||||
}
|
||||
|
||||
var expressionBytes = []byte("expression")
|
||||
var mozBindingBytes = []byte("mozbinding")
|
||||
|
||||
|
|
|
|||
|
|
@ -138,21 +138,24 @@ var htmlNospaceNormReplacementTable = []string{
|
|||
// and when badRunes is true, certain bad runes are allowed through unescaped.
|
||||
func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
|
||||
written, b := 0, new(bytes.Buffer)
|
||||
for i, r := range s {
|
||||
r, w := rune(0), 0
|
||||
for i := 0; i < len(s); i += w {
|
||||
// Cannot use 'for range s' because we need to preserve the width
|
||||
// of the runes in the input. If we see a decoding error, the input
|
||||
// width will not be utf8.Runelen(r) and we will overrun the buffer.
|
||||
r, w = utf8.DecodeRuneInString(s[i:])
|
||||
if int(r) < len(replacementTable) {
|
||||
if repl := replacementTable[r]; len(repl) != 0 {
|
||||
b.WriteString(s[written:i])
|
||||
b.WriteString(repl)
|
||||
// Valid as long as replacementTable doesn't
|
||||
// include anything above 0x7f.
|
||||
written = i + utf8.RuneLen(r)
|
||||
written = i + w
|
||||
}
|
||||
} else if badRunes {
|
||||
// No-op.
|
||||
// IE does not allow these ranges in unquoted attrs.
|
||||
} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
|
||||
fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
|
||||
written = i + utf8.RuneLen(r)
|
||||
written = i + w
|
||||
}
|
||||
}
|
||||
if written == 0 {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,8 @@ func TestHTMLNospaceEscaper(t *testing.T) {
|
|||
`PQRSTUVWXYZ[\]^_` +
|
||||
"`abcdefghijklmno" +
|
||||
"pqrstuvwxyz{|}~\x7f" +
|
||||
"\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E")
|
||||
"\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E" +
|
||||
"erroneous\x960") // keep at the end
|
||||
|
||||
want := ("�\x01\x02\x03\x04\x05\x06\x07" +
|
||||
"\x08	  \x0E\x0F" +
|
||||
|
|
@ -31,14 +32,16 @@ func TestHTMLNospaceEscaper(t *testing.T) {
|
|||
`PQRSTUVWXYZ[\]^_` +
|
||||
``abcdefghijklmno` +
|
||||
`pqrstuvwxyz{|}~` + "\u007f" +
|
||||
"\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
|
||||
"\u00A0\u0100\u2028\u2029\ufeff\U0001D11E" +
|
||||
"erroneous�0") // keep at the end
|
||||
|
||||
got := htmlNospaceEscaper(input)
|
||||
if got != want {
|
||||
t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
|
||||
}
|
||||
|
||||
got, want = html.UnescapeString(got), strings.Replace(input, "\x00", "\ufffd", 1)
|
||||
r := strings.NewReplacer("\x00", "\ufffd", "\x96", "\ufffd")
|
||||
got, want = html.UnescapeString(got), r.Replace(input)
|
||||
if want != got {
|
||||
t.Errorf("decode: want\n\t%q\nbut got\n\t%q", want, got)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -246,8 +246,10 @@ func jsRegexpEscaper(args ...interface{}) string {
|
|||
// `\u2029`.
|
||||
func replace(s string, replacementTable []string) string {
|
||||
var b bytes.Buffer
|
||||
written := 0
|
||||
for i, r := range s {
|
||||
r, w, written := rune(0), 0, 0
|
||||
for i := 0; i < len(s); i += w {
|
||||
// See comment in htmlEscaper.
|
||||
r, w = utf8.DecodeRuneInString(s[i:])
|
||||
var repl string
|
||||
switch {
|
||||
case int(r) < len(replacementTable) && replacementTable[r] != "":
|
||||
|
|
@ -261,7 +263,7 @@ func replace(s string, replacementTable []string) string {
|
|||
}
|
||||
b.WriteString(s[written:i])
|
||||
b.WriteString(repl)
|
||||
written = i + utf8.RuneLen(r)
|
||||
written = i + w
|
||||
}
|
||||
if written == 0 {
|
||||
return s
|
||||
|
|
|
|||
Loading…
Reference in New Issue