mirror of https://github.com/golang/go.git
exp/html: special handling for entities in attributes
Don't unescape entities in attributes when they don't end with a semicolon and they are followed by '=', a letter, or a digit. Pass 6 more tests from the WebKit test suite, plus one that was commented out in token_test.go. R=nigeltao CC=golang-dev https://golang.org/cl/6405073
This commit is contained in:
parent
4087c1b842
commit
f979528ce6
|
|
@ -163,14 +163,15 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
|
|||
}
|
||||
|
||||
// unescape unescapes b's entities in-place, so that "a<b" becomes "a<b".
|
||||
func unescape(b []byte) []byte {
|
||||
// attribute should be true if parsing an attribute value.
|
||||
func unescape(b []byte, attribute bool) []byte {
|
||||
for i, c := range b {
|
||||
if c == '&' {
|
||||
dst, src := unescapeEntity(b, i, i, false)
|
||||
dst, src := unescapeEntity(b, i, i, attribute)
|
||||
for src < len(b) {
|
||||
c := b[src]
|
||||
if c == '&' {
|
||||
dst, src = unescapeEntity(b, dst, src, false)
|
||||
dst, src = unescapeEntity(b, dst, src, attribute)
|
||||
} else {
|
||||
b[dst] = c
|
||||
dst, src = dst+1, src+1
|
||||
|
|
@ -250,7 +251,7 @@ func EscapeString(s string) string {
|
|||
func UnescapeString(s string) string {
|
||||
for _, c := range s {
|
||||
if c == '&' {
|
||||
return string(unescape([]byte(s)))
|
||||
return string(unescape([]byte(s), false))
|
||||
}
|
||||
}
|
||||
return s
|
||||
|
|
|
|||
|
|
@ -2,11 +2,11 @@ PASS "<div bar=\"ZZ>YY\"></div>"
|
|||
PASS "<div bar=\"ZZ&\"></div>"
|
||||
PASS "<div bar='ZZ&'></div>"
|
||||
PASS "<div bar=ZZ&></div>"
|
||||
FAIL "<div bar=\"ZZ>=YY\"></div>"
|
||||
FAIL "<div bar=\"ZZ>0YY\"></div>"
|
||||
FAIL "<div bar=\"ZZ>9YY\"></div>"
|
||||
FAIL "<div bar=\"ZZ>aYY\"></div>"
|
||||
FAIL "<div bar=\"ZZ>ZYY\"></div>"
|
||||
PASS "<div bar=\"ZZ>=YY\"></div>"
|
||||
PASS "<div bar=\"ZZ>0YY\"></div>"
|
||||
PASS "<div bar=\"ZZ>9YY\"></div>"
|
||||
PASS "<div bar=\"ZZ>aYY\"></div>"
|
||||
PASS "<div bar=\"ZZ>ZYY\"></div>"
|
||||
PASS "<div bar=\"ZZ> YY\"></div>"
|
||||
PASS "<div bar=\"ZZ>\"></div>"
|
||||
PASS "<div bar='ZZ>'></div>"
|
||||
|
|
@ -15,7 +15,7 @@ PASS "<div bar=\"ZZ£_id=23\"></div>"
|
|||
PASS "<div bar=\"ZZ&prod_id=23\"></div>"
|
||||
PASS "<div bar=\"ZZ£_id=23\"></div>"
|
||||
PASS "<div bar=\"ZZ∏_id=23\"></div>"
|
||||
FAIL "<div bar=\"ZZ£=23\"></div>"
|
||||
PASS "<div bar=\"ZZ£=23\"></div>"
|
||||
PASS "<div bar=\"ZZ&prod=23\"></div>"
|
||||
PASS "<div>ZZ£_id=23</div>"
|
||||
PASS "<div>ZZ&prod_id=23</div>"
|
||||
|
|
|
|||
|
|
@ -741,7 +741,7 @@ func (z *Tokenizer) Text() []byte {
|
|||
z.data.end = z.raw.end
|
||||
s = convertNewlines(s)
|
||||
if !z.textIsRaw {
|
||||
s = unescape(s)
|
||||
s = unescape(s, false)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
|
@ -775,7 +775,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
|||
z.nAttrReturned++
|
||||
key = z.buf[x[0].start:x[0].end]
|
||||
val = z.buf[x[1].start:x[1].end]
|
||||
return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr)
|
||||
return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
|
||||
}
|
||||
}
|
||||
return nil, nil, false
|
||||
|
|
|
|||
|
|
@ -370,14 +370,11 @@ var tokenTests = []tokenTest{
|
|||
`<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,
|
||||
`<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,
|
||||
},
|
||||
/*
|
||||
// TODO: re-enable this test when it works. This input/output matches html5lib's behavior.
|
||||
{
|
||||
"entity without semicolon",
|
||||
`¬it;∉<a b="q=z&=5¬ice=hello¬=world">`,
|
||||
`¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`,
|
||||
},
|
||||
*/
|
||||
{
|
||||
"entity without semicolon",
|
||||
`¬it;∉<a b="q=z&=5¬ice=hello¬=world">`,
|
||||
`¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`,
|
||||
},
|
||||
{
|
||||
"entity with digits",
|
||||
"½",
|
||||
|
|
|
|||
Loading…
Reference in New Issue