mirror of https://github.com/golang/go.git
exp/html: tokenize attributes of end tags
If an end tag has an attribute that is a quoted string containing '>', the tokenizer would end the tag prematurely. Now it reads the attributes on end tags just as it does on start tags, but the high-level interface still doesn't return them, because their presence is a parse error. Pass 1 additional test. R=nigeltao CC=golang-dev https://golang.org/cl/6457060
This commit is contained in:
parent
695024b8fa
commit
9f3b00579e
|
|
@ -4,7 +4,7 @@ PASS "FOO<script></script >BAR"
|
||||||
PASS "FOO<script></script/>BAR"
|
PASS "FOO<script></script/>BAR"
|
||||||
PASS "FOO<script></script/ >BAR"
|
PASS "FOO<script></script/ >BAR"
|
||||||
PASS "FOO<script type=\"text/plain\"></scriptx>BAR"
|
PASS "FOO<script type=\"text/plain\"></scriptx>BAR"
|
||||||
FAIL "FOO<script></script foo=\">\" dd>BAR"
|
PASS "FOO<script></script foo=\">\" dd>BAR"
|
||||||
PASS "FOO<script>'<'</script>BAR"
|
PASS "FOO<script>'<'</script>BAR"
|
||||||
PASS "FOO<script>'<!'</script>BAR"
|
PASS "FOO<script>'<!'</script>BAR"
|
||||||
PASS "FOO<script>'<!-'</script>BAR"
|
PASS "FOO<script>'<!-'</script>BAR"
|
||||||
|
|
|
||||||
|
|
@ -468,29 +468,10 @@ loop:
|
||||||
// readStartTag reads the next start tag token. The opening "<a" has already
|
// readStartTag reads the next start tag token. The opening "<a" has already
|
||||||
// been consumed, where 'a' means anything in [A-Za-z].
|
// been consumed, where 'a' means anything in [A-Za-z].
|
||||||
func (z *Tokenizer) readStartTag() TokenType {
|
func (z *Tokenizer) readStartTag() TokenType {
|
||||||
z.attr = z.attr[:0]
|
z.readTag()
|
||||||
z.nAttrReturned = 0
|
if z.err != nil && len(z.attr) == 0 {
|
||||||
// Read the tag name and attribute key/value pairs.
|
|
||||||
z.readTagName()
|
|
||||||
if z.skipWhiteSpace(); z.err != nil {
|
|
||||||
return ErrorToken
|
return ErrorToken
|
||||||
}
|
}
|
||||||
for {
|
|
||||||
c := z.readByte()
|
|
||||||
if z.err != nil || c == '>' {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
z.raw.end--
|
|
||||||
z.readTagAttrKey()
|
|
||||||
z.readTagAttrVal()
|
|
||||||
// Save pendingAttr if it has a non-empty key.
|
|
||||||
if z.pendingAttr[0].start != z.pendingAttr[0].end {
|
|
||||||
z.attr = append(z.attr, z.pendingAttr)
|
|
||||||
}
|
|
||||||
if z.skipWhiteSpace(); z.err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Several tags flag the tokenizer's next token as raw.
|
// Several tags flag the tokenizer's next token as raw.
|
||||||
c, raw := z.buf[z.data.start], false
|
c, raw := z.buf[z.data.start], false
|
||||||
if 'A' <= c && c <= 'Z' {
|
if 'A' <= c && c <= 'Z' {
|
||||||
|
|
@ -520,16 +501,30 @@ func (z *Tokenizer) readStartTag() TokenType {
|
||||||
return StartTagToken
|
return StartTagToken
|
||||||
}
|
}
|
||||||
|
|
||||||
// readEndTag reads the next end tag token. The opening "</a" has already
|
// readTag reads the next tag token. The opening "<a" or "</a" has already been
|
||||||
// been consumed, where 'a' means anything in [A-Za-z].
|
// consumed, where 'a' means anything in [A-Za-z].
|
||||||
func (z *Tokenizer) readEndTag() {
|
func (z *Tokenizer) readTag() {
|
||||||
z.attr = z.attr[:0]
|
z.attr = z.attr[:0]
|
||||||
z.nAttrReturned = 0
|
z.nAttrReturned = 0
|
||||||
|
// Read the tag name and attribute key/value pairs.
|
||||||
z.readTagName()
|
z.readTagName()
|
||||||
|
if z.skipWhiteSpace(); z.err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
for {
|
for {
|
||||||
c := z.readByte()
|
c := z.readByte()
|
||||||
if z.err != nil || c == '>' {
|
if z.err != nil || c == '>' {
|
||||||
return
|
break
|
||||||
|
}
|
||||||
|
z.raw.end--
|
||||||
|
z.readTagAttrKey()
|
||||||
|
z.readTagAttrVal()
|
||||||
|
// Save pendingAttr if it has a non-empty key.
|
||||||
|
if z.pendingAttr[0].start != z.pendingAttr[0].end {
|
||||||
|
z.attr = append(z.attr, z.pendingAttr)
|
||||||
|
}
|
||||||
|
if z.skipWhiteSpace(); z.err != nil {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -727,7 +722,7 @@ loop:
|
||||||
continue loop
|
continue loop
|
||||||
}
|
}
|
||||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
|
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
|
||||||
z.readEndTag()
|
z.readTag()
|
||||||
z.tt = EndTagToken
|
z.tt = EndTagToken
|
||||||
return z.tt
|
return z.tt
|
||||||
}
|
}
|
||||||
|
|
@ -858,27 +853,23 @@ func (z *Tokenizer) Token() Token {
|
||||||
switch z.tt {
|
switch z.tt {
|
||||||
case TextToken, CommentToken, DoctypeToken:
|
case TextToken, CommentToken, DoctypeToken:
|
||||||
t.Data = string(z.Text())
|
t.Data = string(z.Text())
|
||||||
case StartTagToken, SelfClosingTagToken:
|
case StartTagToken, SelfClosingTagToken, EndTagToken:
|
||||||
var attr []Attribute
|
|
||||||
name, moreAttr := z.TagName()
|
name, moreAttr := z.TagName()
|
||||||
for moreAttr {
|
// Since end tags should not have attributes, the high-level tokenizer
|
||||||
var key, val []byte
|
// interface will not return attributes for an end tag token even if
|
||||||
key, val, moreAttr = z.TagAttr()
|
// it looks like </br foo="bar">.
|
||||||
attr = append(attr, Attribute{"", atom.String(key), string(val)})
|
if z.tt != EndTagToken {
|
||||||
|
for moreAttr {
|
||||||
|
var key, val []byte
|
||||||
|
key, val, moreAttr = z.TagAttr()
|
||||||
|
t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if a := atom.Lookup(name); a != 0 {
|
if a := atom.Lookup(name); a != 0 {
|
||||||
t.DataAtom, t.Data = a, a.String()
|
t.DataAtom, t.Data = a, a.String()
|
||||||
} else {
|
} else {
|
||||||
t.DataAtom, t.Data = 0, string(name)
|
t.DataAtom, t.Data = 0, string(name)
|
||||||
}
|
}
|
||||||
t.Attr = attr
|
|
||||||
case EndTagToken:
|
|
||||||
name, _ := z.TagName()
|
|
||||||
if a := atom.Lookup(name); a != 0 {
|
|
||||||
t.DataAtom, t.Data = a, a.String()
|
|
||||||
} else {
|
|
||||||
t.DataAtom, t.Data = 0, string(name)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue