diff --git a/src/pkg/encoding/xml/xml.go b/src/pkg/encoding/xml/xml.go
index 5066f5c010..623f417801 100644
--- a/src/pkg/encoding/xml/xml.go
+++ b/src/pkg/encoding/xml/xml.go
@@ -850,6 +850,8 @@ Input:
// Parsers are required to recognize lt, gt, amp, apos, and quot
// even if they have not been declared. That's all we allow.
var i int
+ var semicolon bool
+ var valid bool
for i = 0; i < len(d.tmp); i++ {
var ok bool
d.tmp[i], ok = d.getc()
@@ -861,6 +863,8 @@ Input:
}
c := d.tmp[i]
if c == ';' {
+ semicolon = true
+ valid = i > 0
break
}
if 'a' <= c && c <= 'z' ||
@@ -873,14 +877,25 @@ Input:
break
}
s := string(d.tmp[0:i])
- if i >= len(d.tmp) {
+ if !valid {
if !d.Strict {
b0, b1 = 0, 0
d.buf.WriteByte('&')
d.buf.Write(d.tmp[0:i])
+ if semicolon {
+ d.buf.WriteByte(';')
+ }
continue Input
}
- d.err = d.syntaxError("character entity expression &" + s + "... too long")
+ semi := ";"
+ if !semicolon {
+ semi = " (no semicolon)"
+ }
+ if i < len(d.tmp) {
+ d.err = d.syntaxError("invalid character entity &" + s + semi)
+ } else {
+ d.err = d.syntaxError("invalid character entity &" + s + "... too long")
+ }
return nil
}
var haveText bool
@@ -910,6 +925,7 @@ Input:
b0, b1 = 0, 0
d.buf.WriteByte('&')
d.buf.Write(d.tmp[0:i])
+ d.buf.WriteByte(';')
continue Input
}
d.err = d.syntaxError("invalid character entity &" + s + ";")
diff --git a/src/pkg/encoding/xml/xml_test.go b/src/pkg/encoding/xml/xml_test.go
index 1d0696ce08..d556789fdd 100644
--- a/src/pkg/encoding/xml/xml_test.go
+++ b/src/pkg/encoding/xml/xml_test.go
@@ -5,6 +5,7 @@
package xml
import (
+ "fmt"
"io"
"reflect"
"strings"
@@ -158,6 +159,39 @@ func TestRawToken(t *testing.T) {
testRawToken(t, d, rawTokens)
}
+const nonStrictInput = `
+non&entity
+&unknown;entity
+{
+zzz;
+`
+
+var nonStrictTokens = []Token{
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("non&entity"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&unknown;entity"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("{"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("zzz;"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+}
+
+func TestNonStrictRawToken(t *testing.T) {
+ d := NewDecoder(strings.NewReader(nonStrictInput))
+ d.Strict = false
+ testRawToken(t, d, nonStrictTokens)
+}
+
type downCaser struct {
t *testing.T
r io.ByteReader
@@ -219,7 +253,18 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
t.Fatalf("token %d: unexpected error: %s", i, err)
}
if !reflect.DeepEqual(have, want) {
- t.Errorf("token %d = %#v want %#v", i, have, want)
+ var shave, swant string
+ if _, ok := have.(CharData); ok {
+ shave = fmt.Sprintf("CharData(%q)", have)
+ } else {
+ shave = fmt.Sprintf("%#v", have)
+ }
+ if _, ok := want.(CharData); ok {
+ swant = fmt.Sprintf("CharData(%q)", want)
+ } else {
+ swant = fmt.Sprintf("%#v", want)
+ }
+ t.Errorf("token %d = %s, want %s", i, shave, swant)
}
}
}
@@ -531,8 +576,8 @@ var characterTests = []struct {
{"\xef\xbf\xbe", "illegal character code U+FFFE"},
{"\r\n\x07", "illegal character code U+0007"},
{"what's up", "expected attribute name in element"},
- {"&\x01;", "invalid character entity &;"},
- {"&\xef\xbf\xbe;", "invalid character entity &;"},
+ {"&\x01;", "invalid character entity & (no semicolon)"},
+ {"&\xef\xbf\xbe;", "invalid character entity & (no semicolon)"},
}
func TestDisallowedCharacters(t *testing.T) {