mirror of https://github.com/golang/go.git
net/url: accept non-ASCII bytes in URL per RFC 3986
Fixes #7991. Fixes #12719. Change-Id: I5650fa35ec5d49addeda6cc6e7fa93cfbe1cdfc0 Reviewed-on: https://go-review.googlesource.com/17385 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
84a875caa6
commit
a6869d1c5c
|
|
@ -71,6 +71,7 @@ type encoding int
|
|||
const (
|
||||
encodePath encoding = 1 + iota
|
||||
encodeHost
|
||||
encodeZone
|
||||
encodeUserPassword
|
||||
encodeQueryComponent
|
||||
encodeFragment
|
||||
|
|
@ -93,7 +94,7 @@ func shouldEscape(c byte, mode encoding) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
if mode == encodeHost {
|
||||
if mode == encodeHost || mode == encodeZone {
|
||||
// §3.2.2 Host allows
|
||||
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
||||
// as part of reg-name.
|
||||
|
|
@ -166,6 +167,27 @@ func unescape(s string, mode encoding) (string, error) {
|
|||
}
|
||||
return "", EscapeError(s)
|
||||
}
|
||||
// Per https://tools.ietf.org/html/rfc3986#page-21
|
||||
// in the host component %-encoding can only be used
|
||||
// for non-ASCII bytes.
|
||||
// But https://tools.ietf.org/html/rfc6874#section-2
|
||||
// introduces %25 being allowed to escape a percent sign
|
||||
// in IPv6 scoped-address literals. Yay.
|
||||
if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
|
||||
return "", EscapeError(s[i : i+3])
|
||||
}
|
||||
if mode == encodeZone {
|
||||
// RFC 6874 says basically "anything goes" for zone identifiers
|
||||
// and that even non-ASCII can be redundantly escaped,
|
||||
// but it seems prudent to restrict %-escaped bytes here to those
|
||||
// that are valid host name bytes in their unescaped form.
|
||||
// That is, you can use escaping in the zone identifier but not
|
||||
// to introduce bytes you couldn't just write directly.
|
||||
v := unhex(s[i+1])<<4 | unhex(s[i+2])
|
||||
if s[i:i+3] != "%25" && shouldEscape(v, encodeHost) {
|
||||
return "", EscapeError(s[i : i+3])
|
||||
}
|
||||
}
|
||||
i += 3
|
||||
case '+':
|
||||
hasPlus = mode == encodeQueryComponent
|
||||
|
|
@ -496,14 +518,9 @@ func parseAuthority(authority string) (user *Userinfo, host string, err error) {
|
|||
// parseHost parses host as an authority without user
|
||||
// information. That is, as host[:port].
|
||||
func parseHost(host string) (string, error) {
|
||||
litOrName := host
|
||||
if strings.HasPrefix(host, "[") {
|
||||
// Parse an IP-Literal in RFC 3986 and RFC 6874.
|
||||
// E.g., "[fe80::1], "[fe80::1%25en0]"
|
||||
//
|
||||
// RFC 4007 defines "%" as a delimiter character in
|
||||
// the textual representation of IPv6 addresses.
|
||||
// Per RFC 6874, in URIs that "%" is encoded as "%25".
|
||||
// E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80".
|
||||
i := strings.LastIndex(host, "]")
|
||||
if i < 0 {
|
||||
return "", errors.New("missing ']' in host")
|
||||
|
|
@ -512,29 +529,31 @@ func parseHost(host string) (string, error) {
|
|||
if !validOptionalPort(colonPort) {
|
||||
return "", fmt.Errorf("invalid port %q after host", colonPort)
|
||||
}
|
||||
// Parse a host subcomponent without a ZoneID in RFC
|
||||
// 6874 because the ZoneID is allowed to use the
|
||||
// percent encoded form.
|
||||
j := strings.Index(host[:i], "%25")
|
||||
if j < 0 {
|
||||
litOrName = host[1:i]
|
||||
} else {
|
||||
litOrName = host[1:j]
|
||||
|
||||
// RFC 6874 defines that %25 (%-encoded percent) introduces
|
||||
// the zone identifier, and the zone identifier can use basically
|
||||
// any %-encoding it likes. That's different from the host, which
|
||||
// can only %-encode non-ASCII bytes.
|
||||
// We do impose some restrictions on the zone, to avoid stupidity
|
||||
// like newlines.
|
||||
zone := strings.Index(host[:i], "%25")
|
||||
if zone >= 0 {
|
||||
host1, err := unescape(host[:zone], encodeHost)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
host2, err := unescape(host[zone:i], encodeZone)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
host3, err := unescape(host[i:], encodeHost)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return host1 + host2 + host3, nil
|
||||
}
|
||||
}
|
||||
|
||||
// A URI containing an IP-Literal without a ZoneID or
|
||||
// IPv4address in RFC 3986 and RFC 6847 must not be
|
||||
// percent-encoded.
|
||||
//
|
||||
// A URI containing a DNS registered name in RFC 3986 is
|
||||
// allowed to be percent-encoded, though we don't use it for
|
||||
// now to avoid messing up with the gap between allowed
|
||||
// characters in URI and allowed characters in DNS.
|
||||
// See golang.org/issue/7991.
|
||||
if strings.Contains(litOrName, "%") {
|
||||
return "", errors.New("percent-encoded characters in host")
|
||||
}
|
||||
var err error
|
||||
if host, err = unescape(host, encodeHost); err != nil {
|
||||
return "", err
|
||||
|
|
|
|||
|
|
@ -483,6 +483,34 @@ var urltests = []URLTest{
|
|||
},
|
||||
"",
|
||||
},
|
||||
// golang.org/issue/7991 and golang.org/issue/12719 (non-ascii %-encoded in host)
|
||||
{
|
||||
"http://hello.世界.com/foo",
|
||||
&URL{
|
||||
Scheme: "http",
|
||||
Host: "hello.世界.com",
|
||||
Path: "/foo",
|
||||
},
|
||||
"http://hello.%E4%B8%96%E7%95%8C.com/foo",
|
||||
},
|
||||
{
|
||||
"http://hello.%e4%b8%96%e7%95%8c.com/foo",
|
||||
&URL{
|
||||
Scheme: "http",
|
||||
Host: "hello.世界.com",
|
||||
Path: "/foo",
|
||||
},
|
||||
"http://hello.%E4%B8%96%E7%95%8C.com/foo",
|
||||
},
|
||||
{
|
||||
"http://hello.%E4%B8%96%E7%95%8C.com/foo",
|
||||
&URL{
|
||||
Scheme: "http",
|
||||
Host: "hello.世界.com",
|
||||
Path: "/foo",
|
||||
},
|
||||
"",
|
||||
},
|
||||
}
|
||||
|
||||
// more useful string for debugging than fmt's struct printer
|
||||
|
|
@ -1184,11 +1212,11 @@ func TestParseAuthority(t *testing.T) {
|
|||
{"http://[::1]%23", true},
|
||||
{"http://[::1%25en0]", false}, // valid zone id
|
||||
{"http://[::1]:", false}, // colon, but no port OK
|
||||
{"http://[::1]:%38%30", true}, // no hex in port
|
||||
{"http://[::1%25%10]", false}, // TODO: reject the %10 after the valid zone %25 separator?
|
||||
{"http://[::1]:%38%30", true}, // not allowed: % encoding only for non-ASCII
|
||||
{"http://[::1%25%41]", false}, // RFC 6874 allows over-escaping in zone
|
||||
{"http://[%10::1]", true}, // no %xx escapes in IP address
|
||||
{"http://[::1]/%48", false}, // %xx in path is fine
|
||||
{"http://%41:8080/", true}, // TODO: arguably we should accept reg-name with %xx
|
||||
{"http://%41:8080/", true}, // not allowed: % encoding only for non-ASCII
|
||||
{"mysql://x@y(z:123)/foo", false}, // golang.org/issue/12023
|
||||
{"mysql://x@y(1.2.3.4:123)/foo", false},
|
||||
{"mysql://x@y([2001:db8::1]:123)/foo", false},
|
||||
|
|
|
|||
Loading…
Reference in New Issue