diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go index b1704a402d..795bc0a916 100644 --- a/src/archive/tar/common.go +++ b/src/archive/tar/common.go @@ -85,14 +85,13 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) { format = formatUSTAR | formatPAX | formatGNU paxHdrs = make(map[string]string) - verifyString := func(s string, size int, gnuLong bool, paxKey string) { - + verifyString := func(s string, size int, paxKey string) { // NUL-terminator is optional for path and linkpath. // Technically, it is required for uname and gname, // but neither GNU nor BSD tar checks for it. tooLong := len(s) > size - if !isASCII(s) || (tooLong && !gnuLong) { - // TODO(dsnet): GNU supports UTF-8 (without NUL) for strings. + allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath + if hasNUL(s) || (tooLong && !allowLongGNU) { format &^= formatGNU // No GNU } if !isASCII(s) || tooLong { @@ -120,15 +119,16 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) { } } } - verifyTime := func(ts time.Time, size int, ustarField bool, paxKey string) { + verifyTime := func(ts time.Time, size int, paxKey string) { if ts.IsZero() { return // Always okay } needsNano := ts.Nanosecond() != 0 + hasFieldUSTAR := paxKey == paxMtime if !fitsInBase256(size, ts.Unix()) || needsNano { format &^= formatGNU // No GNU } - if !fitsInOctal(size, ts.Unix()) || needsNano || !ustarField { + if !fitsInOctal(size, ts.Unix()) || needsNano || !hasFieldUSTAR { format &^= formatUSTAR // No USTAR if paxKey == paxNone { format &^= formatPAX // No PAX @@ -138,26 +138,23 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) { } } - // TODO(dsnet): Add GNU long name support. - const supportGNULong = false - var blk block v7 := blk.V7() ustar := blk.USTAR() gnu := blk.GNU() - verifyString(h.Name, len(v7.Name()), supportGNULong, paxPath) - verifyString(h.Linkname, len(v7.LinkName()), supportGNULong, paxLinkpath) - verifyString(h.Uname, len(ustar.UserName()), false, paxUname) - verifyString(h.Gname, len(ustar.GroupName()), false, paxGname) + verifyString(h.Name, len(v7.Name()), paxPath) + verifyString(h.Linkname, len(v7.LinkName()), paxLinkpath) + verifyString(h.Uname, len(ustar.UserName()), paxUname) + verifyString(h.Gname, len(ustar.GroupName()), paxGname) verifyNumeric(h.Mode, len(v7.Mode()), paxNone) verifyNumeric(int64(h.Uid), len(v7.UID()), paxUid) verifyNumeric(int64(h.Gid), len(v7.GID()), paxGid) verifyNumeric(h.Size, len(v7.Size()), paxSize) verifyNumeric(h.Devmajor, len(ustar.DevMajor()), paxNone) verifyNumeric(h.Devminor, len(ustar.DevMinor()), paxNone) - verifyTime(h.ModTime, len(v7.ModTime()), true, paxMtime) - verifyTime(h.AccessTime, len(gnu.AccessTime()), false, paxAtime) - verifyTime(h.ChangeTime, len(gnu.ChangeTime()), false, paxCtime) + verifyTime(h.ModTime, len(v7.ModTime()), paxMtime) + verifyTime(h.AccessTime, len(gnu.AccessTime()), paxAtime) + verifyTime(h.ChangeTime, len(gnu.ChangeTime()), paxCtime) if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { return formatUnknown, nil diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go index 79d271717c..42d4ab7e14 100644 --- a/src/archive/tar/reader_test.go +++ b/src/archive/tar/reader_test.go @@ -350,6 +350,41 @@ func TestReader(t *testing.T) { Uname: "rawr", Gname: "dsnet", }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-utf8.tar", + headers: []*Header{{ + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "☺", + Gname: "⚹", + Devminor: -1, + }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-not-utf8.tar", + headers: []*Header{{ + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Devminor: -1, + }}, }, { // BSD tar v3.1.2 and GNU tar v1.27.1 both rejects PAX records // with NULs in the key. diff --git a/src/archive/tar/strconv.go b/src/archive/tar/strconv.go index 89ac8112e5..e02963b74b 100644 --- a/src/archive/tar/strconv.go +++ b/src/archive/tar/strconv.go @@ -12,6 +12,11 @@ import ( "time" ) +// hasNUL reports whether the NUL character exists within s. +func hasNUL(s string) bool { + return strings.IndexByte(s, 0) >= 0 +} + // isASCII reports whether the input is an ASCII C-style string. func isASCII(s string) bool { for _, c := range s { @@ -59,7 +64,6 @@ func (f *formatter) formatString(b []byte, s string) { if len(s) > len(b) { f.err = ErrFieldTooLong } - s = toASCII(s) // TODO(dsnet): Remove this for UTF-8 support in GNU format copy(b, s) if len(s) < len(b) { b[len(s)] = 0 @@ -307,8 +311,8 @@ func validPAXRecord(k, v string) bool { } switch k { case paxPath, paxLinkpath, paxUname, paxGname: - return strings.IndexByte(v, 0) < 0 + return !hasNUL(v) default: - return strings.IndexByte(k, 0) < 0 + return !hasNUL(k) } } diff --git a/src/archive/tar/tar_test.go b/src/archive/tar/tar_test.go index 22e23a6bbd..1a38ecb446 100644 --- a/src/archive/tar/tar_test.go +++ b/src/archive/tar/tar_test.go @@ -386,7 +386,7 @@ func TestHeaderAllowedFormats(t *testing.T) { formats: formatUnknown, }, { header: &Header{Name: "用戶名", Devmajor: -1 << 56}, - formats: formatUnknown, + formats: formatGNU, }, { header: &Header{Size: math.MaxInt64}, paxHdrs: map[string]string{paxSize: "9223372036854775807"}, @@ -408,10 +408,14 @@ func TestHeaderAllowedFormats(t *testing.T) { }, { header: &Header{Name: strings.Repeat("a", nameSize)}, formats: formatUSTAR | formatPAX | formatGNU, + }, { + header: &Header{Name: strings.Repeat("a", nameSize+1)}, + paxHdrs: map[string]string{paxPath: strings.Repeat("a", nameSize+1)}, + formats: formatPAX | formatGNU, }, { header: &Header{Linkname: "用戶名"}, paxHdrs: map[string]string{paxLinkpath: "用戶名"}, - formats: formatPAX, + formats: formatPAX | formatGNU, }, { header: &Header{Linkname: strings.Repeat("用戶名\x00", nameSize)}, paxHdrs: map[string]string{paxLinkpath: strings.Repeat("用戶名\x00", nameSize)}, diff --git a/src/archive/tar/testdata/gnu-not-utf8.tar b/src/archive/tar/testdata/gnu-not-utf8.tar new file mode 100644 index 0000000000..34b4c57771 Binary files /dev/null and b/src/archive/tar/testdata/gnu-not-utf8.tar differ diff --git a/src/archive/tar/testdata/gnu-utf8.tar b/src/archive/tar/testdata/gnu-utf8.tar new file mode 100644 index 0000000000..dde941c3ff Binary files /dev/null and b/src/archive/tar/testdata/gnu-utf8.tar differ diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go index a918ff3eef..ffef29af10 100644 --- a/src/archive/tar/writer.go +++ b/src/archive/tar/writer.go @@ -121,12 +121,10 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { } // Pack the main header. - var f formatter - blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) + var f formatter // Ignore errors since they are expected + fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } + blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) blk.SetFormat(formatPAX) - if f.err != nil && len(paxHdrs) == 0 { - return f.err // Should never happen, otherwise PAX headers would be used - } return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) } @@ -134,10 +132,23 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error { // TODO(dsnet): Support writing sparse files. // See https://golang.org/issue/13548 - // TODO(dsnet): Support long filenames (with UTF-8) support. + // Use long-link files if Name or Linkname exceeds the field size. + const longName = "././@LongLink" + if len(hdr.Name) > nameSize { + data := hdr.Name + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongName, formatGNU); err != nil { + return err + } + } + if len(hdr.Linkname) > nameSize { + data := hdr.Linkname + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongLink, formatGNU); err != nil { + return err + } + } // Pack the main header. - var f formatter + var f formatter // Ignore errors since they are expected blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) if !hdr.AccessTime.IsZero() { f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) @@ -146,9 +157,6 @@ func (tw *Writer) writeGNUHeader(hdr *Header) error { f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) } blk.SetFormat(formatGNU) - if f.err != nil { - return f.err // Should never happen since header is validated - } return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) } diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go index a6eec5a9da..9d92ab89a6 100644 --- a/src/archive/tar/writer_test.go +++ b/src/archive/tar/writer_test.go @@ -219,6 +219,35 @@ func TestWriter(t *testing.T) { }, }}, err: ErrHeader, + }, { + file: "testdata/gnu-utf8.tar", + entries: []*entry{{ + header: &Header{ + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "☺", + Gname: "⚹", + Devminor: -1, // Force use of GNU format + }, + }}, + }, { + file: "testdata/gnu-not-utf8.tar", + entries: []*entry{{ + header: &Header{ + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Devminor: -1, // Force use of GNU format + }, + }}, }} for _, v := range vectors {