diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go index 795bc0a916..22f3206243 100644 --- a/src/archive/tar/common.go +++ b/src/archive/tar/common.go @@ -15,6 +15,7 @@ package tar import ( "errors" "fmt" + "math" "os" "path" "strconv" @@ -30,6 +31,8 @@ var ( ErrWriteTooLong = errors.New("tar: write too long") ErrFieldTooLong = errors.New("tar: header field too long") ErrWriteAfterClose = errors.New("tar: write after close") + errMissData = errors.New("tar: sparse file references non-existent data") + errUnrefData = errors.New("tar: sparse file contains unreferenced data") ) // Header type flags. @@ -68,6 +71,131 @@ type Header struct { AccessTime time.Time // access time ChangeTime time.Time // status change time Xattrs map[string]string + + // SparseHoles represents a sequence of holes in a sparse file. + // + // The regions must be sorted in ascending order, not overlap with + // each other, and not extend past the specified Size. + // The file is sparse if either len(SparseHoles) > 0 or + // the Typeflag is set to TypeGNUSparse. + SparseHoles []SparseEntry +} + +// SparseEntry represents a Length-sized fragment at Offset in the file. +type SparseEntry struct{ Offset, Length int64 } + +func (s SparseEntry) endOffset() int64 { return s.Offset + s.Length } + +// A sparse file can be represented as either a sparseDatas or a sparseHoles. +// As long as the total size is known, they are equivalent and one can be +// converted to the other form and back. The various tar formats with sparse +// file support represent sparse files in the sparseDatas form. That is, they +// specify the fragments in the file that has data, and treat everything else as +// having zero bytes. As such, the encoding and decoding logic in this package +// deals with sparseDatas. +// +// However, the external API uses sparseHoles instead of sparseDatas because the +// zero value of sparseHoles logically represents a normal file (i.e., there are +// no holes in it). On the other hand, the zero value of sparseDatas implies +// that the file has no data in it, which is rather odd. +// +// As an example, if the underlying raw file contains the 10-byte data: +// var compactFile = "abcdefgh" +// +// And the sparse map has the following entries: +// var spd sparseDatas = []sparseEntry{ +// {Offset: 2, Length: 5}, // Data fragment for 2..6 +// {Offset: 18, Length: 3}, // Data fragment for 18..20 +// } +// var sph sparseHoles = []SparseEntry{ +// {Offset: 0, Length: 2}, // Hole fragment for 0..1 +// {Offset: 7, Length: 11}, // Hole fragment for 7..17 +// {Offset: 21, Length: 4}, // Hole fragment for 21..24 +// } +// +// Then the content of the resulting sparse file with a Header.Size of 25 is: +// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type ( + sparseDatas []SparseEntry + sparseHoles []SparseEntry +) + +// validateSparseEntries reports whether sp is a valid sparse map. +// It does not matter whether sp represents data fragments or hole fragments. +func validateSparseEntries(sp []SparseEntry, size int64) bool { + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + if size < 0 { + return false + } + var pre SparseEntry + for _, cur := range sp { + switch { + case cur.Offset < 0 || cur.Length < 0: + return false // Negative values are never okay + case cur.Offset > math.MaxInt64-cur.Length: + return false // Integer overflow with large length + case cur.endOffset() > size: + return false // Region extends beyond the actual size + case pre.endOffset() > cur.Offset: + return false // Regions cannot overlap and must be in order + } + pre = cur + } + return true +} + +// alignSparseEntries mutates src and returns dst where each fragment's +// starting offset is aligned up to the nearest block edge, and each +// ending offset is aligned down to the nearest block edge. +// +// Even though the Go tar Reader and the BSD tar utility can handle entries +// with arbitrary offsets and lengths, the GNU tar utility can only handle +// offsets and lengths that are multiples of blockSize. +func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry { + dst := src[:0] + for _, s := range src { + pos, end := s.Offset, s.endOffset() + pos += blockPadding(+pos) // Round-up to nearest blockSize + if end != size { + end -= blockPadding(-end) // Round-down to nearest blockSize + } + if pos < end { + dst = append(dst, SparseEntry{Offset: pos, Length: end - pos}) + } + } + return dst +} + +// invertSparseEntries converts a sparse map from one form to the other. +// If the input is sparseHoles, then it will output sparseDatas and vice-versa. +// The input must have been already validated. +// +// This function mutates src and returns a normalized map where: +// * adjacent fragments are coalesced together +// * only the last fragment may be empty +// * the endOffset of the last fragment is the total size +func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry { + dst := src[:0] + var pre SparseEntry + for _, cur := range src { + if cur.Length == 0 { + continue // Skip empty fragments + } + pre.Length = cur.Offset - pre.Offset + if pre.Length > 0 { + dst = append(dst, pre) // Only add non-empty fragments + } + pre.Offset = cur.endOffset() + } + pre.Length = size - pre.Offset // Possibly the only empty fragment + return append(dst, pre) +} + +type fileState interface { + // Remaining reports the number of remaining bytes in the current file. + // This count includes any sparse holes that may exist. + Remaining() int64 } // FileInfo returns an os.FileInfo for the Header. @@ -300,6 +428,17 @@ const ( paxUname = "uname" paxXattr = "SCHILY.xattr." paxNone = "" + + // Keywords for GNU sparse files in a PAX extended header. + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" ) // FileInfoHeader creates a partially-populated Header from fi. @@ -373,6 +512,9 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { h.Size = 0 h.Linkname = sys.Linkname } + if sys.SparseHoles != nil { + h.SparseHoles = append([]SparseEntry{}, sys.SparseHoles...) + } } if sysStat != nil { return h, sysStat(fi, h) @@ -390,3 +532,10 @@ func isHeaderOnlyType(flag byte) bool { return false } } + +func min(a, b int64) int64 { + if a < b { + return a + } + return b +} diff --git a/src/archive/tar/format.go b/src/archive/tar/format.go index 1e9a006a6f..72ff9c59a0 100644 --- a/src/archive/tar/format.go +++ b/src/archive/tar/format.go @@ -50,6 +50,12 @@ const ( prefixSize = 155 // Max length of the prefix field in USTAR format ) +// blockPadding computes the number of bytes needed to pad offset up to the +// nearest block edge where 0 <= n < blockSize. +func blockPadding(offset int64) (n int64) { + return -offset & (blockSize - 1) +} + var zeroBlock block type block [blockSize]byte @@ -192,11 +198,11 @@ func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } type sparseArray []byte -func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) } +func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } func (s sparseArray) MaxEntries() int { return len(s) / 24 } -type sparseNode []byte +type sparseElem []byte -func (s sparseNode) Offset() []byte { return s[00:][:12] } -func (s sparseNode) NumBytes() []byte { return s[12:][:12] } +func (s sparseElem) Offset() []byte { return s[00:][:12] } +func (s sparseElem) Length() []byte { return s[12:][:12] } diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go index fb7cb8891d..f33e2f526c 100644 --- a/src/archive/tar/reader.go +++ b/src/archive/tar/reader.go @@ -11,7 +11,6 @@ import ( "bytes" "io" "io/ioutil" - "math" "strconv" "strings" "time" @@ -23,9 +22,9 @@ import ( // and then it can be treated as an io.Reader to access the file's data. type Reader struct { r io.Reader - pad int64 // amount of padding (ignored) after current file entry - curr numBytesReader // reader for current file entry - blk block // buffer to use as temporary local storage + pad int64 // Amount of padding (ignored) after current file entry + curr fileReader // Reader for current file entry + blk block // Buffer to use as temporary local storage // err is a persistent error. // It is only the responsibility of every exported method of Reader to @@ -33,66 +32,17 @@ type Reader struct { err error } -// A numBytesReader is an io.Reader with a numBytes method, returning the number -// of bytes remaining in the underlying encoded data. -type numBytesReader interface { +type fileReader interface { io.Reader - numBytes() int64 -} + fileState -// A regFileReader is a numBytesReader for reading file data from a tar archive. -type regFileReader struct { - r io.Reader // underlying reader - nb int64 // number of unread bytes for current file entry + Discard(n int64) (int64, error) } -// A sparseFileReader is a numBytesReader for reading sparse file data from a -// tar archive. -type sparseFileReader struct { - rfr numBytesReader // Reads the sparse-encoded file data - sp []sparseEntry // The sparse map for the file - pos int64 // Keeps track of file position - total int64 // Total size of the file -} - -// A sparseEntry holds a single entry in a sparse file's sparse map. -// -// Sparse files are represented using a series of sparseEntrys. -// Despite the name, a sparseEntry represents an actual data fragment that -// references data found in the underlying archive stream. All regions not -// covered by a sparseEntry are logically filled with zeros. -// -// For example, if the underlying raw file contains the 10-byte data: -// var compactData = "abcdefgh" -// -// And the sparse map has the following entries: -// var sp = []sparseEntry{ -// {offset: 2, numBytes: 5} // Data fragment for [2..7] -// {offset: 18, numBytes: 3} // Data fragment for [18..21] -// } -// -// Then the content of the resulting sparse file with a "real" size of 25 is: -// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 -type sparseEntry struct { - offset int64 // Starting position of the fragment - numBytes int64 // Length of the fragment -} - -// Keywords for GNU sparse files in a PAX extended header -const ( - paxGNUSparseNumBlocks = "GNU.sparse.numblocks" - paxGNUSparseOffset = "GNU.sparse.offset" - paxGNUSparseNumBytes = "GNU.sparse.numbytes" - paxGNUSparseMap = "GNU.sparse.map" - paxGNUSparseName = "GNU.sparse.name" - paxGNUSparseMajor = "GNU.sparse.major" - paxGNUSparseMinor = "GNU.sparse.minor" - paxGNUSparseSize = "GNU.sparse.size" - paxGNUSparseRealSize = "GNU.sparse.realsize" -) - // NewReader creates a new Reader reading from r. -func NewReader(r io.Reader) *Reader { return &Reader{r: r} } +func NewReader(r io.Reader) *Reader { + return &Reader{r: r, curr: ®FileReader{r, 0}} +} // Next advances to the next entry in the tar archive. // @@ -116,9 +66,15 @@ func (tr *Reader) next() (*Header, error) { // one or more "header files" until it finds a "normal file". loop: for { - if err := tr.skipUnread(); err != nil { + // Discard the remainder of the file and any padding. + if _, err := tr.curr.Discard(tr.curr.Remaining()); err != nil { return nil, err } + if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + return nil, err + } + tr.pad = 0 + hdr, rawHdr, err := tr.readHeader() if err != nil { return nil, err @@ -192,7 +148,7 @@ func (tr *Reader) handleRegularFile(hdr *Header) error { return ErrHeader } - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two + tr.pad = blockPadding(nb) tr.curr = ®FileReader{r: tr.r, nb: nb} return nil } @@ -200,87 +156,70 @@ func (tr *Reader) handleRegularFile(hdr *Header) error { // handleSparseFile checks if the current file is a sparse format of any type // and sets the curr reader appropriately. func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { - var sp []sparseEntry + var spd sparseDatas var err error if hdr.Typeflag == TypeGNUSparse { - sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) - if err != nil { - return err - } + spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) } else { - sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) - if err != nil { - return err - } + spd, err = tr.readGNUSparsePAXHeaders(hdr, extHdrs) } // If sp is non-nil, then this is a sparse file. - // Note that it is possible for len(sp) to be zero. - if sp != nil { - tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) + // Note that it is possible for len(sp) == 0. + if err == nil && spd != nil { + if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { + return ErrHeader + } + sph := invertSparseEntries(spd, hdr.Size) + tr.curr = &sparseFileReader{tr.curr, sph, 0} + hdr.SparseHoles = append([]SparseEntry{}, sph...) } return err } -// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then -// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to -// be treated as a regular file. -func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { - var sparseFormat string +// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. +// If they are found, then this function reads the sparse map and returns it. +// This assumes that 0.0 headers have already been converted to 0.1 headers +// by the the PAX header parsing logic. +func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header, extHdrs map[string]string) (sparseDatas, error) { + // Identify the version of GNU headers. + var is1x0 bool + major, minor := extHdrs[paxGNUSparseMajor], extHdrs[paxGNUSparseMinor] + switch { + case major == "0" && (minor == "0" || minor == "1"): + is1x0 = false + case major == "1" && minor == "0": + is1x0 = true + case major != "" || minor != "": + return nil, nil // Unknown GNU sparse PAX version + case extHdrs[paxGNUSparseMap] != "": + is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess + default: + return nil, nil // Not a PAX format GNU sparse file. + } - // Check for sparse format indicators - major, majorOk := headers[paxGNUSparseMajor] - minor, minorOk := headers[paxGNUSparseMinor] - sparseName, sparseNameOk := headers[paxGNUSparseName] - _, sparseMapOk := headers[paxGNUSparseMap] - sparseSize, sparseSizeOk := headers[paxGNUSparseSize] - sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] + // Update hdr from GNU sparse PAX headers. + if name := extHdrs[paxGNUSparseName]; name != "" { + hdr.Name = name + } + size := extHdrs[paxGNUSparseSize] + if size == "" { + size = extHdrs[paxGNUSparseRealSize] + } + if size != "" { + n, err := strconv.ParseInt(size, 10, 64) + if err != nil { + return nil, ErrHeader + } + hdr.Size = n + } - // Identify which, if any, sparse format applies from which PAX headers are set - if majorOk && minorOk { - sparseFormat = major + "." + minor - } else if sparseNameOk && sparseMapOk { - sparseFormat = "0.1" - } else if sparseSizeOk { - sparseFormat = "0.0" + // Read the sparse map according to the appropriate format. + if is1x0 { + return readGNUSparseMap1x0(tr.curr) } else { - // Not a PAX format GNU sparse file. - return nil, nil + return readGNUSparseMap0x1(extHdrs) } - - // Check for unknown sparse format - if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { - return nil, nil - } - - // Update hdr from GNU sparse PAX headers - if sparseNameOk { - hdr.Name = sparseName - } - if sparseSizeOk { - realSize, err := strconv.ParseInt(sparseSize, 10, 64) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } else if sparseRealSizeOk { - realSize, err := strconv.ParseInt(sparseRealSize, 10, 64) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } - - // Set up the sparse map, according to the particular sparse format in use - var sp []sparseEntry - var err error - switch sparseFormat { - case "0.0", "0.1": - sp, err = readGNUSparseMap0x1(headers) - case "1.0": - sp, err = readGNUSparseMap1x0(tr.curr) - } - return sp, err } // mergePAX merges well known headers according to PAX standard. @@ -376,45 +315,6 @@ func parsePAX(r io.Reader) (map[string]string, error) { return extHdrs, nil } -// skipUnread skips any unread bytes in the existing file entry, as well as any -// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is -// encountered in the data portion; it is okay to hit io.EOF in the padding. -// -// Note that this function still works properly even when sparse files are being -// used since numBytes returns the bytes remaining in the underlying io.Reader. -func (tr *Reader) skipUnread() error { - dataSkip := tr.numBytes() // Number of data bytes to skip - totalSkip := dataSkip + tr.pad // Total number of bytes to skip - tr.curr, tr.pad = nil, 0 - - // If possible, Seek to the last byte before the end of the data section. - // Do this because Seek is often lazy about reporting errors; this will mask - // the fact that the tar stream may be truncated. We can rely on the - // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek - if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { - // Not all io.Seeker can actually Seek. For example, os.Stdin implements - // io.Seeker, but calling Seek always returns an error and performs - // no action. Thus, we try an innocent seek to the current position - // to see if Seek is really supported. - pos1, err := sr.Seek(0, io.SeekCurrent) - if err == nil { - // Seek seems supported, so perform the real Seek. - pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) - if err != nil { - return err - } - seekSkipped = pos2 - pos1 - } - } - - copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) - if err == io.EOF && seekSkipped+copySkipped < dataSkip { - err = io.ErrUnexpectedEOF - } - return err -} - // readHeader reads the next block header and assumes that the underlying reader // is already aligned to a block boundary. It returns the raw block of the // header in case further processing is required. @@ -530,7 +430,7 @@ func (tr *Reader) readHeader() (*Header, *block, error) { // The Header.Size does not reflect the size of any extended headers used. // Thus, this function will read from the raw io.Reader to fetch extra headers. // This method mutates blk in the process. -func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { // Make sure that the input format is GNU. // Unfortunately, the STAR format also has a sparse header format that uses // the same type flag but has a completely different layout. @@ -543,8 +443,8 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e if p.err != nil { return nil, p.err } - var s sparseArray = blk.GNU().Sparse() - var sp = make([]sparseEntry, 0, s.MaxEntries()) + s := blk.GNU().Sparse() + spd := make(sparseDatas, 0, s.MaxEntries()) for { for i := 0; i < s.MaxEntries(); i++ { // This termination condition is identical to GNU and BSD tar. @@ -552,25 +452,22 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e break // Don't return, need to process extended headers (even if empty) } offset := p.parseNumeric(s.Entry(i).Offset()) - numBytes := p.parseNumeric(s.Entry(i).NumBytes()) + length := p.parseNumeric(s.Entry(i).Length()) if p.err != nil { return nil, p.err } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, SparseEntry{Offset: offset, Length: length}) } if s.IsExtended()[0] > 0 { // There are more entries. Read an extension header and parse its entries. - if _, err := io.ReadFull(tr.r, blk[:]); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } + if _, err := mustReadFull(tr.r, blk[:]); err != nil { return nil, err } s = blk.Sparse() continue } - return sp, nil // Done + return spd, nil // Done } } @@ -578,28 +475,27 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e // version 1.0. The format of the sparse map consists of a series of // newline-terminated numeric fields. The first field is the number of entries // and is always present. Following this are the entries, consisting of two -// fields (offset, numBytes). This function must stop reading at the end +// fields (offset, length). This function must stop reading at the end // boundary of the block containing the last newline. // // Note that the GNU manual says that numeric values should be encoded in octal // format. However, the GNU tar utility itself outputs these values in decimal. // As such, this library treats values as being encoded in decimal. -func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - var cntNewline int64 - var buf bytes.Buffer - var blk = make([]byte, blockSize) +func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { + var ( + cntNewline int64 + buf bytes.Buffer + blk block + ) - // feedTokens copies data in numBlock chunks from r into buf until there are + // feedTokens copies data in blocks from r into buf until there are // at least cnt newlines in buf. It will not read more blocks than needed. - var feedTokens = func(cnt int64) error { - for cntNewline < cnt { - if _, err := io.ReadFull(r, blk); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } + feedTokens := func(n int64) error { + for cntNewline < n { + if _, err := mustReadFull(r, blk[:]); err != nil { return err } - buf.Write(blk) + buf.Write(blk[:]) for _, c := range blk { if c == '\n' { cntNewline++ @@ -611,10 +507,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { // nextToken gets the next token delimited by a newline. This assumes that // at least one newline exists in the buffer. - var nextToken = func() string { + nextToken := func() string { cntNewline-- tok, _ := buf.ReadString('\n') - return tok[:len(tok)-1] // Cut off newline + return strings.TrimRight(tok, "\n") } // Parse for the number of entries. @@ -633,24 +529,21 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { if err := feedTokens(2 * numEntries); err != nil { return nil, err } - sp := make([]sparseEntry, 0, numEntries) + spd := make(sparseDatas, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { + offset, err1 := strconv.ParseInt(nextToken(), 10, 64) + length, err2 := strconv.ParseInt(nextToken(), 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, SparseEntry{Offset: offset, Length: length}) } - return sp, nil + return spd, nil } // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // version 0.1. The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { +func readGNUSparseMap0x1(extHdrs map[string]string) (sparseDatas, error) { // Get number of entries. // Use integer overflow resistant math to check this. numEntriesStr := extHdrs[paxGNUSparseNumBlocks] @@ -661,52 +554,42 @@ func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { // There should be two numbers in sparseMap for each entry. sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") + if len(sparseMap) == 1 && sparseMap[0] == "" { + sparseMap = sparseMap[:0] + } if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } // Loop through the entries in the sparse map. // numEntries is trusted now. - sp := make([]sparseEntry, 0, numEntries) - for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) - if err != nil { + spd := make(sparseDatas, 0, numEntries) + for len(sparseMap) >= 2 { + offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) + length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, SparseEntry{Offset: offset, Length: length}) + sparseMap = sparseMap[2:] } - return sp, nil -} - -// numBytes returns the number of bytes left to read in the current file's entry -// in the tar archive, or 0 if there is no current file. -func (tr *Reader) numBytes() int64 { - if tr.curr == nil { - // No current file, so no bytes - return 0 - } - return tr.curr.numBytes() + return spd, nil } // Read reads from the current entry in the tar archive. // It returns 0, io.EOF when it reaches the end of that entry, // until Next is called to advance to the next entry. // +// If the current file is sparse, then the regions marked as a sparse hole +// will read back NUL-bytes. +// // Calling Read on special types like TypeLink, TypeSymLink, TypeChar, -// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what // the Header.Size claims. func (tr *Reader) Read(b []byte) (int, error) { if tr.err != nil { return 0, tr.err } - if tr.curr == nil { - return 0, io.EOF - } - n, err := tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err @@ -714,116 +597,210 @@ func (tr *Reader) Read(b []byte) (int, error) { return n, err } -func (rfr *regFileReader) Read(b []byte) (n int, err error) { - if rfr.nb == 0 { - // file consumed - return 0, io.EOF - } - if int64(len(b)) > rfr.nb { - b = b[0:rfr.nb] - } - n, err = rfr.r.Read(b) - rfr.nb -= int64(n) +// TODO(dsnet): Export the Reader.Discard method to assist in quickly +// skipping over sections of a file. This is especially useful: +// * when skipping through an underlying io.Reader that is also an io.Seeker. +// * when skipping over large holes in a sparse file. - if err == io.EOF && rfr.nb > 0 { - err = io.ErrUnexpectedEOF +// discard skips the next n bytes in the current file, +// returning the number of bytes discarded. +// If fewer than n bytes are discarded, it returns an non-nil error, +// which may be io.EOF if there are no more remaining bytes in the current file. +func (tr *Reader) discard(n int64) (int64, error) { + if tr.err != nil { + return 0, tr.err } - return -} - -// numBytes returns the number of bytes left to read in the file's data in the tar archive. -func (rfr *regFileReader) numBytes() int64 { - return rfr.nb -} - -// newSparseFileReader creates a new sparseFileReader, but validates all of the -// sparse entries before doing so. -func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { - if total < 0 { - return nil, ErrHeader // Total size cannot be negative - } - - // Validate all sparse entries. These are the same checks as performed by - // the BSD tar utility. - for i, s := range sp { - switch { - case s.offset < 0 || s.numBytes < 0: - return nil, ErrHeader // Negative values are never okay - case s.offset > math.MaxInt64-s.numBytes: - return nil, ErrHeader // Integer overflow with large length - case s.offset+s.numBytes > total: - return nil, ErrHeader // Region extends beyond the "real" size - case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: - return nil, ErrHeader // Regions can't overlap and must be in order - } - } - return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil -} - -// readHole reads a sparse hole ending at endOffset. -func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { - n64 := endOffset - sfr.pos - if n64 > int64(len(b)) { - n64 = int64(len(b)) - } - n := int(n64) - for i := 0; i < n; i++ { - b[i] = 0 - } - sfr.pos += n64 - return n -} - -// Read reads the sparse file data in expanded form. -func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - // Skip past all empty fragments. - for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { - sfr.sp = sfr.sp[1:] - } - - // If there are no more fragments, then it is possible that there - // is one last sparse hole. - if len(sfr.sp) == 0 { - // This behavior matches the BSD tar utility. - // However, GNU tar stops returning data even if sfr.total is unmet. - if sfr.pos < sfr.total { - return sfr.readHole(b, sfr.total), nil - } - return 0, io.EOF - } - - // In front of a data fragment, so read a hole. - if sfr.pos < sfr.sp[0].offset { - return sfr.readHole(b, sfr.sp[0].offset), nil - } - - // In a data fragment, so read from it. - // This math is overflow free since we verify that offset and numBytes can - // be safely added when creating the sparseFileReader. - endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment - bytesLeft := endPos - sfr.pos // Bytes left in fragment - if int64(len(b)) > bytesLeft { - b = b[:bytesLeft] - } - - n, err = sfr.rfr.Read(b) - sfr.pos += int64(n) - if err == io.EOF { - if sfr.pos < endPos { - err = io.ErrUnexpectedEOF // There was supposed to be more data - } else if sfr.pos < sfr.total { - err = nil // There is still an implicit sparse hole at the end - } - } - - if sfr.pos == endPos { - sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it + n, err := tr.curr.Discard(n) + if err != nil && err != io.EOF { + tr.err = err } return n, err } -// numBytes returns the number of bytes left to read in the sparse file's -// sparse-encoded data in the tar archive. -func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.numBytes() +// regFileReader is a fileReader for reading data from a regular file entry. +type regFileReader struct { + r io.Reader // Underlying Reader + nb int64 // Number of remaining bytes to read +} + +func (fr *regFileReader) Read(b []byte) (int, error) { + if int64(len(b)) > fr.nb { + b = b[:fr.nb] + } + n, err := fr.r.Read(b) + fr.nb -= int64(n) + switch { + case err == io.EOF && fr.nb > 0: + return n, io.ErrUnexpectedEOF + case err == nil && fr.nb == 0: + return n, io.EOF + default: + return n, err + } +} + +func (fr *regFileReader) Discard(n int64) (int64, error) { + overread := n > fr.Remaining() + if overread { + n = fr.Remaining() + } + + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := fr.r.(io.Seeker); ok && n > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, io.SeekCurrent) + if pos1 >= 0 && err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(n-1, io.SeekCurrent) + if pos2 < 0 || err != nil { + return 0, err + } + seekSkipped = pos2 - pos1 + } + } + + copySkipped, err := io.CopyN(ioutil.Discard, fr.r, n-seekSkipped) + discarded := seekSkipped + copySkipped + fr.nb -= discarded + switch { + case err == io.EOF && discarded < n: + return discarded, io.ErrUnexpectedEOF + case err == nil && overread: + return discarded, io.EOF + default: + return discarded, err + } +} + +func (rf regFileReader) Remaining() int64 { + return rf.nb +} + +// sparseFileReader is a fileReader for reading data from a sparse file entry. +type sparseFileReader struct { + fr fileReader // Underlying fileReader + sp sparseHoles // Normalized list of sparse holes + pos int64 // Current position in sparse file +} + +func (sr *sparseFileReader) Read(b []byte) (n int, err error) { + finished := int64(len(b)) >= sr.Remaining() + if finished { + b = b[:sr.Remaining()] + } + + b0 := b + endPos := sr.pos + int64(len(b)) + for endPos > sr.pos && err == nil { + var nf int // Bytes read in fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + bf := b[:min(int64(len(b)), holeStart-sr.pos)] + nf, err = tryReadFull(sr.fr, bf) + } else { // In a hole fragment + bf := b[:min(int64(len(b)), holeEnd-sr.pos)] + nf, err = tryReadFull(zeroReader{}, bf) + } + b = b[nf:] + sr.pos += int64(nf) + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.Remaining() == 0 && sr.fr.Remaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + case finished: + return n, io.EOF + default: + return n, nil + } +} + +func (sr *sparseFileReader) Discard(n int64) (int64, error) { + overread := n > sr.Remaining() + if overread { + n = sr.Remaining() + } + + var realDiscard int64 // Number of real data bytes to discard + endPos := sr.pos + n + for endPos > sr.pos { + var nf int64 // Size of fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + nf = min(endPos-sr.pos, holeStart-sr.pos) + realDiscard += nf + } else { // In a hole fragment + nf = min(endPos-sr.pos, holeEnd-sr.pos) + } + sr.pos += nf + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + _, err := sr.fr.Discard(realDiscard) + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.Remaining() == 0 && sr.fr.Remaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + case overread: + return n, io.EOF + default: + return n, nil + } +} + +func (sr sparseFileReader) Remaining() int64 { + return sr.sp[len(sr.sp)-1].endOffset() - sr.pos +} + +type zeroReader struct{} + +func (zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} + +// mustReadFull is like io.ReadFull except it returns +// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. +func mustReadFull(r io.Reader, b []byte) (int, error) { + n, err := tryReadFull(r, b) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return n, err +} + +// tryReadFull is like io.ReadFull except it returns +// io.EOF when it is hit before len(b) bytes are read. +func tryReadFull(r io.Reader, b []byte) (n int, err error) { + for len(b) > n && err == nil { + var nn int + nn, err = r.Read(b[n:]) + n += nn + } + if len(b) == n && err == io.EOF { + err = nil + } + return n, err } diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go index 42d4ab7e14..9b7896132a 100644 --- a/src/archive/tar/reader_test.go +++ b/src/archive/tar/reader_test.go @@ -14,6 +14,7 @@ import ( "os" "path" "reflect" + "strconv" "strings" "testing" "time" @@ -67,6 +68,23 @@ func TestReader(t *testing.T) { Gname: "david", Devmajor: 0, Devminor: 0, + SparseHoles: []SparseEntry{ + {0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1}, + {16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1}, + {30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1}, + {44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1}, + {58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1}, + {72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1}, + {86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1}, + {100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1}, + {112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1}, + {124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1}, + {136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1}, + {148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1}, + {160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1}, + {172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1}, + {184, 1}, {186, 1}, {188, 1}, {190, 10}, + }, }, { Name: "sparse-posix-0.0", Mode: 420, @@ -80,6 +98,23 @@ func TestReader(t *testing.T) { Gname: "david", Devmajor: 0, Devminor: 0, + SparseHoles: []SparseEntry{ + {0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1}, + {16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1}, + {30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1}, + {44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1}, + {58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1}, + {72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1}, + {86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1}, + {100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1}, + {112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1}, + {124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1}, + {136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1}, + {148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1}, + {160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1}, + {172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1}, + {184, 1}, {186, 1}, {188, 1}, {190, 10}, + }, }, { Name: "sparse-posix-0.1", Mode: 420, @@ -93,6 +128,23 @@ func TestReader(t *testing.T) { Gname: "david", Devmajor: 0, Devminor: 0, + SparseHoles: []SparseEntry{ + {0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1}, + {16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1}, + {30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1}, + {44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1}, + {58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1}, + {72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1}, + {86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1}, + {100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1}, + {112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1}, + {124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1}, + {136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1}, + {148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1}, + {160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1}, + {172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1}, + {184, 1}, {186, 1}, {188, 1}, {190, 10}, + }, }, { Name: "sparse-posix-1.0", Mode: 420, @@ -106,6 +158,23 @@ func TestReader(t *testing.T) { Gname: "david", Devmajor: 0, Devminor: 0, + SparseHoles: []SparseEntry{ + {0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1}, + {16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1}, + {30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1}, + {44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1}, + {58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1}, + {72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1}, + {86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1}, + {100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1}, + {112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1}, + {124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1}, + {136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1}, + {148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1}, + {160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1}, + {172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1}, + {184, 1}, {186, 1}, {188, 1}, {190, 10}, + }, }, { Name: "end", Mode: 420, @@ -314,17 +383,18 @@ func TestReader(t *testing.T) { AccessTime: time.Unix(1441974501, 0), ChangeTime: time.Unix(1441973436, 0), }, { - Name: "test2/sparse", - Mode: 33188, - Uid: 1000, - Gid: 1000, - Size: 536870912, - ModTime: time.Unix(1441973427, 0), - Typeflag: 'S', - Uname: "rawr", - Gname: "dsnet", - AccessTime: time.Unix(1441991948, 0), - ChangeTime: time.Unix(1441973436, 0), + Name: "test2/sparse", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 536870912, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'S', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441991948, 0), + ChangeTime: time.Unix(1441973436, 0), + SparseHoles: []SparseEntry{{0, 536870912}}, }}, }, { // Matches the behavior of GNU and BSD tar utilities. @@ -555,375 +625,6 @@ func TestPartialRead(t *testing.T) { } } -func TestSparseFileReader(t *testing.T) { - vectors := []struct { - realSize int64 // Real size of the output file - sparseMap []sparseEntry // Input sparse map - sparseData string // Input compact data - expected string // Expected output data - err error // Expected error outcome - }{{ - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, - }, - sparseData: "abcde", - expected: "ab\x00\x00\x00cde", - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 0, numBytes: 2}, - {offset: 5, numBytes: 3}, - }, - sparseData: "abcde", - expected: "ab\x00\x00\x00cde\x00\x00", - }, { - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de", - }, { - realSize: 8, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 0}, - {offset: 6, numBytes: 0}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de", - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de\x00\x00", - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - {offset: 8, numBytes: 0}, - }, - sparseData: "abcde", - expected: "\x00abc\x00\x00de\x00\x00", - }, { - realSize: 2, - sparseMap: []sparseEntry{}, - sparseData: "", - expected: "\x00\x00", - }, { - realSize: -2, - sparseMap: []sparseEntry{}, - err: ErrHeader, - }, { - realSize: -10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 2}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: 5}, - }, - sparseData: "abcde", - err: io.ErrUnexpectedEOF, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 6, numBytes: -5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 35, - sparseMap: []sparseEntry{ - {offset: math.MaxInt64, numBytes: 3}, - {offset: 6, numBytes: -5}, - }, - sparseData: "abcde", - err: ErrHeader, - }, { - realSize: 10, - sparseMap: []sparseEntry{ - {offset: 1, numBytes: 3}, - {offset: 2, numBytes: 2}, - }, - sparseData: "abcde", - err: ErrHeader, - }} - - for i, v := range vectors { - r := bytes.NewReader([]byte(v.sparseData)) - rfr := ®FileReader{r: r, nb: int64(len(v.sparseData))} - - var ( - sfr *sparseFileReader - err error - buf []byte - ) - - sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize) - if err != nil { - goto fail - } - if sfr.numBytes() != int64(len(v.sparseData)) { - t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData)) - } - buf, err = ioutil.ReadAll(sfr) - if err != nil { - goto fail - } - if string(buf) != v.expected { - t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected) - } - if sfr.numBytes() != 0 { - t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0) - } - - fail: - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } - } -} - -func TestReadOldGNUSparseMap(t *testing.T) { - const ( - t00 = "00000000000\x0000000000000\x00" - t11 = "00000000001\x0000000000001\x00" - t12 = "00000000001\x0000000000002\x00" - t21 = "00000000002\x0000000000001\x00" - ) - - mkBlk := func(size, sp0, sp1, sp2, sp3, ext string, format int) *block { - var blk block - copy(blk.GNU().RealSize(), size) - copy(blk.GNU().Sparse().Entry(0), sp0) - copy(blk.GNU().Sparse().Entry(1), sp1) - copy(blk.GNU().Sparse().Entry(2), sp2) - copy(blk.GNU().Sparse().Entry(3), sp3) - copy(blk.GNU().Sparse().IsExtended(), ext) - if format != formatUnknown { - blk.SetFormat(format) - } - return &blk - } - - vectors := []struct { - data string // Input data - rawHdr *block // Input raw header - want []sparseEntry // Expected sparse entries to be outputted - err error // Expected error to be returned - }{ - {"", mkBlk("", "", "", "", "", "", formatUnknown), nil, ErrHeader}, - {"", mkBlk("1234", "fewa", "", "", "", "", formatGNU), nil, ErrHeader}, - {"", mkBlk("0031", "", "", "", "", "", formatGNU), nil, nil}, - {"", mkBlk("1234", t00, t11, "", "", "", formatGNU), - []sparseEntry{{0, 0}, {1, 1}}, nil}, - {"", mkBlk("1234", t11, t12, t21, t11, "", formatGNU), - []sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}}, nil}, - {"", mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), - []sparseEntry{}, io.ErrUnexpectedEOF}, - {t11 + t11, - mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), - []sparseEntry{}, io.ErrUnexpectedEOF}, - {t11 + t21 + strings.Repeat("\x00", 512), - mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU), - []sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}, {1, 1}, {2, 1}}, nil}, - } - - for i, v := range vectors { - tr := Reader{r: strings.NewReader(v.data)} - hdr := new(Header) - got, err := tr.readOldGNUSparseMap(hdr, v.rawHdr) - if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { - t.Errorf("test %d, readOldGNUSparseMap(...): got %v, want %v", i, got, v.want) - } - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } - } -} - -func TestReadGNUSparseMap0x1(t *testing.T) { - const ( - maxUint = ^uint(0) - maxInt = int(maxUint >> 1) - ) - var ( - big1 = fmt.Sprintf("%d", int64(maxInt)) - big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1) - big3 = fmt.Sprintf("%d", (int64(maxInt) / 3)) - ) - - vectors := []struct { - extHdrs map[string]string // Input data - sparseMap []sparseEntry // Expected sparse entries to be outputted - err error // Expected errors that may be raised - }{{ - extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"}, - err: ErrHeader, - }, { - extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "}, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big1, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big2, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: big3, - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0.5,5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5.5,10,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,fewafewa.5,fewafw,5,20,5,30,5", - }, - err: ErrHeader, - }, { - extHdrs: map[string]string{ - paxGNUSparseNumBlocks: "4", - paxGNUSparseMap: "0,5,10,5,20,5,30,5", - }, - sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, - }} - - for i, v := range vectors { - sp, err := readGNUSparseMap0x1(v.extHdrs) - if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { - t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap) - } - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } - } -} - -func TestReadGNUSparseMap1x0(t *testing.T) { - sp := []sparseEntry{{1, 2}, {3, 4}} - for i := 0; i < 98; i++ { - sp = append(sp, sparseEntry{54321, 12345}) - } - - vectors := []struct { - input string // Input data - sparseMap []sparseEntry // Expected sparse entries to be outputted - cnt int // Expected number of bytes read - err error // Expected errors that may be raised - }{{ - input: "", - cnt: 0, - err: io.ErrUnexpectedEOF, - }, { - input: "ab", - cnt: 2, - err: io.ErrUnexpectedEOF, - }, { - input: strings.Repeat("\x00", 512), - cnt: 512, - err: io.ErrUnexpectedEOF, - }, { - input: strings.Repeat("\x00", 511) + "\n", - cnt: 512, - err: ErrHeader, - }, { - input: strings.Repeat("\n", 512), - cnt: 512, - err: ErrHeader, - }, { - input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512), - sparseMap: []sparseEntry{}, - cnt: 512, - }, { - input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510), - sparseMap: []sparseEntry{}, - cnt: 1024, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506), - sparseMap: []sparseEntry{{2, 3}}, - cnt: 1536, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509), - cnt: 1536, - err: ErrHeader, - }, { - input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508), - cnt: 1536, - err: io.ErrUnexpectedEOF, - }, { - input: "-1\n2\n\n" + strings.Repeat("\x00", 506), - cnt: 512, - err: ErrHeader, - }, { - input: "1\nk\n2\n" + strings.Repeat("\x00", 506), - cnt: 512, - err: ErrHeader, - }, { - input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512), - cnt: 2560, - sparseMap: sp, - }} - - for i, v := range vectors { - r := strings.NewReader(v.input) - sp, err := readGNUSparseMap1x0(r) - if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) { - t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap) - } - if numBytes := len(v.input) - r.Len(); numBytes != v.cnt { - t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt) - } - if err != v.err { - t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err) - } - } -} - func TestUninitializedRead(t *testing.T) { f, err := os.Open("testdata/gnu.tar") if err != nil { @@ -1192,3 +893,539 @@ func TestParsePAX(t *testing.T) { } } } + +func TestReadOldGNUSparseMap(t *testing.T) { + populateSparseMap := func(sa sparseArray, sps []string) []string { + for i := 0; len(sps) > 0 && i < sa.MaxEntries(); i++ { + copy(sa.Entry(i), sps[0]) + sps = sps[1:] + } + if len(sps) > 0 { + copy(sa.IsExtended(), "\x80") + } + return sps + } + + makeInput := func(format int, size string, sps ...string) (out []byte) { + // Write the initial GNU header. + var blk block + gnu := blk.GNU() + sparse := gnu.Sparse() + copy(gnu.RealSize(), size) + sps = populateSparseMap(sparse, sps) + if format != formatUnknown { + blk.SetFormat(format) + } + out = append(out, blk[:]...) + + // Write extended sparse blocks. + for len(sps) > 0 { + var blk block + sps = populateSparseMap(blk.Sparse(), sps) + out = append(out, blk[:]...) + } + return out + } + + makeSparseStrings := func(sp []SparseEntry) (out []string) { + var f formatter + for _, s := range sp { + var b [24]byte + f.formatNumeric(b[:12], s.Offset) + f.formatNumeric(b[12:], s.Length) + out = append(out, string(b[:])) + } + return out + } + + vectors := []struct { + input []byte + wantMap sparseDatas + wantSize int64 + wantErr error + }{{ + input: makeInput(formatUnknown, ""), + wantErr: ErrHeader, + }, { + input: makeInput(formatGNU, "1234", "fewa"), + wantSize: 01234, + wantErr: ErrHeader, + }, { + input: makeInput(formatGNU, "0031"), + wantSize: 031, + }, { + input: makeInput(formatGNU, "80"), + wantErr: ErrHeader, + }, { + input: makeInput(formatGNU, "1234", + makeSparseStrings(sparseDatas{{0, 0}, {1, 1}})...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(formatGNU, "1234", + append(makeSparseStrings(sparseDatas{{0, 0}, {1, 1}}), []string{"", "blah"}...)...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(formatGNU, "3333", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + wantSize: 03333, + }, { + input: makeInput(formatGNU, "", + append(append( + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}}), + []string{"", ""}...), + makeSparseStrings(sparseDatas{{4, 1}, {6, 1}})...)...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + }, { + input: makeInput(formatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:blockSize], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(formatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:3*blockSize/2], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(formatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}}, + }, { + input: makeInput(formatGNU, "", + makeSparseStrings(sparseDatas{{10 << 30, 512}, {20 << 30, 512}})...), + wantMap: sparseDatas{{10 << 30, 512}, {20 << 30, 512}}, + }} + + for i, v := range vectors { + var blk block + var hdr Header + v.input = v.input[copy(blk[:], v.input):] + tr := Reader{r: bytes.NewReader(v.input)} + got, err := tr.readOldGNUSparseMap(&hdr, &blk) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readOldGNUSparseMap(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readOldGNUSparseMap() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + } +} + +func TestReadGNUSparsePAXHeaders(t *testing.T) { + padInput := func(s string) string { + return s + string(zeroBlock[:blockPadding(int64(len(s)))]) + } + + vectors := []struct { + inputData string + inputHdrs map[string]string + wantMap sparseDatas + wantSize int64 + wantName string + wantErr error + }{{ + inputHdrs: nil, + wantErr: nil, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: strconv.FormatInt(math.MaxInt64, 10), + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4\x00", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0, 1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,02,3", + paxGNUSparseRealSize: "4321", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 4321, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,one1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + paxGNUSparseSize: "1234", + paxGNUSparseRealSize: "4321", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 1234, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "1", + paxGNUSparseMap: "10737418240,512", + paxGNUSparseSize: "10737418240", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{10737418240, 512}}, + wantSize: 10737418240, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "0", + paxGNUSparseMap: "", + }, + wantMap: sparseDatas{}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "1", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantMap: sparseDatas{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "1", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0\n")[:blockSize-1] + "#", + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("ab\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput("1\n2\n3\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{2, 3}}, + }, { + inputData: padInput("1\n2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("1\n2\n\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: string(zeroBlock[:]) + padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: strings.Repeat("0", blockSize) + padInput("1\n5\n1\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 1}}, + }, { + inputData: padInput(fmt.Sprintf("%d\n", int64(math.MaxInt64))), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput(strings.Repeat("0", 300) + "1\n" + strings.Repeat("0", 1000) + "5\n" + strings.Repeat("0", 800) + "2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 2}}, + }, { + inputData: padInput("2\n10737418240\n512\n21474836480\n512\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{10737418240, 512}, {21474836480, 512}}, + }, { + inputData: padInput("100\n" + func() string { + var ss []string + for i := 0; i < 100; i++ { + ss = append(ss, fmt.Sprintf("%d\n%d\n", int64(i)<<30, 512)) + } + return strings.Join(ss, "") + }()), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: func() (spd sparseDatas) { + for i := 0; i < 100; i++ { + spd = append(spd, SparseEntry{int64(i) << 30, 512}) + } + return spd + }(), + }} + + for i, v := range vectors { + var hdr Header + r := strings.NewReader(v.inputData + "#") // Add canary byte + tr := Reader{curr: ®FileReader{r, int64(r.Len())}} + got, err := tr.readGNUSparsePAXHeaders(&hdr, v.inputHdrs) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readGNUSparsePAXHeaders(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readGNUSparsePAXHeaders() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + if hdr.Name != v.wantName { + t.Errorf("test %d, Header.Name = %s, want %s", i, hdr.Name, v.wantName) + } + if v.wantErr == nil && r.Len() == 0 { + t.Errorf("test %d, canary byte unexpectedly consumed", i) + } + } +} + +func TestFileReader(t *testing.T) { + type ( + testRead struct { // ReadN(cnt) == (wantStr, wantErr) + cnt int + wantStr string + wantErr error + } + testDiscard struct { // Discard(cnt) == (wantCnt, wantErr) + cnt int64 + wantCnt int64 + wantErr error + } + testRemaining struct { // Remaining() == wantCnt + wantCnt int64 + } + testFnc interface{} // testRead | testDiscard | testRemaining + ) + + makeReg := func(s string, n int) fileReader { + return ®FileReader{strings.NewReader(s), int64(n)} + } + makeSparse := func(fr fileReader, spd sparseDatas, size int64) fileReader { + if !validateSparseEntries(spd, size) { + t.Fatalf("invalid sparse map: %v", spd) + } + sph := invertSparseEntries(append([]SparseEntry{}, spd...), size) + return &sparseFileReader{fr, sph, 0} + } + + vectors := []struct { + fr fileReader + tests []testFnc + }{{ + fr: makeReg("", 0), + tests: []testFnc{ + testRemaining{0}, + testRead{0, "", io.EOF}, + testRead{1, "", io.EOF}, + testDiscard{0, 0, nil}, + testDiscard{1, 0, io.EOF}, + testRemaining{0}, + }, + }, { + fr: makeReg("", 1), + tests: []testFnc{ + testRemaining{1}, + testRead{0, "", io.ErrUnexpectedEOF}, + testRead{5, "", io.ErrUnexpectedEOF}, + testDiscard{0, 0, nil}, + testDiscard{1, 0, io.ErrUnexpectedEOF}, + testRemaining{1}, + }, + }, { + fr: makeReg("hello", 5), + tests: []testFnc{ + testRemaining{5}, + testRead{5, "hello", io.EOF}, + testRemaining{0}, + }, + }, { + fr: makeReg("hello, world", 50), + tests: []testFnc{ + testRemaining{50}, + testDiscard{7, 7, nil}, + testRemaining{43}, + testRead{5, "world", nil}, + testRemaining{38}, + testDiscard{1, 0, io.ErrUnexpectedEOF}, + testRead{1, "", io.ErrUnexpectedEOF}, + testRemaining{38}, + }, + }, { + fr: makeReg("hello, world", 5), + tests: []testFnc{ + testRemaining{5}, + testRead{0, "", nil}, + testRead{4, "hell", nil}, + testRemaining{1}, + testDiscard{5, 1, io.EOF}, + testRemaining{0}, + testDiscard{5, 0, io.EOF}, + testRead{0, "", io.EOF}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8), + tests: []testFnc{ + testRemaining{8}, + testRead{3, "ab\x00", nil}, + testRead{10, "\x00\x00cde", io.EOF}, + testRemaining{0}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8), + tests: []testFnc{ + testRemaining{8}, + testDiscard{100, 8, io.EOF}, + testRemaining{0}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 10), + tests: []testFnc{ + testRemaining{10}, + testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, + testRemaining{0}, + }, + }, { + fr: makeSparse(makeReg("abc", 5), sparseDatas{{0, 2}, {5, 3}}, 10), + tests: []testFnc{ + testRemaining{10}, + testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, + testRemaining{4}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 8), + tests: []testFnc{ + testRemaining{8}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8), + tests: []testFnc{ + testRemaining{8}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 10), + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10), + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + fr: makeSparse(makeReg("", 0), sparseDatas{}, 2), + tests: []testFnc{ + testRead{100, "\x00\x00", io.EOF}, + }, + }, { + fr: makeSparse(makeReg("", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRead{100, "\x00", io.ErrUnexpectedEOF}, + }, + }, { + fr: makeSparse(makeReg("ab", 2), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRead{100, "\x00ab", errMissData}, + }, + }, { + fr: makeSparse(makeReg("ab", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRead{100, "\x00ab", io.ErrUnexpectedEOF}, + }, + }, { + fr: makeSparse(makeReg("abc", 3), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", errMissData}, + }, + }, { + fr: makeSparse(makeReg("abc", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF}, + }, + }, { + fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", errMissData}, + }, + }, { + fr: makeSparse(makeReg("abcde", 8), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF}, + }, + }, { + fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRemaining{15}, + testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, + testDiscard{100, 0, errUnrefData}, + testRemaining{0}, + }, + }, { + fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15), + tests: []testFnc{ + testRemaining{15}, + testDiscard{100, 15, errUnrefData}, + testRead{100, "", errUnrefData}, + testRemaining{0}, + }, + }} + + for i, v := range vectors { + for j, tf := range v.tests { + switch tf := tf.(type) { + case testRead: + b := make([]byte, tf.cnt) + n, err := v.fr.Read(b) + if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { + t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) + } + case testDiscard: + got, err := v.fr.Discard(tf.cnt) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr) + } + case testRemaining: + got := v.fr.Remaining() + if got != tf.wantCnt { + t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } + } + } +} diff --git a/src/archive/tar/tar_test.go b/src/archive/tar/tar_test.go index c64314491d..736f523b00 100644 --- a/src/archive/tar/tar_test.go +++ b/src/archive/tar/tar_test.go @@ -19,6 +19,116 @@ import ( "time" ) +func equalSparseEntries(x, y []SparseEntry) bool { + return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y) +} + +func TestSparseEntries(t *testing.T) { + vectors := []struct { + in []SparseEntry + size int64 + + wantValid bool // Result of validateSparseEntries + wantAligned []SparseEntry // Result of alignSparseEntries + wantInverted []SparseEntry // Result of invertSparseEntries + }{{ + in: []SparseEntry{}, size: 0, + wantValid: true, + wantInverted: []SparseEntry{{0, 0}}, + }, { + in: []SparseEntry{}, size: 5000, + wantValid: true, + wantInverted: []SparseEntry{{0, 5000}}, + }, { + in: []SparseEntry{{0, 5000}}, size: 5000, + wantValid: true, + wantAligned: []SparseEntry{{0, 5000}}, + wantInverted: []SparseEntry{{5000, 0}}, + }, { + in: []SparseEntry{{1000, 4000}}, size: 5000, + wantValid: true, + wantAligned: []SparseEntry{{1024, 3976}}, + wantInverted: []SparseEntry{{0, 1000}, {5000, 0}}, + }, { + in: []SparseEntry{{0, 3000}}, size: 5000, + wantValid: true, + wantAligned: []SparseEntry{{0, 2560}}, + wantInverted: []SparseEntry{{3000, 2000}}, + }, { + in: []SparseEntry{{3000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []SparseEntry{{3072, 1928}}, + wantInverted: []SparseEntry{{0, 3000}, {5000, 0}}, + }, { + in: []SparseEntry{{2000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []SparseEntry{{2048, 1536}}, + wantInverted: []SparseEntry{{0, 2000}, {4000, 1000}}, + }, { + in: []SparseEntry{{0, 2000}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []SparseEntry{{0, 1536}, {8192, 1808}}, + wantInverted: []SparseEntry{{2000, 6000}, {10000, 0}}, + }, { + in: []SparseEntry{{0, 2000}, {2000, 2000}, {4000, 0}, {4000, 3000}, {7000, 1000}, {8000, 0}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []SparseEntry{{0, 1536}, {2048, 1536}, {4096, 2560}, {7168, 512}, {8192, 1808}}, + wantInverted: []SparseEntry{{10000, 0}}, + }, { + in: []SparseEntry{{0, 0}, {1000, 0}, {2000, 0}, {3000, 0}, {4000, 0}, {5000, 0}}, size: 5000, + wantValid: true, + wantInverted: []SparseEntry{{0, 5000}}, + }, { + in: []SparseEntry{{1, 0}}, size: 0, + wantValid: false, + }, { + in: []SparseEntry{{-1, 0}}, size: 100, + wantValid: false, + }, { + in: []SparseEntry{{0, -1}}, size: 100, + wantValid: false, + }, { + in: []SparseEntry{{0, 0}}, size: -100, + wantValid: false, + }, { + in: []SparseEntry{{math.MaxInt64, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []SparseEntry{{1, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []SparseEntry{{math.MaxInt64, math.MaxInt64}}, size: math.MaxInt64, + wantValid: false, + }, { + in: []SparseEntry{{3, 3}}, size: 5, + wantValid: false, + }, { + in: []SparseEntry{{2, 0}, {1, 0}, {0, 0}}, size: 3, + wantValid: false, + }, { + in: []SparseEntry{{1, 3}, {2, 2}}, size: 10, + wantValid: false, + }} + + for i, v := range vectors { + gotValid := validateSparseEntries(v.in, v.size) + if gotValid != v.wantValid { + t.Errorf("test %d, validateSparseEntries() = %v, want %v", i, gotValid, v.wantValid) + } + if !v.wantValid { + continue + } + gotAligned := alignSparseEntries(append([]SparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotAligned, v.wantAligned) { + t.Errorf("test %d, alignSparseEntries():\ngot %v\nwant %v", i, gotAligned, v.wantAligned) + } + gotInverted := invertSparseEntries(append([]SparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotInverted, v.wantInverted) { + t.Errorf("test %d, inverseSparseEntries():\ngot %v\nwant %v", i, gotInverted, v.wantInverted) + } + } +} + func TestFileInfoHeader(t *testing.T) { fi, err := os.Stat("testdata/small.txt") if err != nil { diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go index 3a8f102196..0d68d47129 100644 --- a/src/archive/tar/writer.go +++ b/src/archive/tar/writer.go @@ -250,7 +250,7 @@ func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { size = 0 } tw.nb = size - tw.pad = -size & (blockSize - 1) // blockSize is a power of two + tw.pad = blockPadding(size) return nil }