| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package tar implements access to tar archives. |
| // It aims to cover most of the variations, including those produced |
| // by GNU and BSD tars. |
| package tar |
| |
| import ( |
| "errors" |
| "fmt" |
| "math" |
| "os" |
| "path" |
| "strconv" |
| "time" |
| ) |
| |
| // BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit |
| // architectures. If a large value is encountered when decoding, the result |
| // stored in Header will be the truncated version. |
| |
| var ( |
| ErrHeader = errors.New("tar: invalid tar header") |
| ErrWriteTooLong = errors.New("tar: write too long") |
| ErrFieldTooLong = errors.New("tar: header field too long") |
| ErrWriteAfterClose = errors.New("tar: write after close") |
| errMissData = errors.New("tar: sparse file references non-existent data") |
| errUnrefData = errors.New("tar: sparse file contains unreferenced data") |
| errWriteHole = errors.New("tar: write non-NUL byte in sparse hole") |
| ) |
| |
| // Header type flags. |
| const ( |
| TypeReg = '0' // regular file |
| TypeRegA = '\x00' // regular file |
| TypeLink = '1' // hard link |
| TypeSymlink = '2' // symbolic link |
| TypeChar = '3' // character device node |
| TypeBlock = '4' // block device node |
| TypeDir = '5' // directory |
| TypeFifo = '6' // fifo node |
| TypeCont = '7' // reserved |
| TypeXHeader = 'x' // extended header |
| TypeXGlobalHeader = 'g' // global extended header |
| TypeGNULongName = 'L' // Next file has a long name |
| TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name |
| TypeGNUSparse = 'S' // sparse file |
| ) |
| |
| // A Header represents a single header in a tar archive. |
| // Some fields may not be populated. |
| type Header struct { |
| Name string // name of header file entry |
| Mode int64 // permission and mode bits |
| Uid int // user id of owner |
| Gid int // group id of owner |
| Size int64 // length in bytes |
| ModTime time.Time // modified time |
| Typeflag byte // type of header entry |
| Linkname string // target name of link |
| Uname string // user name of owner |
| Gname string // group name of owner |
| Devmajor int64 // major number of character or block device |
| Devminor int64 // minor number of character or block device |
| AccessTime time.Time // access time |
| ChangeTime time.Time // status change time |
| Xattrs map[string]string |
| |
| // SparseHoles represents a sequence of holes in a sparse file. |
| // |
| // A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse. |
| // If TypeGNUSparse is set, then the format is GNU, otherwise |
| // the PAX format with GNU-specific record is used. |
| // |
| // A sparse file consists of fragments of data, intermixed with holes |
| // (described by this field). A hole is semantically a block of NUL-bytes, |
| // but does not actually exist within the tar file. |
| // The logical size of the file stored in the Size field, while |
| // the holes must be sorted in ascending order, |
| // not overlap with each other, and not extend past the specified Size. |
| SparseHoles []SparseEntry |
| |
| // Format specifies the format of the tar header. |
| // |
| // This is set by Reader.Next as a best-effort guess at the format. |
| // Since the Reader liberally reads some non-compliant files, |
| // it is possible for this to be FormatUnknown. |
| // |
| // When writing, if this is not FormatUnknown, then Writer.WriteHeader |
| // uses this as the format to encode the header. |
| Format Format |
| } |
| |
| // SparseEntry represents a Length-sized fragment at Offset in the file. |
| type SparseEntry struct{ Offset, Length int64 } |
| |
| func (s SparseEntry) endOffset() int64 { return s.Offset + s.Length } |
| |
| // A sparse file can be represented as either a sparseDatas or a sparseHoles. |
| // As long as the total size is known, they are equivalent and one can be |
| // converted to the other form and back. The various tar formats with sparse |
| // file support represent sparse files in the sparseDatas form. That is, they |
| // specify the fragments in the file that has data, and treat everything else as |
| // having zero bytes. As such, the encoding and decoding logic in this package |
| // deals with sparseDatas. |
| // |
| // However, the external API uses sparseHoles instead of sparseDatas because the |
| // zero value of sparseHoles logically represents a normal file (i.e., there are |
| // no holes in it). On the other hand, the zero value of sparseDatas implies |
| // that the file has no data in it, which is rather odd. |
| // |
| // As an example, if the underlying raw file contains the 10-byte data: |
| // var compactFile = "abcdefgh" |
| // |
| // And the sparse map has the following entries: |
| // var spd sparseDatas = []sparseEntry{ |
| // {Offset: 2, Length: 5}, // Data fragment for 2..6 |
| // {Offset: 18, Length: 3}, // Data fragment for 18..20 |
| // } |
| // var sph sparseHoles = []SparseEntry{ |
| // {Offset: 0, Length: 2}, // Hole fragment for 0..1 |
| // {Offset: 7, Length: 11}, // Hole fragment for 7..17 |
| // {Offset: 21, Length: 4}, // Hole fragment for 21..24 |
| // } |
| // |
| // Then the content of the resulting sparse file with a Header.Size of 25 is: |
| // var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 |
| type ( |
| sparseDatas []SparseEntry |
| sparseHoles []SparseEntry |
| ) |
| |
| // validateSparseEntries reports whether sp is a valid sparse map. |
| // It does not matter whether sp represents data fragments or hole fragments. |
| func validateSparseEntries(sp []SparseEntry, size int64) bool { |
| // Validate all sparse entries. These are the same checks as performed by |
| // the BSD tar utility. |
| if size < 0 { |
| return false |
| } |
| var pre SparseEntry |
| for _, cur := range sp { |
| switch { |
| case cur.Offset < 0 || cur.Length < 0: |
| return false // Negative values are never okay |
| case cur.Offset > math.MaxInt64-cur.Length: |
| return false // Integer overflow with large length |
| case cur.endOffset() > size: |
| return false // Region extends beyond the actual size |
| case pre.endOffset() > cur.Offset: |
| return false // Regions cannot overlap and must be in order |
| } |
| pre = cur |
| } |
| return true |
| } |
| |
| // alignSparseEntries mutates src and returns dst where each fragment's |
| // starting offset is aligned up to the nearest block edge, and each |
| // ending offset is aligned down to the nearest block edge. |
| // |
| // Even though the Go tar Reader and the BSD tar utility can handle entries |
| // with arbitrary offsets and lengths, the GNU tar utility can only handle |
| // offsets and lengths that are multiples of blockSize. |
| func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry { |
| dst := src[:0] |
| for _, s := range src { |
| pos, end := s.Offset, s.endOffset() |
| pos += blockPadding(+pos) // Round-up to nearest blockSize |
| if end != size { |
| end -= blockPadding(-end) // Round-down to nearest blockSize |
| } |
| if pos < end { |
| dst = append(dst, SparseEntry{Offset: pos, Length: end - pos}) |
| } |
| } |
| return dst |
| } |
| |
| // invertSparseEntries converts a sparse map from one form to the other. |
| // If the input is sparseHoles, then it will output sparseDatas and vice-versa. |
| // The input must have been already validated. |
| // |
| // This function mutates src and returns a normalized map where: |
| // * adjacent fragments are coalesced together |
| // * only the last fragment may be empty |
| // * the endOffset of the last fragment is the total size |
| func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry { |
| dst := src[:0] |
| var pre SparseEntry |
| for _, cur := range src { |
| if cur.Length == 0 { |
| continue // Skip empty fragments |
| } |
| pre.Length = cur.Offset - pre.Offset |
| if pre.Length > 0 { |
| dst = append(dst, pre) // Only add non-empty fragments |
| } |
| pre.Offset = cur.endOffset() |
| } |
| pre.Length = size - pre.Offset // Possibly the only empty fragment |
| return append(dst, pre) |
| } |
| |
| type fileState interface { |
| // Remaining reports the number of remaining bytes in the current file. |
| // This count includes any sparse holes that may exist. |
| Remaining() int64 |
| } |
| |
| // FileInfo returns an os.FileInfo for the Header. |
| func (h *Header) FileInfo() os.FileInfo { |
| return headerFileInfo{h} |
| } |
| |
| // allowedFormats determines which formats can be used. The value returned |
| // is the logical OR of multiple possible formats. If the value is |
| // FormatUnknown, then the input Header cannot be encoded. |
| // |
| // As a by-product of checking the fields, this function returns paxHdrs, which |
| // contain all fields that could not be directly encoded. |
| func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string) { |
| format = FormatUSTAR | FormatPAX | FormatGNU |
| paxHdrs = make(map[string]string) |
| |
| verifyString := func(s string, size int, paxKey string) { |
| // NUL-terminator is optional for path and linkpath. |
| // Technically, it is required for uname and gname, |
| // but neither GNU nor BSD tar checks for it. |
| tooLong := len(s) > size |
| allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath |
| if hasNUL(s) || (tooLong && !allowLongGNU) { |
| format.mustNotBe(FormatGNU) |
| } |
| if !isASCII(s) || tooLong { |
| canSplitUSTAR := paxKey == paxPath |
| if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { |
| format.mustNotBe(FormatUSTAR) |
| } |
| if paxKey == paxNone { |
| format.mustNotBe(FormatPAX) |
| } else { |
| paxHdrs[paxKey] = s |
| } |
| } |
| } |
| verifyNumeric := func(n int64, size int, paxKey string) { |
| if !fitsInBase256(size, n) { |
| format.mustNotBe(FormatGNU) |
| } |
| if !fitsInOctal(size, n) { |
| format.mustNotBe(FormatUSTAR) |
| if paxKey == paxNone { |
| format.mustNotBe(FormatPAX) |
| } else { |
| paxHdrs[paxKey] = strconv.FormatInt(n, 10) |
| } |
| } |
| } |
| verifyTime := func(ts time.Time, size int, paxKey string) { |
| if ts.IsZero() { |
| return // Always okay |
| } |
| needsNano := ts.Nanosecond() != 0 |
| hasFieldUSTAR := paxKey == paxMtime |
| if !fitsInBase256(size, ts.Unix()) || needsNano { |
| format.mustNotBe(FormatGNU) |
| } |
| if !fitsInOctal(size, ts.Unix()) || needsNano || !hasFieldUSTAR { |
| format.mustNotBe(FormatUSTAR) |
| if paxKey == paxNone { |
| format.mustNotBe(FormatPAX) |
| } else { |
| paxHdrs[paxKey] = formatPAXTime(ts) |
| } |
| } |
| } |
| |
| var blk block |
| v7 := blk.V7() |
| ustar := blk.USTAR() |
| gnu := blk.GNU() |
| verifyString(h.Name, len(v7.Name()), paxPath) |
| verifyString(h.Linkname, len(v7.LinkName()), paxLinkpath) |
| verifyString(h.Uname, len(ustar.UserName()), paxUname) |
| verifyString(h.Gname, len(ustar.GroupName()), paxGname) |
| verifyNumeric(h.Mode, len(v7.Mode()), paxNone) |
| verifyNumeric(int64(h.Uid), len(v7.UID()), paxUid) |
| verifyNumeric(int64(h.Gid), len(v7.GID()), paxGid) |
| verifyNumeric(h.Size, len(v7.Size()), paxSize) |
| verifyNumeric(h.Devmajor, len(ustar.DevMajor()), paxNone) |
| verifyNumeric(h.Devminor, len(ustar.DevMinor()), paxNone) |
| verifyTime(h.ModTime, len(v7.ModTime()), paxMtime) |
| verifyTime(h.AccessTime, len(gnu.AccessTime()), paxAtime) |
| verifyTime(h.ChangeTime, len(gnu.ChangeTime()), paxCtime) |
| |
| if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { |
| return FormatUnknown, nil |
| } |
| if len(h.Xattrs) > 0 { |
| for k, v := range h.Xattrs { |
| paxHdrs[paxXattr+k] = v |
| } |
| format.mayOnlyBe(FormatPAX) |
| } |
| for k, v := range paxHdrs { |
| // Forbid empty values (which represent deletion) since usage of |
| // them are non-sensible without global PAX record support. |
| if !validPAXRecord(k, v) || v == "" { |
| return FormatUnknown, nil // Invalid PAX key |
| } |
| } |
| if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { |
| if isHeaderOnlyType(h.Typeflag) { |
| return FormatUnknown, nil // Cannot have sparse data on header-only file |
| } |
| if !validateSparseEntries(h.SparseHoles, h.Size) { |
| return FormatUnknown, nil |
| } |
| if h.Typeflag == TypeGNUSparse { |
| format.mayOnlyBe(FormatGNU) |
| } else { |
| format.mustNotBe(FormatGNU) |
| } |
| format.mustNotBe(FormatUSTAR) |
| } |
| if wantFormat := h.Format; wantFormat != FormatUnknown { |
| if wantFormat.has(FormatPAX) { |
| wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too |
| } |
| format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted |
| } |
| return format, paxHdrs |
| } |
| |
| // headerFileInfo implements os.FileInfo. |
| type headerFileInfo struct { |
| h *Header |
| } |
| |
| func (fi headerFileInfo) Size() int64 { return fi.h.Size } |
| func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } |
| func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } |
| func (fi headerFileInfo) Sys() interface{} { return fi.h } |
| |
| // Name returns the base name of the file. |
| func (fi headerFileInfo) Name() string { |
| if fi.IsDir() { |
| return path.Base(path.Clean(fi.h.Name)) |
| } |
| return path.Base(fi.h.Name) |
| } |
| |
| // Mode returns the permission and mode bits for the headerFileInfo. |
| func (fi headerFileInfo) Mode() (mode os.FileMode) { |
| // Set file permission bits. |
| mode = os.FileMode(fi.h.Mode).Perm() |
| |
| // Set setuid, setgid and sticky bits. |
| if fi.h.Mode&c_ISUID != 0 { |
| // setuid |
| mode |= os.ModeSetuid |
| } |
| if fi.h.Mode&c_ISGID != 0 { |
| // setgid |
| mode |= os.ModeSetgid |
| } |
| if fi.h.Mode&c_ISVTX != 0 { |
| // sticky |
| mode |= os.ModeSticky |
| } |
| |
| // Set file mode bits. |
| // clear perm, setuid, setgid and sticky bits. |
| m := os.FileMode(fi.h.Mode) &^ 07777 |
| if m == c_ISDIR { |
| // directory |
| mode |= os.ModeDir |
| } |
| if m == c_ISFIFO { |
| // named pipe (FIFO) |
| mode |= os.ModeNamedPipe |
| } |
| if m == c_ISLNK { |
| // symbolic link |
| mode |= os.ModeSymlink |
| } |
| if m == c_ISBLK { |
| // device file |
| mode |= os.ModeDevice |
| } |
| if m == c_ISCHR { |
| // Unix character device |
| mode |= os.ModeDevice |
| mode |= os.ModeCharDevice |
| } |
| if m == c_ISSOCK { |
| // Unix domain socket |
| mode |= os.ModeSocket |
| } |
| |
| switch fi.h.Typeflag { |
| case TypeSymlink: |
| // symbolic link |
| mode |= os.ModeSymlink |
| case TypeChar: |
| // character device node |
| mode |= os.ModeDevice |
| mode |= os.ModeCharDevice |
| case TypeBlock: |
| // block device node |
| mode |= os.ModeDevice |
| case TypeDir: |
| // directory |
| mode |= os.ModeDir |
| case TypeFifo: |
| // fifo node |
| mode |= os.ModeNamedPipe |
| } |
| |
| return mode |
| } |
| |
| // sysStat, if non-nil, populates h from system-dependent fields of fi. |
| var sysStat func(fi os.FileInfo, h *Header) error |
| |
| const ( |
| // Mode constants from the USTAR spec: |
| // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 |
| c_ISUID = 04000 // Set uid |
| c_ISGID = 02000 // Set gid |
| c_ISVTX = 01000 // Save text (sticky bit) |
| |
| // Common Unix mode constants; these are not defined in any common tar standard. |
| // Header.FileInfo understands these, but FileInfoHeader will never produce these. |
| c_ISDIR = 040000 // Directory |
| c_ISFIFO = 010000 // FIFO |
| c_ISREG = 0100000 // Regular file |
| c_ISLNK = 0120000 // Symbolic link |
| c_ISBLK = 060000 // Block special file |
| c_ISCHR = 020000 // Character special file |
| c_ISSOCK = 0140000 // Socket |
| ) |
| |
| // Keywords for the PAX Extended Header |
| const ( |
| paxAtime = "atime" |
| paxCharset = "charset" |
| paxComment = "comment" |
| paxCtime = "ctime" // please note that ctime is not a valid pax header. |
| paxGid = "gid" |
| paxGname = "gname" |
| paxLinkpath = "linkpath" |
| paxMtime = "mtime" |
| paxPath = "path" |
| paxSize = "size" |
| paxUid = "uid" |
| paxUname = "uname" |
| paxXattr = "SCHILY.xattr." |
| paxNone = "" |
| |
| // Keywords for GNU sparse files in a PAX extended header. |
| paxGNUSparseNumBlocks = "GNU.sparse.numblocks" |
| paxGNUSparseOffset = "GNU.sparse.offset" |
| paxGNUSparseNumBytes = "GNU.sparse.numbytes" |
| paxGNUSparseMap = "GNU.sparse.map" |
| paxGNUSparseName = "GNU.sparse.name" |
| paxGNUSparseMajor = "GNU.sparse.major" |
| paxGNUSparseMinor = "GNU.sparse.minor" |
| paxGNUSparseSize = "GNU.sparse.size" |
| paxGNUSparseRealSize = "GNU.sparse.realsize" |
| ) |
| |
| // FileInfoHeader creates a partially-populated Header from fi. |
| // If fi describes a symlink, FileInfoHeader records link as the link target. |
| // If fi describes a directory, a slash is appended to the name. |
| // Because os.FileInfo's Name method returns only the base name of |
| // the file it describes, it may be necessary to modify the Name field |
| // of the returned header to provide the full path name of the file. |
| func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { |
| if fi == nil { |
| return nil, errors.New("tar: FileInfo is nil") |
| } |
| fm := fi.Mode() |
| h := &Header{ |
| Name: fi.Name(), |
| ModTime: fi.ModTime(), |
| Mode: int64(fm.Perm()), // or'd with c_IS* constants later |
| } |
| switch { |
| case fm.IsRegular(): |
| h.Typeflag = TypeReg |
| h.Size = fi.Size() |
| case fi.IsDir(): |
| h.Typeflag = TypeDir |
| h.Name += "/" |
| case fm&os.ModeSymlink != 0: |
| h.Typeflag = TypeSymlink |
| h.Linkname = link |
| case fm&os.ModeDevice != 0: |
| if fm&os.ModeCharDevice != 0 { |
| h.Typeflag = TypeChar |
| } else { |
| h.Typeflag = TypeBlock |
| } |
| case fm&os.ModeNamedPipe != 0: |
| h.Typeflag = TypeFifo |
| case fm&os.ModeSocket != 0: |
| return nil, fmt.Errorf("tar: sockets not supported") |
| default: |
| return nil, fmt.Errorf("tar: unknown file mode %v", fm) |
| } |
| if fm&os.ModeSetuid != 0 { |
| h.Mode |= c_ISUID |
| } |
| if fm&os.ModeSetgid != 0 { |
| h.Mode |= c_ISGID |
| } |
| if fm&os.ModeSticky != 0 { |
| h.Mode |= c_ISVTX |
| } |
| // If possible, populate additional fields from OS-specific |
| // FileInfo fields. |
| if sys, ok := fi.Sys().(*Header); ok { |
| // This FileInfo came from a Header (not the OS). Use the |
| // original Header to populate all remaining fields. |
| h.Uid = sys.Uid |
| h.Gid = sys.Gid |
| h.Uname = sys.Uname |
| h.Gname = sys.Gname |
| h.AccessTime = sys.AccessTime |
| h.ChangeTime = sys.ChangeTime |
| if sys.Xattrs != nil { |
| h.Xattrs = make(map[string]string) |
| for k, v := range sys.Xattrs { |
| h.Xattrs[k] = v |
| } |
| } |
| if sys.Typeflag == TypeLink { |
| // hard link |
| h.Typeflag = TypeLink |
| h.Size = 0 |
| h.Linkname = sys.Linkname |
| } |
| if sys.SparseHoles != nil { |
| h.SparseHoles = append([]SparseEntry{}, sys.SparseHoles...) |
| } |
| } |
| if sysStat != nil { |
| return h, sysStat(fi, h) |
| } |
| return h, nil |
| } |
| |
| // isHeaderOnlyType checks if the given type flag is of the type that has no |
| // data section even if a size is specified. |
| func isHeaderOnlyType(flag byte) bool { |
| switch flag { |
| case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: |
| return true |
| default: |
| return false |
| } |
| } |
| |
| func min(a, b int64) int64 { |
| if a < b { |
| return a |
| } |
| return b |
| } |