blob: e662971a4f0f2680e2670009c5fda3114281d494 [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The tar package implements access to tar archives.
// It aims to cover most of the variations, including those produced
// by GNU and BSD tars (not yet started).
//
// References:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
package tar
// TODO(dsymonds):
// - Make it seekable.
// - Extensions.
import (
"bufio";
"bytes";
"io";
"os";
"strconv";
)
var (
HeaderError os.Error = os.ErrorString("invalid tar header");
)
// A tar archive consists of a sequence of files.
// A Reader provides sequential access to the contents of a tar archive.
// The Next method advances to the next file in the archive (including the first),
// and then it can be treated as an io.Reader to access the file's data.
//
// Example:
// tr := NewTarReader(r);
// for {
// hdr, err := tr.Next();
// if err != nil {
// // handle error
// }
// if hdr == nil {
// // end of tar archive
// break
// }
// io.Copy(tr, somewhere);
// }
type Reader struct {
r io.Reader;
err os.Error;
nb int64; // number of unread bytes for current file entry
pad int64; // amount of padding (ignored) after current file entry
}
// A Header represents a single header in a tar archive.
// Only some fields may be populated.
type Header struct {
Name string;
Mode int64;
Uid int64;
Gid int64;
Size int64;
Mtime int64;
Typeflag byte;
Linkname string;
Uname string;
Gname string;
Devmajor int64;
Devminor int64;
Atime int64;
Ctime int64;
}
func (tr *Reader) skipUnread()
func (tr *Reader) readHeader() *Header
// NewReader creates a new Reader reading the given io.Reader.
func NewReader(r io.Reader) *Reader {
return &Reader{ r: r }
}
// Next advances to the next entry in the tar archive.
func (tr *Reader) Next() (*Header, os.Error) {
var hdr *Header;
if tr.err == nil {
tr.skipUnread();
}
if tr.err == nil {
hdr = tr.readHeader();
}
return hdr, tr.err
}
const (
blockSize = 512;
// Types
TypeReg = '0';
TypeRegA = '\x00';
TypeLink = '1';
TypeSymlink = '2';
TypeChar = '3';
TypeBlock = '4';
TypeDir = '5';
TypeFifo = '6';
TypeCont = '7';
TypeXHeader = 'x';
TypeXGlobalHeader = 'g';
)
var zeroBlock = make([]byte, blockSize);
// Parse bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string {
n := 0;
for n < len(b) && b[n] != 0 {
n++;
}
return string(b[0:n])
}
func (tr *Reader) octal(b []byte) int64 {
if len(b) > 0 && b[len(b)-1] == ' ' {
b = b[0:len(b)-1];
}
x, err := strconv.Btoui64(cString(b), 8);
if err != nil {
tr.err = err;
}
return int64(x)
}
type ignoreWriter struct {}
func (ignoreWriter) Write(b []byte) (n int, err os.Error) {
return len(b), nil
}
type seeker interface {
Seek(offset int64, whence int) (ret int64, err os.Error);
}
// Skip any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.nb + tr.pad; // number of bytes to skip
var n int64;
if sr, ok := tr.r.(seeker); ok {
n, tr.err = sr.Seek(nr, 1);
} else {
n, tr.err = io.Copyn(tr.r, ignoreWriter{}, nr);
}
tr.nb, tr.pad = 0, 0;
}
func (tr *Reader) verifyChecksum(header []byte) bool {
given := tr.octal(header[148:156]);
if tr.err != nil {
return false
}
// POSIX specifies a sum of the unsigned byte values,
// but the Sun tar uses signed byte values. :-(
var unsigned, signed int64;
for i := 0; i < len(header); i++ {
if i == 148 {
// The chksum field is special: it should be treated as space bytes.
unsigned += ' ' * 8;
signed += ' ' * 8;
i += 7;
continue
}
unsigned += int64(header[i]);
signed += int64(int8(header[i]));
}
return given == unsigned || given == signed
}
type slicer []byte
func (sp *slicer) next(n int) (b []byte) {
s := *sp;
b, *sp = s[0:n], s[n:len(s)];
return
}
func (tr *Reader) readHeader() *Header {
header := make([]byte, blockSize);
var n int;
if n, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
}
// Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) {
if n, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
}
if !bytes.Equal(header, zeroBlock[0:blockSize]) {
tr.err = HeaderError;
}
return nil
}
if !tr.verifyChecksum(header) {
tr.err = HeaderError;
return nil
}
// Unpack
hdr := new(Header);
s := slicer(header);
// TODO(dsymonds): The format of the header depends on the value of magic (hdr[257:262]),
// so use that value to do the correct parsing below.
hdr.Name = cString(s.next(100));
hdr.Mode = tr.octal(s.next(8));
hdr.Uid = tr.octal(s.next(8));
hdr.Gid = tr.octal(s.next(8));
hdr.Size = tr.octal(s.next(12));
hdr.Mtime = tr.octal(s.next(12));
s.next(8); // chksum
hdr.Typeflag = s.next(1)[0];
hdr.Linkname = cString(s.next(100));
// The remainder of the header depends on the value of magic.
magic := string(s.next(8)); // contains version field as well.
var format string;
switch magic {
case "ustar\x0000": // POSIX tar (1003.1-1988)
if string(header[508:512]) == "tar\x00" {
format = "star";
} else {
format = "posix";
}
case "ustar \x00": // old GNU tar
format = "gnu";
}
switch format {
case "posix", "gnu", "star":
hdr.Uname = cString(s.next(32));
hdr.Gname = cString(s.next(32));
devmajor := s.next(8);
devminor := s.next(8);
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
hdr.Devmajor = tr.octal(devmajor);
hdr.Devminor = tr.octal(devminor);
}
var prefix string;
switch format {
case "posix", "gnu":
prefix = cString(s.next(155));
case "star":
prefix = cString(s.next(131));
hdr.Atime = tr.octal(s.next(12));
hdr.Ctime = tr.octal(s.next(12));
}
if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name;
}
}
if tr.err != nil {
tr.err = HeaderError;
return nil
}
// Maximum value of hdr.Size is 64 GB (12 octal digits),
// so there's no risk of int64 overflowing.
tr.nb = int64(hdr.Size);
tr.pad = -tr.nb & (blockSize - 1); // blockSize is a power of two
return hdr
}
// Read reads from the current entry in the tar archive.
// It returns 0, nil when it reaches the end of that entry,
// until Next is called to advance to the next entry.
func (tr *Reader) Read(b []uint8) (n int, err os.Error) {
if int64(len(b)) > tr.nb {
b = b[0:tr.nb];
}
n, err = tr.r.Read(b);
tr.nb -= int64(n);
tr.err = err;
return
}