blob: 8ac50e202f7a8da2bafbab170d294ab9269acb6e [file] [log] [blame]
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package archive implements reading of archive files generated by the Go
// toolchain.
package archive
import (
"bufio"
"bytes"
"cmd/internal/bio"
"cmd/internal/goobj"
"errors"
"fmt"
"io"
"log"
"os"
"strconv"
"strings"
"time"
"unicode/utf8"
)
/*
The archive format is:
First, on a line by itself
!<arch>
Then zero or more file records. Each file record has a fixed-size one-line header
followed by data bytes followed by an optional padding byte. The header is:
%-16s%-12d%-6d%-6d%-8o%-10d`
name mtime uid gid mode size
(note the trailing backquote). The %-16s here means at most 16 *bytes* of
the name, and if shorter, space padded on the right.
*/
// A Data is a reference to data stored in an object file.
// It records the offset and size of the data, so that a client can
// read the data only if necessary.
type Data struct {
Offset int64
Size int64
}
type Archive struct {
f *os.File
Entries []Entry
}
func (a *Archive) File() *os.File { return a.f }
type Entry struct {
Name string
Type EntryType
Mtime int64
Uid int
Gid int
Mode os.FileMode
Data
Obj *GoObj // nil if this entry is not a Go object file
}
type EntryType int
const (
EntryPkgDef EntryType = iota
EntryGoObj
EntryNativeObj
)
func (e *Entry) String() string {
return fmt.Sprintf("%s %6d/%-6d %12d %s %s",
(e.Mode & 0777).String(),
e.Uid,
e.Gid,
e.Size,
time.Unix(e.Mtime, 0).Format(timeFormat),
e.Name)
}
type GoObj struct {
TextHeader []byte
Arch string
Data
}
const (
entryHeader = "%s%-12d%-6d%-6d%-8o%-10d`\n"
// In entryHeader the first entry, the name, is always printed as 16 bytes right-padded.
entryLen = 16 + 12 + 6 + 6 + 8 + 10 + 1 + 1
timeFormat = "Jan _2 15:04 2006"
)
var (
archiveHeader = []byte("!<arch>\n")
archiveMagic = []byte("`\n")
goobjHeader = []byte("go objec") // truncated to size of archiveHeader
errCorruptArchive = errors.New("corrupt archive")
errTruncatedArchive = errors.New("truncated archive")
errCorruptObject = errors.New("corrupt object file")
errNotObject = errors.New("unrecognized object file format")
)
type ErrGoObjOtherVersion struct{ magic []byte }
func (e ErrGoObjOtherVersion) Error() string {
return fmt.Sprintf("go object of a different version: %q", e.magic)
}
// An objReader is an object file reader.
type objReader struct {
a *Archive
b *bio.Reader
err error
offset int64
limit int64
tmp [256]byte
}
func (r *objReader) init(f *os.File) {
r.a = &Archive{f, nil}
r.offset, _ = f.Seek(0, io.SeekCurrent)
r.limit, _ = f.Seek(0, io.SeekEnd)
f.Seek(r.offset, io.SeekStart)
r.b = bio.NewReader(f)
}
// error records that an error occurred.
// It returns only the first error, so that an error
// caused by an earlier error does not discard information
// about the earlier error.
func (r *objReader) error(err error) error {
if r.err == nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
}
// panic("corrupt") // useful for debugging
return r.err
}
// peek returns the next n bytes without advancing the reader.
func (r *objReader) peek(n int) ([]byte, error) {
if r.err != nil {
return nil, r.err
}
if r.offset >= r.limit {
r.error(io.ErrUnexpectedEOF)
return nil, r.err
}
b, err := r.b.Peek(n)
if err != nil {
if err != bufio.ErrBufferFull {
r.error(err)
}
}
return b, err
}
// readByte reads and returns a byte from the input file.
// On I/O error or EOF, it records the error but returns byte 0.
// A sequence of 0 bytes will eventually terminate any
// parsing state in the object file. In particular, it ends the
// reading of a varint.
func (r *objReader) readByte() byte {
if r.err != nil {
return 0
}
if r.offset >= r.limit {
r.error(io.ErrUnexpectedEOF)
return 0
}
b, err := r.b.ReadByte()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.error(err)
b = 0
} else {
r.offset++
}
return b
}
// readFull reads exactly len(b) bytes from the input file.
// If an error occurs, read returns the error but also
// records it, so it is safe for callers to ignore the result
// as long as delaying the report is not a problem.
func (r *objReader) readFull(b []byte) error {
if r.err != nil {
return r.err
}
if r.offset+int64(len(b)) > r.limit {
return r.error(io.ErrUnexpectedEOF)
}
n, err := io.ReadFull(r.b, b)
r.offset += int64(n)
if err != nil {
return r.error(err)
}
return nil
}
// skip skips n bytes in the input.
func (r *objReader) skip(n int64) {
if n < 0 {
r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
}
if n < int64(len(r.tmp)) {
// Since the data is so small, a just reading from the buffered
// reader is better than flushing the buffer and seeking.
r.readFull(r.tmp[:n])
} else if n <= int64(r.b.Buffered()) {
// Even though the data is not small, it has already been read.
// Advance the buffer instead of seeking.
for n > int64(len(r.tmp)) {
r.readFull(r.tmp[:])
n -= int64(len(r.tmp))
}
r.readFull(r.tmp[:n])
} else {
// Seek, giving up buffered data.
r.b.MustSeek(r.offset+n, io.SeekStart)
r.offset += n
}
}
// New writes to f to make a new archive.
func New(f *os.File) (*Archive, error) {
_, err := f.Write(archiveHeader)
if err != nil {
return nil, err
}
return &Archive{f: f}, nil
}
// Parse parses an object file or archive from f.
func Parse(f *os.File, verbose bool) (*Archive, error) {
var r objReader
r.init(f)
t, err := r.peek(8)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return nil, err
}
switch {
default:
return nil, errNotObject
case bytes.Equal(t, archiveHeader):
if err := r.parseArchive(verbose); err != nil {
return nil, err
}
case bytes.Equal(t, goobjHeader):
off := r.offset
o := &GoObj{}
if err := r.parseObject(o, r.limit-off); err != nil {
return nil, err
}
r.a.Entries = []Entry{{
Name: f.Name(),
Type: EntryGoObj,
Data: Data{off, r.limit - off},
Obj: o,
}}
}
return r.a, nil
}
// trimSpace removes trailing spaces from b and returns the corresponding string.
// This effectively parses the form used in archive headers.
func trimSpace(b []byte) string {
return string(bytes.TrimRight(b, " "))
}
// parseArchive parses a Unix archive of Go object files.
func (r *objReader) parseArchive(verbose bool) error {
r.readFull(r.tmp[:8]) // consume header (already checked)
for r.offset < r.limit {
if err := r.readFull(r.tmp[:60]); err != nil {
return err
}
data := r.tmp[:60]
// Each file is preceded by this text header (slice indices in first column):
// 0:16 name
// 16:28 date
// 28:34 uid
// 34:40 gid
// 40:48 mode
// 48:58 size
// 58:60 magic - `\n
// We only care about name, size, and magic, unless in verbose mode.
// The fields are space-padded on the right.
// The size is in decimal.
// The file data - size bytes - follows the header.
// Headers are 2-byte aligned, so if size is odd, an extra padding
// byte sits between the file data and the next header.
// The file data that follows is padded to an even number of bytes:
// if size is odd, an extra padding byte is inserted betw the next header.
if len(data) < 60 {
return errTruncatedArchive
}
if !bytes.Equal(data[58:60], archiveMagic) {
return errCorruptArchive
}
name := trimSpace(data[0:16])
var err error
get := func(start, end, base, bitsize int) int64 {
if err != nil {
return 0
}
var v int64
v, err = strconv.ParseInt(trimSpace(data[start:end]), base, bitsize)
return v
}
size := get(48, 58, 10, 64)
var (
mtime int64
uid, gid int
mode os.FileMode
)
if verbose {
mtime = get(16, 28, 10, 64)
uid = int(get(28, 34, 10, 32))
gid = int(get(34, 40, 10, 32))
mode = os.FileMode(get(40, 48, 8, 32))
}
if err != nil {
return errCorruptArchive
}
data = data[60:]
fsize := size + size&1
if fsize < 0 || fsize < size {
return errCorruptArchive
}
switch name {
case "__.PKGDEF":
r.a.Entries = append(r.a.Entries, Entry{
Name: name,
Type: EntryPkgDef,
Mtime: mtime,
Uid: uid,
Gid: gid,
Mode: mode,
Data: Data{r.offset, size},
})
r.skip(size)
default:
var typ EntryType
var o *GoObj
offset := r.offset
p, err := r.peek(8)
if err != nil {
return err
}
if bytes.Equal(p, goobjHeader) {
typ = EntryGoObj
o = &GoObj{}
err := r.parseObject(o, size)
if err != nil {
return err
}
} else {
typ = EntryNativeObj
r.skip(size)
}
r.a.Entries = append(r.a.Entries, Entry{
Name: name,
Type: typ,
Mtime: mtime,
Uid: uid,
Gid: gid,
Mode: mode,
Data: Data{offset, size},
Obj: o,
})
}
if size&1 != 0 {
r.skip(1)
}
}
return nil
}
// parseObject parses a single Go object file.
// The object file consists of a textual header ending in "\n!\n"
// and then the part we want to parse begins.
// The format of that part is defined in a comment at the top
// of cmd/internal/goobj/objfile.go.
func (r *objReader) parseObject(o *GoObj, size int64) error {
h := make([]byte, 0, 256)
var c1, c2, c3 byte
for {
c1, c2, c3 = c2, c3, r.readByte()
h = append(h, c3)
// The new export format can contain 0 bytes.
// Don't consider them errors, only look for r.err != nil.
if r.err != nil {
return errCorruptObject
}
if c1 == '\n' && c2 == '!' && c3 == '\n' {
break
}
}
o.TextHeader = h
hs := strings.Fields(string(h))
if len(hs) >= 4 {
o.Arch = hs[3]
}
o.Offset = r.offset
o.Size = size - int64(len(h))
p, err := r.peek(8)
if err != nil {
return err
}
if !bytes.Equal(p, []byte(goobj.Magic)) {
if bytes.HasPrefix(p, []byte("\x00go1")) && bytes.HasSuffix(p, []byte("ld")) {
return r.error(ErrGoObjOtherVersion{p[1:]}) // strip the \x00 byte
}
return r.error(errCorruptObject)
}
r.skip(o.Size)
return nil
}
// AddEntry adds an entry to the end of a, with the content from r.
func (a *Archive) AddEntry(typ EntryType, name string, mtime int64, uid, gid int, mode os.FileMode, size int64, r io.Reader) {
off, err := a.f.Seek(0, io.SeekEnd)
if err != nil {
log.Fatal(err)
}
n, err := fmt.Fprintf(a.f, entryHeader, exactly16Bytes(name), mtime, uid, gid, mode, size)
if err != nil || n != entryLen {
log.Fatal("writing entry header: ", err)
}
n1, _ := io.CopyN(a.f, r, size)
if n1 != size {
log.Fatal(err)
}
if (off+size)&1 != 0 {
a.f.Write([]byte{0}) // pad to even byte
}
a.Entries = append(a.Entries, Entry{
Name: name,
Type: typ,
Mtime: mtime,
Uid: uid,
Gid: gid,
Mode: mode,
Data: Data{off + entryLen, size},
})
}
// exactly16Bytes truncates the string if necessary so it is at most 16 bytes long,
// then pads the result with spaces to be exactly 16 bytes.
// Fmt uses runes for its width calculation, but we need bytes in the entry header.
func exactly16Bytes(s string) string {
for len(s) > 16 {
_, wid := utf8.DecodeLastRuneInString(s)
s = s[:len(s)-wid]
}
const sixteenSpaces = " "
s += sixteenSpaces[:16-len(s)]
return s
}
// architecture-independent object file output
const HeaderSize = 60
func ReadHeader(b *bufio.Reader, name string) int {
var buf [HeaderSize]byte
if _, err := io.ReadFull(b, buf[:]); err != nil {
return -1
}
aname := strings.Trim(string(buf[0:16]), " ")
if !strings.HasPrefix(aname, name) {
return -1
}
asize := strings.Trim(string(buf[48:58]), " ")
i, _ := strconv.Atoi(asize)
return i
}
func FormatHeader(arhdr []byte, name string, size int64) {
copy(arhdr[:], fmt.Sprintf("%-16s%-12d%-6d%-6d%-8o%-10d`\n", name, 0, 0, 0, 0644, size))
}