| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package archive implements reading of archive files generated by the Go |
| // toolchain. |
| package archive |
| |
| import ( |
| "bufio" |
| "bytes" |
| "cmd/internal/bio" |
| "cmd/internal/goobj" |
| "errors" |
| "fmt" |
| "io" |
| "log" |
| "os" |
| "strconv" |
| "strings" |
| "time" |
| "unicode/utf8" |
| ) |
| |
| /* |
| The archive format is: |
| |
| First, on a line by itself |
| !<arch> |
| |
| Then zero or more file records. Each file record has a fixed-size one-line header |
| followed by data bytes followed by an optional padding byte. The header is: |
| |
| %-16s%-12d%-6d%-6d%-8o%-10d` |
| name mtime uid gid mode size |
| |
| (note the trailing backquote). The %-16s here means at most 16 *bytes* of |
| the name, and if shorter, space padded on the right. |
| */ |
| |
| // A Data is a reference to data stored in an object file. |
| // It records the offset and size of the data, so that a client can |
| // read the data only if necessary. |
| type Data struct { |
| Offset int64 |
| Size int64 |
| } |
| |
| type Archive struct { |
| f *os.File |
| Entries []Entry |
| } |
| |
| func (a *Archive) File() *os.File { return a.f } |
| |
| type Entry struct { |
| Name string |
| Type EntryType |
| Mtime int64 |
| Uid int |
| Gid int |
| Mode os.FileMode |
| Data |
| Obj *GoObj // nil if this entry is not a Go object file |
| } |
| |
| type EntryType int |
| |
| const ( |
| EntryPkgDef EntryType = iota |
| EntryGoObj |
| EntryNativeObj |
| ) |
| |
| func (e *Entry) String() string { |
| return fmt.Sprintf("%s %6d/%-6d %12d %s %s", |
| (e.Mode & 0777).String(), |
| e.Uid, |
| e.Gid, |
| e.Size, |
| time.Unix(e.Mtime, 0).Format(timeFormat), |
| e.Name) |
| } |
| |
| type GoObj struct { |
| TextHeader []byte |
| Arch string |
| Data |
| } |
| |
| const ( |
| entryHeader = "%s%-12d%-6d%-6d%-8o%-10d`\n" |
| // In entryHeader the first entry, the name, is always printed as 16 bytes right-padded. |
| entryLen = 16 + 12 + 6 + 6 + 8 + 10 + 1 + 1 |
| timeFormat = "Jan _2 15:04 2006" |
| ) |
| |
| var ( |
| archiveHeader = []byte("!<arch>\n") |
| archiveMagic = []byte("`\n") |
| goobjHeader = []byte("go objec") // truncated to size of archiveHeader |
| |
| errCorruptArchive = errors.New("corrupt archive") |
| errTruncatedArchive = errors.New("truncated archive") |
| errCorruptObject = errors.New("corrupt object file") |
| errNotObject = errors.New("unrecognized object file format") |
| ) |
| |
| // An objReader is an object file reader. |
| type objReader struct { |
| a *Archive |
| b *bio.Reader |
| err error |
| offset int64 |
| limit int64 |
| tmp [256]byte |
| } |
| |
| func (r *objReader) init(f *os.File) { |
| r.a = &Archive{f, nil} |
| r.offset, _ = f.Seek(0, io.SeekCurrent) |
| r.limit, _ = f.Seek(0, io.SeekEnd) |
| f.Seek(r.offset, io.SeekStart) |
| r.b = bio.NewReader(f) |
| } |
| |
| // error records that an error occurred. |
| // It returns only the first error, so that an error |
| // caused by an earlier error does not discard information |
| // about the earlier error. |
| func (r *objReader) error(err error) error { |
| if r.err == nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| r.err = err |
| } |
| // panic("corrupt") // useful for debugging |
| return r.err |
| } |
| |
| // peek returns the next n bytes without advancing the reader. |
| func (r *objReader) peek(n int) ([]byte, error) { |
| if r.err != nil { |
| return nil, r.err |
| } |
| if r.offset >= r.limit { |
| r.error(io.ErrUnexpectedEOF) |
| return nil, r.err |
| } |
| b, err := r.b.Peek(n) |
| if err != nil { |
| if err != bufio.ErrBufferFull { |
| r.error(err) |
| } |
| } |
| return b, err |
| } |
| |
| // readByte reads and returns a byte from the input file. |
| // On I/O error or EOF, it records the error but returns byte 0. |
| // A sequence of 0 bytes will eventually terminate any |
| // parsing state in the object file. In particular, it ends the |
| // reading of a varint. |
| func (r *objReader) readByte() byte { |
| if r.err != nil { |
| return 0 |
| } |
| if r.offset >= r.limit { |
| r.error(io.ErrUnexpectedEOF) |
| return 0 |
| } |
| b, err := r.b.ReadByte() |
| if err != nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| r.error(err) |
| b = 0 |
| } else { |
| r.offset++ |
| } |
| return b |
| } |
| |
| // read reads exactly len(b) bytes from the input file. |
| // If an error occurs, read returns the error but also |
| // records it, so it is safe for callers to ignore the result |
| // as long as delaying the report is not a problem. |
| func (r *objReader) readFull(b []byte) error { |
| if r.err != nil { |
| return r.err |
| } |
| if r.offset+int64(len(b)) > r.limit { |
| return r.error(io.ErrUnexpectedEOF) |
| } |
| n, err := io.ReadFull(r.b, b) |
| r.offset += int64(n) |
| if err != nil { |
| return r.error(err) |
| } |
| return nil |
| } |
| |
| // skip skips n bytes in the input. |
| func (r *objReader) skip(n int64) { |
| if n < 0 { |
| r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) |
| } |
| if n < int64(len(r.tmp)) { |
| // Since the data is so small, a just reading from the buffered |
| // reader is better than flushing the buffer and seeking. |
| r.readFull(r.tmp[:n]) |
| } else if n <= int64(r.b.Buffered()) { |
| // Even though the data is not small, it has already been read. |
| // Advance the buffer instead of seeking. |
| for n > int64(len(r.tmp)) { |
| r.readFull(r.tmp[:]) |
| n -= int64(len(r.tmp)) |
| } |
| r.readFull(r.tmp[:n]) |
| } else { |
| // Seek, giving up buffered data. |
| r.b.MustSeek(r.offset+n, io.SeekStart) |
| r.offset += n |
| } |
| } |
| |
| // New writes to f to make a new archive. |
| func New(f *os.File) (*Archive, error) { |
| _, err := f.Write(archiveHeader) |
| if err != nil { |
| return nil, err |
| } |
| return &Archive{f: f}, nil |
| } |
| |
| // Parse parses an object file or archive from f. |
| func Parse(f *os.File, verbose bool) (*Archive, error) { |
| var r objReader |
| r.init(f) |
| t, err := r.peek(8) |
| if err != nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| return nil, err |
| } |
| |
| switch { |
| default: |
| return nil, errNotObject |
| |
| case bytes.Equal(t, archiveHeader): |
| if err := r.parseArchive(verbose); err != nil { |
| return nil, err |
| } |
| case bytes.Equal(t, goobjHeader): |
| off := r.offset |
| o := &GoObj{} |
| if err := r.parseObject(o, r.limit-off); err != nil { |
| return nil, err |
| } |
| r.a.Entries = []Entry{{ |
| Name: f.Name(), |
| Type: EntryGoObj, |
| Data: Data{off, r.limit - off}, |
| Obj: o, |
| }} |
| } |
| |
| return r.a, nil |
| } |
| |
| // trimSpace removes trailing spaces from b and returns the corresponding string. |
| // This effectively parses the form used in archive headers. |
| func trimSpace(b []byte) string { |
| return string(bytes.TrimRight(b, " ")) |
| } |
| |
| // parseArchive parses a Unix archive of Go object files. |
| func (r *objReader) parseArchive(verbose bool) error { |
| r.readFull(r.tmp[:8]) // consume header (already checked) |
| for r.offset < r.limit { |
| if err := r.readFull(r.tmp[:60]); err != nil { |
| return err |
| } |
| data := r.tmp[:60] |
| |
| // Each file is preceded by this text header (slice indices in first column): |
| // 0:16 name |
| // 16:28 date |
| // 28:34 uid |
| // 34:40 gid |
| // 40:48 mode |
| // 48:58 size |
| // 58:60 magic - `\n |
| // We only care about name, size, and magic, unless in verbose mode. |
| // The fields are space-padded on the right. |
| // The size is in decimal. |
| // The file data - size bytes - follows the header. |
| // Headers are 2-byte aligned, so if size is odd, an extra padding |
| // byte sits between the file data and the next header. |
| // The file data that follows is padded to an even number of bytes: |
| // if size is odd, an extra padding byte is inserted betw the next header. |
| if len(data) < 60 { |
| return errTruncatedArchive |
| } |
| if !bytes.Equal(data[58:60], archiveMagic) { |
| return errCorruptArchive |
| } |
| name := trimSpace(data[0:16]) |
| var err error |
| get := func(start, end, base, bitsize int) int64 { |
| if err != nil { |
| return 0 |
| } |
| var v int64 |
| v, err = strconv.ParseInt(trimSpace(data[start:end]), base, bitsize) |
| return v |
| } |
| size := get(48, 58, 10, 64) |
| var ( |
| mtime int64 |
| uid, gid int |
| mode os.FileMode |
| ) |
| if verbose { |
| mtime = get(16, 28, 10, 64) |
| uid = int(get(28, 34, 10, 32)) |
| gid = int(get(34, 40, 10, 32)) |
| mode = os.FileMode(get(40, 48, 8, 32)) |
| } |
| if err != nil { |
| return errCorruptArchive |
| } |
| data = data[60:] |
| fsize := size + size&1 |
| if fsize < 0 || fsize < size { |
| return errCorruptArchive |
| } |
| switch name { |
| case "__.PKGDEF": |
| r.a.Entries = append(r.a.Entries, Entry{ |
| Name: name, |
| Type: EntryPkgDef, |
| Mtime: mtime, |
| Uid: uid, |
| Gid: gid, |
| Mode: mode, |
| Data: Data{r.offset, size}, |
| }) |
| r.skip(size) |
| default: |
| var typ EntryType |
| var o *GoObj |
| offset := r.offset |
| p, err := r.peek(8) |
| if err != nil { |
| return err |
| } |
| if bytes.Equal(p, goobjHeader) { |
| typ = EntryGoObj |
| o = &GoObj{} |
| r.parseObject(o, size) |
| } else { |
| typ = EntryNativeObj |
| r.skip(size) |
| } |
| r.a.Entries = append(r.a.Entries, Entry{ |
| Name: name, |
| Type: typ, |
| Mtime: mtime, |
| Uid: uid, |
| Gid: gid, |
| Mode: mode, |
| Data: Data{offset, size}, |
| Obj: o, |
| }) |
| } |
| if size&1 != 0 { |
| r.skip(1) |
| } |
| } |
| return nil |
| } |
| |
| // parseObject parses a single Go object file. |
| // The object file consists of a textual header ending in "\n!\n" |
| // and then the part we want to parse begins. |
| // The format of that part is defined in a comment at the top |
| // of src/liblink/objfile.c. |
| func (r *objReader) parseObject(o *GoObj, size int64) error { |
| h := make([]byte, 0, 256) |
| var c1, c2, c3 byte |
| for { |
| c1, c2, c3 = c2, c3, r.readByte() |
| h = append(h, c3) |
| // The new export format can contain 0 bytes. |
| // Don't consider them errors, only look for r.err != nil. |
| if r.err != nil { |
| return errCorruptObject |
| } |
| if c1 == '\n' && c2 == '!' && c3 == '\n' { |
| break |
| } |
| } |
| o.TextHeader = h |
| hs := strings.Fields(string(h)) |
| if len(hs) >= 4 { |
| o.Arch = hs[3] |
| } |
| o.Offset = r.offset |
| o.Size = size - int64(len(h)) |
| |
| p, err := r.peek(8) |
| if err != nil { |
| return err |
| } |
| if !bytes.Equal(p, []byte(goobj.Magic)) { |
| return r.error(errCorruptObject) |
| } |
| r.skip(o.Size) |
| return nil |
| } |
| |
| // AddEntry adds an entry to the end of a, with the content from r. |
| func (a *Archive) AddEntry(typ EntryType, name string, mtime int64, uid, gid int, mode os.FileMode, size int64, r io.Reader) { |
| off, err := a.f.Seek(0, io.SeekEnd) |
| if err != nil { |
| log.Fatal(err) |
| } |
| n, err := fmt.Fprintf(a.f, entryHeader, exactly16Bytes(name), mtime, uid, gid, mode, size) |
| if err != nil || n != entryLen { |
| log.Fatal("writing entry header: ", err) |
| } |
| n1, _ := io.CopyN(a.f, r, size) |
| if n1 != size { |
| log.Fatal(err) |
| } |
| if (off+size)&1 != 0 { |
| a.f.Write([]byte{0}) // pad to even byte |
| } |
| a.Entries = append(a.Entries, Entry{ |
| Name: name, |
| Type: typ, |
| Mtime: mtime, |
| Uid: uid, |
| Gid: gid, |
| Mode: mode, |
| Data: Data{off + entryLen, size}, |
| }) |
| } |
| |
| // exactly16Bytes truncates the string if necessary so it is at most 16 bytes long, |
| // then pads the result with spaces to be exactly 16 bytes. |
| // Fmt uses runes for its width calculation, but we need bytes in the entry header. |
| func exactly16Bytes(s string) string { |
| for len(s) > 16 { |
| _, wid := utf8.DecodeLastRuneInString(s) |
| s = s[:len(s)-wid] |
| } |
| const sixteenSpaces = " " |
| s += sixteenSpaces[:16-len(s)] |
| return s |
| } |