blob: c1661d7711ac901ef1ae73539e38cd2f64c83785 [file] [log] [blame]
Brad Fitzpatrick51947442016-03-01 22:57:46 +00001// Copyright 2013 The Go Authors. All rights reserved.
Russ Cox08b846b2013-12-16 12:52:21 -05002// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Cherry Zhang95598772020-07-30 20:49:29 -04005// Package archive implements reading of archive files generated by the Go
6// toolchain.
7package archive
Russ Cox08b846b2013-12-16 12:52:21 -05008
9import (
10 "bufio"
11 "bytes"
Cherry Zhang95598772020-07-30 20:49:29 -040012 "cmd/internal/bio"
Cherry Zhang27e37782020-08-02 19:36:28 -040013 "cmd/internal/goobj"
Russ Cox08b846b2013-12-16 12:52:21 -050014 "errors"
15 "fmt"
16 "io"
Cherry Zhang3a185d72020-08-05 21:16:52 -040017 "log"
Hiroshi Ioka1053ae52017-09-10 09:45:49 +090018 "os"
Russ Cox08b846b2013-12-16 12:52:21 -050019 "strconv"
Tao Qingyun31da1d92020-08-19 01:38:43 +000020 "strings"
Cherry Zhang3a185d72020-08-05 21:16:52 -040021 "time"
22 "unicode/utf8"
Russ Cox08b846b2013-12-16 12:52:21 -050023)
24
Cherry Zhang3a185d72020-08-05 21:16:52 -040025/*
26The archive format is:
27
28First, on a line by itself
29 !<arch>
30
31Then zero or more file records. Each file record has a fixed-size one-line header
32followed by data bytes followed by an optional padding byte. The header is:
33
34 %-16s%-12d%-6d%-6d%-8o%-10d`
35 name mtime uid gid mode size
36
37(note the trailing backquote). The %-16s here means at most 16 *bytes* of
38the name, and if shorter, space padded on the right.
39*/
40
Russ Cox08b846b2013-12-16 12:52:21 -050041// A Data is a reference to data stored in an object file.
42// It records the offset and size of the data, so that a client can
43// read the data only if necessary.
44type Data struct {
45 Offset int64
46 Size int64
47}
48
Cherry Zhang95598772020-07-30 20:49:29 -040049type Archive struct {
50 f *os.File
51 Entries []Entry
Russ Cox08b846b2013-12-16 12:52:21 -050052}
53
Cherry Zhang3a185d72020-08-05 21:16:52 -040054func (a *Archive) File() *os.File { return a.f }
55
Cherry Zhang95598772020-07-30 20:49:29 -040056type Entry struct {
Cherry Zhang3a185d72020-08-05 21:16:52 -040057 Name string
58 Type EntryType
59 Mtime int64
60 Uid int
61 Gid int
62 Mode os.FileMode
Cherry Zhang95598772020-07-30 20:49:29 -040063 Data
64 Obj *GoObj // nil if this entry is not a Go object file
65}
66
67type EntryType int
68
69const (
70 EntryPkgDef EntryType = iota
71 EntryGoObj
72 EntryNativeObj
73)
74
Cherry Zhang3a185d72020-08-05 21:16:52 -040075func (e *Entry) String() string {
76 return fmt.Sprintf("%s %6d/%-6d %12d %s %s",
77 (e.Mode & 0777).String(),
78 e.Uid,
79 e.Gid,
80 e.Size,
81 time.Unix(e.Mtime, 0).Format(timeFormat),
82 e.Name)
83}
84
Cherry Zhang95598772020-07-30 20:49:29 -040085type GoObj struct {
86 TextHeader []byte
Tao Qingyun31da1d92020-08-19 01:38:43 +000087 Arch string
Cherry Zhang95598772020-07-30 20:49:29 -040088 Data
Russ Cox08b846b2013-12-16 12:52:21 -050089}
90
Cherry Zhang3a185d72020-08-05 21:16:52 -040091const (
92 entryHeader = "%s%-12d%-6d%-6d%-8o%-10d`\n"
93 // In entryHeader the first entry, the name, is always printed as 16 bytes right-padded.
94 entryLen = 16 + 12 + 6 + 6 + 8 + 10 + 1 + 1
95 timeFormat = "Jan _2 15:04 2006"
96)
97
Russ Cox08b846b2013-12-16 12:52:21 -050098var (
99 archiveHeader = []byte("!<arch>\n")
100 archiveMagic = []byte("`\n")
101 goobjHeader = []byte("go objec") // truncated to size of archiveHeader
102
103 errCorruptArchive = errors.New("corrupt archive")
104 errTruncatedArchive = errors.New("truncated archive")
Dave Cheneybce97472016-04-09 15:04:45 +1000105 errCorruptObject = errors.New("corrupt object file")
106 errNotObject = errors.New("unrecognized object file format")
Russ Cox08b846b2013-12-16 12:52:21 -0500107)
108
109// An objReader is an object file reader.
110type objReader struct {
Cherry Zhang95598772020-07-30 20:49:29 -0400111 a *Archive
112 b *bio.Reader
113 err error
114 offset int64
115 limit int64
116 tmp [256]byte
Russ Cox2404b7f2013-12-18 19:00:52 -0500117}
118
Cherry Zhang95598772020-07-30 20:49:29 -0400119func (r *objReader) init(f *os.File) {
120 r.a = &Archive{f, nil}
Joe Tsaiacc757f2016-04-05 11:22:53 -0700121 r.offset, _ = f.Seek(0, io.SeekCurrent)
122 r.limit, _ = f.Seek(0, io.SeekEnd)
123 f.Seek(r.offset, io.SeekStart)
Cherry Zhang95598772020-07-30 20:49:29 -0400124 r.b = bio.NewReader(f)
Russ Cox08b846b2013-12-16 12:52:21 -0500125}
126
127// error records that an error occurred.
128// It returns only the first error, so that an error
129// caused by an earlier error does not discard information
130// about the earlier error.
131func (r *objReader) error(err error) error {
132 if r.err == nil {
133 if err == io.EOF {
134 err = io.ErrUnexpectedEOF
135 }
136 r.err = err
137 }
138 // panic("corrupt") // useful for debugging
139 return r.err
140}
141
Hiroshi Ioka1053ae52017-09-10 09:45:49 +0900142// peek returns the next n bytes without advancing the reader.
143func (r *objReader) peek(n int) ([]byte, error) {
144 if r.err != nil {
145 return nil, r.err
146 }
147 if r.offset >= r.limit {
148 r.error(io.ErrUnexpectedEOF)
149 return nil, r.err
150 }
151 b, err := r.b.Peek(n)
152 if err != nil {
153 if err != bufio.ErrBufferFull {
154 r.error(err)
155 }
156 }
157 return b, err
158}
159
Russ Cox08b846b2013-12-16 12:52:21 -0500160// readByte reads and returns a byte from the input file.
161// On I/O error or EOF, it records the error but returns byte 0.
162// A sequence of 0 bytes will eventually terminate any
163// parsing state in the object file. In particular, it ends the
164// reading of a varint.
165func (r *objReader) readByte() byte {
166 if r.err != nil {
167 return 0
168 }
169 if r.offset >= r.limit {
170 r.error(io.ErrUnexpectedEOF)
171 return 0
172 }
173 b, err := r.b.ReadByte()
174 if err != nil {
175 if err == io.EOF {
176 err = io.ErrUnexpectedEOF
177 }
178 r.error(err)
179 b = 0
180 } else {
181 r.offset++
182 }
183 return b
184}
185
186// read reads exactly len(b) bytes from the input file.
187// If an error occurs, read returns the error but also
188// records it, so it is safe for callers to ignore the result
189// as long as delaying the report is not a problem.
190func (r *objReader) readFull(b []byte) error {
191 if r.err != nil {
192 return r.err
193 }
194 if r.offset+int64(len(b)) > r.limit {
195 return r.error(io.ErrUnexpectedEOF)
196 }
197 n, err := io.ReadFull(r.b, b)
198 r.offset += int64(n)
199 if err != nil {
200 return r.error(err)
201 }
202 return nil
203}
204
Russ Cox08b846b2013-12-16 12:52:21 -0500205// skip skips n bytes in the input.
206func (r *objReader) skip(n int64) {
207 if n < 0 {
208 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
209 }
210 if n < int64(len(r.tmp)) {
211 // Since the data is so small, a just reading from the buffered
212 // reader is better than flushing the buffer and seeking.
213 r.readFull(r.tmp[:n])
214 } else if n <= int64(r.b.Buffered()) {
215 // Even though the data is not small, it has already been read.
216 // Advance the buffer instead of seeking.
217 for n > int64(len(r.tmp)) {
218 r.readFull(r.tmp[:])
219 n -= int64(len(r.tmp))
220 }
221 r.readFull(r.tmp[:n])
222 } else {
223 // Seek, giving up buffered data.
Cherry Zhang95598772020-07-30 20:49:29 -0400224 r.b.MustSeek(r.offset+n, io.SeekStart)
Russ Cox08b846b2013-12-16 12:52:21 -0500225 r.offset += n
Russ Cox08b846b2013-12-16 12:52:21 -0500226 }
227}
228
Cherry Zhang3a185d72020-08-05 21:16:52 -0400229// New writes to f to make a new archive.
230func New(f *os.File) (*Archive, error) {
231 _, err := f.Write(archiveHeader)
232 if err != nil {
233 return nil, err
234 }
235 return &Archive{f: f}, nil
236}
237
Cherry Zhang95598772020-07-30 20:49:29 -0400238// Parse parses an object file or archive from f.
Cherry Zhang3a185d72020-08-05 21:16:52 -0400239func Parse(f *os.File, verbose bool) (*Archive, error) {
Cherry Zhang95598772020-07-30 20:49:29 -0400240 var r objReader
241 r.init(f)
242 t, err := r.peek(8)
Russ Cox08b846b2013-12-16 12:52:21 -0500243 if err != nil {
244 if err == io.EOF {
245 err = io.ErrUnexpectedEOF
246 }
247 return nil, err
248 }
249
250 switch {
251 default:
252 return nil, errNotObject
253
Cherry Zhang95598772020-07-30 20:49:29 -0400254 case bytes.Equal(t, archiveHeader):
Cherry Zhang3a185d72020-08-05 21:16:52 -0400255 if err := r.parseArchive(verbose); err != nil {
Russ Cox08b846b2013-12-16 12:52:21 -0500256 return nil, err
257 }
Cherry Zhang95598772020-07-30 20:49:29 -0400258 case bytes.Equal(t, goobjHeader):
259 off := r.offset
260 o := &GoObj{}
261 if err := r.parseObject(o, r.limit-off); err != nil {
Russ Cox08b846b2013-12-16 12:52:21 -0500262 return nil, err
263 }
Cherry Zhang3a185d72020-08-05 21:16:52 -0400264 r.a.Entries = []Entry{{
265 Name: f.Name(),
266 Type: EntryGoObj,
267 Data: Data{off, r.limit - off},
268 Obj: o,
269 }}
Russ Cox08b846b2013-12-16 12:52:21 -0500270 }
271
Cherry Zhang95598772020-07-30 20:49:29 -0400272 return r.a, nil
Russ Cox08b846b2013-12-16 12:52:21 -0500273}
274
275// trimSpace removes trailing spaces from b and returns the corresponding string.
276// This effectively parses the form used in archive headers.
277func trimSpace(b []byte) string {
278 return string(bytes.TrimRight(b, " "))
279}
280
281// parseArchive parses a Unix archive of Go object files.
Cherry Zhang3a185d72020-08-05 21:16:52 -0400282func (r *objReader) parseArchive(verbose bool) error {
Cherry Zhang95598772020-07-30 20:49:29 -0400283 r.readFull(r.tmp[:8]) // consume header (already checked)
Russ Cox08b846b2013-12-16 12:52:21 -0500284 for r.offset < r.limit {
285 if err := r.readFull(r.tmp[:60]); err != nil {
286 return err
287 }
288 data := r.tmp[:60]
289
290 // Each file is preceded by this text header (slice indices in first column):
291 // 0:16 name
292 // 16:28 date
293 // 28:34 uid
294 // 34:40 gid
295 // 40:48 mode
296 // 48:58 size
297 // 58:60 magic - `\n
Cherry Zhang3a185d72020-08-05 21:16:52 -0400298 // We only care about name, size, and magic, unless in verbose mode.
Russ Cox08b846b2013-12-16 12:52:21 -0500299 // The fields are space-padded on the right.
300 // The size is in decimal.
301 // The file data - size bytes - follows the header.
302 // Headers are 2-byte aligned, so if size is odd, an extra padding
303 // byte sits between the file data and the next header.
304 // The file data that follows is padded to an even number of bytes:
305 // if size is odd, an extra padding byte is inserted betw the next header.
306 if len(data) < 60 {
307 return errTruncatedArchive
308 }
309 if !bytes.Equal(data[58:60], archiveMagic) {
310 return errCorruptArchive
311 }
312 name := trimSpace(data[0:16])
Cherry Zhang3a185d72020-08-05 21:16:52 -0400313 var err error
314 get := func(start, end, base, bitsize int) int64 {
315 if err != nil {
316 return 0
317 }
318 var v int64
319 v, err = strconv.ParseInt(trimSpace(data[start:end]), base, bitsize)
320 return v
321 }
322 size := get(48, 58, 10, 64)
323 var (
324 mtime int64
325 uid, gid int
326 mode os.FileMode
327 )
328 if verbose {
329 mtime = get(16, 28, 10, 64)
330 uid = int(get(28, 34, 10, 32))
331 gid = int(get(34, 40, 10, 32))
332 mode = os.FileMode(get(40, 48, 8, 32))
333 }
Russ Cox08b846b2013-12-16 12:52:21 -0500334 if err != nil {
335 return errCorruptArchive
336 }
337 data = data[60:]
338 fsize := size + size&1
339 if fsize < 0 || fsize < size {
340 return errCorruptArchive
341 }
342 switch name {
Matthew Dempsky67dbde02016-02-25 14:58:03 -0800343 case "__.PKGDEF":
Cherry Zhang3a185d72020-08-05 21:16:52 -0400344 r.a.Entries = append(r.a.Entries, Entry{
345 Name: name,
346 Type: EntryPkgDef,
347 Mtime: mtime,
348 Uid: uid,
349 Gid: gid,
350 Mode: mode,
351 Data: Data{r.offset, size},
352 })
Russ Cox08b846b2013-12-16 12:52:21 -0500353 r.skip(size)
354 default:
Cherry Zhang95598772020-07-30 20:49:29 -0400355 var typ EntryType
356 var o *GoObj
357 offset := r.offset
Hiroshi Ioka1053ae52017-09-10 09:45:49 +0900358 p, err := r.peek(8)
359 if err != nil {
360 return err
Russ Cox08b846b2013-12-16 12:52:21 -0500361 }
Hiroshi Ioka1053ae52017-09-10 09:45:49 +0900362 if bytes.Equal(p, goobjHeader) {
Cherry Zhang95598772020-07-30 20:49:29 -0400363 typ = EntryGoObj
364 o = &GoObj{}
365 r.parseObject(o, size)
Hiroshi Ioka1053ae52017-09-10 09:45:49 +0900366 } else {
Cherry Zhang95598772020-07-30 20:49:29 -0400367 typ = EntryNativeObj
368 r.skip(size)
Hiroshi Ioka1053ae52017-09-10 09:45:49 +0900369 }
Cherry Zhang3a185d72020-08-05 21:16:52 -0400370 r.a.Entries = append(r.a.Entries, Entry{
371 Name: name,
372 Type: typ,
373 Mtime: mtime,
374 Uid: uid,
375 Gid: gid,
376 Mode: mode,
377 Data: Data{offset, size},
378 Obj: o,
379 })
Russ Cox08b846b2013-12-16 12:52:21 -0500380 }
381 if size&1 != 0 {
382 r.skip(1)
383 }
384 }
385 return nil
386}
387
388// parseObject parses a single Go object file.
Russ Cox08b846b2013-12-16 12:52:21 -0500389// The object file consists of a textual header ending in "\n!\n"
390// and then the part we want to parse begins.
391// The format of that part is defined in a comment at the top
392// of src/liblink/objfile.c.
Cherry Zhang95598772020-07-30 20:49:29 -0400393func (r *objReader) parseObject(o *GoObj, size int64) error {
Keith Randalla99f8122016-07-02 17:19:25 -0700394 h := make([]byte, 0, 256)
Russ Cox08b846b2013-12-16 12:52:21 -0500395 var c1, c2, c3 byte
396 for {
397 c1, c2, c3 = c2, c3, r.readByte()
Keith Randalla99f8122016-07-02 17:19:25 -0700398 h = append(h, c3)
Robert Griesemer4e777c82015-10-22 10:08:45 -0700399 // The new export format can contain 0 bytes.
400 // Don't consider them errors, only look for r.err != nil.
401 if r.err != nil {
Russ Cox08b846b2013-12-16 12:52:21 -0500402 return errCorruptObject
403 }
404 if c1 == '\n' && c2 == '!' && c3 == '\n' {
405 break
406 }
407 }
Cherry Zhang95598772020-07-30 20:49:29 -0400408 o.TextHeader = h
Tao Qingyun31da1d92020-08-19 01:38:43 +0000409 hs := strings.Fields(string(h))
410 if len(hs) >= 4 {
411 o.Arch = hs[3]
412 }
Cherry Zhang95598772020-07-30 20:49:29 -0400413 o.Offset = r.offset
414 o.Size = size - int64(len(h))
Keith Randalla99f8122016-07-02 17:19:25 -0700415
Than McIntoshe5acb582019-10-16 12:31:33 -0400416 p, err := r.peek(8)
417 if err != nil {
418 return err
419 }
Cherry Zhang27e37782020-08-02 19:36:28 -0400420 if !bytes.Equal(p, []byte(goobj.Magic)) {
Russ Cox08b846b2013-12-16 12:52:21 -0500421 return r.error(errCorruptObject)
422 }
Cherry Zhang95598772020-07-30 20:49:29 -0400423 r.skip(o.Size)
Russ Cox08b846b2013-12-16 12:52:21 -0500424 return nil
425}
Cherry Zhang3a185d72020-08-05 21:16:52 -0400426
427// AddEntry adds an entry to the end of a, with the content from r.
428func (a *Archive) AddEntry(typ EntryType, name string, mtime int64, uid, gid int, mode os.FileMode, size int64, r io.Reader) {
429 off, err := a.f.Seek(0, io.SeekEnd)
430 if err != nil {
431 log.Fatal(err)
432 }
433 n, err := fmt.Fprintf(a.f, entryHeader, exactly16Bytes(name), mtime, uid, gid, mode, size)
434 if err != nil || n != entryLen {
435 log.Fatal("writing entry header: ", err)
436 }
437 n1, _ := io.CopyN(a.f, r, size)
438 if n1 != size {
439 log.Fatal(err)
440 }
441 if (off+size)&1 != 0 {
442 a.f.Write([]byte{0}) // pad to even byte
443 }
444 a.Entries = append(a.Entries, Entry{
445 Name: name,
446 Type: typ,
447 Mtime: mtime,
448 Uid: uid,
449 Gid: gid,
450 Mode: mode,
451 Data: Data{off + entryLen, size},
452 })
453}
454
455// exactly16Bytes truncates the string if necessary so it is at most 16 bytes long,
456// then pads the result with spaces to be exactly 16 bytes.
457// Fmt uses runes for its width calculation, but we need bytes in the entry header.
458func exactly16Bytes(s string) string {
459 for len(s) > 16 {
460 _, wid := utf8.DecodeLastRuneInString(s)
461 s = s[:len(s)-wid]
462 }
463 const sixteenSpaces = " "
464 s += sixteenSpaces[:16-len(s)]
465 return s
466}