David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package tar |
| 6 | |
| 7 | // TODO(dsymonds): |
| 8 | // - catch more errors (no first header, write after close, etc.) |
| 9 | |
| 10 | import ( |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 11 | "io" |
| 12 | "os" |
| 13 | "strconv" |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 14 | ) |
| 15 | |
| 16 | var ( |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 17 | ErrWriteTooLong = os.NewError("write too long") |
| 18 | ErrFieldTooLong = os.NewError("header field too long") |
| 19 | ErrWriteAfterClose = os.NewError("write after close") |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 20 | ) |
| 21 | |
| 22 | // A Writer provides sequential writing of a tar archive in POSIX.1 format. |
| 23 | // A tar archive consists of a sequence of files. |
| 24 | // Call WriteHeader to begin a new file, and then call Write to supply that file's data, |
| 25 | // writing at most hdr.Size bytes in total. |
| 26 | // |
| 27 | // Example: |
Christopher Wedgwood | ae729a4 | 2010-04-11 10:29:07 -0700 | [diff] [blame] | 28 | // tw := tar.NewWriter(w) |
| 29 | // hdr := new(Header) |
| 30 | // hdr.Size = length of data in bytes |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 31 | // // populate other hdr fields as desired |
| 32 | // if err := tw.WriteHeader(hdr); err != nil { |
| 33 | // // handle error |
| 34 | // } |
Christopher Wedgwood | ae729a4 | 2010-04-11 10:29:07 -0700 | [diff] [blame] | 35 | // io.Copy(tw, data) |
| 36 | // tw.Close() |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 37 | type Writer struct { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 38 | w io.Writer |
| 39 | err os.Error |
| 40 | nb int64 // number of unwritten bytes for current file entry |
| 41 | pad int64 // amount of padding to write after current file entry |
| 42 | closed bool |
| 43 | usedBinary bool // whether the binary numeric field extension was used |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 44 | } |
| 45 | |
| 46 | // NewWriter creates a new Writer writing to w. |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 47 | func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 48 | |
| 49 | // Flush finishes writing the current file (optional). |
| 50 | func (tw *Writer) Flush() os.Error { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 51 | n := tw.nb + tw.pad |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 52 | for n > 0 && tw.err == nil { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 53 | nr := n |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 54 | if nr > blockSize { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 55 | nr = blockSize |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 56 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 57 | var nw int |
| 58 | nw, tw.err = tw.w.Write(zeroBlock[0:nr]) |
| 59 | n -= int64(nw) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 60 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 61 | tw.nb = 0 |
| 62 | tw.pad = 0 |
| 63 | return tw.err |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 64 | } |
| 65 | |
| 66 | // Write s into b, terminating it with a NUL if there is room. |
| 67 | func (tw *Writer) cString(b []byte, s string) { |
| 68 | if len(s) > len(b) { |
| 69 | if tw.err == nil { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 70 | tw.err = ErrFieldTooLong |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 71 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 72 | return |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 73 | } |
Russ Cox | d86ab01 | 2010-10-26 21:52:54 -0700 | [diff] [blame] | 74 | copy(b, s) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 75 | if len(s) < len(b) { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 76 | b[len(s)] = 0 |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 77 | } |
| 78 | } |
| 79 | |
| 80 | // Encode x as an octal ASCII string and write it into b with leading zeros. |
| 81 | func (tw *Writer) octal(b []byte, x int64) { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 82 | s := strconv.Itob64(x, 8) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 83 | // leading zeros, but leave room for a NUL. |
Russ Cox | 650bff6 | 2009-10-06 14:55:39 -0700 | [diff] [blame] | 84 | for len(s)+1 < len(b) { |
Robert Griesemer | 1698934 | 2009-11-09 21:09:34 -0800 | [diff] [blame] | 85 | s = "0" + s |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 86 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 87 | tw.cString(b, s) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 88 | } |
| 89 | |
David Symonds | c17dde2 | 2009-10-05 04:08:24 -0700 | [diff] [blame] | 90 | // Write x into b, either as octal or as binary (GNUtar/star extension). |
| 91 | func (tw *Writer) numeric(b []byte, x int64) { |
| 92 | // Try octal first. |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 93 | s := strconv.Itob64(x, 8) |
David Symonds | c17dde2 | 2009-10-05 04:08:24 -0700 | [diff] [blame] | 94 | if len(s) < len(b) { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 95 | tw.octal(b, x) |
| 96 | return |
David Symonds | c17dde2 | 2009-10-05 04:08:24 -0700 | [diff] [blame] | 97 | } |
| 98 | // Too big: use binary (big-endian). |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 99 | tw.usedBinary = true |
Robert Griesemer | 1698934 | 2009-11-09 21:09:34 -0800 | [diff] [blame] | 100 | for i := len(b) - 1; x > 0 && i >= 0; i-- { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 101 | b[i] = byte(x) |
| 102 | x >>= 8 |
David Symonds | c17dde2 | 2009-10-05 04:08:24 -0700 | [diff] [blame] | 103 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 104 | b[0] |= 0x80 // highest bit indicates binary format |
David Symonds | c17dde2 | 2009-10-05 04:08:24 -0700 | [diff] [blame] | 105 | } |
| 106 | |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 107 | // WriteHeader writes hdr and prepares to accept the file's contents. |
| 108 | // WriteHeader calls Flush if it is not the first header. |
Christopher Wedgwood | fe0eb17 | 2009-12-14 11:35:02 -0800 | [diff] [blame] | 109 | // Calling after a Close will return ErrWriteAfterClose. |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 110 | func (tw *Writer) WriteHeader(hdr *Header) os.Error { |
Christopher Wedgwood | fe0eb17 | 2009-12-14 11:35:02 -0800 | [diff] [blame] | 111 | if tw.closed { |
| 112 | return ErrWriteAfterClose |
| 113 | } |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 114 | if tw.err == nil { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 115 | tw.Flush() |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 116 | } |
| 117 | if tw.err != nil { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 118 | return tw.err |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 119 | } |
| 120 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 121 | tw.nb = int64(hdr.Size) |
| 122 | tw.pad = -tw.nb & (blockSize - 1) // blockSize is a power of two |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 123 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 124 | header := make([]byte, blockSize) |
| 125 | s := slicer(header) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 126 | |
| 127 | // TODO(dsymonds): handle names longer than 100 chars |
Russ Cox | 9750adb | 2010-02-25 16:01:29 -0800 | [diff] [blame] | 128 | copy(s.next(100), []byte(hdr.Name)) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 129 | |
Russ Cox | 9750adb | 2010-02-25 16:01:29 -0800 | [diff] [blame] | 130 | tw.octal(s.next(8), hdr.Mode) // 100:108 |
Rob Pike | 5cd8c83 | 2010-04-22 14:01:33 -0700 | [diff] [blame] | 131 | tw.numeric(s.next(8), int64(hdr.Uid)) // 108:116 |
| 132 | tw.numeric(s.next(8), int64(hdr.Gid)) // 116:124 |
Russ Cox | 9750adb | 2010-02-25 16:01:29 -0800 | [diff] [blame] | 133 | tw.numeric(s.next(12), hdr.Size) // 124:136 |
| 134 | tw.numeric(s.next(12), hdr.Mtime) // 136:148 |
| 135 | s.next(8) // chksum (148:156) |
| 136 | s.next(1)[0] = hdr.Typeflag // 156:157 |
| 137 | s.next(100) // linkname (157:257) |
| 138 | copy(s.next(8), []byte("ustar\x0000")) // 257:265 |
| 139 | tw.cString(s.next(32), hdr.Uname) // 265:297 |
| 140 | tw.cString(s.next(32), hdr.Gname) // 297:329 |
| 141 | tw.numeric(s.next(8), hdr.Devmajor) // 329:337 |
| 142 | tw.numeric(s.next(8), hdr.Devminor) // 337:345 |
David Symonds | c17dde2 | 2009-10-05 04:08:24 -0700 | [diff] [blame] | 143 | |
| 144 | // Use the GNU magic instead of POSIX magic if we used any GNU extensions. |
| 145 | if tw.usedBinary { |
Russ Cox | 9750adb | 2010-02-25 16:01:29 -0800 | [diff] [blame] | 146 | copy(header[257:265], []byte("ustar \x00")) |
David Symonds | c17dde2 | 2009-10-05 04:08:24 -0700 | [diff] [blame] | 147 | } |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 148 | |
| 149 | // The chksum field is terminated by a NUL and a space. |
| 150 | // This is different from the other octal fields. |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 151 | chksum, _ := checksum(header) |
| 152 | tw.octal(header[148:155], chksum) |
| 153 | header[155] = ' ' |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 154 | |
| 155 | if tw.err != nil { |
| 156 | // problem with header; probably integer too big for a field. |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 157 | return tw.err |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 158 | } |
| 159 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 160 | _, tw.err = tw.w.Write(header) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 161 | |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 162 | return tw.err |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 163 | } |
| 164 | |
| 165 | // Write writes to the current entry in the tar archive. |
| 166 | // Write returns the error ErrWriteTooLong if more than |
| 167 | // hdr.Size bytes are written after WriteHeader. |
Evan Shaw | 4db3a16 | 2009-11-19 20:43:30 -0800 | [diff] [blame] | 168 | func (tw *Writer) Write(b []byte) (n int, err os.Error) { |
Christopher Wedgwood | fe0eb17 | 2009-12-14 11:35:02 -0800 | [diff] [blame] | 169 | if tw.closed { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 170 | err = ErrWriteTooLong |
| 171 | return |
Christopher Wedgwood | fe0eb17 | 2009-12-14 11:35:02 -0800 | [diff] [blame] | 172 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 173 | overwrite := false |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 174 | if int64(len(b)) > tw.nb { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 175 | b = b[0:tw.nb] |
| 176 | overwrite = true |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 177 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 178 | n, err = tw.w.Write(b) |
| 179 | tw.nb -= int64(n) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 180 | if err == nil && overwrite { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 181 | err = ErrWriteTooLong |
| 182 | return |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 183 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 184 | tw.err = err |
| 185 | return |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 186 | } |
| 187 | |
Christopher Wedgwood | fe0eb17 | 2009-12-14 11:35:02 -0800 | [diff] [blame] | 188 | // Close closes the tar archive, flushing any unwritten |
| 189 | // data to the underlying writer. |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 190 | func (tw *Writer) Close() os.Error { |
| 191 | if tw.err != nil || tw.closed { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 192 | return tw.err |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 193 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 194 | tw.Flush() |
| 195 | tw.closed = true |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 196 | |
| 197 | // trailer: two zero blocks |
| 198 | for i := 0; i < 2; i++ { |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 199 | _, tw.err = tw.w.Write(zeroBlock) |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 200 | if tw.err != nil { |
Robert Griesemer | 40621d5 | 2009-11-09 12:07:39 -0800 | [diff] [blame] | 201 | break |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 202 | } |
| 203 | } |
Robert Griesemer | 5a1d332 | 2009-12-15 15:33:31 -0800 | [diff] [blame] | 204 | return tw.err |
David Symonds | d6d0a39 | 2009-07-08 17:15:18 -0700 | [diff] [blame] | 205 | } |