blob: 14b5bd154ff53a0fc0717779200f99d716ae140b [file] [log] [blame]
Dave Cheney7c8280c2014-02-25 09:47:42 -05001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Small in-memory unzip implementation.
6// A simplified copy of the pre-Go 1 compress/flate/inflate.go
7// and a modified copy of the zip reader in package time.
8// (The one in package time does not support decompression; this one does.)
9
10package syscall
11
12const (
13 maxCodeLen = 16 // max length of Huffman code
14 maxHist = 32768 // max history required
15 maxLit = 286
16 maxDist = 32
17 numCodes = 19 // number of codes in Huffman meta-code
18)
19
20type decompressor struct {
21 in string // compressed input
22 out []byte // uncompressed output
23 b uint32 // input bits, at top of b
24 nb uint
25 err bool // invalid input
26 eof bool // reached EOF
27
28 h1, h2 huffmanDecoder // decoders for literal/length, distance
29 bits [maxLit + maxDist]int // lengths defining Huffman codes
30 codebits [numCodes]int
31}
32
33func (f *decompressor) nextBlock() {
34 for f.nb < 1+2 {
35 if f.moreBits(); f.err {
36 return
37 }
38 }
39 f.eof = f.b&1 == 1
40 f.b >>= 1
41 typ := f.b & 3
42 f.b >>= 2
43 f.nb -= 1 + 2
44 switch typ {
45 case 0:
46 f.dataBlock()
47 case 1:
48 // compressed, fixed Huffman tables
49 f.huffmanBlock(&fixedHuffmanDecoder, nil)
50 case 2:
51 // compressed, dynamic Huffman tables
52 if f.readHuffman(); f.err {
53 break
54 }
55 f.huffmanBlock(&f.h1, &f.h2)
56 default:
57 // 3 is reserved.
58 f.err = true
59 }
60}
61
62// RFC 1951 section 3.2.7.
63// Compression with dynamic Huffman codes
64
65var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
66
67func (f *decompressor) readHuffman() {
68 // HLIT[5], HDIST[5], HCLEN[4].
69 for f.nb < 5+5+4 {
70 if f.moreBits(); f.err {
71 return
72 }
73 }
74 nlit := int(f.b&0x1F) + 257
75 f.b >>= 5
76 ndist := int(f.b&0x1F) + 1
77 f.b >>= 5
78 nclen := int(f.b&0xF) + 4
79 f.b >>= 4
80 f.nb -= 5 + 5 + 4
81
82 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
83 for i := 0; i < nclen; i++ {
84 for f.nb < 3 {
85 if f.moreBits(); f.err {
86 return
87 }
88 }
89 f.codebits[codeOrder[i]] = int(f.b & 0x7)
90 f.b >>= 3
91 f.nb -= 3
92 }
93 for i := nclen; i < len(codeOrder); i++ {
94 f.codebits[codeOrder[i]] = 0
95 }
96 if !f.h1.init(f.codebits[0:]) {
97 f.err = true
98 return
99 }
100
101 // HLIT + 257 code lengths, HDIST + 1 code lengths,
102 // using the code length Huffman code.
103 for i, n := 0, nlit+ndist; i < n; {
104 x := f.huffSym(&f.h1)
105 if f.err {
106 return
107 }
108 if x < 16 {
109 // Actual length.
110 f.bits[i] = x
111 i++
112 continue
113 }
114 // Repeat previous length or zero.
115 var rep int
116 var nb uint
117 var b int
118 switch x {
119 default:
120 f.err = true
121 return
122 case 16:
123 rep = 3
124 nb = 2
125 if i == 0 {
126 f.err = true
127 return
128 }
129 b = f.bits[i-1]
130 case 17:
131 rep = 3
132 nb = 3
133 b = 0
134 case 18:
135 rep = 11
136 nb = 7
137 b = 0
138 }
139 for f.nb < nb {
140 if f.moreBits(); f.err {
141 return
142 }
143 }
144 rep += int(f.b & uint32(1<<nb-1))
145 f.b >>= nb
146 f.nb -= nb
147 if i+rep > n {
148 f.err = true
149 return
150 }
151 for j := 0; j < rep; j++ {
152 f.bits[i] = b
153 i++
154 }
155 }
156
157 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
158 f.err = true
159 return
160 }
161}
162
163// Decode a single Huffman block from f.
164// hl and hd are the Huffman states for the lit/length values
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000165// and the distance values, respectively. If hd == nil, using the
Dave Cheney7c8280c2014-02-25 09:47:42 -0500166// fixed distance encoding associated with fixed Huffman blocks.
167func (f *decompressor) huffmanBlock(hl, hd *huffmanDecoder) {
168 for {
169 v := f.huffSym(hl)
170 if f.err {
171 return
172 }
173 var n uint // number of bits extra
174 var length int
175 switch {
176 case v < 256:
177 f.out = append(f.out, byte(v))
178 continue
179 case v == 256:
180 // Done with huffman block; read next block.
181 return
182 // otherwise, reference to older data
183 case v < 265:
184 length = v - (257 - 3)
185 n = 0
186 case v < 269:
187 length = v*2 - (265*2 - 11)
188 n = 1
189 case v < 273:
190 length = v*4 - (269*4 - 19)
191 n = 2
192 case v < 277:
193 length = v*8 - (273*8 - 35)
194 n = 3
195 case v < 281:
196 length = v*16 - (277*16 - 67)
197 n = 4
198 case v < 285:
199 length = v*32 - (281*32 - 131)
200 n = 5
201 default:
202 length = 258
203 n = 0
204 }
205 if n > 0 {
206 for f.nb < n {
207 if f.moreBits(); f.err {
208 return
209 }
210 }
211 length += int(f.b & uint32(1<<n-1))
212 f.b >>= n
213 f.nb -= n
214 }
215
216 var dist int
217 if hd == nil {
218 for f.nb < 5 {
219 if f.moreBits(); f.err {
220 return
221 }
222 }
223 dist = int(reverseByte[(f.b&0x1F)<<3])
224 f.b >>= 5
225 f.nb -= 5
226 } else {
227 if dist = f.huffSym(hd); f.err {
228 return
229 }
230 }
231
232 switch {
233 case dist < 4:
234 dist++
235 case dist >= 30:
236 f.err = true
237 return
238 default:
239 nb := uint(dist-2) >> 1
240 // have 1 bit in bottom of dist, need nb more.
241 extra := (dist & 1) << nb
242 for f.nb < nb {
243 if f.moreBits(); f.err {
244 return
245 }
246 }
247 extra |= int(f.b & uint32(1<<nb-1))
248 f.b >>= nb
249 f.nb -= nb
250 dist = 1<<(nb+1) + 1 + extra
251 }
252
253 // Copy [-dist:-dist+length] into output.
254 // Encoding can be prescient, so no check on length.
255 if dist > len(f.out) {
256 f.err = true
257 return
258 }
259
260 p := len(f.out) - dist
261 for i := 0; i < length; i++ {
262 f.out = append(f.out, f.out[p])
263 p++
264 }
265 }
266}
267
268// Copy a single uncompressed data block from input to output.
269func (f *decompressor) dataBlock() {
270 // Uncompressed.
271 // Discard current half-byte.
272 f.nb = 0
273 f.b = 0
274
275 if len(f.in) < 4 {
276 f.err = true
277 return
278 }
279
280 buf := f.in[:4]
281 f.in = f.in[4:]
282 n := int(buf[0]) | int(buf[1])<<8
283 nn := int(buf[2]) | int(buf[3])<<8
284 if uint16(nn) != uint16(^n) {
285 f.err = true
286 return
287 }
288
289 if len(f.in) < n {
290 f.err = true
291 return
292 }
293 f.out = append(f.out, f.in[:n]...)
294 f.in = f.in[n:]
295}
296
297func (f *decompressor) moreBits() {
298 if len(f.in) == 0 {
299 f.err = true
300 return
301 }
302 c := f.in[0]
303 f.in = f.in[1:]
304 f.b |= uint32(c) << f.nb
305 f.nb += 8
306}
307
308// Read the next Huffman-encoded symbol from f according to h.
309func (f *decompressor) huffSym(h *huffmanDecoder) int {
310 for n := uint(h.min); n <= uint(h.max); n++ {
311 lim := h.limit[n]
312 if lim == -1 {
313 continue
314 }
315 for f.nb < n {
316 if f.moreBits(); f.err {
317 return 0
318 }
319 }
320 v := int(f.b & uint32(1<<n-1))
321 v <<= 16 - n
322 v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
323 if v <= lim {
324 f.b >>= n
325 f.nb -= n
326 return h.codes[v-h.base[n]]
327 }
328 }
329 f.err = true
330 return 0
331}
332
333var reverseByte = [256]byte{
334 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
335 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
336 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
337 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
338 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
339 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
340 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
341 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
342 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
343 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
344 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
345 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
346 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
347 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
348 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
349 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
350 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
351 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
352 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
353 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
354 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
355 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
356 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
357 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
358 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
359 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
360 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
361 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
362 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
363 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
364 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
365 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
366}
367
368// Hard-coded Huffman tables for DEFLATE algorithm.
369// See RFC 1951, section 3.2.6.
370var fixedHuffmanDecoder = huffmanDecoder{
371 7, 9,
372 [maxCodeLen + 1]int{7: 23, 199, 511},
373 [maxCodeLen + 1]int{7: 0, 24, 224},
374 []int{
375 // length 7: 256-279
376 256, 257, 258, 259, 260, 261, 262,
377 263, 264, 265, 266, 267, 268, 269,
378 270, 271, 272, 273, 274, 275, 276,
379 277, 278, 279,
380
381 // length 8: 0-143
382 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
383 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
384 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
385 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
386 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
387 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
388 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
389 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
390 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
391 92, 93, 94, 95, 96, 97, 98, 99, 100,
392 101, 102, 103, 104, 105, 106, 107, 108,
393 109, 110, 111, 112, 113, 114, 115, 116,
394 117, 118, 119, 120, 121, 122, 123, 124,
395 125, 126, 127, 128, 129, 130, 131, 132,
396 133, 134, 135, 136, 137, 138, 139, 140,
397 141, 142, 143,
398
399 // length 8: 280-287
400 280, 281, 282, 283, 284, 285, 286, 287,
401
402 // length 9: 144-255
403 144, 145, 146, 147, 148, 149, 150, 151,
404 152, 153, 154, 155, 156, 157, 158, 159,
405 160, 161, 162, 163, 164, 165, 166, 167,
406 168, 169, 170, 171, 172, 173, 174, 175,
407 176, 177, 178, 179, 180, 181, 182, 183,
408 184, 185, 186, 187, 188, 189, 190, 191,
409 192, 193, 194, 195, 196, 197, 198, 199,
410 200, 201, 202, 203, 204, 205, 206, 207,
411 208, 209, 210, 211, 212, 213, 214, 215,
412 216, 217, 218, 219, 220, 221, 222, 223,
413 224, 225, 226, 227, 228, 229, 230, 231,
414 232, 233, 234, 235, 236, 237, 238, 239,
415 240, 241, 242, 243, 244, 245, 246, 247,
416 248, 249, 250, 251, 252, 253, 254, 255,
417 },
418}
419
420// Huffman decoder is based on
421// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
422// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
423type huffmanDecoder struct {
424 // min, max code length
425 min, max int
426
427 // limit[i] = largest code word of length i
428 // Given code v of length n,
429 // need more bits if v > limit[n].
430 limit [maxCodeLen + 1]int
431
432 // base[i] = smallest code word of length i - seq number
433 base [maxCodeLen + 1]int
434
435 // codes[seq number] = output code.
436 // Given code v of length n, value is
437 // codes[v - base[n]].
438 codes []int
439}
440
441// Initialize Huffman decoding tables from array of code lengths.
442func (h *huffmanDecoder) init(bits []int) bool {
443 // Count number of codes of each length,
444 // compute min and max length.
445 var count [maxCodeLen + 1]int
446 var min, max int
447 for _, n := range bits {
448 if n == 0 {
449 continue
450 }
451 if min == 0 || n < min {
452 min = n
453 }
454 if n > max {
455 max = n
456 }
457 count[n]++
458 }
459 if max == 0 {
460 return false
461 }
462
463 h.min = min
464 h.max = max
465
466 // For each code range, compute
467 // nextcode (first code of that length),
468 // limit (last code of that length), and
469 // base (offset from first code to sequence number).
470 code := 0
471 seq := 0
472 var nextcode [maxCodeLen]int
473 for i := min; i <= max; i++ {
474 n := count[i]
475 nextcode[i] = code
476 h.base[i] = code - seq
477 code += n
478 seq += n
479 h.limit[i] = code - 1
480 code <<= 1
481 }
482
483 // Make array mapping sequence numbers to codes.
484 if len(h.codes) < len(bits) {
485 h.codes = make([]int, len(bits))
486 }
487 for i, n := range bits {
488 if n == 0 {
489 continue
490 }
491 code := nextcode[n]
492 nextcode[n]++
493 seq := code - h.base[n]
494 h.codes[seq] = i
495 }
496 return true
497}
498
499func inflate(in string) (out []byte) {
500 var d decompressor
501 d.in = in
502 for !d.err && !d.eof {
503 d.nextBlock()
504 }
505 if len(d.in) != 0 {
506 println("fs unzip: junk at end of compressed data")
507 return nil
508 }
509 return d.out
510}
511
512// get4 returns the little-endian 32-bit value in b.
513func zget4(b string) int {
514 if len(b) < 4 {
515 return 0
516 }
517 return int(b[0]) | int(b[1])<<8 | int(b[2])<<16 | int(b[3])<<24
518}
519
520// get2 returns the little-endian 16-bit value in b.
521func zget2(b string) int {
522 if len(b) < 2 {
523 return 0
524 }
525 return int(b[0]) | int(b[1])<<8
526}
527
528func unzip(data string) {
529 const (
530 zecheader = 0x06054b50
531 zcheader = 0x02014b50
532 ztailsize = 22
533 zheadersize = 30
534 zheader = 0x04034b50
535 )
536
537 buf := data[len(data)-ztailsize:]
538 n := zget2(buf[10:])
539 size := zget4(buf[12:])
540 off := zget4(buf[16:])
541
542 hdr := data[off : off+size]
543 for i := 0; i < n; i++ {
544 // zip entry layout:
545 // 0 magic[4]
546 // 4 madevers[1]
547 // 5 madeos[1]
548 // 6 extvers[1]
549 // 7 extos[1]
550 // 8 flags[2]
551 // 10 meth[2]
552 // 12 modtime[2]
553 // 14 moddate[2]
554 // 16 crc[4]
555 // 20 csize[4]
556 // 24 uncsize[4]
557 // 28 namelen[2]
558 // 30 xlen[2]
559 // 32 fclen[2]
560 // 34 disknum[2]
561 // 36 iattr[2]
562 // 38 eattr[4]
563 // 42 off[4]
564 // 46 name[namelen]
565 // 46+namelen+xlen+fclen - next header
566 //
567 if zget4(hdr) != zcheader {
568 println("fs unzip: bad magic")
569 break
570 }
571 meth := zget2(hdr[10:])
572 mtime := zget2(hdr[12:])
573 mdate := zget2(hdr[14:])
574 csize := zget4(hdr[20:])
575 size := zget4(hdr[24:])
576 namelen := zget2(hdr[28:])
577 xlen := zget2(hdr[30:])
578 fclen := zget2(hdr[32:])
579 xattr := uint32(zget4(hdr[38:])) >> 16
580 off := zget4(hdr[42:])
581 name := hdr[46 : 46+namelen]
582 hdr = hdr[46+namelen+xlen+fclen:]
583
584 // zip per-file header layout:
585 // 0 magic[4]
586 // 4 extvers[1]
587 // 5 extos[1]
588 // 6 flags[2]
589 // 8 meth[2]
590 // 10 modtime[2]
591 // 12 moddate[2]
592 // 14 crc[4]
593 // 18 csize[4]
594 // 22 uncsize[4]
595 // 26 namelen[2]
596 // 28 xlen[2]
597 // 30 name[namelen]
598 // 30+namelen+xlen - file data
599 //
600 buf := data[off : off+zheadersize+namelen]
601 if zget4(buf) != zheader ||
602 zget2(buf[8:]) != meth ||
603 zget2(buf[26:]) != namelen ||
604 buf[30:30+namelen] != name {
605 println("fs unzip: inconsistent zip file")
606 return
607 }
608 xlen = zget2(buf[28:])
609
610 off += zheadersize + namelen + xlen
611
612 var fdata []byte
613 switch meth {
614 case 0:
615 // buf is uncompressed
616 buf = data[off : off+size]
617 fdata = []byte(buf)
618 case 8:
619 // buf is deflate-compressed
620 buf = data[off : off+csize]
621 fdata = inflate(buf)
622 if len(fdata) != size {
623 println("fs unzip: inconsistent size in zip file")
624 return
625 }
626 }
627
628 if xattr&S_IFMT == 0 {
629 if xattr&0777 == 0 {
630 xattr |= 0666
631 }
632 if len(name) > 0 && name[len(name)-1] == '/' {
633 xattr |= S_IFDIR
634 xattr |= 0111
635 } else {
636 xattr |= S_IFREG
637 }
638 }
639
640 if err := create(name, xattr, zipToTime(mdate, mtime), fdata); err != nil {
641 print("fs unzip: create ", name, ": ", err.Error(), "\n")
642 }
643 }
644
645 chdirEnv()
646}
647
648func zipToTime(date, time int) int64 {
649 dd := date & 0x1f
650 mm := date >> 5 & 0xf
651 yy := date >> 9 // since 1980
652
653 sec := int64(315532800) // jan 1 1980
654 sec += int64(yy) * 365 * 86400
655 sec += int64(yy) / 4 * 86400
656 if yy%4 > 0 || mm >= 3 {
657 sec += 86400
658 }
659 sec += int64(daysBeforeMonth[mm]) * 86400
660 sec += int64(dd-1) * 86400
661
662 h := time >> 11
663 m := time >> 5 & 0x3F
664 s := time & 0x1f * 2
665 sec += int64(h*3600 + m*60 + s)
666
667 return sec
668}
669
670var daysBeforeMonth = [...]int32{
671 0,
672 0,
673 31,
674 31 + 28,
675 31 + 28 + 31,
676 31 + 28 + 31 + 30,
677 31 + 28 + 31 + 30 + 31,
678 31 + 28 + 31 + 30 + 31 + 30,
679 31 + 28 + 31 + 30 + 31 + 30 + 31,
680 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31,
681 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30,
682 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31,
683 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30,
684 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31,
685}