archive/zip: handle pre-zip64 zip files containing 2³²-1-byte content

This corrects a regression from Go 1.5 introduced by CL 18317.

Fixes #14185.

Change-Id: Ic3215714846d9f28809cd04e3eb3664b599244f4
Reviewed-on: https://go-review.googlesource.com/19151
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go
index 84a9d41..10e8172 100644
--- a/src/archive/zip/reader.go
+++ b/src/archive/zip/reader.go
@@ -330,7 +330,17 @@
 		}
 	}
 
-	if needUSize || needCSize || needHeaderOffset {
+	// Assume that uncompressed size 2³²-1 could plausibly happen in
+	// an old zip32 file that was sharding inputs into the largest chunks
+	// possible (or is just malicious; search the web for 42.zip).
+	// If needUSize is true still, it means we didn't see a zip64 extension.
+	// As long as the compressed size is not also 2³²-1 (implausible)
+	// and the header is not also 2³²-1 (equally implausible),
+	// accept the uncompressed size 2³²-1 as valid.
+	// If nothing else, this keeps archive/zip working with 42.zip.
+	_ = needUSize
+
+	if needCSize || needHeaderOffset {
 		return ErrFormat
 	}
 
diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go
index 8f7e8bf..72cf5d9 100644
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@@ -27,12 +27,24 @@
 }
 
 type ZipTestFile struct {
-	Name       string
-	Content    []byte // if blank, will attempt to compare against File
+	Name  string
+	Mode  os.FileMode
+	Mtime string // optional, modified time in format "mm-dd-yy hh:mm:ss"
+
+	// Information describing expected zip file content.
+	// First, reading the entire content should produce the error ContentErr.
+	// Second, if ContentErr==nil, the content should match Content.
+	// If content is large, an alternative to setting Content is to set File,
+	// which names a file in the testdata/ directory containing the
+	// uncompressed expected content.
+	// If content is very large, an alternative to setting Content or File
+	// is to set Size, which will then be checked against the header-reported size
+	// but will bypass the decompressing of the actual data.
+	// This last option is used for testing very large (multi-GB) compressed files.
 	ContentErr error
-	File       string // name of file to compare to (relative to testdata/)
-	Mtime      string // modified time in format "mm-dd-yy hh:mm:ss"
-	Mode       os.FileMode
+	Content    []byte
+	File       string
+	Size       uint64
 }
 
 // Caution: The Mtime values found for the test files should correspond to
@@ -248,6 +260,19 @@
 			},
 		},
 	},
+	// Largest possible non-zip64 file, with no zip64 header.
+	{
+		Name:   "big.zip",
+		Source: returnBigZipBytes,
+		File: []ZipTestFile{
+			{
+				Name:    "big.file",
+				Content: nil,
+				Size:    1<<32 - 1,
+				Mode:    0666,
+			},
+		},
+	},
 }
 
 var crossPlatform = []ZipTestFile{
@@ -356,13 +381,31 @@
 
 	testFileMode(t, zt.Name, f, ft.Mode)
 
-	var b bytes.Buffer
+	size := uint64(f.UncompressedSize)
+	if size == uint32max {
+		size = f.UncompressedSize64
+	} else if size != f.UncompressedSize64 {
+		t.Errorf("%v: UncompressedSize=%#x does not match UncompressedSize64=%#x", f.Name, size, f.UncompressedSize64)
+	}
+
 	r, err := f.Open()
 	if err != nil {
 		t.Errorf("%s: %v", zt.Name, err)
 		return
 	}
 
+	// For very large files, just check that the size is correct.
+	// The content is expected to be all zeros.
+	// Don't bother uncompressing: too big.
+	if ft.Content == nil && ft.File == "" && ft.Size > 0 {
+		if size != ft.Size {
+			t.Errorf("%v: uncompressed size %#x, want %#x", size, ft.Size)
+		}
+		r.Close()
+		return
+	}
+
+	var b bytes.Buffer
 	_, err = io.Copy(&b, r)
 	if err != ft.ContentErr {
 		t.Errorf("%s: copying contents: %v (want %v)", zt.Name, err, ft.ContentErr)
@@ -372,10 +415,6 @@
 	}
 	r.Close()
 
-	size := uint64(f.UncompressedSize)
-	if size == uint32max {
-		size = f.UncompressedSize64
-	}
 	if g := uint64(b.Len()); g != size {
 		t.Errorf("%v: read %v bytes but f.UncompressedSize == %v", f.Name, g, size)
 	}
@@ -510,6 +549,182 @@
 	return bytes.NewReader(b), int64(len(b))
 }
 
+// biggestZipBytes returns the bytes of a zip file biggest.zip
+// that contains a zip file bigger.zip that contains a zip file
+// big.zip that contains big.file, which contains 2³²-1 zeros.
+// The big.zip file is interesting because it has no zip64 header,
+// much like the innermost zip files in the well-known 42.zip.
+//
+// biggest.zip was generated by changing isZip64 to use > uint32max
+// instead of >= uint32max and then running this program:
+//
+//	package main
+//
+//	import (
+//		"archive/zip"
+//		"bytes"
+//		"io"
+//		"io/ioutil"
+//		"log"
+//	)
+//
+//	type zeros struct{}
+//
+//	func (zeros) Read(b []byte) (int, error) {
+//		for i := range b {
+//			b[i] = 0
+//		}
+//		return len(b), nil
+//	}
+//
+//	func main() {
+//		bigZip := makeZip("big.file", io.LimitReader(zeros{}, 1<<32-1))
+//		if err := ioutil.WriteFile("/tmp/big.zip", bigZip, 0666); err != nil {
+//			log.Fatal(err)
+//		}
+//
+//		biggerZip := makeZip("big.zip", bytes.NewReader(bigZip))
+//		if err := ioutil.WriteFile("/tmp/bigger.zip", biggerZip, 0666); err != nil {
+//			log.Fatal(err)
+//		}
+//
+//		biggestZip := makeZip("bigger.zip", bytes.NewReader(biggerZip))
+//		if err := ioutil.WriteFile("/tmp/biggest.zip", biggestZip, 0666); err != nil {
+//			log.Fatal(err)
+//		}
+//	}
+//
+//	func makeZip(name string, r io.Reader) []byte {
+//		var buf bytes.Buffer
+//		w := zip.NewWriter(&buf)
+//		wf, err := w.Create(name)
+//		if err != nil {
+//			log.Fatal(err)
+//		}
+//		if _, err = io.Copy(wf, r); err != nil {
+//			log.Fatal(err)
+//		}
+//		if err := w.Close(); err != nil {
+//			log.Fatal(err)
+//		}
+//		return buf.Bytes()
+//	}
+//
+// The 4 GB of zeros compresses to 4 MB, which compresses to 20 kB,
+// which compresses to 1252 bytes (in the hex dump below).
+//
+// It's here in hex for the same reason as rZipBytes above: to avoid
+// problems with on-disk virus scanners or other zip processors.
+//
+func biggestZipBytes() []byte {
+	s := `
+0000000 50 4b 03 04 14 00 08 00 08 00 00 00 00 00 00 00
+0000010 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 62 69
+0000020 67 67 65 72 2e 7a 69 70 ec dc 6b 4c 53 67 18 07
+0000030 f0 16 c5 ca 65 2e cb b8 94 20 61 1f 44 33 c7 cd
+0000040 c0 86 4a b5 c0 62 8a 61 05 c6 cd 91 b2 54 8c 1b
+0000050 63 8b 03 9c 1b 95 52 5a e3 a0 19 6c b2 05 59 44
+0000060 64 9d 73 83 71 11 46 61 14 b9 1d 14 09 4a c3 60
+0000070 2e 4c 6e a5 60 45 02 62 81 95 b6 94 9e 9e 77 e7
+0000080 d0 43 b6 f8 71 df 96 3c e7 a4 69 ce bf cf e9 79
+0000090 ce ef 79 3f bf f1 31 db b6 bb 31 76 92 e7 f3 07
+00000a0 8b fc 9c ca cc 08 cc cb cc 5e d2 1c 88 d9 7e bb
+00000b0 4f bb 3a 3f 75 f1 5d 7f 8f c2 68 67 77 8f 25 ff
+00000c0 84 e2 93 2d ef a4 95 3d 71 4e 2c b9 b0 87 c3 be
+00000d0 3d f8 a7 60 24 61 c5 ef ae 9e c8 6c 6d 4e 69 c8
+00000e0 67 65 34 f8 37 76 2d 76 5c 54 f3 95 65 49 c7 0f
+00000f0 18 71 4b 7e 5b 6a d1 79 47 61 41 b0 4e 2a 74 45
+0000100 43 58 12 b2 5a a5 c6 7d 68 55 88 d4 98 75 18 6d
+0000110 08 d1 1f 8f 5a 9e 96 ee 45 cf a4 84 4e 4b e8 50
+0000120 a7 13 d9 06 de 52 81 97 36 b2 d7 b8 fc 2b 5f 55
+0000130 23 1f 32 59 cf 30 27 fb e2 8a b9 de 45 dd 63 9c
+0000140 4b b5 8b 96 4c 7a 62 62 cc a1 a7 cf fa f1 fe dd
+0000150 54 62 11 bf 36 78 b3 c7 b1 b5 f2 61 4d 4e dd 66
+0000160 32 2e e6 70 34 5f f4 c9 e6 6c 43 6f da 6b c6 c3
+0000170 09 2c ce 09 57 7f d2 7e b4 23 ba 7c 1b 99 bc 22
+0000180 3e f1 de 91 2f e3 9c 1b 82 cc c2 84 39 aa e6 de
+0000190 b4 69 fc cc cb 72 a6 61 45 f0 d3 1d 26 19 7c 8d
+00001a0 29 c8 66 02 be 77 6a f9 3d 34 79 17 19 c8 96 24
+00001b0 a3 ac e4 dd 3b 1a 8e c6 fe 96 38 6b bf 67 5a 23
+00001c0 f4 16 f4 e6 8a b4 fc c2 cd bf 95 66 1d bb 35 aa
+00001d0 92 7d 66 d8 08 8d a5 1f 54 2a af 09 cf 61 ff d2
+00001e0 85 9d 8f b6 d7 88 07 4a 86 03 db 64 f3 d9 92 73
+00001f0 df ec a7 fc 23 4c 8d 83 79 63 2a d9 fd 8d b3 c8
+0000200 8f 7e d4 19 85 e6 8d 1c 76 f0 8b 58 32 fd 9a d6
+0000210 85 e2 48 ad c3 d5 60 6f 7e 22 dd ef 09 49 7c 7f
+0000220 3a 45 c3 71 b7 df f3 4c 63 fb b5 d9 31 5f 6e d6
+0000230 24 1d a4 4a fe 32 a7 5c 16 48 5c 3e 08 6b 8a d3
+0000240 25 1d a2 12 a5 59 24 ea 20 5f 52 6d ad 94 db 6b
+0000250 94 b9 5d eb 4b a7 5c 44 bb 1e f2 3c 6b cf 52 c9
+0000260 e9 e5 ba 06 b9 c4 e5 0a d0 00 0d d0 00 0d d0 00
+0000270 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d
+0000280 d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0
+0000290 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00
+00002a0 0d d0 00 cd ff 9e 46 86 fa a7 7d 3a 43 d7 8e 10
+00002b0 52 e9 be e6 6e cf eb 9e 85 4d 65 ce cc 30 c1 44
+00002c0 c0 4e af bc 9c 6c 4b a0 d7 54 ff 1d d5 5c 89 fb
+00002d0 b5 34 7e c4 c2 9e f5 a0 f6 5b 7e 6e ca 73 c7 ef
+00002e0 5d be de f9 e8 81 eb a5 0a a5 63 54 2c d7 1c d1
+00002f0 89 17 85 f8 16 94 f2 8a b2 a3 f5 b6 6d df 75 cd
+0000300 90 dd 64 bd 5d 55 4e f2 55 19 1b b7 cc ef 1b ea
+0000310 2e 05 9c f4 aa 1e a8 cd a6 82 c7 59 0f 5e 9d e0
+0000320 bb fc 6c d6 99 23 eb 36 ad c6 c5 e1 d8 e1 e2 3e
+0000330 d9 90 5a f7 91 5d 6f bc 33 6d 98 47 d2 7c 2e 2f
+0000340 99 a4 25 72 85 49 2c be 0b 5b af 8f e5 6e 81 a6
+0000350 a3 5a 6f 39 53 3a ab 7a 8b 1e 26 f7 46 6c 7d 26
+0000360 53 b3 22 31 94 d3 83 f2 18 4d f5 92 33 27 53 97
+0000370 0f d3 e6 55 9c a6 c5 31 87 6f d3 f3 ae 39 6f 56
+0000380 10 7b ab 7e d0 b4 ca f2 b8 05 be 3f 0e 6e 5a 75
+0000390 ab 0c f5 37 0e ba 8e 75 71 7a aa ed 7a dd 6a 63
+00003a0 be 9b a0 97 27 6a 6f e7 d3 8b c4 7c ec d3 91 56
+00003b0 d9 ac 5e bf 16 42 2f 00 1f 93 a2 23 87 bd e2 59
+00003c0 a0 de 1a 66 c8 62 eb 55 8f 91 17 b4 61 42 7a 50
+00003d0 40 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40
+00003e0 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 03
+00003f0 34 40 03 34 40 03 34 ff 85 86 90 8b ea 67 90 0d
+0000400 e1 42 1b d2 61 d6 79 ec fd 3e 44 28 a4 51 6c 5c
+0000410 fc d2 72 ca ba 82 18 46 16 61 cd 93 a9 0f d1 24
+0000420 17 99 e2 2c 71 16 84 0c c8 7a 13 0f 9a 5e c5 f0
+0000430 79 64 e2 12 4d c8 82 a1 81 19 2d aa 44 6d 87 54
+0000440 84 71 c1 f6 d4 ca 25 8c 77 b9 08 c7 c8 5e 10 8a
+0000450 8f 61 ed 8c ba 30 1f 79 9a c7 60 34 2b b9 8c f8
+0000460 18 a6 83 1b e3 9f ad 79 fe fd 1b 8b f1 fc 41 6f
+0000470 d4 13 1f e3 b8 83 ba 64 92 e7 eb e4 77 05 8f ba
+0000480 fa 3b 00 00 ff ff 50 4b 07 08 a6 18 b1 91 5e 04
+0000490 00 00 e4 47 00 00 50 4b 01 02 14 00 14 00 08 00
+00004a0 08 00 00 00 00 00 a6 18 b1 91 5e 04 00 00 e4 47
+00004b0 00 00 0a 00 00 00 00 00 00 00 00 00 00 00 00 00
+00004c0 00 00 00 00 62 69 67 67 65 72 2e 7a 69 70 50 4b
+00004d0 05 06 00 00 00 00 01 00 01 00 38 00 00 00 96 04
+00004e0 00 00 00 00`
+	s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "")
+	s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "")
+	b, err := hex.DecodeString(s)
+	if err != nil {
+		panic(err)
+	}
+	return b
+}
+
+func returnBigZipBytes() (r io.ReaderAt, size int64) {
+	b := biggestZipBytes()
+	for i := 0; i < 2; i++ {
+		r, err := NewReader(bytes.NewReader(b), int64(len(b)))
+		if err != nil {
+			panic(err)
+		}
+		f, err := r.File[0].Open()
+		if err != nil {
+			panic(err)
+		}
+		b, err = ioutil.ReadAll(f)
+		if err != nil {
+			panic(err)
+		}
+	}
+	return bytes.NewReader(b), int64(len(b))
+}
+
 func TestIssue8186(t *testing.T) {
 	// Directory headers & data found in the TOC of a JAR file.
 	dirEnts := []string{