archive/tar: automatically promote TypeRegA

Change Reader to promote TypeRegA to TypeReg in headers, unless their
name have a trailing slash which is already promoted to TypeDir. This
will allow client code to handle just TypeReg instead both TypeReg and
TypeRegA.

Change Writer to promote TypeRegA to TypeReg or TypeDir in the headers
depending on whether the name has a trailing slash. This normalization
is motivated by the specification (in pax(1)):

   0 represents a regular file. For backwards-compatibility, a
   typeflag value of binary zero ( '\0' ) should be recognized as
   meaning a regular file when extracting files from the
   archive. Archives written with this version of the archive file
   format create regular files with a typeflag value of the
   ISO/IEC 646:1991 standard IRV '0'.

Fixes #22768.

Change-Id: I149ec55824580d446cdde5a0d7a0457ad7b03466
Reviewed-on: https://go-review.googlesource.com/85656
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go
index 4a2c173..89d1f38 100644
--- a/src/archive/tar/common.go
+++ b/src/archive/tar/common.go
@@ -56,7 +56,7 @@
 const (
 	// Type '0' indicates a regular file.
 	TypeReg  = '0'
-	TypeRegA = '\x00' // For legacy support; use TypeReg instead
+	TypeRegA = '\x00' // Deprecated: Use TypeReg instead.
 
 	// Type '1' to '6' are header-only flags and may not have a data body.
 	TypeLink    = '1' // Hard link
@@ -138,7 +138,10 @@
 // should do so by creating a new Header and copying the fields
 // that they are interested in preserving.
 type Header struct {
-	Typeflag byte // Type of header entry (should be TypeReg for most files)
+	// Typeflag is the type of header entry.
+	// The zero value is automatically promoted to either TypeReg or TypeDir
+	// depending on the presence of a trailing slash in Name.
+	Typeflag byte
 
 	Name     string // Name of file entry
 	Linkname string // Target name of link (valid for TypeLink or TypeSymlink)
diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go
index f4eeb55..6025e82 100644
--- a/src/archive/tar/reader.go
+++ b/src/archive/tar/reader.go
@@ -131,8 +131,12 @@
 			if gnuLongLink != "" {
 				hdr.Linkname = gnuLongLink
 			}
-			if hdr.Typeflag == TypeRegA && strings.HasSuffix(hdr.Name, "/") {
-				hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories
+			if hdr.Typeflag == TypeRegA {
+				if strings.HasSuffix(hdr.Name, "/") {
+					hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories
+				} else {
+					hdr.Typeflag = TypeReg
+				}
 			}
 
 			// The extended headers may have updated the size.
diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go
index a6832d3..0fc29ea 100644
--- a/src/archive/tar/reader_test.go
+++ b/src/archive/tar/reader_test.go
@@ -189,7 +189,7 @@
 			Gid:      5000,
 			Size:     5,
 			ModTime:  time.Unix(1244593104, 0),
-			Typeflag: '\x00',
+			Typeflag: '0',
 		}, {
 			Name:     "small2.txt",
 			Mode:     0444,
@@ -197,7 +197,7 @@
 			Gid:      5000,
 			Size:     11,
 			ModTime:  time.Unix(1244593104, 0),
-			Typeflag: '\x00',
+			Typeflag: '0',
 		}},
 	}, {
 		file: "testdata/pax.tar",
@@ -534,9 +534,10 @@
 		// a buggy pre-Go1.8 tar.Writer.
 		file: "testdata/invalid-go17.tar",
 		headers: []*Header{{
-			Name:    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
-			Uid:     010000000,
-			ModTime: time.Unix(0, 0),
+			Name:     "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
+			Uid:      010000000,
+			ModTime:  time.Unix(0, 0),
+			Typeflag: '0',
 		}},
 	}, {
 		// USTAR archive with a regular entry with non-zero device numbers.
diff --git a/src/archive/tar/tar_test.go b/src/archive/tar/tar_test.go
index af80d6e..2676853 100644
--- a/src/archive/tar/tar_test.go
+++ b/src/archive/tar/tar_test.go
@@ -306,6 +306,7 @@
 		ModTime:    time.Now().Round(time.Second),
 		PAXRecords: map[string]string{"uid": "2097152"},
 		Format:     FormatPAX,
+		Typeflag:   TypeReg,
 	}
 	if err := tw.WriteHeader(hdr); err != nil {
 		t.Fatalf("tw.WriteHeader: %v", err)
diff --git a/src/archive/tar/testdata/file-and-dir.tar b/src/archive/tar/testdata/file-and-dir.tar
new file mode 100644
index 0000000..c18d428
--- /dev/null
+++ b/src/archive/tar/testdata/file-and-dir.tar
Binary files differ
diff --git a/src/archive/tar/testdata/trailing-slash.tar b/src/archive/tar/testdata/trailing-slash.tar
index bf1b2ec..93718b3 100644
--- a/src/archive/tar/testdata/trailing-slash.tar
+++ b/src/archive/tar/testdata/trailing-slash.tar
Binary files differ
diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go
index 97d23f8..d6f6931 100644
--- a/src/archive/tar/writer.go
+++ b/src/archive/tar/writer.go
@@ -71,6 +71,16 @@
 	}
 	tw.hdr = *hdr // Shallow copy of Header
 
+	// Avoid usage of the legacy TypeRegA flag, and automatically promote
+	// it to use TypeReg or TypeDir.
+	if tw.hdr.Typeflag == TypeRegA {
+		if strings.HasSuffix(tw.hdr.Name, "/") {
+			tw.hdr.Typeflag = TypeDir
+		} else {
+			tw.hdr.Typeflag = TypeReg
+		}
+	}
+
 	// Round ModTime and ignore AccessTime and ChangeTime unless
 	// the format is explicitly chosen.
 	// This ensures nominal usage of WriteHeader (without specifying the format)
diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go
index 24e8da2..30556d2 100644
--- a/src/archive/tar/writer_test.go
+++ b/src/archive/tar/writer_test.go
@@ -461,6 +461,15 @@
 			testHeader{Header{Name: strings.Repeat("123456789/", 30)}, nil},
 			testClose{nil},
 		},
+	}, {
+		// Automatically promote zero value of Typeflag depending on the name.
+		file: "testdata/file-and-dir.tar",
+		tests: []testFnc{
+			testHeader{Header{Name: "small.txt", Size: 5}, nil},
+			testWrite{"Kilts", 5, nil},
+			testHeader{Header{Name: "dir/"}, nil},
+			testClose{nil},
+		},
 	}}
 
 	equalError := func(x, y error) bool {
@@ -809,8 +818,8 @@
 		if err != nil {
 			t.Fatalf("Failed to read header: %s", err)
 		}
-		if header.Typeflag != 0 {
-			t.Fatalf("Typeflag should've been 0, found %d", header.Typeflag)
+		if header.Typeflag != TypeReg {
+			t.Fatalf("Typeflag should've been %d, found %d", TypeReg, header.Typeflag)
 		}
 	}
 }