[release-branch.go1.20] path/filepath: fix various issues in parsing Windows paths

On Windows, A root local device path is a path which begins with
\\?\ or \??\.  A root local device path accesses the DosDevices
object directory, and permits access to any file or device on the
system. For example \??\C:\foo is equivalent to common C:\foo.

The Clean, IsAbs, IsLocal, and VolumeName functions did not
recognize root local device paths beginning with \??\.

Clean could convert a rooted path such as \a\..\??\b into
the root local device path \??\b. It will now convert this
path into .\??\b.

IsAbs now correctly reports paths beginning with \??\
as absolute.

IsLocal now correctly reports paths beginning with \??\
as non-local.

VolumeName now reports the \??\ prefix as a volume name.

Join(`\`, `??`, `b`) could convert a seemingly innocent
sequence of path elements into the root local device path
\??\b. It will now convert this to \.\??\b.

In addition, the IsLocal function did not correctly
detect reserved names in some cases:

  - reserved names followed by spaces, such as "COM1 ".
  - "COM" or "LPT" followed by a superscript 1, 2, or 3.

IsLocal now correctly reports these names as non-local.

For #63713
Fixes #63714
Fixes CVE-2023-45283
Fixes CVE-2023-45284

Change-Id: I446674a58977adfa54de7267d716ac23ab496c54
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/2040691
Reviewed-by: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
Run-TryBot: Damien Neil <dneil@google.com>
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/2072597
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/539276
Auto-Submit: Heschi Kreinick <heschi@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go
index 3e8fb9e..1ec6ee7 100644
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -153,7 +153,7 @@
 
 	unicode, fmt !< net, os, os/signal;
 
-	os/signal, STR
+	os/signal, internal/safefilepath, STR
 	< path/filepath
 	< io/ioutil;
 
diff --git a/src/internal/safefilepath/path_windows.go b/src/internal/safefilepath/path_windows.go
index 909c150..7cfd6ce 100644
--- a/src/internal/safefilepath/path_windows.go
+++ b/src/internal/safefilepath/path_windows.go
@@ -20,15 +20,10 @@
 	for p := path; p != ""; {
 		// Find the next path element.
 		i := 0
-		dot := -1
 		for i < len(p) && p[i] != '/' {
 			switch p[i] {
 			case 0, '\\', ':':
 				return "", errInvalidPath
-			case '.':
-				if dot < 0 {
-					dot = i
-				}
 			}
 			i++
 		}
@@ -39,22 +34,8 @@
 		} else {
 			p = ""
 		}
-		// Trim the extension and look for a reserved name.
-		base := part
-		if dot >= 0 {
-			base = part[:dot]
-		}
-		if isReservedName(base) {
-			if dot < 0 {
-				return "", errInvalidPath
-			}
-			// The path element is a reserved name with an extension.
-			// Some Windows versions consider this a reserved name,
-			// while others do not. Use FullPath to see if the name is
-			// reserved.
-			if p, _ := syscall.FullPath(part); len(p) >= 4 && p[:4] == `\\.\` {
-				return "", errInvalidPath
-			}
+		if IsReservedName(part) {
+			return "", errInvalidPath
 		}
 	}
 	if containsSlash {
@@ -70,23 +51,88 @@
 	return path, nil
 }
 
-// isReservedName reports if name is a Windows reserved device name.
+// IsReservedName reports if name is a Windows reserved device name.
 // It does not detect names with an extension, which are also reserved on some Windows versions.
 //
 // For details, search for PRN in
 // https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file.
-func isReservedName(name string) bool {
-	if 3 <= len(name) && len(name) <= 4 {
-		switch string([]byte{toUpper(name[0]), toUpper(name[1]), toUpper(name[2])}) {
-		case "CON", "PRN", "AUX", "NUL":
-			return len(name) == 3
-		case "COM", "LPT":
-			return len(name) == 4 && '1' <= name[3] && name[3] <= '9'
+func IsReservedName(name string) bool {
+	// Device names can have arbitrary trailing characters following a dot or colon.
+	base := name
+	for i := 0; i < len(base); i++ {
+		switch base[i] {
+		case ':', '.':
+			base = base[:i]
 		}
 	}
+	// Trailing spaces in the last path element are ignored.
+	for len(base) > 0 && base[len(base)-1] == ' ' {
+		base = base[:len(base)-1]
+	}
+	if !isReservedBaseName(base) {
+		return false
+	}
+	if len(base) == len(name) {
+		return true
+	}
+	// The path element is a reserved name with an extension.
+	// Some Windows versions consider this a reserved name,
+	// while others do not. Use FullPath to see if the name is
+	// reserved.
+	if p, _ := syscall.FullPath(name); len(p) >= 4 && p[:4] == `\\.\` {
+		return true
+	}
 	return false
 }
 
+func isReservedBaseName(name string) bool {
+	if len(name) == 3 {
+		switch string([]byte{toUpper(name[0]), toUpper(name[1]), toUpper(name[2])}) {
+		case "CON", "PRN", "AUX", "NUL":
+			return true
+		}
+	}
+	if len(name) >= 4 {
+		switch string([]byte{toUpper(name[0]), toUpper(name[1]), toUpper(name[2])}) {
+		case "COM", "LPT":
+			if len(name) == 4 && '1' <= name[3] && name[3] <= '9' {
+				return true
+			}
+			// Superscript ¹, ², and ³ are considered numbers as well.
+			switch name[3:] {
+			case "\u00b2", "\u00b3", "\u00b9":
+				return true
+			}
+			return false
+		}
+	}
+
+	// Passing CONIN$ or CONOUT$ to CreateFile opens a console handle.
+	// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea#consoles
+	//
+	// While CONIN$ and CONOUT$ aren't documented as being files,
+	// they behave the same as CON. For example, ./CONIN$ also opens the console input.
+	if len(name) == 6 && name[5] == '$' && equalFold(name, "CONIN$") {
+		return true
+	}
+	if len(name) == 7 && name[6] == '$' && equalFold(name, "CONOUT$") {
+		return true
+	}
+	return false
+}
+
+func equalFold(a, b string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := 0; i < len(a); i++ {
+		if toUpper(a[i]) != toUpper(b[i]) {
+			return false
+		}
+	}
+	return true
+}
+
 func toUpper(c byte) byte {
 	if 'a' <= c && c <= 'z' {
 		return c - ('a' - 'A')
diff --git a/src/path/filepath/path.go b/src/path/filepath/path.go
index 326ca00..485ef4e 100644
--- a/src/path/filepath/path.go
+++ b/src/path/filepath/path.go
@@ -15,7 +15,6 @@
 	"errors"
 	"io/fs"
 	"os"
-	"runtime"
 	"sort"
 	"strings"
 )
@@ -163,21 +162,7 @@
 		out.append('.')
 	}
 
-	if runtime.GOOS == "windows" && out.volLen == 0 && out.buf != nil {
-		// If a ':' appears in the path element at the start of a Windows path,
-		// insert a .\ at the beginning to avoid converting relative paths
-		// like a/../c: into c:.
-		for _, c := range out.buf {
-			if os.IsPathSeparator(c) {
-				break
-			}
-			if c == ':' {
-				out.prepend('.', Separator)
-				break
-			}
-		}
-	}
-
+	postClean(&out) // avoid creating absolute paths on Windows
 	return FromSlash(out.string())
 }
 
diff --git a/src/path/filepath/path_nonwindows.go b/src/path/filepath/path_nonwindows.go
new file mode 100644
index 0000000..db69f02
--- /dev/null
+++ b/src/path/filepath/path_nonwindows.go
@@ -0,0 +1,9 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !windows
+
+package filepath
+
+func postClean(out *lazybuf) {}
diff --git a/src/path/filepath/path_test.go b/src/path/filepath/path_test.go
index 1a2e53a..0a22866 100644
--- a/src/path/filepath/path_test.go
+++ b/src/path/filepath/path_test.go
@@ -115,6 +115,9 @@
 	{`a/../c:/a`, `.\c:\a`},
 	{`a/../../c:`, `..\c:`},
 	{`foo:bar`, `foo:bar`},
+
+	// Don't allow cleaning to create a Root Local Device path like \??\a.
+	{`/a/../??/a`, `\.\??\a`},
 }
 
 func TestClean(t *testing.T) {
@@ -176,8 +179,28 @@
 var winislocaltests = []IsLocalTest{
 	{"NUL", false},
 	{"nul", false},
+	{"nul ", false},
 	{"nul.", false},
+	{"a/nul:", false},
+	{"a/nul : a", false},
+	{"com0", true},
 	{"com1", false},
+	{"com2", false},
+	{"com3", false},
+	{"com4", false},
+	{"com5", false},
+	{"com6", false},
+	{"com7", false},
+	{"com8", false},
+	{"com9", false},
+	{"com¹", false},
+	{"com²", false},
+	{"com³", false},
+	{"com¹ : a", false},
+	{"cOm1", false},
+	{"lpt1", false},
+	{"LPT1", false},
+	{"lpt³", false},
 	{"./nul", false},
 	{`\`, false},
 	{`\a`, false},
@@ -383,6 +406,7 @@
 	{[]string{`\\a\`, `b`, `c`}, `\\a\b\c`},
 	{[]string{`//`, `a`}, `\\a`},
 	{[]string{`a:\b\c`, `x\..\y:\..\..\z`}, `a:\b\z`},
+	{[]string{`\`, `??\a`}, `\.\??\a`},
 }
 
 func TestJoin(t *testing.T) {
@@ -953,6 +977,8 @@
 	{`\\host\share\`, true},
 	{`\\host\share\foo`, true},
 	{`//host/share/foo/bar`, true},
+	{`\\?\a\b\c`, true},
+	{`\??\a\b\c`, true},
 }
 
 func TestIsAbs(t *testing.T) {
@@ -1422,7 +1448,8 @@
 var volumenametests = []VolumeNameTest{
 	{`c:/foo/bar`, `c:`},
 	{`c:`, `c:`},
-	{`2:`, ``},
+	{`c:\`, `c:`},
+	{`2:`, `2:`},
 	{``, ``},
 	{`\\\host`, `\\\host`},
 	{`\\\host\`, `\\\host`},
@@ -1442,12 +1469,23 @@
 	{`//host/share//foo///bar////baz`, `\\host\share`},
 	{`\\host\share\foo\..\bar`, `\\host\share`},
 	{`//host/share/foo/../bar`, `\\host\share`},
+	{`//.`, `\\.`},
+	{`//./`, `\\.\`},
 	{`//./NUL`, `\\.\NUL`},
-	{`//?/NUL`, `\\?\NUL`},
+	{`//?/`, `\\?`},
+	{`//./a/b`, `\\.\a`},
+	{`//?/`, `\\?`},
+	{`//?/`, `\\?`},
 	{`//./C:`, `\\.\C:`},
+	{`//./C:/`, `\\.\C:`},
 	{`//./C:/a/b/c`, `\\.\C:`},
 	{`//./UNC/host/share/a/b/c`, `\\.\UNC\host\share`},
 	{`//./UNC/host`, `\\.\UNC\host`},
+	{`//./UNC/host\`, `\\.\UNC\host\`},
+	{`//./UNC`, `\\.\UNC`},
+	{`//./UNC/`, `\\.\UNC\`},
+	{`\\?\x`, `\\?`},
+	{`\??\x`, `\??`},
 }
 
 func TestVolumeName(t *testing.T) {
@@ -1720,3 +1758,28 @@
 		t.Errorf("got directories %v, want %v", saw, want)
 	}
 }
+
+func TestEscaping(t *testing.T) {
+	dir1 := t.TempDir()
+	dir2 := t.TempDir()
+	chdir(t, dir1)
+
+	for _, p := range []string{
+		filepath.Join(dir2, "x"),
+	} {
+		if !filepath.IsLocal(p) {
+			continue
+		}
+		f, err := os.Create(p)
+		if err != nil {
+			f.Close()
+		}
+		ents, err := os.ReadDir(dir2)
+		if err != nil {
+			t.Fatal(err)
+		}
+		for _, e := range ents {
+			t.Fatalf("found: %v", e.Name())
+		}
+	}
+}
diff --git a/src/path/filepath/path_windows.go b/src/path/filepath/path_windows.go
index 4dca9e0..c490424 100644
--- a/src/path/filepath/path_windows.go
+++ b/src/path/filepath/path_windows.go
@@ -5,6 +5,8 @@
 package filepath
 
 import (
+	"internal/safefilepath"
+	"os"
 	"strings"
 	"syscall"
 )
@@ -20,34 +22,6 @@
 	return c
 }
 
-// isReservedName reports if name is a Windows reserved device name or a console handle.
-// It does not detect names with an extension, which are also reserved on some Windows versions.
-//
-// For details, search for PRN in
-// https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file.
-func isReservedName(name string) bool {
-	if 3 <= len(name) && len(name) <= 4 {
-		switch string([]byte{toUpper(name[0]), toUpper(name[1]), toUpper(name[2])}) {
-		case "CON", "PRN", "AUX", "NUL":
-			return len(name) == 3
-		case "COM", "LPT":
-			return len(name) == 4 && '1' <= name[3] && name[3] <= '9'
-		}
-	}
-	// Passing CONIN$ or CONOUT$ to CreateFile opens a console handle.
-	// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea#consoles
-	//
-	// While CONIN$ and CONOUT$ aren't documented as being files,
-	// they behave the same as CON. For example, ./CONIN$ also opens the console input.
-	if len(name) == 6 && name[5] == '$' && strings.EqualFold(name, "CONIN$") {
-		return true
-	}
-	if len(name) == 7 && name[6] == '$' && strings.EqualFold(name, "CONOUT$") {
-		return true
-	}
-	return false
-}
-
 func isLocal(path string) bool {
 	if path == "" {
 		return false
@@ -68,25 +42,8 @@
 		if part == "." || part == ".." {
 			hasDots = true
 		}
-		// Trim the extension and look for a reserved name.
-		base, _, hasExt := strings.Cut(part, ".")
-		if isReservedName(base) {
-			if !hasExt {
-				return false
-			}
-			// The path element is a reserved name with an extension. Some Windows
-			// versions consider this a reserved name, while others do not. Use
-			// FullPath to see if the name is reserved.
-			//
-			// FullPath will convert references to reserved device names to their
-			// canonical form: \\.\${DEVICE_NAME}
-			//
-			// FullPath does not perform this conversion for paths which contain
-			// a reserved device name anywhere other than in the last element,
-			// so check the part rather than the full path.
-			if p, _ := syscall.FullPath(part); len(p) >= 4 && p[:4] == `\\.\` {
-				return false
-			}
+		if safefilepath.IsReservedName(part) {
+			return false
 		}
 	}
 	if hasDots {
@@ -118,40 +75,99 @@
 // volumeNameLen returns length of the leading volume name on Windows.
 // It returns 0 elsewhere.
 //
-// See: https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats
+// See:
+// https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats
+// https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
 func volumeNameLen(path string) int {
-	if len(path) < 2 {
-		return 0
-	}
-	// with drive letter
-	c := path[0]
-	if path[1] == ':' && ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+	switch {
+	case len(path) >= 2 && path[1] == ':':
+		// Path starts with a drive letter.
+		//
+		// Not all Windows functions necessarily enforce the requirement that
+		// drive letters be in the set A-Z, and we don't try to here.
+		//
+		// We don't handle the case of a path starting with a non-ASCII character,
+		// in which case the "drive letter" might be multiple bytes long.
 		return 2
-	}
-	// UNC and DOS device paths start with two slashes.
-	if !isSlash(path[0]) || !isSlash(path[1]) {
+
+	case len(path) == 0 || !isSlash(path[0]):
+		// Path does not have a volume component.
 		return 0
-	}
-	rest := path[2:]
-	p1, rest, _ := cutPath(rest)
-	p2, rest, ok := cutPath(rest)
-	if !ok {
-		return len(path)
-	}
-	if p1 != "." && p1 != "?" {
-		// This is a UNC path: \\${HOST}\${SHARE}\
-		return len(path) - len(rest) - 1
-	}
-	// This is a DOS device path.
-	if len(p2) == 3 && toUpper(p2[0]) == 'U' && toUpper(p2[1]) == 'N' && toUpper(p2[2]) == 'C' {
-		// This is a DOS device path that links to a UNC: \\.\UNC\${HOST}\${SHARE}\
-		_, rest, _ = cutPath(rest)  // host
-		_, rest, ok = cutPath(rest) // share
+
+	case pathHasPrefixFold(path, `\\.\UNC`):
+		// We're going to treat the UNC host and share as part of the volume
+		// prefix for historical reasons, but this isn't really principled;
+		// Windows's own GetFullPathName will happily remove the first
+		// component of the path in this space, converting
+		// \\.\unc\a\b\..\c into \\.\unc\a\c.
+		return uncLen(path, len(`\\.\UNC\`))
+
+	case pathHasPrefixFold(path, `\\.`):
+		// Path starts with \\., and is a Local Device path.
+		//
+		// We currently treat the next component after the \\.\ prefix
+		// as part of the volume name, although there doesn't seem to be
+		// a principled reason to do this.
+		if len(path) == 3 {
+			return 3 // exactly \\.
+		}
+		_, rest, ok := cutPath(path[4:])
 		if !ok {
 			return len(path)
 		}
+		return len(path) - len(rest) - 1
+
+	case pathHasPrefixFold(path, `\\?`) || pathHasPrefixFold(path, `\??`):
+		// Path starts with \\?\ or \??\, and is a Root Local Device path.
+		//
+		// While Windows usually treats / and \ as equivalent,
+		// /??/ does not seem to be recognized as a Root Local Device path.
+		// We treat it as one anyway here to be safe.
+		return 3
+
+	case len(path) >= 2 && isSlash(path[1]):
+		// Path starts with \\, and is a UNC path.
+		return uncLen(path, 2)
 	}
-	return len(path) - len(rest) - 1
+	return 0
+}
+
+// pathHasPrefixFold tests whether the path s begins with prefix,
+// ignoring case and treating all path separators as equivalent.
+// If s is longer than prefix, then s[len(prefix)] must be a path separator.
+func pathHasPrefixFold(s, prefix string) bool {
+	if len(s) < len(prefix) {
+		return false
+	}
+	for i := 0; i < len(prefix); i++ {
+		if isSlash(prefix[i]) {
+			if !isSlash(s[i]) {
+				return false
+			}
+		} else if toUpper(prefix[i]) != toUpper(s[i]) {
+			return false
+		}
+	}
+	if len(s) > len(prefix) && !isSlash(s[len(prefix)]) {
+		return false
+	}
+	return true
+}
+
+// uncLen returns the length of the volume prefix of a UNC path.
+// prefixLen is the prefix prior to the start of the UNC host;
+// for example, for "//host/share", the prefixLen is len("//")==2.
+func uncLen(path string, prefixLen int) int {
+	count := 0
+	for i := prefixLen; i < len(path); i++ {
+		if isSlash(path[i]) {
+			count++
+			if count == 2 {
+				return i
+			}
+		}
+	}
+	return len(path)
 }
 
 // cutPath slices path around the first path separator.
@@ -238,6 +254,12 @@
 			for len(e) > 0 && isSlash(e[0]) {
 				e = e[1:]
 			}
+			// If the path is \ and the next path element is ??,
+			// add an extra .\ to create \.\?? rather than \??\
+			// (a Root Local Device path).
+			if b.Len() == 1 && pathHasPrefixFold(e, "??") {
+				b.WriteString(`.\`)
+			}
 		case lastChar == ':':
 			// If the path ends in a colon, keep the path relative to the current directory
 			// on a drive and don't add a separator. Preserve leading slashes in the next
@@ -304,3 +326,29 @@
 func sameWord(a, b string) bool {
 	return strings.EqualFold(a, b)
 }
+
+// postClean adjusts the results of Clean to avoid turning a relative path
+// into an absolute or rooted one.
+func postClean(out *lazybuf) {
+	if out.volLen != 0 || out.buf == nil {
+		return
+	}
+	// If a ':' appears in the path element at the start of a path,
+	// insert a .\ at the beginning to avoid converting relative paths
+	// like a/../c: into c:.
+	for _, c := range out.buf {
+		if os.IsPathSeparator(c) {
+			break
+		}
+		if c == ':' {
+			out.prepend('.', Separator)
+			return
+		}
+	}
+	// If a path begins with \??\, insert a \. at the beginning
+	// to avoid converting paths like \a\..\??\c:\x into \??\c:\x
+	// (equivalent to c:\x).
+	if len(out.buf) >= 3 && os.IsPathSeparator(out.buf[0]) && out.buf[1] == '?' && out.buf[2] == '?' {
+		out.prepend(Separator, '.')
+	}
+}