cmd/go, cmd/cgo: update gofrontend mangling checks

This is a port of two patches in the master repository.

https://golang.org/cl/259298

    cmd/cgo: split gofrontend mangling checks into cmd/internal/pkgpath

    This is a step toward porting https://golang.org/cl/219817 from the
    gofrontend repo to the main repo.

    Note that this also corrects the implementation of the v2 mangling
    scheme to use ..u and ..U where appropriate.

https://golang.org/cl/259299

    cmd/go: use cmd/internal/pkgpath for gccgo pkgpath symbol

For golang/go#37272
For golang/go#41862

Change-Id: Id276ca1bac7e1d850408a9d222c19e57b26ba001
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/270637
Trust: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
diff --git a/libgo/check-packages.txt b/libgo/check-packages.txt
index efa7d19..da77e84 100644
--- a/libgo/check-packages.txt
+++ b/libgo/check-packages.txt
@@ -22,6 +22,7 @@
 cmd/internal/buildid
 cmd/internal/edit
 cmd/internal/objabi
+cmd/internal/pkgpath
 cmd/internal/test2json
 compress/bzip2
 compress/flate
diff --git a/libgo/go/cmd/cgo/main.go b/libgo/go/cmd/cgo/main.go
index 7fc2508..4952118 100644
--- a/libgo/go/cmd/cgo/main.go
+++ b/libgo/go/cmd/cgo/main.go
@@ -244,8 +244,7 @@
 var gccgo = flag.Bool("gccgo", false, "generate files for use with gccgo")
 var gccgoprefix = flag.String("gccgoprefix", "", "-fgo-prefix option used with gccgo")
 var gccgopkgpath = flag.String("gccgopkgpath", "", "-fgo-pkgpath option used with gccgo")
-var gccgoMangleCheckDone bool
-var gccgoNewmanglingInEffect bool
+var gccgoMangler func(string) string
 var importRuntimeCgo = flag.Bool("import_runtime_cgo", true, "import runtime/cgo in generated code")
 var importSyscall = flag.Bool("import_syscall", true, "import syscall in generated code")
 var goarch, goos string
diff --git a/libgo/go/cmd/cgo/out.go b/libgo/go/cmd/cgo/out.go
index dd03f7d..2ab8425 100644
--- a/libgo/go/cmd/cgo/out.go
+++ b/libgo/go/cmd/cgo/out.go
@@ -6,6 +6,7 @@
 
 import (
 	"bytes"
+	"cmd/internal/pkgpath"
 	"debug/elf"
 	"debug/macho"
 	"debug/pe"
@@ -15,7 +16,6 @@
 	"go/token"
 	"internal/xcoff"
 	"io"
-	"io/ioutil"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -1286,112 +1286,24 @@
 	fmt.Fprintf(fgcch, "%s\n", p.gccExportHeaderProlog())
 }
 
-// gccgoUsesNewMangling reports whether gccgo uses the new collision-free
-// packagepath mangling scheme (see determineGccgoManglingScheme for more
-// info).
-func gccgoUsesNewMangling() bool {
-	if !gccgoMangleCheckDone {
-		gccgoNewmanglingInEffect = determineGccgoManglingScheme()
-		gccgoMangleCheckDone = true
-	}
-	return gccgoNewmanglingInEffect
-}
-
-const mangleCheckCode = `
-package läufer
-func Run(x int) int {
-  return 1
-}
-`
-
-// determineGccgoManglingScheme performs a runtime test to see which
-// flavor of packagepath mangling gccgo is using. Older versions of
-// gccgo use a simple mangling scheme where there can be collisions
-// between packages whose paths are different but mangle to the same
-// string. More recent versions of gccgo use a new mangler that avoids
-// these collisions. Return value is whether gccgo uses the new mangling.
-func determineGccgoManglingScheme() bool {
-
-	// Emit a small Go file for gccgo to compile.
-	filepat := "*_gccgo_manglecheck.go"
-	var f *os.File
-	var err error
-	if f, err = ioutil.TempFile(*objDir, filepat); err != nil {
-		fatalf("%v", err)
-	}
-	gofilename := f.Name()
-	defer os.Remove(gofilename)
-
-	if err = ioutil.WriteFile(gofilename, []byte(mangleCheckCode), 0666); err != nil {
-		fatalf("%v", err)
-	}
-
-	// Compile with gccgo, capturing generated assembly.
-	gccgocmd := os.Getenv("GCCGO")
-	if gccgocmd == "" {
-		gpath, gerr := exec.LookPath("gccgo")
-		if gerr != nil {
-			fatalf("unable to locate gccgo: %v", gerr)
-		}
-		gccgocmd = gpath
-	}
-	cmd := exec.Command(gccgocmd, "-S", "-o", "-", gofilename)
-	buf, cerr := cmd.CombinedOutput()
-	if cerr != nil {
-		fatalf("%s", cerr)
-	}
-
-	// New mangling: expect go.l..u00e4ufer.Run
-	// Old mangling: expect go.l__ufer.Run
-	return regexp.MustCompile(`go\.l\.\.u00e4ufer\.Run`).Match(buf)
-}
-
-// gccgoPkgpathToSymbolNew converts a package path to a gccgo-style
-// package symbol.
-func gccgoPkgpathToSymbolNew(ppath string) string {
-	bsl := []byte{}
-	changed := false
-	for _, c := range []byte(ppath) {
-		switch {
-		case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z',
-			'0' <= c && c <= '9', c == '_':
-			bsl = append(bsl, c)
-		case c == '.':
-			bsl = append(bsl, ".x2e"...)
-		default:
-			changed = true
-			encbytes := []byte(fmt.Sprintf("..z%02x", c))
-			bsl = append(bsl, encbytes...)
-		}
-	}
-	if !changed {
-		return ppath
-	}
-	return string(bsl)
-}
-
-// gccgoPkgpathToSymbolOld converts a package path to a gccgo-style
-// package symbol using the older mangling scheme.
-func gccgoPkgpathToSymbolOld(ppath string) string {
-	clean := func(r rune) rune {
-		switch {
-		case 'A' <= r && r <= 'Z', 'a' <= r && r <= 'z',
-			'0' <= r && r <= '9':
-			return r
-		}
-		return '_'
-	}
-	return strings.Map(clean, ppath)
-}
-
 // gccgoPkgpathToSymbol converts a package path to a mangled packagepath
 // symbol.
 func gccgoPkgpathToSymbol(ppath string) string {
-	if gccgoUsesNewMangling() {
-		return gccgoPkgpathToSymbolNew(ppath)
-	} else {
-		return gccgoPkgpathToSymbolOld(ppath)
+	if gccgoMangler == nil {
+		var err error
+		cmd := os.Getenv("GCCGO")
+		if cmd == "" {
+			cmd, err = exec.LookPath("gccgo")
+			if err != nil {
+				fatalf("unable to locate gccgo: %v", err)
+			}
+		}
+		gccgoMangler, err = pkgpath.ToSymbolFunc(cmd, *objDir)
+		if err != nil {
+			fatalf("%v", err)
+		}
 	}
+	return gccgoMangler(ppath)
 }
 
 // Return the package prefix when using gccgo.
diff --git a/libgo/go/cmd/go/internal/work/gccgo.go b/libgo/go/cmd/go/internal/work/gccgo.go
index 63d5c62..dbaff70 100644
--- a/libgo/go/cmd/go/internal/work/gccgo.go
+++ b/libgo/go/cmd/go/internal/work/gccgo.go
@@ -11,11 +11,13 @@
 	"os/exec"
 	"path/filepath"
 	"strings"
+	"sync"
 
 	"cmd/go/internal/base"
 	"cmd/go/internal/cfg"
 	"cmd/go/internal/load"
 	"cmd/go/internal/str"
+	"cmd/internal/pkgpath"
 )
 
 // The Gccgo toolchain.
@@ -174,7 +176,7 @@
 		ofiles = append(ofiles, ofile)
 		sfile = mkAbs(p.Dir, sfile)
 		defs := []string{"-D", "GOOS_" + cfg.Goos, "-D", "GOARCH_" + cfg.Goarch}
-		if pkgpath := gccgoCleanPkgpath(p); pkgpath != "" {
+		if pkgpath := tools.gccgoCleanPkgpath(b, p); pkgpath != "" {
 			defs = append(defs, `-D`, `GOPKGPATH=`+pkgpath)
 		}
 		defs = tools.maybePIC(defs)
@@ -556,7 +558,7 @@
 	cfile = mkAbs(p.Dir, cfile)
 	defs := []string{"-D", "GOOS_" + cfg.Goos, "-D", "GOARCH_" + cfg.Goarch}
 	defs = append(defs, b.gccArchArgs()...)
-	if pkgpath := gccgoCleanPkgpath(p); pkgpath != "" {
+	if pkgpath := tools.gccgoCleanPkgpath(b, p); pkgpath != "" {
 		defs = append(defs, `-D`, `GOPKGPATH="`+pkgpath+`"`)
 	}
 	compiler := envList("CC", cfg.DefaultCC(cfg.Goos, cfg.Goarch))
@@ -596,28 +598,23 @@
 	return p.ImportPath
 }
 
-// gccgoCleanPkgpath returns the form of p's pkgpath that gccgo uses
-// for symbol names. This is like gccgoPkgpathToSymbolNew in cmd/cgo/out.go.
-func gccgoCleanPkgpath(p *load.Package) string {
-	ppath := gccgoPkgpath(p)
-	bsl := []byte{}
-	changed := false
-	for _, c := range []byte(ppath) {
-		switch {
-		case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z',
-			'0' <= c && c <= '9', c == '_':
-			bsl = append(bsl, c)
-		case c == '.':
-			bsl = append(bsl, ".x2e"...)
-			changed = true
-		default:
-			encbytes := []byte(fmt.Sprintf("..z%02x", c))
-			bsl = append(bsl, encbytes...)
-			changed = true
+var gccgoToSymbolFuncOnce sync.Once
+var gccgoToSymbolFunc func(string) string
+
+func (tools gccgoToolchain) gccgoCleanPkgpath(b *Builder, p *load.Package) string {
+	gccgoToSymbolFuncOnce.Do(func() {
+		if cfg.BuildN {
+			gccgoToSymbolFunc = func(s string) string { return s }
+			return
 		}
-	}
-	if !changed {
-		return ppath
-	}
-	return string(bsl)
+		fn, err := pkgpath.ToSymbolFunc(tools.compiler(), b.WorkDir)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "cmd/go: %v\n", err)
+			base.SetExitStatus(2)
+			base.Exit()
+		}
+		gccgoToSymbolFunc = fn
+	})
+
+	return gccgoToSymbolFunc(gccgoPkgpath(p))
 }
diff --git a/libgo/go/cmd/internal/pkgpath/pkgpath.go b/libgo/go/cmd/internal/pkgpath/pkgpath.go
new file mode 100644
index 0000000..0b24468
--- /dev/null
+++ b/libgo/go/cmd/internal/pkgpath/pkgpath.go
@@ -0,0 +1,114 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package pkgpath determines the package path used by gccgo/GoLLVM symbols.
+// This package is not used for the gc compiler.
+package pkgpath
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"strings"
+)
+
+// ToSymbolFunc returns a function that may be used to convert a
+// package path into a string suitable for use as a symbol.
+// cmd is the gccgo/GoLLVM compiler in use, and tmpdir is a temporary
+// directory to pass to ioutil.TempFile.
+// For example, this returns a function that converts "net/http"
+// into a string like "net..z2fhttp". The actual string varies for
+// different gccgo/GoLLVM versions, which is why this returns a function
+// that does the conversion appropriate for the compiler in use.
+func ToSymbolFunc(cmd, tmpdir string) (func(string) string, error) {
+	// To determine the scheme used by cmd, we compile a small
+	// file and examine the assembly code. Older versions of gccgo
+	// use a simple mangling scheme where there can be collisions
+	// between packages whose paths are different but mangle to
+	// the same string. More recent versions use a new mangler
+	// that avoids these collisions.
+	const filepat = "*_gccgo_manglechck.go"
+	f, err := ioutil.TempFile(tmpdir, filepat)
+	if err != nil {
+		return nil, err
+	}
+	gofilename := f.Name()
+	f.Close()
+	defer os.Remove(gofilename)
+
+	if err := ioutil.WriteFile(gofilename, []byte(mangleCheckCode), 0644); err != nil {
+		return nil, err
+	}
+
+	command := exec.Command(cmd, "-S", "-o", "-", gofilename)
+	buf, err := command.Output()
+	if err != nil {
+		return nil, err
+	}
+
+	// New mangling: expect go.l..u00e4ufer.Run
+	// Old mangling: expect go.l__ufer.Run
+	if bytes.Contains(buf, []byte("go.l..u00e4ufer.Run")) {
+		return toSymbolV2, nil
+	} else if bytes.Contains(buf, []byte("go.l__ufer.Run")) {
+		return toSymbolV1, nil
+	} else {
+		return nil, errors.New(cmd + ": unrecognized mangling scheme")
+	}
+}
+
+// mangleCheckCode is the package we compile to determine the mangling scheme.
+const mangleCheckCode = `
+package läufer
+func Run(x int) int {
+  return 1
+}
+`
+
+// toSymbolV1 converts a package path using the original mangling scheme.
+func toSymbolV1(ppath string) string {
+	clean := func(r rune) rune {
+		switch {
+		case 'A' <= r && r <= 'Z', 'a' <= r && r <= 'z',
+			'0' <= r && r <= '9':
+			return r
+		}
+		return '_'
+	}
+	return strings.Map(clean, ppath)
+}
+
+// toSymbolV2 converts a package path using the newer mangling scheme.
+func toSymbolV2(ppath string) string {
+	// This has to build at boostrap time, so it has to build
+	// with Go 1.4, so we don't use strings.Builder.
+	bsl := make([]byte, 0, len(ppath))
+	changed := false
+	for _, c := range ppath {
+		if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '_' {
+			bsl = append(bsl, byte(c))
+			continue
+		}
+		var enc string
+		switch {
+		case c == '.':
+			enc = ".x2e"
+		case c < 0x80:
+			enc = fmt.Sprintf("..z%02x", c)
+		case c < 0x10000:
+			enc = fmt.Sprintf("..u%04x", c)
+		default:
+			enc = fmt.Sprintf("..U%08x", c)
+		}
+		bsl = append(bsl, enc...)
+		changed = true
+	}
+	if !changed {
+		return ppath
+	}
+	return string(bsl)
+}
diff --git a/libgo/go/cmd/internal/pkgpath/pkgpath_test.go b/libgo/go/cmd/internal/pkgpath/pkgpath_test.go
new file mode 100644
index 0000000..7355f81
--- /dev/null
+++ b/libgo/go/cmd/internal/pkgpath/pkgpath_test.go
@@ -0,0 +1,121 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgpath
+
+import (
+	"os"
+	"testing"
+)
+
+const testEnvName = "GO_PKGPATH_TEST_COMPILER"
+
+// This init function supports TestToSymbolFunc. For simplicity,
+// we use the test binary itself as a sample gccgo driver.
+// We set an environment variable to specify how it should behave.
+func init() {
+	switch os.Getenv(testEnvName) {
+	case "":
+		return
+	case "v1":
+		os.Stdout.WriteString(`.string	"go.l__ufer.Run"`)
+		os.Exit(0)
+	case "v2":
+		os.Stdout.WriteString(`.string	"go.l..u00e4ufer.Run"`)
+		os.Exit(0)
+	case "error":
+		os.Stdout.WriteString(`unknown string`)
+		os.Exit(0)
+	}
+}
+
+func TestToSymbolFunc(t *testing.T) {
+	const input = "päδΈ–πŸœƒ"
+	tests := []struct {
+		env     string
+		fail    bool
+		mangled string
+	}{
+		{
+			env:     "v1",
+			mangled: "p___",
+		},
+		{
+			env:     "v2",
+			mangled: "p..u00e4..u4e16..U0001f703",
+		},
+		{
+			env:  "error",
+			fail: true,
+		},
+	}
+
+	cmd := os.Args[0]
+	tmpdir := t.TempDir()
+
+	defer os.Unsetenv(testEnvName)
+
+	for _, test := range tests {
+		t.Run(test.env, func(t *testing.T) {
+			os.Setenv(testEnvName, test.env)
+
+			fn, err := ToSymbolFunc(cmd, tmpdir)
+			if err != nil {
+				if !test.fail {
+					t.Errorf("ToSymbolFunc(%q, %q): unexpected error %v", cmd, tmpdir, err)
+				}
+			} else if test.fail {
+				t.Errorf("ToSymbolFunc(%q, %q) succeeded but expected to fail", cmd, tmpdir)
+			} else if got, want := fn(input), test.mangled; got != want {
+				t.Errorf("ToSymbolFunc(%q, %q)(%q) = %q, want %q", cmd, tmpdir, input, got, want)
+			}
+		})
+	}
+}
+
+var symbolTests = []struct {
+	input, v1, v2 string
+}{
+	{
+		"",
+		"",
+		"",
+	},
+	{
+		"bytes",
+		"bytes",
+		"bytes",
+	},
+	{
+		"net/http",
+		"net_http",
+		"net..z2fhttp",
+	},
+	{
+		"golang.org/x/net/http",
+		"golang_org_x_net_http",
+		"golang.x2eorg..z2fx..z2fnet..z2fhttp",
+	},
+	{
+		"päδΈ–.πŸœƒ",
+		"p____",
+		"p..u00e4..u4e16.x2e..U0001f703",
+	},
+}
+
+func TestV1(t *testing.T) {
+	for _, test := range symbolTests {
+		if got, want := toSymbolV1(test.input), test.v1; got != want {
+			t.Errorf("toSymbolV1(%q) = %q, want %q", test.input, got, want)
+		}
+	}
+}
+
+func TestV2(t *testing.T) {
+	for _, test := range symbolTests {
+		if got, want := toSymbolV2(test.input), test.v2; got != want {
+			t.Errorf("toSymbolV2(%q) = %q, want %q", test.input, got, want)
+		}
+	}
+}
diff --git a/libgo/gotool-packages.txt b/libgo/gotool-packages.txt
index 745c34c..c11df02 100644
--- a/libgo/gotool-packages.txt
+++ b/libgo/gotool-packages.txt
@@ -43,6 +43,7 @@
 cmd/internal/diff
 cmd/internal/edit
 cmd/internal/objabi
+cmd/internal/pkgpath
 cmd/internal/sys
 cmd/internal/test2json
 golang.org/x/crypto/ed25519