cmd/link: support cgo internal/linking on darwin/arm64

Cgo programs work as well. Still not enabled by default for now.

Enable internal linking tests.

Updates #38485.

Change-Id: I8324a5c263fba221eb4e67d71207ca84fa241e6c
Reviewed-on: https://go-review.googlesource.com/c/go/+/263637
Trust: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
diff --git a/misc/cgo/test/issue4029.c b/misc/cgo/test/issue4029.c
index e6a777f..e79c5a7 100644
--- a/misc/cgo/test/issue4029.c
+++ b/misc/cgo/test/issue4029.c
@@ -3,7 +3,7 @@
 // license that can be found in the LICENSE file.
 
 // +build !windows,!static
-// +build !darwin !internal_pie
+// +build !darwin !internal_pie,!arm64
 
 #include <stdint.h>
 #include <dlfcn.h>
diff --git a/misc/cgo/test/issue4029.go b/misc/cgo/test/issue4029.go
index 8602ce19..b2d1318 100644
--- a/misc/cgo/test/issue4029.go
+++ b/misc/cgo/test/issue4029.go
@@ -3,10 +3,11 @@
 // license that can be found in the LICENSE file.
 
 // +build !windows,!static
-// +build !darwin !internal_pie
+// +build !darwin !internal_pie,!arm64
 
 // Excluded in darwin internal linking PIE mode, as dynamic export is not
 // supported.
+// Excluded in internal linking mode on darwin/arm64, as it is always PIE.
 
 package cgotest
 
diff --git a/misc/cgo/test/issue4029w.go b/misc/cgo/test/issue4029w.go
index de0cf21..b969bdd 100644
--- a/misc/cgo/test/issue4029w.go
+++ b/misc/cgo/test/issue4029w.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build windows static darwin,internal_pie
+// +build windows static darwin,internal_pie darwin,arm64
 
 package cgotest
 
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go
index bcb12f2..09d69f7 100644
--- a/src/cmd/dist/test.go
+++ b/src/cmd/dist/test.go
@@ -946,9 +946,6 @@
 	if goos == "ios" {
 		return false
 	}
-	if goos == "darwin" && goarch == "arm64" {
-		return false
-	}
 	// Internally linking cgo is incomplete on some architectures.
 	// https://golang.org/issue/10373
 	// https://golang.org/issue/14449
@@ -964,7 +961,7 @@
 
 func (t *tester) internalLinkPIE() bool {
 	switch goos + "-" + goarch {
-	case "darwin-amd64",
+	case "darwin-amd64", "darwin-arm64",
 		"linux-amd64", "linux-arm64",
 		"android-arm64",
 		"windows-amd64", "windows-386", "windows-arm":
@@ -1088,7 +1085,7 @@
 
 	pair := gohostos + "-" + goarch
 	switch pair {
-	case "darwin-amd64",
+	case "darwin-amd64", "darwin-arm64",
 		"openbsd-386", "openbsd-amd64",
 		"windows-386", "windows-amd64":
 		// test linkmode=external, but __thread not supported, so skip testtls.
diff --git a/src/cmd/link/internal/arm64/asm.go b/src/cmd/link/internal/arm64/asm.go
index 585c968..e456411 100644
--- a/src/cmd/link/internal/arm64/asm.go
+++ b/src/cmd/link/internal/arm64/asm.go
@@ -71,13 +71,13 @@
 }
 
 func adddynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym, r loader.Reloc, rIdx int) bool {
-
 	targ := r.Sym()
 	var targType sym.SymKind
 	if targ != 0 {
 		targType = ldr.SymType(targ)
 	}
 
+	const pcrel = 1
 	switch r.Type() {
 	default:
 		if r.Type() >= objabi.ElfRelocOffset {
@@ -201,6 +201,75 @@
 		su := ldr.MakeSymbolUpdater(s)
 		su.SetRelocType(rIdx, objabi.R_ARM64_LDST128)
 		return true
+
+	// Handle relocations found in Mach-O object files.
+	case objabi.MachoRelocOffset + ld.MACHO_ARM64_RELOC_UNSIGNED*2:
+		if targType == sym.SDYNIMPORT {
+			ldr.Errorf(s, "unexpected reloc for dynamic symbol %s", ldr.SymName(targ))
+		}
+		su := ldr.MakeSymbolUpdater(s)
+		su.SetRelocType(rIdx, objabi.R_ADDR)
+		if target.IsPIE() && target.IsInternal() {
+			// For internal linking PIE, this R_ADDR relocation cannot
+			// be resolved statically. We need to generate a dynamic
+			// relocation. Let the code below handle it.
+			break
+		}
+		return true
+
+	case objabi.MachoRelocOffset + ld.MACHO_ARM64_RELOC_BRANCH26*2 + pcrel:
+		su := ldr.MakeSymbolUpdater(s)
+		su.SetRelocType(rIdx, objabi.R_CALLARM64)
+		if targType == sym.SDYNIMPORT {
+			addpltsym(target, ldr, syms, targ)
+			su.SetRelocSym(rIdx, syms.PLT)
+			su.SetRelocAdd(rIdx, int64(ldr.SymPlt(targ)))
+		}
+		return true
+
+	case objabi.MachoRelocOffset + ld.MACHO_ARM64_RELOC_PAGE21*2 + pcrel,
+		objabi.MachoRelocOffset + ld.MACHO_ARM64_RELOC_PAGEOFF12*2:
+		if targType == sym.SDYNIMPORT {
+			ldr.Errorf(s, "unexpected relocation for dynamic symbol %s", ldr.SymName(targ))
+		}
+		su := ldr.MakeSymbolUpdater(s)
+		su.SetRelocType(rIdx, objabi.R_ARM64_PCREL)
+		return true
+
+	case objabi.MachoRelocOffset + ld.MACHO_ARM64_RELOC_GOT_LOAD_PAGE21*2 + pcrel,
+		objabi.MachoRelocOffset + ld.MACHO_ARM64_RELOC_GOT_LOAD_PAGEOFF12*2:
+		if targType != sym.SDYNIMPORT {
+			// have symbol
+			// turn MOVD sym@GOT (adrp+ldr) into MOVD $sym (adrp+add)
+			data := ldr.Data(s)
+			off := r.Off()
+			if int(off+3) >= len(data) {
+				ldr.Errorf(s, "unexpected GOT_LOAD reloc for non-dynamic symbol %s", ldr.SymName(targ))
+				return false
+			}
+			o := target.Arch.ByteOrder.Uint32(data[off:])
+			su := ldr.MakeSymbolUpdater(s)
+			switch {
+			case (o>>24)&0x9f == 0x90: // adrp
+				// keep instruction unchanged, change relocation type below
+			case o>>24 == 0xf9: // ldr
+				// rewrite to add
+				o = (0x91 << 24) | (o & (1<<22 - 1))
+				su.MakeWritable()
+				su.SetUint32(target.Arch, int64(off), o)
+			default:
+				ldr.Errorf(s, "unexpected GOT_LOAD reloc for non-dynamic symbol %s", ldr.SymName(targ))
+				return false
+			}
+			su.SetRelocType(rIdx, objabi.R_ARM64_PCREL)
+			return true
+		}
+		ld.AddGotSym(target, ldr, syms, targ, 0)
+		su := ldr.MakeSymbolUpdater(s)
+		su.SetRelocType(rIdx, objabi.R_ARM64_GOT)
+		su.SetRelocSym(rIdx, syms.GOT)
+		su.SetRelocAdd(rIdx, int64(ldr.SymGot(targ)))
+		return true
 	}
 
 	// Reread the reloc to incorporate any changes in type above.
@@ -671,14 +740,28 @@
 			}
 			o0 := (uint32((t>>12)&3) << 29) | (uint32((t>>12>>2)&0x7ffff) << 5)
 			return val | int64(o0), noExtReloc, isOk
-		} else if (val>>24)&0x91 == 0x91 {
-			// R_AARCH64_ADD_ABS_LO12_NC
+		} else if (val>>24)&0x9f == 0x91 {
+			// ELF R_AARCH64_ADD_ABS_LO12_NC or Mach-O ARM64_RELOC_PAGEOFF12
 			// patch instruction: add
 			t := ldr.SymAddr(rs) + r.Add() - ((ldr.SymValue(s) + int64(r.Off())) &^ 0xfff)
 			o1 := uint32(t&0xfff) << 10
 			return val | int64(o1), noExtReloc, isOk
+		} else if (val>>24)&0x3b == 0x39 {
+			// Mach-O ARM64_RELOC_PAGEOFF12
+			// patch ldr/str(b/h/w/d/q) (integer or vector) instructions, which have different scaling factors.
+			// Mach-O uses same relocation type for them.
+			shift := uint32(val) >> 30
+			if shift == 0 && (val>>20)&0x048 == 0x048 { // 128-bit vector load
+				shift = 4
+			}
+			t := ldr.SymAddr(rs) + r.Add() - ((ldr.SymValue(s) + int64(r.Off())) &^ 0xfff)
+			if t&(1<<shift-1) != 0 {
+				ldr.Errorf(s, "invalid address: %x for relocation type: ARM64_RELOC_PAGEOFF12", t)
+			}
+			o1 := (uint32(t&0xfff) >> shift) << 10
+			return val | int64(o1), noExtReloc, isOk
 		} else {
-			ldr.Errorf(s, "unsupported instruction for %x R_PCRELARM64", val)
+			ldr.Errorf(s, "unsupported instruction for %x R_ARM64_PCREL", val)
 		}
 
 	case objabi.R_ARM64_LDST8:
diff --git a/src/cmd/link/internal/ld/config.go b/src/cmd/link/internal/ld/config.go
index 834c87d..a54b96d 100644
--- a/src/cmd/link/internal/ld/config.go
+++ b/src/cmd/link/internal/ld/config.go
@@ -208,9 +208,6 @@
 	if iscgo && objabi.GOOS == "android" {
 		return true, objabi.GOOS + " does not support internal cgo"
 	}
-	if iscgo && objabi.GOOS == "darwin" && objabi.GOARCH == "arm64" {
-		return true, objabi.GOOS + "/" + objabi.GOARCH + " does not support internal cgo"
-	}
 
 	// When the race flag is set, the LLVM tsan relocatable file is linked
 	// into the final binary, which means external linking is required because
diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go
index 47cac04..d861efc 100644
--- a/src/cmd/link/internal/loader/loader.go
+++ b/src/cmd/link/internal/loader/loader.go
@@ -2622,11 +2622,15 @@
 	fmt.Println("Nsyms:", len(l.objSyms))
 	fmt.Println("syms")
 	for i := Sym(1); i < Sym(len(l.objSyms)); i++ {
-		pi := interface{}("")
+		pi := ""
 		if l.IsExternal(i) {
 			pi = fmt.Sprintf("<ext %d>", l.extIndex(i))
 		}
-		fmt.Println(i, l.SymName(i), l.SymType(i), pi)
+		sect := ""
+		if l.SymSect(i) != nil {
+			sect = l.SymSect(i).Name
+		}
+		fmt.Printf("%v %v %v %v %x %v\n", i, l.SymName(i), l.SymType(i), pi, l.SymValue(i), sect)
 	}
 	fmt.Println("symsByName")
 	for name, i := range l.symsByName[0] {
diff --git a/src/cmd/link/internal/loadmacho/ldmacho.go b/src/cmd/link/internal/loadmacho/ldmacho.go
index 864d808..d12f2bc 100644
--- a/src/cmd/link/internal/loadmacho/ldmacho.go
+++ b/src/cmd/link/internal/loadmacho/ldmacho.go
@@ -43,7 +43,7 @@
 THE SOFTWARE.
 */
 
-// TODO(crawshaw): de-duplicate these symbols with cmd/internal/ld
+// TODO(crawshaw): de-duplicate these symbols with cmd/link/internal/ld
 const (
 	MACHO_X86_64_RELOC_UNSIGNED = 0
 	MACHO_X86_64_RELOC_SIGNED   = 1
@@ -172,11 +172,12 @@
 	LdMachoCpuVax         = 1
 	LdMachoCpu68000       = 6
 	LdMachoCpu386         = 7
-	LdMachoCpuAmd64       = 0x1000007
+	LdMachoCpuAmd64       = 1<<24 | 7
 	LdMachoCpuMips        = 8
 	LdMachoCpu98000       = 10
 	LdMachoCpuHppa        = 11
 	LdMachoCpuArm         = 12
+	LdMachoCpuArm64       = 1<<24 | 12
 	LdMachoCpu88000       = 13
 	LdMachoCpuSparc       = 14
 	LdMachoCpu860         = 15
@@ -471,11 +472,14 @@
 	switch arch.Family {
 	default:
 		return errorf("mach-o %s unimplemented", arch.Name)
-
 	case sys.AMD64:
 		if e != binary.LittleEndian || m.cputype != LdMachoCpuAmd64 {
 			return errorf("mach-o object but not amd64")
 		}
+	case sys.ARM64:
+		if e != binary.LittleEndian || m.cputype != LdMachoCpuArm64 {
+			return errorf("mach-o object but not arm64")
+		}
 	}
 
 	m.cmd = make([]ldMachoCmd, ncmd)
@@ -633,7 +637,9 @@
 		}
 
 		bld.SetType(l.SymType(outer))
-		l.AddInteriorSym(outer, s)
+		if l.SymSize(outer) != 0 { // skip empty section (0-sized symbol)
+			l.AddInteriorSym(outer, s)
+		}
 
 		bld.SetValue(int64(machsym.value - sect.addr))
 		if !l.AttrCgoExportDynamic(s) {
@@ -722,27 +728,28 @@
 
 			// Handle X86_64_RELOC_SIGNED referencing a section (rel.extrn == 0).
 			p := l.Data(s)
-			if arch.Family == sys.AMD64 && rel.extrn == 0 && rel.type_ == MACHO_X86_64_RELOC_SIGNED {
-				// Calculate the addend as the offset into the section.
-				//
-				// The rip-relative offset stored in the object file is encoded
-				// as follows:
-				//
-				//    movsd	0x00000360(%rip),%xmm0
-				//
-				// To get the absolute address of the value this rip-relative address is pointing
-				// to, we must add the address of the next instruction to it. This is done by
-				// taking the address of the relocation and adding 4 to it (since the rip-relative
-				// offset can at most be 32 bits long).  To calculate the offset into the section the
-				// relocation is referencing, we subtract the vaddr of the start of the referenced
-				// section found in the original object file.
-				//
-				// [For future reference, see Darwin's /usr/include/mach-o/x86_64/reloc.h]
-				secaddr := c.seg.sect[rel.symnum-1].addr
-
-				rAdd = int64(uint64(int64(int32(e.Uint32(p[rOff:])))+int64(rOff)+4) - secaddr)
-			} else {
-				rAdd = int64(int32(e.Uint32(p[rOff:])))
+			if arch.Family == sys.AMD64 {
+				if rel.extrn == 0 && rel.type_ == MACHO_X86_64_RELOC_SIGNED {
+					// Calculate the addend as the offset into the section.
+					//
+					// The rip-relative offset stored in the object file is encoded
+					// as follows:
+					//
+					//    movsd	0x00000360(%rip),%xmm0
+					//
+					// To get the absolute address of the value this rip-relative address is pointing
+					// to, we must add the address of the next instruction to it. This is done by
+					// taking the address of the relocation and adding 4 to it (since the rip-relative
+					// offset can at most be 32 bits long).  To calculate the offset into the section the
+					// relocation is referencing, we subtract the vaddr of the start of the referenced
+					// section found in the original object file.
+					//
+					// [For future reference, see Darwin's /usr/include/mach-o/x86_64/reloc.h]
+					secaddr := c.seg.sect[rel.symnum-1].addr
+					rAdd = int64(uint64(int64(int32(e.Uint32(p[rOff:])))+int64(rOff)+4) - secaddr)
+				} else {
+					rAdd = int64(int32(e.Uint32(p[rOff:])))
+				}
 			}
 
 			// An unsigned internal relocation has a value offset