cmd/asm: align an instruction or a function's address

Recently, the gVisor project needs an instruction's address
with 128 bytes alignment and a function's start address with
2K bytes alignment to fit the architecture requirement for
interrupt table.

This patch allows aligning the address of an instruction to be
aligned to a specific value (2^n and not higher than 2048) and
the address of a function to be 2048 bytes.

The main changes include:

1. Adds ALIGN2048 flag to align a function's address with
2048 bytes.
e.g. "TEXT ·Add(SB),NOSPLIT|ALIGN2048" indicates that the
address of Add function should be aligned to 2048 bytes.

2. Adds a new element in the FuncInfo structure defined in
cmd/internal/obj/link.go file to record the alignment
information.

3. Adds a new element in the Func structure defined in
cmd/internal/goobj/read.go file to read the alignment
information.

4. Because go introduces a new object file format, also add
a new element in the FuncInfo structure defined in
cmd/internal/goobj2/funcinfo.go to record the alignment
information.

5. Adds the assembler support to align an intruction's offset
with a specific value (2^n and not higher than 2048).
e.g. "PCALIGN $256" indicates that the next instruction should
be aligned to 256 bytes.

This CL also adds a test.

Change-Id: I31cfa6fb5bc35dee2c44bf65913e90cddfcb492a
Reviewed-on: https://go-review.googlesource.com/c/go/+/212767
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go
index e61e95d..48537d2 100644
--- a/src/cmd/internal/goobj/read.go
+++ b/src/cmd/internal/goobj/read.go
@@ -95,6 +95,7 @@
 type Func struct {
 	Args     int64      // size in bytes of argument frame: inputs and outputs
 	Frame    int64      // size in bytes of local variable frame
+	Align    uint32     // alignment requirement in bytes for the address of the function
 	Leaf     bool       // function omits save of link register (ARM)
 	NoSplit  bool       // function omits stack split prologue
 	TopFrame bool       // function is the top of the call stack
@@ -590,6 +591,7 @@
 			s.Func = f
 			f.Args = r.readInt()
 			f.Frame = r.readInt()
+			f.Align = uint32(r.readInt())
 			flags := r.readInt()
 			f.Leaf = flags&(1<<0) != 0
 			f.TopFrame = flags&(1<<4) != 0
diff --git a/src/cmd/internal/goobj/readnew.go b/src/cmd/internal/goobj/readnew.go
index 3f9d0d1..1acf18a 100644
--- a/src/cmd/internal/goobj/readnew.go
+++ b/src/cmd/internal/goobj/readnew.go
@@ -149,6 +149,7 @@
 		f := &Func{
 			Args:     int64(info.Args),
 			Frame:    int64(info.Locals),
+			Align:    info.Align,
 			NoSplit:  info.NoSplit != 0,
 			Leaf:     osym.Leaf(),
 			TopFrame: osym.TopFrame(),
diff --git a/src/cmd/internal/goobj2/funcinfo.go b/src/cmd/internal/goobj2/funcinfo.go
index 8620931..946415b 100644
--- a/src/cmd/internal/goobj2/funcinfo.go
+++ b/src/cmd/internal/goobj2/funcinfo.go
@@ -18,6 +18,7 @@
 
 	Args   uint32
 	Locals uint32
+	Align  uint32
 
 	Pcsp        uint32
 	Pcfile      uint32
@@ -42,6 +43,7 @@
 
 	writeUint32(a.Args)
 	writeUint32(a.Locals)
+	writeUint32(a.Align)
 
 	writeUint32(a.Pcsp)
 	writeUint32(a.Pcfile)
@@ -79,6 +81,7 @@
 
 	a.Args = readUint32()
 	a.Locals = readUint32()
+	a.Align = readUint32()
 
 	a.Pcsp = readUint32()
 	a.Pcfile = readUint32()
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index e8b092a..dbe816e 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -886,25 +886,10 @@
 
 // align code to a certain length by padding bytes.
 func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int {
-	switch alignedValue {
-	case 8:
-		if pc%8 == 4 {
-			return 4
-		}
-	case 16:
-		switch pc % 16 {
-		case 4:
-			return 12
-		case 8:
-			return 8
-		case 12:
-			return 4
-		}
-	default:
-		ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", alignedValue)
+	if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) {
+		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue)
 	}
-
-	return 0
+	return int(-pc & (alignedValue - 1))
 }
 
 func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
@@ -940,8 +925,12 @@
 		if m == 0 {
 			switch p.As {
 			case obj.APCALIGN:
-				a := p.From.Offset
-				m = pcAlignPadLength(pc, a, ctxt)
+				alignedValue := p.From.Offset
+				m = pcAlignPadLength(pc, alignedValue, ctxt)
+				// Update the current text symbol ailgnment value.
+				if int32(alignedValue) > cursym.Func.Align {
+					cursym.Func.Align = int32(alignedValue)
+				}
 				break
 			case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
 				continue
@@ -1017,8 +1006,8 @@
 			if m == 0 {
 				switch p.As {
 				case obj.APCALIGN:
-					a := p.From.Offset
-					m = pcAlignPadLength(pc, a, ctxt)
+					alignedValue := p.From.Offset
+					m = pcAlignPadLength(pc, alignedValue, ctxt)
 					break
 				case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
 					continue
diff --git a/src/cmd/internal/obj/arm64/asm_test.go b/src/cmd/internal/obj/arm64/asm_test.go
index 1691828..9efdb02 100644
--- a/src/cmd/internal/obj/arm64/asm_test.go
+++ b/src/cmd/internal/obj/arm64/asm_test.go
@@ -18,7 +18,9 @@
 
 // TestLarge generates a very large file to verify that large
 // program builds successfully, in particular, too-far
-// conditional branches are fixed.
+// conditional branches are fixed, and also verify that the
+// instruction's pc can be correctly aligned even when branches
+// need to be fixed.
 func TestLarge(t *testing.T) {
 	if testing.Short() {
 		t.Skip("Skip in short mode")
@@ -41,11 +43,28 @@
 		t.Fatalf("can't write output: %v\n", err)
 	}
 
-	// build generated file
-	cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile)
+	pattern := `0x0080\s00128\s\(.*\)\tMOVD\t\$3,\sR3`
+
+	// assemble generated file
+	cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile)
 	cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
 	out, err := cmd.CombinedOutput()
 	if err != nil {
+		t.Errorf("Assemble failed: %v, output: %s", err, out)
+	}
+	matched, err := regexp.MatchString(pattern, string(out))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !matched {
+		t.Errorf("The alignment is not correct: %t, output:%s\n", matched, out)
+	}
+
+	// build generated file
+	cmd = exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile)
+	cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
+	out, err = cmd.CombinedOutput()
+	if err != nil {
 		t.Errorf("Build failed: %v, output: %s", err, out)
 	}
 }
@@ -56,6 +75,8 @@
 	fmt.Fprintln(buf, "TBZ $5, R0, label")
 	fmt.Fprintln(buf, "CBZ R0, label")
 	fmt.Fprintln(buf, "BEQ label")
+	fmt.Fprintln(buf, "PCALIGN $128")
+	fmt.Fprintln(buf, "MOVD $3, R3")
 	for i := 0; i < 1<<19; i++ {
 		fmt.Fprintln(buf, "MOVD R0, R1")
 	}
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index d1cc536..0879c61 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -398,6 +398,7 @@
 type FuncInfo struct {
 	Args     int32
 	Locals   int32
+	Align    int32
 	Text     *Prog
 	Autot    map[*LSym]struct{}
 	Pcln     Pcln
diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go
index 7fd97f7..46e8a55 100644
--- a/src/cmd/internal/obj/objfile.go
+++ b/src/cmd/internal/obj/objfile.go
@@ -346,6 +346,7 @@
 
 	w.writeInt(int64(s.Func.Args))
 	w.writeInt(int64(s.Func.Locals))
+	w.writeInt(int64(s.Func.Align))
 	w.writeBool(s.NoSplit())
 	flags = int64(0)
 	if s.Leaf() {
diff --git a/src/cmd/internal/obj/objfile2.go b/src/cmd/internal/obj/objfile2.go
index 69019e0..626df56 100644
--- a/src/cmd/internal/obj/objfile2.go
+++ b/src/cmd/internal/obj/objfile2.go
@@ -374,6 +374,7 @@
 			NoSplit: nosplit,
 			Args:    uint32(s.Func.Args),
 			Locals:  uint32(s.Func.Locals),
+			Align:   uint32(s.Func.Align),
 		}
 		pc := &s.Func.Pcln
 		o.Pcsp = pcdataoff
diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go
index 7579dd0..be19221 100644
--- a/src/cmd/internal/obj/plist.go
+++ b/src/cmd/internal/obj/plist.go
@@ -133,6 +133,13 @@
 	s.Set(AttrNeedCtxt, flag&NEEDCTXT != 0)
 	s.Set(AttrNoFrame, flag&NOFRAME != 0)
 	s.Set(AttrTopFrame, flag&TOPFRAME != 0)
+	if flag&ALIGN2048 != 0 {
+		if objabi.GOARCH != "arm64" {
+			ctxt.Diag("ALIGN2048 flag only works on ARM64 at present.")
+		}
+		s.Func.Align = 2048
+	}
+
 	s.Type = objabi.STEXT
 	ctxt.Text = append(ctxt.Text, s)
 
diff --git a/src/cmd/internal/obj/textflag.go b/src/cmd/internal/obj/textflag.go
index d2cec73..3681a3b6 100644
--- a/src/cmd/internal/obj/textflag.go
+++ b/src/cmd/internal/obj/textflag.go
@@ -51,4 +51,9 @@
 	// Function is the top of the call stack. Call stack unwinders should stop
 	// at this function.
 	TOPFRAME = 2048
+
+	// ALIGN2048 means that the address of the function must be aligned to a
+	// 2048 bytes boundary.
+	// Only works on arm64 at present.
+	ALIGN2048 = 4096
 )
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index 7ca01c8..31613e5 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -2119,6 +2119,10 @@
 		funcsize = uint64(s.Size)
 	}
 
+	if sect.Align < s.Align {
+		sect.Align = s.Align
+	}
+
 	// On ppc64x a text section should not be larger than 2^26 bytes due to the size of
 	// call target offset field in the bl instruction.  Splitting into smaller text
 	// sections smaller than this limit allows the GNU linker to modify the long calls
diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go
index 0adc395..bd9c6b4 100644
--- a/src/cmd/link/internal/loader/loader.go
+++ b/src/cmd/link/internal/loader/loader.go
@@ -1227,6 +1227,7 @@
 		info.Pcdata = append(info.Pcdata, info.PcdataEnd) // for the ease of knowing where it ends
 		pc.Args = int32(info.Args)
 		pc.Locals = int32(info.Locals)
+		s.Align = int32(info.Align)
 
 		npc := len(info.Pcdata) - 1 // -1 as we appended one above
 		pc.Pcdata = pcDataBatch[:npc:npc]
diff --git a/src/cmd/link/internal/objfile/objfile.go b/src/cmd/link/internal/objfile/objfile.go
index a15d3c3..295acb2 100644
--- a/src/cmd/link/internal/objfile/objfile.go
+++ b/src/cmd/link/internal/objfile/objfile.go
@@ -312,6 +312,7 @@
 
 		pc.Args = r.readInt32()
 		pc.Locals = r.readInt32()
+		s.Align = r.readInt32()
 		if r.readUint8() != 0 {
 			s.Attr |= sym.AttrNoSplit
 		}
diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go
index 4f792bd..025e882 100644
--- a/src/cmd/link/link_test.go
+++ b/src/cmd/link/link_test.go
@@ -3,6 +3,7 @@
 import (
 	"bufio"
 	"bytes"
+	"cmd/internal/objabi"
 	"debug/macho"
 	"internal/testenv"
 	"io/ioutil"
@@ -447,3 +448,103 @@
 		t.Errorf("unexpected output:\n%s", out)
 	}
 }
+
+const testFuncAlignSrc = `
+package main
+import (
+	"fmt"
+	"reflect"
+)
+func alignFunc()
+func alignPc()
+
+func main() {
+	addr1 := reflect.ValueOf(alignFunc).Pointer()
+	addr2 := reflect.ValueOf(alignPc).Pointer()
+	switch {
+	case (addr1 % 2048) != 0 && (addr2 % 512) != 0:
+		fmt.Printf("expected 2048 bytes alignment, got %v; expected 512 bytes alignment, got %v\n", addr1, addr2)
+	case (addr2 % 512) != 0:
+		fmt.Printf("expected 512 bytes alignment, got %v\n", addr2)
+	case (addr1 % 2048) != 0:
+		fmt.Printf("expected 2048 bytes alignment, got %v\n", addr1)
+	default:
+		fmt.Printf("PASS")
+	}
+}
+`
+
+const testFuncAlignAsmSrc = `
+#include "textflag.h"
+TEXT	·alignFunc(SB),NOSPLIT|ALIGN2048, $0-0
+	MOVD	$1, R0
+	MOVD	$2, R1
+	RET
+
+TEXT	·alignPc(SB),NOSPLIT, $0-0
+	MOVD	$2, R0
+	PCALIGN	$512
+	MOVD	$3, R1
+	RET
+`
+
+// TestFuncAlign verifies that the address of a function can be aligned
+// with a specfic value on arm64.
+func TestFuncAlign(t *testing.T) {
+	if objabi.GOARCH != "arm64" {
+		t.Skipf("Skipping FuncAlign test on %s", objabi.GOARCH)
+	}
+	testenv.MustHaveGoBuild(t)
+
+	tmpdir, err := ioutil.TempDir("", "TestFuncAlign")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(tmpdir)
+
+	src := filepath.Join(tmpdir, "falign.go")
+	err = ioutil.WriteFile(src, []byte(testFuncAlignSrc), 0666)
+	if err != nil {
+		t.Fatal(err)
+	}
+	src = filepath.Join(tmpdir, "falign.s")
+	err = ioutil.WriteFile(src, []byte(testFuncAlignAsmSrc), 0666)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Build and run with old object file format.
+	cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "falign")
+	cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
+	cmd.Dir = tmpdir
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("build failed: %v", err)
+	}
+	cmd = exec.Command(tmpdir + "/falign")
+	out, err = cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("failed to run with err %v, output: %s", err, out)
+	}
+	if string(out) != "PASS" {
+		t.Errorf("unexpected output: %s\n", out)
+	}
+
+	// Build and run with new object file format.
+	cmd = exec.Command(testenv.GoToolPath(t), "build", "-o", "falign", "-gcflags=all=-newobj", "-asmflags=all=-newobj", "-ldflags=-newobj")
+	cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
+	cmd.Dir = tmpdir
+	out, err = cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("build with newobj failed: %v", err)
+	}
+	cmd = exec.Command(tmpdir + "/falign")
+	out, err = cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("failed to run with -newobj, err: %v, output: %s", err, out)
+	}
+	if string(out) != "PASS" {
+		t.Errorf("unexpected output with -newobj: %s\n", out)
+	}
+
+}
diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h
index daca36d..bbbef63 100644
--- a/src/runtime/textflag.h
+++ b/src/runtime/textflag.h
@@ -35,3 +35,8 @@
 // Function is the top of the call stack. Call stack unwinders should stop
 // at this function.
 #define TOPFRAME 2048
+// ALIGN2048 means that the address of the function must be aligned to a
+// 2048 bytes boundary.
+// Only works on arm64 at present.
+#define ALIGN2048 4096
+