[dev.ssa] Merge remote-tracking branch 'origin/master' into mergebranch

Semi-regular merge from tip to dev.ssa.

Conflicts:
	src/runtime/sys_windows_amd64.s

Change-Id: I5f733130049c810e6ceacd46dad85faebca52b29
diff --git a/src/cmd/cgo/doc.go b/src/cmd/cgo/doc.go
index 8ec4301..bd38a5c 100644
--- a/src/cmd/cgo/doc.go
+++ b/src/cmd/cgo/doc.go
@@ -125,11 +125,19 @@
 To access a struct, union, or enum type directly, prefix it with
 struct_, union_, or enum_, as in C.struct_stat.
 
+The size of any C type T is available as C.sizeof_T, as in
+C.sizeof_struct_stat.
+
 As Go doesn't have support for C's union type in the general case,
 C's union types are represented as a Go byte array with the same length.
 
 Go structs cannot embed fields with C types.
 
+Go code can not refer to zero-sized fields that occur at the end of
+non-empty C structs.  To get the address of such a field (which is the
+only operation you can do with a zero-sized field) you must take the
+address of the struct and add the size of the struct.
+
 Cgo translates C types into equivalent unexported Go types.
 Because the translations are unexported, a Go package should not
 expose C types in its exported API: a C type used in one Go package
diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go
index 750b89b..fb5049c 100644
--- a/src/cmd/cgo/gcc.go
+++ b/src/cmd/cgo/gcc.go
@@ -626,9 +626,7 @@
 
 		// Add optional additional arguments for an address
 		// expression.
-		if u, ok := call.Args[i].(*ast.UnaryExpr); ok && u.Op == token.AND {
-			c.Args = p.checkAddrArgs(f, c.Args, u.X)
-		}
+		c.Args = p.checkAddrArgs(f, c.Args, call.Args[i])
 
 		// _cgoCheckPointer returns interface{}.
 		// We need to type assert that to the type we want.
@@ -773,7 +771,19 @@
 // only pass the slice or array if we can refer to it without side
 // effects.
 func (p *Package) checkAddrArgs(f *File, args []ast.Expr, x ast.Expr) []ast.Expr {
-	index, ok := x.(*ast.IndexExpr)
+	// Strip type conversions.
+	for {
+		c, ok := x.(*ast.CallExpr)
+		if !ok || len(c.Args) != 1 || !p.isType(c.Fun) {
+			break
+		}
+		x = c.Args[0]
+	}
+	u, ok := x.(*ast.UnaryExpr)
+	if !ok || u.Op != token.AND {
+		return args
+	}
+	index, ok := u.X.(*ast.IndexExpr)
 	if !ok {
 		// This is the address of something that is not an
 		// index expression.  We only need to examine the
@@ -804,6 +814,42 @@
 	return found
 }
 
+// isType returns whether the expression is definitely a type.
+// This is conservative--it returns false for an unknown identifier.
+func (p *Package) isType(t ast.Expr) bool {
+	switch t := t.(type) {
+	case *ast.SelectorExpr:
+		if t.Sel.Name != "Pointer" {
+			return false
+		}
+		id, ok := t.X.(*ast.Ident)
+		if !ok {
+			return false
+		}
+		return id.Name == "unsafe"
+	case *ast.Ident:
+		// TODO: This ignores shadowing.
+		switch t.Name {
+		case "unsafe.Pointer", "bool", "byte",
+			"complex64", "complex128",
+			"error",
+			"float32", "float64",
+			"int", "int8", "int16", "int32", "int64",
+			"rune", "string",
+			"uint", "uint8", "uint16", "uint32", "uint64", "uintptr":
+
+			return true
+		}
+	case *ast.StarExpr:
+		return p.isType(t.X)
+	case *ast.ArrayType, *ast.StructType, *ast.FuncType, *ast.InterfaceType,
+		*ast.MapType, *ast.ChanType:
+
+		return true
+	}
+	return false
+}
+
 // unsafeCheckPointerName is given the Go version of a C type.  If the
 // type uses unsafe.Pointer, we arrange to build a version of
 // _cgoCheckPointer that returns that type.  This avoids using a type
@@ -832,6 +878,8 @@
 func (p *Package) hasUnsafePointer(t ast.Expr) bool {
 	switch t := t.(type) {
 	case *ast.Ident:
+		// We don't see a SelectorExpr for unsafe.Pointer;
+		// this is created by code in this file.
 		return t.Name == "unsafe.Pointer"
 	case *ast.ArrayType:
 		return p.hasUnsafePointer(t.Elt)
diff --git a/src/cmd/compile/internal/amd64/prog.go b/src/cmd/compile/internal/amd64/prog.go
index b4cc781..b43dde6 100644
--- a/src/cmd/compile/internal/amd64/prog.go
+++ b/src/cmd/compile/internal/amd64/prog.go
@@ -34,6 +34,7 @@
 	obj.ACHECKNIL: {Flags: gc.LeftRead},
 	obj.AVARDEF:   {Flags: gc.Pseudo | gc.RightWrite},
 	obj.AVARKILL:  {Flags: gc.Pseudo | gc.RightWrite},
+	obj.AVARLIVE:  {Flags: gc.Pseudo | gc.LeftRead},
 
 	// NOP is an internal no-op that also stands
 	// for USED and SET annotations, not the Intel opcode.
diff --git a/src/cmd/compile/internal/arm/peep.go b/src/cmd/compile/internal/arm/peep.go
index d7a9c5f..bc49ebc 100644
--- a/src/cmd/compile/internal/arm/peep.go
+++ b/src/cmd/compile/internal/arm/peep.go
@@ -1366,6 +1366,7 @@
 		obj.AFUNCDATA,
 		obj.AVARDEF,
 		obj.AVARKILL,
+		obj.AVARLIVE,
 		obj.AUSEFIELD:
 		return 0
 	}
diff --git a/src/cmd/compile/internal/arm/prog.go b/src/cmd/compile/internal/arm/prog.go
index 8a304e2..81be77a 100644
--- a/src/cmd/compile/internal/arm/prog.go
+++ b/src/cmd/compile/internal/arm/prog.go
@@ -33,6 +33,7 @@
 	obj.ACHECKNIL: {Flags: gc.LeftRead},
 	obj.AVARDEF:   {Flags: gc.Pseudo | gc.RightWrite},
 	obj.AVARKILL:  {Flags: gc.Pseudo | gc.RightWrite},
+	obj.AVARLIVE:  {Flags: gc.Pseudo | gc.LeftRead},
 
 	// NOP is an internal no-op that also stands
 	// for USED and SET annotations, not the Intel opcode.
diff --git a/src/cmd/compile/internal/arm64/peep.go b/src/cmd/compile/internal/arm64/peep.go
index b61ac6e..daa626f 100644
--- a/src/cmd/compile/internal/arm64/peep.go
+++ b/src/cmd/compile/internal/arm64/peep.go
@@ -711,6 +711,7 @@
 		obj.AFUNCDATA,
 		obj.AVARDEF,
 		obj.AVARKILL,
+		obj.AVARLIVE,
 		obj.AUSEFIELD:
 		return 0
 	}
diff --git a/src/cmd/compile/internal/arm64/prog.go b/src/cmd/compile/internal/arm64/prog.go
index a4b8ebe..a8e8bc5 100644
--- a/src/cmd/compile/internal/arm64/prog.go
+++ b/src/cmd/compile/internal/arm64/prog.go
@@ -34,6 +34,7 @@
 	obj.ACHECKNIL: {Flags: gc.LeftRead},
 	obj.AVARDEF:   {Flags: gc.Pseudo | gc.RightWrite},
 	obj.AVARKILL:  {Flags: gc.Pseudo | gc.RightWrite},
+	obj.AVARLIVE:  {Flags: gc.Pseudo | gc.LeftRead},
 
 	// NOP is an internal no-op that also stands
 	// for USED and SET annotations, not the Power opcode.
diff --git a/src/cmd/compile/internal/gc/const.go b/src/cmd/compile/internal/gc/const.go
index d30515a..795b53d 100644
--- a/src/cmd/compile/internal/gc/const.go
+++ b/src/cmd/compile/internal/gc/const.go
@@ -634,6 +634,7 @@
 	var wr int
 	var v Val
 	var norig *Node
+	var nn *Node
 	if nr == nil {
 		// copy numeric value to avoid modifying
 		// nl, in case someone still refers to it (e.g. iota).
@@ -1115,15 +1116,21 @@
 	return
 
 settrue:
-	norig = saveorig(n)
-	*n = *Nodbool(true)
-	n.Orig = norig
+	nn = Nodbool(true)
+	nn.Orig = saveorig(n)
+	if !iscmp[n.Op] {
+		nn.Type = nl.Type
+	}
+	*n = *nn
 	return
 
 setfalse:
-	norig = saveorig(n)
-	*n = *Nodbool(false)
-	n.Orig = norig
+	nn = Nodbool(false)
+	nn.Orig = saveorig(n)
+	if !iscmp[n.Op] {
+		nn.Type = nl.Type
+	}
+	*n = *nn
 	return
 
 illegal:
diff --git a/src/cmd/compile/internal/gc/esc.go b/src/cmd/compile/internal/gc/esc.go
index 293f916..ff983e7 100644
--- a/src/cmd/compile/internal/gc/esc.go
+++ b/src/cmd/compile/internal/gc/esc.go
@@ -299,12 +299,13 @@
 }
 
 type NodeEscState struct {
-	Curfn        *Node
-	Escflowsrc   *NodeList // flow(this, src)
-	Escretval    *NodeList // on OCALLxxx, list of dummy return values
-	Escloopdepth int32     // -1: global, 0: return variables, 1:function top level, increased inside function for every loop or label to mark scopes
-	Esclevel     Level
-	Walkgen      uint32
+	Curfn             *Node
+	Escflowsrc        *NodeList // flow(this, src)
+	Escretval         *NodeList // on OCALLxxx, list of dummy return values
+	Escloopdepth      int32     // -1: global, 0: return variables, 1:function top level, increased inside function for every loop or label to mark scopes
+	Esclevel          Level
+	Walkgen           uint32
+	Maxextraloopdepth int32
 }
 
 func (e *EscState) nodeEscState(n *Node) *NodeEscState {
@@ -1579,7 +1580,13 @@
 		src.Op == ONAME && src.Class == PPARAM && src.Name.Curfn == dst.Name.Curfn
 }
 
+const NOTALOOPDEPTH = -1
+
 func escwalk(e *EscState, level Level, dst *Node, src *Node) {
+	escwalkBody(e, level, dst, src, NOTALOOPDEPTH)
+}
+
+func escwalkBody(e *EscState, level Level, dst *Node, src *Node, extraloopdepth int32) {
 	if src.Op == OLITERAL {
 		return
 	}
@@ -1590,16 +1597,29 @@
 		// convergence.
 		level = level.min(srcE.Esclevel)
 		if level == srcE.Esclevel {
-			return
+			// Have we been here already with an extraloopdepth,
+			// or is the extraloopdepth provided no improvement on
+			// what's already been seen?
+			if srcE.Maxextraloopdepth >= extraloopdepth || srcE.Escloopdepth >= extraloopdepth {
+				return
+			}
+			srcE.Maxextraloopdepth = extraloopdepth
 		}
+	} else { // srcE.Walkgen < e.walkgen -- first time, reset this.
+		srcE.Maxextraloopdepth = NOTALOOPDEPTH
 	}
 
 	srcE.Walkgen = e.walkgen
 	srcE.Esclevel = level
+	modSrcLoopdepth := srcE.Escloopdepth
+
+	if extraloopdepth > modSrcLoopdepth {
+		modSrcLoopdepth = extraloopdepth
+	}
 
 	if Debug['m'] > 1 {
-		fmt.Printf("escwalk: level:%d depth:%d %.*s op=%v %v(%v) scope:%v[%d]\n",
-			level, e.pdepth, e.pdepth, "\t\t\t\t\t\t\t\t\t\t", Oconv(int(src.Op), 0), Nconv(src, obj.FmtShort), Jconv(src, obj.FmtShort), e.curfnSym(src), srcE.Escloopdepth)
+		fmt.Printf("escwalk: level:%d depth:%d %.*s op=%v %v(%v) scope:%v[%d] extraloopdepth=%v\n",
+			level, e.pdepth, e.pdepth, "\t\t\t\t\t\t\t\t\t\t", Oconv(int(src.Op), 0), Nconv(src, obj.FmtShort), Jconv(src, obj.FmtShort), e.curfnSym(src), srcE.Escloopdepth, extraloopdepth)
 	}
 
 	e.pdepth++
@@ -1638,7 +1658,7 @@
 		}
 	}
 
-	leaks = level.int() <= 0 && level.guaranteedDereference() <= 0 && dstE.Escloopdepth < srcE.Escloopdepth
+	leaks = level.int() <= 0 && level.guaranteedDereference() <= 0 && dstE.Escloopdepth < modSrcLoopdepth
 
 	switch src.Op {
 	case ONAME:
@@ -1650,7 +1670,7 @@
 						Warnl(int(src.Lineno), "leaking param content: %v", Nconv(src, obj.FmtShort))
 					} else {
 						Warnl(int(src.Lineno), "leaking param content: %v level=%v dst.eld=%v src.eld=%v dst=%v",
-							Nconv(src, obj.FmtShort), level, dstE.Escloopdepth, srcE.Escloopdepth, Nconv(dst, obj.FmtShort))
+							Nconv(src, obj.FmtShort), level, dstE.Escloopdepth, modSrcLoopdepth, Nconv(dst, obj.FmtShort))
 					}
 				}
 			} else {
@@ -1660,7 +1680,7 @@
 						Warnl(int(src.Lineno), "leaking param: %v", Nconv(src, obj.FmtShort))
 					} else {
 						Warnl(int(src.Lineno), "leaking param: %v level=%v dst.eld=%v src.eld=%v dst=%v",
-							Nconv(src, obj.FmtShort), level, dstE.Escloopdepth, srcE.Escloopdepth, Nconv(dst, obj.FmtShort))
+							Nconv(src, obj.FmtShort), level, dstE.Escloopdepth, modSrcLoopdepth, Nconv(dst, obj.FmtShort))
 					}
 				}
 			}
@@ -1686,15 +1706,17 @@
 				}
 				if Debug['m'] > 1 {
 					Warnl(int(src.Lineno), "%v escapes to heap, level=%v, dst.eld=%v, src.eld=%v",
-						Nconv(p, obj.FmtShort), level, dstE.Escloopdepth, srcE.Escloopdepth)
+						Nconv(p, obj.FmtShort), level, dstE.Escloopdepth, modSrcLoopdepth)
 				} else {
 					Warnl(int(src.Lineno), "%v escapes to heap", Nconv(p, obj.FmtShort))
 				}
 			}
+			escwalkBody(e, level.dec(), dst, src.Left, modSrcLoopdepth)
+			extraloopdepth = modSrcLoopdepth // passes to recursive case, seems likely a no-op
+		} else {
+			escwalk(e, level.dec(), dst, src.Left)
 		}
 
-		escwalk(e, level.dec(), dst, src.Left)
-
 	case OAPPEND:
 		escwalk(e, level, dst, src.List.N)
 
@@ -1704,6 +1726,7 @@
 			if Debug['m'] != 0 {
 				Warnl(int(src.Lineno), "%v escapes to heap", Nconv(src, obj.FmtShort))
 			}
+			extraloopdepth = modSrcLoopdepth
 		}
 		// similar to a slice arraylit and its args.
 		level = level.dec()
@@ -1737,6 +1760,7 @@
 			if Debug['m'] != 0 {
 				Warnl(int(src.Lineno), "%v escapes to heap", Nconv(src, obj.FmtShort))
 			}
+			extraloopdepth = modSrcLoopdepth
 		}
 
 	case ODOT,
@@ -1778,12 +1802,19 @@
 recurse:
 	level = level.copy()
 	for ll := srcE.Escflowsrc; ll != nil; ll = ll.Next {
-		escwalk(e, level, dst, ll.N)
+		escwalkBody(e, level, dst, ll.N, extraloopdepth)
 	}
 
 	e.pdepth--
 }
 
+// This special tag is applied to uintptr variables
+// that we believe may hold unsafe.Pointers for
+// calls into assembly functions.
+// It is logically a constant, but using a var
+// lets us take the address below to get a *string.
+var unsafeUintptrTag = "unsafe-uintptr"
+
 func esctag(e *EscState, func_ *Node) {
 	func_.Esc = EscFuncTagged
 
@@ -1798,6 +1829,29 @@
 			}
 		}
 
+		// Assume that uintptr arguments must be held live across the call.
+		// This is most important for syscall.Syscall.
+		// See golang.org/issue/13372.
+		// This really doesn't have much to do with escape analysis per se,
+		// but we are reusing the ability to annotate an individual function
+		// argument and pass those annotations along to importing code.
+		narg := 0
+		for t := getinargx(func_.Type).Type; t != nil; t = t.Down {
+			narg++
+			if t.Type.Etype == TUINTPTR {
+				if Debug['m'] != 0 {
+					var name string
+					if t.Sym != nil {
+						name = t.Sym.Name
+					} else {
+						name = fmt.Sprintf("arg#%d", narg)
+					}
+					Warnl(int(func_.Lineno), "%v assuming %v is unsafe uintptr", funcSym(func_), name)
+				}
+				t.Note = &unsafeUintptrTag
+			}
+		}
+
 		return
 	}
 
diff --git a/src/cmd/compile/internal/gc/float_test.go b/src/cmd/compile/internal/gc/float_test.go
new file mode 100644
index 0000000..c761e96
--- /dev/null
+++ b/src/cmd/compile/internal/gc/float_test.go
@@ -0,0 +1,102 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import "testing"
+
+// For GO386=387, make sure fucomi* opcodes are not used
+// for comparison operations.
+// Note that this test will fail only on a Pentium MMX
+// processor (with GOARCH=386 GO386=387), as it just runs
+// some code and looks for an unimplemented instruction fault.
+
+//go:noinline
+func compare1(a, b float64) bool {
+	return a < b
+}
+
+//go:noinline
+func compare2(a, b float32) bool {
+	return a < b
+}
+
+func TestFloatCompare(t *testing.T) {
+	if !compare1(3, 5) {
+		t.Errorf("compare1 returned false")
+	}
+	if !compare2(3, 5) {
+		t.Errorf("compare2 returned false")
+	}
+}
+
+// For GO386=387, make sure fucomi* opcodes are not used
+// for float->int conversions.
+
+//go:noinline
+func cvt1(a float64) uint64 {
+	return uint64(a)
+}
+
+//go:noinline
+func cvt2(a float64) uint32 {
+	return uint32(a)
+}
+
+//go:noinline
+func cvt3(a float32) uint64 {
+	return uint64(a)
+}
+
+//go:noinline
+func cvt4(a float32) uint32 {
+	return uint32(a)
+}
+
+//go:noinline
+func cvt5(a float64) int64 {
+	return int64(a)
+}
+
+//go:noinline
+func cvt6(a float64) int32 {
+	return int32(a)
+}
+
+//go:noinline
+func cvt7(a float32) int64 {
+	return int64(a)
+}
+
+//go:noinline
+func cvt8(a float32) int32 {
+	return int32(a)
+}
+
+func TestFloatConvert(t *testing.T) {
+	if got := cvt1(3.5); got != 3 {
+		t.Errorf("cvt1 got %d, wanted 3", got)
+	}
+	if got := cvt2(3.5); got != 3 {
+		t.Errorf("cvt2 got %d, wanted 3", got)
+	}
+	if got := cvt3(3.5); got != 3 {
+		t.Errorf("cvt3 got %d, wanted 3", got)
+	}
+	if got := cvt4(3.5); got != 3 {
+		t.Errorf("cvt4 got %d, wanted 3", got)
+	}
+	if got := cvt5(3.5); got != 3 {
+		t.Errorf("cvt5 got %d, wanted 3", got)
+	}
+	if got := cvt6(3.5); got != 3 {
+		t.Errorf("cvt6 got %d, wanted 3", got)
+	}
+	if got := cvt7(3.5); got != 3 {
+		t.Errorf("cvt7 got %d, wanted 3", got)
+	}
+	if got := cvt8(3.5); got != 3 {
+		t.Errorf("cvt8 got %d, wanted 3", got)
+	}
+}
diff --git a/src/cmd/compile/internal/gc/gen.go b/src/cmd/compile/internal/gc/gen.go
index c9208d9..ac55d4f 100644
--- a/src/cmd/compile/internal/gc/gen.go
+++ b/src/cmd/compile/internal/gc/gen.go
@@ -607,6 +607,9 @@
 	n.Esc = EscNever
 	n.Name.Curfn = Curfn
 	Curfn.Func.Dcl = list(Curfn.Func.Dcl, n)
+	if Debug['h'] != 0 {
+		println("H", n, n.Orig, funcSym(Curfn).Name)
+	}
 
 	dowidth(t)
 	n.Xoffset = 0
@@ -870,6 +873,9 @@
 
 	case OVARKILL:
 		gvarkill(n.Left)
+
+	case OVARLIVE:
+		gvarlive(n.Left)
 	}
 
 ret:
diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go
index f17a701..be0a0fb 100644
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@@ -185,7 +185,7 @@
 			continue
 		}
 
-		if (p.As == obj.AVARDEF || p.As == obj.AVARKILL) && p.To.Node != nil && !((p.To.Node).(*Node)).Used {
+		if (p.As == obj.AVARDEF || p.As == obj.AVARKILL || p.As == obj.AVARLIVE) && p.To.Node != nil && !((p.To.Node).(*Node)).Used {
 			// Cannot remove VARDEF instruction, because - unlike TYPE handled above -
 			// VARDEFs are interspersed with other code, and a jump might be using the
 			// VARDEF as a target. Replace with a no-op instead. A later pass will remove
diff --git a/src/cmd/compile/internal/gc/lex.go b/src/cmd/compile/internal/gc/lex.go
index 830c56d..fb30d58 100644
--- a/src/cmd/compile/internal/gc/lex.go
+++ b/src/cmd/compile/internal/gc/lex.go
@@ -694,7 +694,13 @@
 		errorexit()
 	}
 
-	if f.U.(string) == "unsafe" {
+	path_ := f.U.(string)
+
+	if mapped, ok := importMap[path_]; ok {
+		path_ = mapped
+	}
+
+	if path_ == "unsafe" {
 		if safemode != 0 {
 			Yyerror("cannot import package unsafe")
 			errorexit()
@@ -706,12 +712,6 @@
 		return
 	}
 
-	path_ := f.U.(string)
-
-	if mapped, ok := importMap[path_]; ok {
-		path_ = mapped
-	}
-
 	if islocalname(path_) {
 		if path_[0] == '/' {
 			Yyerror("import path cannot be absolute path")
diff --git a/src/cmd/compile/internal/gc/order.go b/src/cmd/compile/internal/gc/order.go
index 84b96c2d..05cd53a 100644
--- a/src/cmd/compile/internal/gc/order.go
+++ b/src/cmd/compile/internal/gc/order.go
@@ -243,6 +243,13 @@
 	var kill *Node
 
 	for l := order.temp; l != mark; l = l.Next {
+		if l.N.Name.Keepalive {
+			l.N.Name.Keepalive = false
+			l.N.Addrtaken = true // ensure SSA keeps the l.N variable
+			kill = Nod(OVARLIVE, l.N, nil)
+			typecheck(&kill, Etop)
+			*out = list(*out, kill)
+		}
 		kill = Nod(OVARKILL, l.N, nil)
 		typecheck(&kill, Etop)
 		*out = list(*out, kill)
@@ -375,6 +382,28 @@
 	orderexpr(&n.Left, order, nil)
 	orderexpr(&n.Right, order, nil) // ODDDARG temp
 	ordercallargs(&n.List, order)
+
+	if n.Op == OCALLFUNC {
+		for l, t := n.List, getinargx(n.Left.Type).Type; l != nil && t != nil; l, t = l.Next, t.Down {
+			// Check for "unsafe-uintptr" tag provided by escape analysis.
+			// If present and the argument is really a pointer being converted
+			// to uintptr, arrange for the pointer to be kept alive until the call
+			// returns, by copying it into a temp and marking that temp
+			// still alive when we pop the temp stack.
+			if t.Note != nil && *t.Note == unsafeUintptrTag {
+				xp := &l.N
+				for (*xp).Op == OCONVNOP && !Isptr[(*xp).Type.Etype] {
+					xp = &(*xp).Left
+				}
+				x := *xp
+				if Isptr[x.Type.Etype] {
+					x = ordercopyexpr(x, x.Type, order, 0)
+					x.Name.Keepalive = true
+					*xp = x
+				}
+			}
+		}
+	}
 }
 
 // Ordermapassign appends n to order->out, introducing temporaries
@@ -464,7 +493,7 @@
 	default:
 		Fatalf("orderstmt %v", Oconv(int(n.Op), 0))
 
-	case OVARKILL:
+	case OVARKILL, OVARLIVE:
 		order.out = list(order.out, n)
 
 	case OAS:
diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go
index 9b65f9c..6e7e10e 100644
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@@ -95,7 +95,11 @@
 
 	switch n.Class {
 	case PAUTO, PPARAM, PPARAMOUT:
-		Thearch.Gins(as, nil, n)
+		if as == obj.AVARLIVE {
+			Thearch.Gins(as, n, nil)
+		} else {
+			Thearch.Gins(as, nil, n)
+		}
 	}
 }
 
@@ -107,13 +111,17 @@
 	gvardefx(n, obj.AVARKILL)
 }
 
+func gvarlive(n *Node) {
+	gvardefx(n, obj.AVARLIVE)
+}
+
 func removevardef(firstp *obj.Prog) {
 	for p := firstp; p != nil; p = p.Link {
-		for p.Link != nil && (p.Link.As == obj.AVARDEF || p.Link.As == obj.AVARKILL) {
+		for p.Link != nil && (p.Link.As == obj.AVARDEF || p.Link.As == obj.AVARKILL || p.Link.As == obj.AVARLIVE) {
 			p.Link = p.Link.Link
 		}
 		if p.To.Type == obj.TYPE_BRANCH {
-			for p.To.Val.(*obj.Prog) != nil && (p.To.Val.(*obj.Prog).As == obj.AVARDEF || p.To.Val.(*obj.Prog).As == obj.AVARKILL) {
+			for p.To.Val.(*obj.Prog) != nil && (p.To.Val.(*obj.Prog).As == obj.AVARDEF || p.To.Val.(*obj.Prog).As == obj.AVARKILL || p.To.Val.(*obj.Prog).As == obj.AVARLIVE) {
 				p.To.Val = p.To.Val.(*obj.Prog).Link
 			}
 		}
diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go
index 7765d2d..458497d 100644
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@@ -809,7 +809,7 @@
 		return
 	}
 
-	fmt.Printf("checkauto %v: %v (%p; class=%d) not found in %v\n", Curfn, n, n, n.Class, p)
+	fmt.Printf("checkauto %v: %v (%p; class=%d) not found in %p %v\n", funcSym(Curfn), n, n, n.Class, p, p)
 	for l := fn.Func.Dcl; l != nil; l = l.Next {
 		fmt.Printf("\t%v (%p; class=%d)\n", l.N, l.N, l.N.Class)
 	}
diff --git a/src/cmd/compile/internal/gc/racewalk.go b/src/cmd/compile/internal/gc/racewalk.go
index 5f35c44..ee4f3ba 100644
--- a/src/cmd/compile/internal/gc/racewalk.go
+++ b/src/cmd/compile/internal/gc/racewalk.go
@@ -143,7 +143,7 @@
 		goto ret
 
 		// can't matter
-	case OCFUNC, OVARKILL:
+	case OCFUNC, OVARKILL, OVARLIVE:
 		goto ret
 
 	case OBLOCK:
diff --git a/src/cmd/compile/internal/gc/reg.go b/src/cmd/compile/internal/gc/reg.go
index f575094..14dc03b 100644
--- a/src/cmd/compile/internal/gc/reg.go
+++ b/src/cmd/compile/internal/gc/reg.go
@@ -1073,6 +1073,9 @@
 
 	for f := firstf; f != nil; f = f.Link {
 		p := f.Prog
+		// AVARLIVE must be considered a use, do not skip it.
+		// Otherwise the variable will be optimized away,
+		// and the whole point of AVARLIVE is to keep it on the stack.
 		if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
 			continue
 		}
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 46aaaa7..be9af60 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -849,6 +849,13 @@
 			s.vars[&memVar] = s.newValue1A(ssa.OpVarKill, ssa.TypeMem, n.Left, s.mem())
 		}
 
+	case OVARLIVE:
+		// Insert a varlive op to record that a variable is still live.
+		if !n.Left.Addrtaken {
+			s.Fatalf("VARLIVE variable %s must have Addrtaken set", n.Left)
+		}
+		s.vars[&memVar] = s.newValue1A(ssa.OpVarLive, ssa.TypeMem, n.Left, s.mem())
+
 	case OCHECKNIL:
 		p := s.expr(n.Left)
 		s.nilCheck(p)
@@ -4122,6 +4129,8 @@
 		Gvardef(v.Aux.(*Node))
 	case ssa.OpVarKill:
 		gvarkill(v.Aux.(*Node))
+	case ssa.OpVarLive:
+		gvarlive(v.Aux.(*Node))
 	case ssa.OpAMD64LoweredNilCheck:
 		// Optimization - if the subsequent block has a load or store
 		// at the same address, we don't need to issue this instruction.
diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index da23e05..b97cb3f 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -128,6 +128,7 @@
 	Captured  bool // is the variable captured by a closure
 	Byval     bool // is the variable captured by value or by reference
 	Needzero  bool // if it contains pointers, needs to be zeroed on function entry
+	Keepalive bool // mark value live across unknown assembly call
 }
 
 type Param struct {
@@ -342,6 +343,7 @@
 	OCFUNC      // reference to c function pointer (not go func value)
 	OCHECKNIL   // emit code to ensure pointer/interface not nil
 	OVARKILL    // variable is dead
+	OVARLIVE    // variable is alive
 
 	// thearch-specific registers
 	OREGISTER // a register, such as AX.
diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go
index 2244802..f74bb33 100644
--- a/src/cmd/compile/internal/gc/typecheck.go
+++ b/src/cmd/compile/internal/gc/typecheck.go
@@ -687,8 +687,6 @@
 				n.Left = l
 				n.Right = r
 			}
-		} else if n.Op == OANDAND || n.Op == OOROR {
-			evconst(n)
 		}
 
 		if et == TSTRING {
@@ -2025,7 +2023,8 @@
 		OEMPTY,
 		OGOTO,
 		OXFALL,
-		OVARKILL:
+		OVARKILL,
+		OVARLIVE:
 		ok |= Etop
 		break OpSwitch
 
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index 838def9..dddcb68 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -216,7 +216,8 @@
 		ODCLCONST,
 		ODCLTYPE,
 		OCHECKNIL,
-		OVARKILL:
+		OVARKILL,
+		OVARLIVE:
 		break
 
 	case OBLOCK:
diff --git a/src/cmd/compile/internal/mips64/peep.go b/src/cmd/compile/internal/mips64/peep.go
index 3d82c81..f97be60 100644
--- a/src/cmd/compile/internal/mips64/peep.go
+++ b/src/cmd/compile/internal/mips64/peep.go
@@ -688,6 +688,7 @@
 		obj.AFUNCDATA,
 		obj.AVARDEF,
 		obj.AVARKILL,
+		obj.AVARLIVE,
 		obj.AUSEFIELD:
 		return 0
 	}
diff --git a/src/cmd/compile/internal/mips64/prog.go b/src/cmd/compile/internal/mips64/prog.go
index bf13d82..b07c7fe 100644
--- a/src/cmd/compile/internal/mips64/prog.go
+++ b/src/cmd/compile/internal/mips64/prog.go
@@ -34,6 +34,7 @@
 	obj.ACHECKNIL: {Flags: gc.LeftRead},
 	obj.AVARDEF:   {Flags: gc.Pseudo | gc.RightWrite},
 	obj.AVARKILL:  {Flags: gc.Pseudo | gc.RightWrite},
+	obj.AVARLIVE:  {Flags: gc.Pseudo | gc.LeftRead},
 
 	// NOP is an internal no-op that also stands
 	// for USED and SET annotations, not the MIPS opcode.
diff --git a/src/cmd/compile/internal/ppc64/peep.go b/src/cmd/compile/internal/ppc64/peep.go
index fadaa4a..1ff3109 100644
--- a/src/cmd/compile/internal/ppc64/peep.go
+++ b/src/cmd/compile/internal/ppc64/peep.go
@@ -953,6 +953,7 @@
 		obj.AFUNCDATA,
 		obj.AVARDEF,
 		obj.AVARKILL,
+		obj.AVARLIVE,
 		obj.AUSEFIELD:
 		return 0
 	}
diff --git a/src/cmd/compile/internal/ppc64/prog.go b/src/cmd/compile/internal/ppc64/prog.go
index 92293be..6b48256 100644
--- a/src/cmd/compile/internal/ppc64/prog.go
+++ b/src/cmd/compile/internal/ppc64/prog.go
@@ -34,6 +34,7 @@
 	obj.ACHECKNIL: {Flags: gc.LeftRead},
 	obj.AVARDEF:   {Flags: gc.Pseudo | gc.RightWrite},
 	obj.AVARKILL:  {Flags: gc.Pseudo | gc.RightWrite},
+	obj.AVARLIVE:  {Flags: gc.Pseudo | gc.LeftRead},
 
 	// NOP is an internal no-op that also stands
 	// for USED and SET annotations, not the Power opcode.
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index e57dd93..d17f558 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -373,6 +373,7 @@
 
 	{name: "VarDef", typ: "Mem"}, // aux is a *gc.Node of a variable that is about to be initialized.  arg0=mem, returns mem
 	{name: "VarKill"},            // aux is a *gc.Node of a variable that is known to be dead.  arg0=mem, returns mem
+	{name: "VarLive"},            // aux is a *gc.Node of a variable that must be kept live.  arg0=mem, returns mem
 }
 
 //     kind           control    successors       implicit exit
diff --git a/src/cmd/compile/internal/ssa/lower.go b/src/cmd/compile/internal/ssa/lower.go
index 1b50eb6..af0ee4c 100644
--- a/src/cmd/compile/internal/ssa/lower.go
+++ b/src/cmd/compile/internal/ssa/lower.go
@@ -21,7 +21,7 @@
 				continue // lowered
 			}
 			switch v.Op {
-			case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill:
+			case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive:
 				continue // ok not to lower
 			}
 			s := "not lowered: " + v.Op.String() + " " + v.Type.SimpleString()
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 2fd7f6b..433794a 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -552,6 +552,7 @@
 	OpFwdRef
 	OpVarDef
 	OpVarKill
+	OpVarLive
 )
 
 var opcodeTable = [...]opInfo{
@@ -4310,6 +4311,10 @@
 		name:    "VarKill",
 		generic: true,
 	},
+	{
+		name:    "VarLive",
+		generic: true,
+	},
 }
 
 func (o Op) Asm() int       { return opcodeTable[o].asm }
diff --git a/src/cmd/compile/internal/x86/ggen.go b/src/cmd/compile/internal/x86/ggen.go
index e559a9f..139b199 100644
--- a/src/cmd/compile/internal/x86/ggen.go
+++ b/src/cmd/compile/internal/x86/ggen.go
@@ -764,9 +764,7 @@
 				gc.Cgen(nr, &tmp)
 				gc.Cgen(nl, &tmp)
 			}
-
-			gins(x86.AFUCOMIP, &tmp, &n2)
-			gins(x86.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF
+			gins(x86.AFUCOMPP, &tmp, &n2)
 		} else {
 			// TODO(rsc): The moves back and forth to memory
 			// here are for truncating the value to 32 bits.
@@ -783,9 +781,9 @@
 			gc.Cgen(nl, &t2)
 			gmove(&t2, &tmp)
 			gins(x86.AFCOMFP, &t1, &tmp)
-			gins(x86.AFSTSW, nil, &ax)
-			gins(x86.ASAHF, nil, nil)
 		}
+		gins(x86.AFSTSW, nil, &ax)
+		gins(x86.ASAHF, nil, nil)
 	} else {
 		// Not 387
 		if !nl.Addable {
diff --git a/src/cmd/compile/internal/x86/gsubr.go b/src/cmd/compile/internal/x86/gsubr.go
index 0397857..9859571 100644
--- a/src/cmd/compile/internal/x86/gsubr.go
+++ b/src/cmd/compile/internal/x86/gsubr.go
@@ -1198,14 +1198,17 @@
 
 		// if 0 > v { answer = 0 }
 		gins(x86.AFMOVD, &zerof, &f0)
-
-		gins(x86.AFUCOMIP, &f0, &f1)
+		gins(x86.AFUCOMP, &f0, &f1)
+		gins(x86.AFSTSW, nil, &ax)
+		gins(x86.ASAHF, nil, nil)
 		p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
 
 		// if 1<<64 <= v { answer = 0 too }
 		gins(x86.AFMOVD, &two64f, &f0)
 
-		gins(x86.AFUCOMIP, &f0, &f1)
+		gins(x86.AFUCOMP, &f0, &f1)
+		gins(x86.AFSTSW, nil, &ax)
+		gins(x86.ASAHF, nil, nil)
 		p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
 		gc.Patch(p1, gc.Pc)
 		gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack
@@ -1235,7 +1238,9 @@
 		// actual work
 		gins(x86.AFMOVD, &two63f, &f0)
 
-		gins(x86.AFUCOMIP, &f0, &f1)
+		gins(x86.AFUCOMP, &f0, &f1)
+		gins(x86.AFSTSW, nil, &ax)
+		gins(x86.ASAHF, nil, nil)
 		p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0)
 		gins(x86.AFMOVVP, &f0, t)
 		p3 := gc.Gbranch(obj.AJMP, nil, 0)
diff --git a/src/cmd/compile/internal/x86/prog.go b/src/cmd/compile/internal/x86/prog.go
index 465a21f..3399a28 100644
--- a/src/cmd/compile/internal/x86/prog.go
+++ b/src/cmd/compile/internal/x86/prog.go
@@ -40,6 +40,7 @@
 	obj.ACHECKNIL: {Flags: gc.LeftRead},
 	obj.AVARDEF:   {Flags: gc.Pseudo | gc.RightWrite},
 	obj.AVARKILL:  {Flags: gc.Pseudo | gc.RightWrite},
+	obj.AVARLIVE:  {Flags: gc.Pseudo | gc.LeftRead},
 
 	// NOP is an internal no-op that also stands
 	// for USED and SET annotations, not the Intel opcode.
@@ -91,8 +92,12 @@
 	x86.AFCOMDPP:   {Flags: gc.SizeD | gc.LeftAddr | gc.RightRead},
 	x86.AFCOMF:     {Flags: gc.SizeF | gc.LeftAddr | gc.RightRead},
 	x86.AFCOMFP:    {Flags: gc.SizeF | gc.LeftAddr | gc.RightRead},
-	x86.AFUCOMIP:   {Flags: gc.SizeF | gc.LeftAddr | gc.RightRead},
-	x86.AFCHS:      {Flags: gc.SizeD | RightRdwr}, // also SizeF
+	// NOTE(khr): don't use FUCOMI* instructions, not available
+	// on Pentium MMX.  See issue 13923.
+	//x86.AFUCOMIP:   {Flags: gc.SizeF | gc.LeftAddr | gc.RightRead},
+	x86.AFUCOMP:  {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
+	x86.AFUCOMPP: {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
+	x86.AFCHS:    {Flags: gc.SizeD | RightRdwr}, // also SizeF
 
 	x86.AFDIVDP:  {Flags: gc.SizeD | gc.LeftAddr | RightRdwr},
 	x86.AFDIVF:   {Flags: gc.SizeF | gc.LeftAddr | RightRdwr},
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go
index 679c23b..4cd696a 100644
--- a/src/cmd/dist/test.go
+++ b/src/cmd/dist/test.go
@@ -656,7 +656,7 @@
 	case "c-shared":
 		switch pair {
 		case "linux-386", "linux-amd64", "linux-arm", "linux-arm64",
-			"darwin-amd64",
+			"darwin-amd64", "darwin-386",
 			"android-arm", "android-arm64", "android-386":
 			return true
 		}
@@ -913,6 +913,12 @@
 			s = "DYLD_LIBRARY_PATH"
 		}
 		cmd.Env = mergeEnvLists([]string{s + "=."}, os.Environ())
+
+		// On FreeBSD 64-bit architectures, the 32-bit linker looks for
+		// different environment variables.
+		if t.goos == "freebsd" && t.gohostarch == "386" {
+			cmd.Env = mergeEnvLists([]string{"LD_32_LIBRARY_PATH=."}, cmd.Env)
+		}
 	}
 	return cmd.Run()
 }
diff --git a/src/cmd/dist/util.go b/src/cmd/dist/util.go
index 1b42954..1b5d1f9 100644
--- a/src/cmd/dist/util.go
+++ b/src/cmd/dist/util.go
@@ -461,7 +461,7 @@
 		}
 	}
 
-	if gohostarch == "arm" {
+	if gohostarch == "arm" || gohostarch == "mips64" || gohostarch == "mips64le" {
 		maxbg = min(maxbg, runtime.NumCPU())
 	}
 	bginit()
diff --git a/src/cmd/go/build.go b/src/cmd/go/build.go
index 6c6d551..6a8edaf 100644
--- a/src/cmd/go/build.go
+++ b/src/cmd/go/build.go
@@ -354,7 +354,7 @@
 			case "linux/amd64", "linux/arm", "linux/arm64", "linux/386",
 				"android/amd64", "android/arm", "android/arm64", "android/386":
 				codegenArg = "-shared"
-			case "darwin/amd64":
+			case "darwin/amd64", "darwin/386":
 			default:
 				fatalf("-buildmode=c-shared not supported on %s\n", platform)
 			}
@@ -822,7 +822,9 @@
 	pkg := new(Package)
 	pkg.local = true
 	pkg.cmdline = true
+	stk.push("main")
 	pkg.load(&stk, bp, err)
+	stk.pop()
 	pkg.localPrefix = dirToImportPath(dir)
 	pkg.ImportPath = "command-line-arguments"
 	pkg.target = ""
@@ -999,13 +1001,22 @@
 
 		// Install header for cgo in c-archive and c-shared modes.
 		if p.usesCgo() && (buildBuildmode == "c-archive" || buildBuildmode == "c-shared") {
+			hdrTarget := a.target[:len(a.target)-len(filepath.Ext(a.target))] + ".h"
+			if buildContext.Compiler == "gccgo" {
+				// For the header file, remove the "lib"
+				// added by go/build, so we generate pkg.h
+				// rather than libpkg.h.
+				dir, file := filepath.Split(hdrTarget)
+				file = strings.TrimPrefix(file, "lib")
+				hdrTarget = filepath.Join(dir, file)
+			}
 			ah := &action{
 				p:      a.p,
 				deps:   []*action{a.deps[0]},
 				f:      (*builder).installHeader,
 				pkgdir: a.pkgdir,
 				objdir: a.objdir,
-				target: a.target[:len(a.target)-len(filepath.Ext(a.target))] + ".h",
+				target: hdrTarget,
 			}
 			a.deps = append(a.deps, ah)
 		}
@@ -2711,6 +2722,10 @@
 		// libffi.
 		ldflags = append(ldflags, "-Wl,-r", "-nostdlib", "-Wl,--whole-archive", "-lgolibbegin", "-Wl,--no-whole-archive")
 
+		if b.gccSupportsNoPie() {
+			ldflags = append(ldflags, "-no-pie")
+		}
+
 		// We are creating an object file, so we don't want a build ID.
 		ldflags = b.disableBuildID(ldflags)
 
@@ -2718,7 +2733,7 @@
 		out = out + ".o"
 
 	case "c-shared":
-		ldflags = append(ldflags, "-shared", "-nostdlib", "-Wl,--whole-archive", "-lgolibbegin", "-Wl,--no-whole-archive", "-lgo", "-lgcc_s", "-lgcc")
+		ldflags = append(ldflags, "-shared", "-nostdlib", "-Wl,--whole-archive", "-lgolibbegin", "-Wl,--no-whole-archive", "-lgo", "-lgcc_s", "-lgcc", "-lc", "-lgcc")
 
 	default:
 		fatalf("-buildmode=%s not supported for gccgo", ldBuildmode)
@@ -2902,6 +2917,36 @@
 	return a
 }
 
+// On systems with PIE (position independent executables) enabled by default,
+// -no-pie must be passed when doing a partial link with -Wl,-r. But -no-pie is
+// not supported by all compilers.
+func (b *builder) gccSupportsNoPie() bool {
+	if goos != "linux" {
+		// On some BSD platforms, error messages from the
+		// compiler make it to the console despite cmd.Std*
+		// all being nil. As -no-pie is only required on linux
+		// systems so far, we only test there.
+		return false
+	}
+	src := filepath.Join(b.work, "trivial.c")
+	if err := ioutil.WriteFile(src, []byte{}, 0666); err != nil {
+		return false
+	}
+	cmdArgs := b.gccCmd(b.work)
+	cmdArgs = append(cmdArgs, "-no-pie", "-c", "trivial.c")
+	if buildN || buildX {
+		b.showcmd(b.work, "%s", joinUnambiguously(cmdArgs))
+		if buildN {
+			return false
+		}
+	}
+	cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...)
+	cmd.Dir = b.work
+	cmd.Env = envForDir(cmd.Dir, os.Environ())
+	out, err := cmd.CombinedOutput()
+	return err == nil && !bytes.Contains(out, []byte("unrecognized"))
+}
+
 // gccArchArgs returns arguments to pass to gcc based on the architecture.
 func (b *builder) gccArchArgs() []string {
 	switch goarch {
@@ -3158,6 +3203,10 @@
 	}
 	ldflags := stringList(bareLDFLAGS, "-Wl,-r", "-nostdlib", staticLibs)
 
+	if b.gccSupportsNoPie() {
+		ldflags = append(ldflags, "-no-pie")
+	}
+
 	// We are creating an object file, so we don't want a build ID.
 	ldflags = b.disableBuildID(ldflags)
 
diff --git a/src/cmd/go/go_test.go b/src/cmd/go/go_test.go
index cc36b43..50c7521 100644
--- a/src/cmd/go/go_test.go
+++ b/src/cmd/go/go_test.go
@@ -961,6 +961,16 @@
 	tg.grepBoth("use of internal package not allowed", "wrote error message for testdata/testinternal2")
 }
 
+func TestRunInternal(t *testing.T) {
+	tg := testgo(t)
+	defer tg.cleanup()
+	dir := filepath.Join(tg.pwd(), "testdata")
+	tg.setenv("GOPATH", dir)
+	tg.run("run", filepath.Join(dir, "src/run/good.go"))
+	tg.runFail("run", filepath.Join(dir, "src/run/bad.go"))
+	tg.grepStderr("use of internal package not allowed", "unexpected error for run/bad.go")
+}
+
 func testMove(t *testing.T, vcs, url, base, config string) {
 	testenv.MustHaveExternalNetwork(t)
 
diff --git a/src/cmd/go/pkg.go b/src/cmd/go/pkg.go
index 3361fc3..0507841 100644
--- a/src/cmd/go/pkg.go
+++ b/src/cmd/go/pkg.go
@@ -348,11 +348,9 @@
 	// TODO: After Go 1, decide when to pass build.AllowBinary here.
 	// See issue 3268 for mistakes to avoid.
 	buildMode := build.ImportComment
-	if go15VendorExperiment && mode&useVendor != 0 && path == origPath {
-		// We've already searched the vendor directories and didn't find anything.
-		// Let Import search them again so that, if the package is not found anywhere,
-		// the error includes the vendor directories in the list of places considered.
-		buildMode |= build.AllowVendor
+	if !go15VendorExperiment || mode&useVendor == 0 || path != origPath {
+		// Not vendoring, or we already found the vendored path.
+		buildMode |= build.IgnoreVendor
 	}
 	bp, err := buildContext.Import(path, srcDir, buildMode)
 	bp.ImportPath = importPath
@@ -422,7 +420,7 @@
 			continue
 		}
 		targ := filepath.Join(dir[:i], vpath)
-		if isDir(targ) {
+		if isDir(targ) && hasGoFiles(targ) {
 			// We started with parent's dir c:\gopath\src\foo\bar\baz\quux\xyzzy.
 			// We know the import path for parent's dir.
 			// We chopped off some number of path elements and
@@ -445,6 +443,20 @@
 	return path
 }
 
+// hasGoFiles reports whether dir contains any files with names ending in .go.
+// For a vendor check we must exclude directories that contain no .go files.
+// Otherwise it is not possible to vendor just a/b/c and still import the
+// non-vendored a/b. See golang.org/issue/13832.
+func hasGoFiles(dir string) bool {
+	fis, _ := ioutil.ReadDir(dir)
+	for _, fi := range fis {
+		if !fi.IsDir() && strings.HasSuffix(fi.Name(), ".go") {
+			return true
+		}
+	}
+	return false
+}
+
 // reusePackage reuses package p to satisfy the import at the top
 // of the import stack stk.  If this use causes an import loop,
 // reusePackage updates p's error information to record the loop.
@@ -504,7 +516,7 @@
 		i-- // rewind over slash in ".../internal"
 	}
 	parent := p.Dir[:i+len(p.Dir)-len(p.ImportPath)]
-	if hasPathPrefix(filepath.ToSlash(srcDir), filepath.ToSlash(parent)) {
+	if hasFilePathPrefix(filepath.Clean(srcDir), filepath.Clean(parent)) {
 		return p
 	}
 
@@ -601,7 +613,7 @@
 		return p
 	}
 	parent := p.Dir[:truncateTo]
-	if hasPathPrefix(filepath.ToSlash(srcDir), filepath.ToSlash(parent)) {
+	if hasFilePathPrefix(filepath.Clean(srcDir), filepath.Clean(parent)) {
 		return p
 	}
 
diff --git a/src/cmd/go/testdata/src/run/bad.go b/src/cmd/go/testdata/src/run/bad.go
new file mode 100644
index 0000000..c1cc3ac
--- /dev/null
+++ b/src/cmd/go/testdata/src/run/bad.go
@@ -0,0 +1,5 @@
+package main
+
+import _ "run/subdir/internal/private"
+
+func main() {}
diff --git a/src/cmd/go/testdata/src/run/good.go b/src/cmd/go/testdata/src/run/good.go
new file mode 100644
index 0000000..0b67dce
--- /dev/null
+++ b/src/cmd/go/testdata/src/run/good.go
@@ -0,0 +1,5 @@
+package main
+
+import _ "run/internal"
+
+func main() {}
diff --git a/src/cmd/go/testdata/src/run/internal/internal.go b/src/cmd/go/testdata/src/run/internal/internal.go
new file mode 100644
index 0000000..5bf0569
--- /dev/null
+++ b/src/cmd/go/testdata/src/run/internal/internal.go
@@ -0,0 +1 @@
+package internal
diff --git a/src/cmd/go/testdata/src/run/subdir/internal/private/private.go b/src/cmd/go/testdata/src/run/subdir/internal/private/private.go
new file mode 100644
index 0000000..735e4dc
--- /dev/null
+++ b/src/cmd/go/testdata/src/run/subdir/internal/private/private.go
@@ -0,0 +1 @@
+package private
diff --git a/src/cmd/go/testdata/src/vend/dir1/dir1.go b/src/cmd/go/testdata/src/vend/dir1/dir1.go
new file mode 100644
index 0000000..b719ead
--- /dev/null
+++ b/src/cmd/go/testdata/src/vend/dir1/dir1.go
@@ -0,0 +1 @@
+package dir1
diff --git a/src/cmd/go/testdata/src/vend/vendor/vend/dir1/dir2/dir2.go b/src/cmd/go/testdata/src/vend/vendor/vend/dir1/dir2/dir2.go
new file mode 100644
index 0000000..6fe35e9
--- /dev/null
+++ b/src/cmd/go/testdata/src/vend/vendor/vend/dir1/dir2/dir2.go
@@ -0,0 +1 @@
+package dir2
diff --git a/src/cmd/go/testdata/src/vend/x/x.go b/src/cmd/go/testdata/src/vend/x/x.go
index ae526eb..bdcde57 100644
--- a/src/cmd/go/testdata/src/vend/x/x.go
+++ b/src/cmd/go/testdata/src/vend/x/x.go
@@ -3,3 +3,5 @@
 import _ "p"
 import _ "q"
 import _ "r"
+import _ "vend/dir1"      // not vendored
+import _ "vend/dir1/dir2" // vendored
diff --git a/src/cmd/go/vendor_test.go b/src/cmd/go/vendor_test.go
index ed73be3..006a8c9 100644
--- a/src/cmd/go/vendor_test.go
+++ b/src/cmd/go/vendor_test.go
@@ -24,12 +24,14 @@
 	tg.run("list", "-f", "{{.ImportPath}} {{.Imports}}", "vend/...")
 	want := `
 		vend [vend/vendor/p r]
+		vend/dir1 []
 		vend/hello [fmt vend/vendor/strings]
 		vend/subdir [vend/vendor/p r]
 		vend/vendor/p []
 		vend/vendor/q []
 		vend/vendor/strings []
-		vend/x [vend/x/vendor/p vend/vendor/q vend/x/vendor/r]
+		vend/vendor/vend/dir1/dir2 []
+		vend/x [vend/x/vendor/p vend/vendor/q vend/x/vendor/r vend/dir1 vend/vendor/vend/dir1/dir2]
 		vend/x/invalid [vend/x/invalid/vendor/foo]
 		vend/x/vendor/p []
 		vend/x/vendor/p/p [notfound]
@@ -45,6 +47,14 @@
 	}
 }
 
+func TestVendorBuild(t *testing.T) {
+	tg := testgo(t)
+	defer tg.cleanup()
+	tg.setenv("GOPATH", filepath.Join(tg.pwd(), "testdata"))
+	tg.setenv("GO15VENDOREXPERIMENT", "1")
+	tg.run("build", "vend/x")
+}
+
 func TestVendorRun(t *testing.T) {
 	tg := testgo(t)
 	defer tg.cleanup()
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index 511e409..bc89823 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -282,6 +282,7 @@
 	AUSEFIELD
 	AVARDEF
 	AVARKILL
+	AVARLIVE
 	A_ARCHSPECIFIC
 )
 
@@ -609,6 +610,12 @@
 	Version            int
 	Textp              *LSym
 	Etextp             *LSym
+
+	// state for writing objects
+	Text  *LSym
+	Data  *LSym
+	Etext *LSym
+	Edata *LSym
 }
 
 // The smallest possible offset from the hardware stack pointer to a local
diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go
index 13930aa..8d4a506 100644
--- a/src/cmd/internal/obj/objfile.go
+++ b/src/cmd/internal/obj/objfile.go
@@ -111,6 +111,11 @@
 // out a Go object file.  The linker does not call this; the linker
 // does not write out object files.
 func Writeobjdirect(ctxt *Link, b *Biobuf) {
+	Flushplist(ctxt)
+	Writeobjfile(ctxt, b)
+}
+
+func Flushplist(ctxt *Link) {
 	var flag int
 	var s *LSym
 	var p *Prog
@@ -119,13 +124,11 @@
 
 	// Build list of symbols, and assign instructions to lists.
 	// Ignore ctxt->plist boundaries. There are no guarantees there,
-	// and the C compilers and assemblers just use one big list.
-	var text *LSym
-
+	// and the assemblers just use one big list.
 	var curtext *LSym
-	var data *LSym
+	var text *LSym
 	var etext *LSym
-	var edata *LSym
+
 	for pl := ctxt.Plist; pl != nil; pl = pl.Link {
 		for p = pl.Firstpc; p != nil; p = plink {
 			if ctxt.Debugasm != 0 && ctxt.Debugvlog != 0 {
@@ -174,10 +177,10 @@
 					log.Fatalf("symbol %s listed multiple times", s.Name)
 				}
 				s.Onlist = 1
-				if data == nil {
-					data = s
+				if ctxt.Data == nil {
+					ctxt.Data = s
 				} else {
-					edata.Next = s
+					ctxt.Edata.Next = s
 				}
 				s.Next = nil
 				s.Size = p.To.Offset
@@ -195,7 +198,7 @@
 				} else if flag&TLSBSS != 0 {
 					s.Type = STLSBSS
 				}
-				edata = s
+				ctxt.Edata = s
 				continue
 			}
 
@@ -298,6 +301,17 @@
 		linkpcln(ctxt, s)
 	}
 
+	// Add to running list in ctxt.
+	if ctxt.Etext == nil {
+		ctxt.Text = text
+	} else {
+		ctxt.Etext.Next = text
+	}
+	ctxt.Etext = etext
+	ctxt.Plist = nil
+}
+
+func Writeobjfile(ctxt *Link, b *Biobuf) {
 	// Emit header.
 	Bputc(b, 0)
 
@@ -312,10 +326,10 @@
 	wrstring(b, "")
 
 	// Emit symbols.
-	for s := text; s != nil; s = s.Next {
+	for s := ctxt.Text; s != nil; s = s.Next {
 		writesym(ctxt, b, s)
 	}
-	for s := data; s != nil; s = s.Next {
+	for s := ctxt.Data; s != nil; s = s.Next {
 		writesym(ctxt, b, s)
 	}
 
diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go
index 5846470..ea59d46 100644
--- a/src/cmd/internal/obj/util.go
+++ b/src/cmd/internal/obj/util.go
@@ -611,7 +611,7 @@
 }
 
 func Aconv(a int) string {
-	if a < A_ARCHSPECIFIC {
+	if 0 <= a && a < len(Anames) {
 		return Anames[a]
 	}
 	for i := range aSpace {
@@ -643,6 +643,7 @@
 	"USEFIELD",
 	"VARDEF",
 	"VARKILL",
+	"VARLIVE",
 }
 
 func Bool2int(b bool) int {
diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go
index 244aa15..4ee8cfb 100644
--- a/src/cmd/internal/obj/x86/a.out.go
+++ b/src/cmd/internal/obj/x86/a.out.go
@@ -181,6 +181,7 @@
 	APAUSE
 	APOPAL
 	APOPAW
+	APOPCNT
 	APOPFL
 	APOPFW
 	APOPL
@@ -500,6 +501,7 @@
 	AXADDQ
 	AXCHGQ
 	AXORQ
+	AXGETBV
 
 	// media
 	AADDPD
@@ -614,6 +616,9 @@
 	APCMPGTL
 	APCMPGTW
 	APEXTRW
+	APEXTRB
+	APEXTRD
+	APEXTRQ
 	APFACC
 	APFADD
 	APFCMPEQ
@@ -632,6 +637,7 @@
 	APFSUB
 	APFSUBR
 	APINSRW
+	APINSRB
 	APINSRD
 	APINSRQ
 	APMADDWL
diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go
index 9eb57b0..392899c 100644
--- a/src/cmd/internal/obj/x86/anames.go
+++ b/src/cmd/internal/obj/x86/anames.go
@@ -149,6 +149,7 @@
 	"PAUSE",
 	"POPAL",
 	"POPAW",
+	"POPCNT",
 	"POPFL",
 	"POPFW",
 	"POPL",
@@ -451,6 +452,7 @@
 	"XADDQ",
 	"XCHGQ",
 	"XORQ",
+	"XGETBV",
 	"ADDPD",
 	"ADDPS",
 	"ADDSD",
@@ -563,6 +565,9 @@
 	"PCMPGTL",
 	"PCMPGTW",
 	"PEXTRW",
+	"PEXTRB",
+	"PEXTRD",
+	"PEXTRQ",
 	"PFACC",
 	"PFADD",
 	"PFCMPEQ",
@@ -581,6 +586,7 @@
 	"PFSUB",
 	"PFSUBR",
 	"PINSRW",
+	"PINSRB",
 	"PINSRD",
 	"PINSRQ",
 	"PMADDWL",
diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go
index 8bb4dff..164dbd6 100644
--- a/src/cmd/internal/obj/x86/asm6.go
+++ b/src/cmd/internal/obj/x86/asm6.go
@@ -187,6 +187,7 @@
 	Zm_r_xm_nr
 	Zr_m_xm_nr
 	Zibm_r /* mmx1,mmx2/mem64,imm8 */
+	Zibr_m
 	Zmb_r
 	Zaut_r
 	Zo_m
@@ -219,6 +220,7 @@
 	Pf2   = 0xf2 /* xmm escape 1: f2 0f */
 	Pf3   = 0xf3 /* xmm escape 2: f3 0f */
 	Pq3   = 0x67 /* xmm escape 3: 66 48 0f */
+	Pfw   = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
 	Pvex1 = 0xc5 /* 66.0f escape, vex encoding */
 	Pvex2 = 0xc6 /* f3.0f escape, vex encoding */
 	Pvex3 = 0xc7 /* 66.0f38 escape, vex encoding */
@@ -720,6 +722,10 @@
 	{Yu8, Yxr, Yrl, Zibm_r, 2},
 }
 
+var yextr = []ytab{
+	{Yu8, Yxr, Ymm, Zibr_m, 3},
+}
+
 var yinsrw = []ytab{
 	{Yu8, Yml, Yxr, Zibm_r, 2},
 }
@@ -1162,6 +1168,9 @@
 	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
 	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
 	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
+	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
+	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
+	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
 	{APF2IL, ymfp, Px, [23]uint8{0x1d}},
 	{APF2IW, ymfp, Px, [23]uint8{0x1c}},
 	{API2FL, ymfp, Px, [23]uint8{0x0d}},
@@ -1183,6 +1192,7 @@
 	{APFSUB, ymfp, Px, [23]uint8{0x9a}},
 	{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
 	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
+	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
 	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
 	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
 	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
@@ -1198,6 +1208,7 @@
 	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
 	{APOPAL, ynone, P32, [23]uint8{0x61}},
 	{APOPAW, ynone, Pe, [23]uint8{0x61}},
+	{APOPCNT, yml_rl, Pfw, [23]uint8{0xb8}},
 	{APOPFL, ynone, P32, [23]uint8{0x9d}},
 	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
 	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
@@ -1533,6 +1544,7 @@
 	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
 	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
 	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
+	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
 	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
 	{obj.ATYPE, nil, 0, [23]uint8{}},
 	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
@@ -3194,6 +3206,15 @@
 				ctxt.Andptr[0] = Pm
 				ctxt.Andptr = ctxt.Andptr[1:]
 
+			case Pfw: /* first escape, Rex.w, and second escape */
+				ctxt.Andptr[0] = Pf3
+				ctxt.Andptr = ctxt.Andptr[1:]
+
+				ctxt.Andptr[0] = Pw
+				ctxt.Andptr = ctxt.Andptr[1:]
+				ctxt.Andptr[0] = Pm
+				ctxt.Andptr = ctxt.Andptr[1:]
+
 			case Pm: /* opcode escape */
 				ctxt.Andptr[0] = Pm
 				ctxt.Andptr = ctxt.Andptr[1:]
@@ -3343,7 +3364,7 @@
 				ctxt.Andptr[0] = byte(op)
 				ctxt.Andptr = ctxt.Andptr[1:]
 
-			case Zibm_r:
+			case Zibm_r, Zibr_m:
 				for {
 					tmp1 := z
 					z++
@@ -3354,7 +3375,11 @@
 					ctxt.Andptr[0] = byte(op)
 					ctxt.Andptr = ctxt.Andptr[1:]
 				}
-				asmand(ctxt, p, p.From3, &p.To)
+				if yt.zcase == Zibr_m {
+					asmand(ctxt, p, &p.To, p.From3)
+				} else {
+					asmand(ctxt, p, p.From3, &p.To)
+				}
 				ctxt.Andptr[0] = byte(p.From.Offset)
 				ctxt.Andptr = ctxt.Andptr[1:]
 
diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go
index 5bb206a..7561250 100644
--- a/src/cmd/link/internal/ld/lib.go
+++ b/src/cmd/link/internal/ld/lib.go
@@ -1068,7 +1068,7 @@
 		argv = append(argv, "-pie")
 	case BuildmodeCShared:
 		if HEADTYPE == obj.Hdarwin {
-			argv = append(argv, "-dynamiclib")
+			argv = append(argv, "-dynamiclib", "-Wl,-read_only_relocs,suppress")
 		} else {
 			// ELF.
 			argv = append(argv, "-Wl,-Bsymbolic")
diff --git a/src/cmd/link/internal/ld/macho.go b/src/cmd/link/internal/ld/macho.go
index af93361..1c7f3a0 100644
--- a/src/cmd/link/internal/ld/macho.go
+++ b/src/cmd/link/internal/ld/macho.go
@@ -566,6 +566,25 @@
 		}
 	}
 
+	if Linkmode == LinkInternal {
+		// For lldb, must say LC_VERSION_MIN_MACOSX or else
+		// it won't know that this Mach-O binary is from OS X
+		// (could be iOS or WatchOS intead).
+		// Go on iOS uses linkmode=external, and linkmode=external
+		// adds this itself. So we only need this code for linkmode=internal
+		// and we can assume OS X.
+		//
+		// See golang.org/issues/12941.
+		const (
+			LC_VERSION_MIN_MACOSX   = 0x24
+			LC_VERSION_MIN_IPHONEOS = 0x25
+			LC_VERSION_MIN_WATCHOS  = 0x30
+		)
+		ml := newMachoLoad(LC_VERSION_MIN_MACOSX, 2)
+		ml.data[0] = 10<<16 | 7<<8 | 0<<0 // OS X version 10.7.0
+		ml.data[1] = 10<<16 | 7<<8 | 0<<0 // SDK 10.7.0
+	}
+
 	// TODO: dwarf headers go in ms too
 	if Debug['s'] == 0 {
 		dwarfaddmachoheaders(ms)
diff --git a/src/cmd/link/internal/ld/pe.go b/src/cmd/link/internal/ld/pe.go
index 16ce7bd..00fbb17 100644
--- a/src/cmd/link/internal/ld/pe.go
+++ b/src/cmd/link/internal/ld/pe.go
@@ -1217,12 +1217,17 @@
 	// larger size, as verified with VMMap.
 
 	// Go code would be OK with 64k stacks, but we need larger stacks for cgo.
-	// That default stack reserve size affects only the main thread,
-	// for other threads we specify stack size in runtime explicitly
+	//
+	// The default stack reserve size affects only the main
+	// thread, ctrlhandler thread, and profileloop thread. For
+	// these, it must be greater than the stack size assumed by
+	// externalthreadhandler.
+	//
+	// For other threads we specify stack size in runtime explicitly
 	// (runtime knows whether cgo is enabled or not).
-	// If you change stack reserve sizes here,
-	// change STACKSIZE in runtime/cgo/gcc_windows_{386,amd64}.c and correspondent
-	// CreateThread parameter in runtime.newosproc as well.
+	// For these, the reserve must match STACKSIZE in
+	// runtime/cgo/gcc_windows_{386,amd64}.c and the correspondent
+	// CreateThread parameter in runtime.newosproc.
 	if !iscgo {
 		oh64.SizeOfStackReserve = 0x00020000
 		oh.SizeOfStackReserve = 0x00020000