cmd/internal/gc: emit write barriers at lower level

This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.

name                                       old                     new          delta
BenchmarkBinaryTree17              18.2s × (0.99,1.01)     17.9s × (0.99,1.01)  -1.57%
BenchmarkFannkuch11                4.44s × (1.00,1.00)     4.42s × (1.00,1.00)  -0.40%
BenchmarkFmtFprintfEmpty           119ns × (0.95,1.02)     118ns × (0.96,1.02)  ~
BenchmarkFmtFprintfString          501ns × (0.99,1.02)     486ns × (0.99,1.01)  -2.89%
BenchmarkFmtFprintfInt             474ns × (0.99,1.00)     457ns × (0.99,1.01)  -3.59%
BenchmarkFmtFprintfIntInt          792ns × (1.00,1.00)     768ns × (1.00,1.01)  -3.03%
BenchmarkFmtFprintfPrefixedInt     574ns × (1.00,1.01)     584ns × (0.99,1.03)  +1.83%
BenchmarkFmtFprintfFloat           749ns × (1.00,1.00)     739ns × (0.99,1.00)  -1.34%
BenchmarkFmtManyArgs              2.94µs × (1.00,1.01)    2.77µs × (1.00,1.00)  -5.76%
BenchmarkGobDecode                39.5ms × (0.99,1.01)    39.3ms × (0.99,1.01)  ~
BenchmarkGobEncode                39.4ms × (1.00,1.01)    39.4ms × (0.99,1.00)  ~
BenchmarkGzip                      658ms × (1.00,1.01)     661ms × (0.99,1.01)  ~
BenchmarkGunzip                    142ms × (1.00,1.00)     142ms × (1.00,1.00)  +0.22%
BenchmarkHTTPClientServer          134µs × (0.99,1.01)     133µs × (0.98,1.01)  ~
BenchmarkJSONEncode               57.1ms × (0.99,1.01)    56.5ms × (0.99,1.01)  ~
BenchmarkJSONDecode                141ms × (1.00,1.00)     143ms × (1.00,1.00)  +1.09%
BenchmarkMandelbrot200            6.01ms × (1.00,1.00)    6.01ms × (1.00,1.00)  ~
BenchmarkGoParse                  10.1ms × (0.91,1.09)     9.6ms × (0.94,1.07)  ~
BenchmarkRegexpMatchEasy0_32       207ns × (1.00,1.01)     210ns × (1.00,1.00)  +1.45%
BenchmarkRegexpMatchEasy0_1K       592ns × (0.99,1.00)     596ns × (0.99,1.01)  +0.68%
BenchmarkRegexpMatchEasy1_32       184ns × (0.99,1.01)     184ns × (0.99,1.01)  ~
BenchmarkRegexpMatchEasy1_1K      1.01µs × (1.00,1.00)    1.01µs × (0.99,1.01)  ~
BenchmarkRegexpMatchMedium_32      327ns × (0.99,1.00)     327ns × (1.00,1.01)  ~
BenchmarkRegexpMatchMedium_1K     92.5µs × (1.00,1.00)    93.0µs × (1.00,1.02)  +0.48%
BenchmarkRegexpMatchHard_32       4.79µs × (0.95,1.00)    4.76µs × (0.95,1.01)  ~
BenchmarkRegexpMatchHard_1K        136µs × (1.00,1.00)     136µs × (1.00,1.01)  ~
BenchmarkRevcomp                   900ms × (0.99,1.01)     892ms × (1.00,1.01)  ~
BenchmarkTemplate                  170ms × (0.99,1.01)     175ms × (0.99,1.00)  +2.95%
BenchmarkTimeParse                 645ns × (1.00,1.00)     638ns × (1.00,1.00)  -1.16%
BenchmarkTimeFormat                740ns × (1.00,1.00)     772ns × (1.00,1.00)  +4.39%

Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/internal/gc/align.go b/src/cmd/internal/gc/align.go
index 954e599..789e59b 100644
--- a/src/cmd/internal/gc/align.go
+++ b/src/cmd/internal/gc/align.go
@@ -411,6 +411,8 @@
 	defercalc = 0
 }
 
+var itable *Type // distinguished *byte
+
 func typeinit() {
 	if Widthptr == 0 {
 		Fatal("typeinit before betypeinit")
@@ -664,6 +666,9 @@
 
 	dowidth(Types[TSTRING])
 	dowidth(idealstring)
+
+	itable = typ(Tptr)
+	itable.Type = Types[TUINT8]
 }
 
 /*
diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go
index b6691ef..c996be7 100644
--- a/src/cmd/internal/gc/cgen.go
+++ b/src/cmd/internal/gc/cgen.go
@@ -13,11 +13,20 @@
  * generate:
  *	res = n;
  * simplifies and calls Thearch.Gmove.
+ * if wb is true, need to emit write barriers.
  */
-func Cgen(n *Node, res *Node) {
+func Cgen(n, res *Node) {
+	cgen_wb(n, res, false)
+}
+
+func cgen_wb(n, res *Node, wb bool) {
 	if Debug['g'] != 0 {
-		Dump("\ncgen-n", n)
-		Dump("cgen-res", res)
+		op := "cgen"
+		if wb {
+			op = "cgen_wb"
+		}
+		Dump("\n"+op+"-n", n)
+		Dump(op+"-res", res)
 	}
 
 	if n == nil || n.Type == nil {
@@ -34,29 +43,29 @@
 
 	switch n.Op {
 	case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
-		if res.Op != ONAME || !res.Addable {
+		if res.Op != ONAME || !res.Addable || wb {
 			var n1 Node
 			Tempname(&n1, n.Type)
 			Cgen_slice(n, &n1)
-			Cgen(&n1, res)
+			cgen_wb(&n1, res, wb)
 		} else {
 			Cgen_slice(n, res)
 		}
 		return
 
 	case OEFACE:
-		if res.Op != ONAME || !res.Addable {
+		if res.Op != ONAME || !res.Addable || wb {
 			var n1 Node
 			Tempname(&n1, n.Type)
 			Cgen_eface(n, &n1)
-			Cgen(&n1, res)
+			cgen_wb(&n1, res, wb)
 		} else {
 			Cgen_eface(n, res)
 		}
 		return
 
 	case ODOTTYPE:
-		cgen_dottype(n, res, nil)
+		cgen_dottype(n, res, nil, wb)
 		return
 	}
 
@@ -68,7 +77,7 @@
 			var n1 Node
 			Tempname(&n1, n.Type)
 			Cgen(n, &n1)
-			Cgen(&n1, res)
+			cgen_wb(&n1, res, wb)
 			return
 		}
 	}
@@ -77,7 +86,7 @@
 		if n.Type.Width < 0 {
 			Fatal("forgot to compute width for %v", n.Type)
 		}
-		sgen(n, res, n.Type.Width)
+		sgen_wb(n, res, n.Type.Width, wb)
 		return
 	}
 
@@ -87,7 +96,7 @@
 				var n1 Node
 				Tempname(&n1, n.Type)
 				Cgen(n, &n1)
-				Cgen(&n1, res)
+				cgen_wb(&n1, res, wb)
 				return
 			}
 
@@ -100,70 +109,67 @@
 				Fatal("loop in cgen")
 			}
 
-			Cgen(&n1, res)
+			cgen_wb(&n1, res, wb)
 			Regfree(&n1)
 			return
 		}
 
 		var f int
-		if res.Ullman >= UINF {
-			goto gen
-		}
+		if res.Ullman < UINF {
+			if Complexop(n, res) {
+				Complexgen(n, res)
+				return
+			}
 
-		if Complexop(n, res) {
-			Complexgen(n, res)
-			return
-		}
+			f = 1 // gen thru register
+			switch n.Op {
+			case OLITERAL:
+				if Smallintconst(n) {
+					f = 0
+				}
 
-		f = 1 // gen thru register
-		switch n.Op {
-		case OLITERAL:
-			if Smallintconst(n) {
+			case OREGISTER:
 				f = 0
 			}
 
-		case OREGISTER:
-			f = 0
-		}
-
-		if !Iscomplex[n.Type.Etype] && Ctxt.Arch.Regsize == 8 {
-			a := Thearch.Optoas(OAS, res.Type)
-			var addr obj.Addr
-			if Thearch.Sudoaddable(a, res, &addr) {
-				var p1 *obj.Prog
-				if f != 0 {
-					var n2 Node
-					Regalloc(&n2, res.Type, nil)
-					Cgen(n, &n2)
-					p1 = Thearch.Gins(a, &n2, nil)
-					Regfree(&n2)
-				} else {
-					p1 = Thearch.Gins(a, n, nil)
+			if !Iscomplex[n.Type.Etype] && Ctxt.Arch.Regsize == 8 && !wb {
+				a := Thearch.Optoas(OAS, res.Type)
+				var addr obj.Addr
+				if Thearch.Sudoaddable(a, res, &addr) {
+					var p1 *obj.Prog
+					if f != 0 {
+						var n2 Node
+						Regalloc(&n2, res.Type, nil)
+						Cgen(n, &n2)
+						p1 = Thearch.Gins(a, &n2, nil)
+						Regfree(&n2)
+					} else {
+						p1 = Thearch.Gins(a, n, nil)
+					}
+					p1.To = addr
+					if Debug['g'] != 0 {
+						fmt.Printf("%v [ignore previous line]\n", p1)
+					}
+					Thearch.Sudoclean()
+					return
 				}
-				p1.To = addr
-				if Debug['g'] != 0 {
-					fmt.Printf("%v [ignore previous line]\n", p1)
-				}
-				Thearch.Sudoclean()
-				return
 			}
 		}
 
-	gen:
 		if Ctxt.Arch.Thechar == '8' {
 			// no registers to speak of
 			var n1, n2 Node
 			Tempname(&n1, n.Type)
 			Cgen(n, &n1)
 			Igen(res, &n2, nil)
-			Thearch.Gmove(&n1, &n2)
+			cgen_wb(&n1, &n2, wb)
 			Regfree(&n2)
 			return
 		}
 
 		var n1 Node
 		Igen(res, &n1, nil)
-		Cgen(n, &n1)
+		cgen_wb(n, &n1, wb)
 		Regfree(&n1)
 		return
 	}
@@ -186,6 +192,22 @@
 		n.Addable = n.Left.Addable
 	}
 
+	if wb {
+		if int(Simtype[res.Type.Etype]) != Tptr {
+			Fatal("cgen_wb of type %v", res.Type)
+		}
+		if n.Ullman >= UINF {
+			var n1 Node
+			Tempname(&n1, n.Type)
+			Cgen(n, &n1)
+			n = &n1
+		}
+		cgen_wbptr(n, res)
+		return
+	}
+
+	// Write barrier now handled. Code below this line can ignore wb.
+
 	if Ctxt.Arch.Thechar == '5' { // TODO(rsc): Maybe more often?
 		// if both are addressable, move
 		if n.Addable && res.Addable {
@@ -765,6 +787,73 @@
 	cgen_norm(n, &n1, res)
 }
 
+var sys_wbptr *Node
+
+func cgen_wbptr(n, res *Node) {
+	if Debug_wb > 0 {
+		Warn("write barrier")
+	}
+	var dst, src Node
+	Agenr(res, &dst, nil)
+	Cgenr(n, &src, nil)
+	p := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &dst, nil)
+	a := &p.To
+	a.Type = obj.TYPE_MEM
+	a.Reg = int16(Thearch.REGSP)
+	a.Offset = 0
+	if HasLinkRegister() {
+		a.Offset += int64(Widthptr)
+	}
+	p2 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, nil)
+	p2.To = p.To
+	p2.To.Offset += int64(Widthptr)
+	Regfree(&dst)
+	Regfree(&src)
+	if sys_wbptr == nil {
+		sys_wbptr = writebarrierfn("writebarrierptr", Types[Tptr], Types[Tptr])
+	}
+	Ginscall(sys_wbptr, 0)
+}
+
+func cgen_wbfat(n, res *Node) {
+	if Debug_wb > 0 {
+		Warn("write barrier")
+	}
+	needType := true
+	funcName := "typedmemmove"
+	var dst, src Node
+	if n.Ullman >= res.Ullman {
+		Agenr(n, &src, nil)
+		Agenr(res, &dst, nil)
+	} else {
+		Agenr(res, &dst, nil)
+		Agenr(n, &src, nil)
+	}
+	p := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &dst, nil)
+	a := &p.To
+	a.Type = obj.TYPE_MEM
+	a.Reg = int16(Thearch.REGSP)
+	a.Offset = 0
+	if HasLinkRegister() {
+		a.Offset += int64(Widthptr)
+	}
+	if needType {
+		a.Offset += int64(Widthptr)
+	}
+	p2 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, nil)
+	p2.To = p.To
+	p2.To.Offset += int64(Widthptr)
+	Regfree(&dst)
+	Regfree(&src)
+	if needType {
+		p3 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), typename(n.Type), nil)
+		p3.To = p2.To
+		p3.To.Offset -= 2 * int64(Widthptr)
+		Regfree(&src)
+	}
+	Ginscall(writebarrierfn(funcName, Types[Tptr], Types[Tptr]), 0)
+}
+
 // cgen_norm moves n1 to res, truncating to expected type if necessary.
 // n1 is a register, and cgen_norm frees it.
 func cgen_norm(n, n1, res *Node) {
@@ -2118,10 +2207,15 @@
 /*
  * block copy:
  *	memmove(&ns, &n, w);
+ * if wb is true, needs write barrier.
  */
-func sgen(n *Node, ns *Node, w int64) {
+func sgen_wb(n *Node, ns *Node, w int64, wb bool) {
 	if Debug['g'] != 0 {
-		fmt.Printf("\nsgen w=%d\n", w)
+		op := "sgen"
+		if wb {
+			op = "sgen-wb"
+		}
+		fmt.Printf("\n%s w=%d\n", op, w)
 		Dump("r", n)
 		Dump("res", ns)
 	}
@@ -2145,7 +2239,7 @@
 	}
 
 	// Avoid taking the address for simple enough types.
-	if Componentgen(n, ns) {
+	if componentgen_wb(n, ns, wb) {
 		return
 	}
 
@@ -2163,15 +2257,29 @@
 	osrc := stkof(n)
 	odst := stkof(ns)
 
-	if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) {
+	if odst != -1000 {
+		// on stack, write barrier not needed after all
+		wb = false
+	}
+
+	if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) || wb && osrc != -1000 {
 		// osrc and odst both on stack, and at least one is in
 		// an unknown position.  Could generate code to test
 		// for forward/backward copy, but instead just copy
 		// to a temporary location first.
+		//
+		// OR: write barrier needed and source is on stack.
+		// Invoking the write barrier will use the stack to prepare its call.
+		// Copy to temporary.
 		var tmp Node
 		Tempname(&tmp, n.Type)
-		sgen(n, &tmp, w)
-		sgen(&tmp, ns, w)
+		sgen_wb(n, &tmp, w, false)
+		sgen_wb(&tmp, ns, w, wb)
+		return
+	}
+
+	if wb {
+		cgen_wbfat(n, ns)
 		return
 	}
 
diff --git a/src/cmd/internal/gc/esc.go b/src/cmd/internal/gc/esc.go
index 8d195cd..4a44de7 100644
--- a/src/cmd/internal/gc/esc.go
+++ b/src/cmd/internal/gc/esc.go
@@ -537,7 +537,7 @@
 	// This assignment is a no-op for escape analysis,
 	// it does not store any new pointers into b that were not already there.
 	// However, without this special case b will escape, because we assign to OIND/ODOTPTR.
-	case OAS, OASOP:
+	case OAS, OASOP, OASWB:
 		if (n.Left.Op == OIND || n.Left.Op == ODOTPTR) && n.Left.Left.Op == ONAME && // dst is ONAME dereference
 			(n.Right.Op == OSLICE || n.Right.Op == OSLICE3 || n.Right.Op == OSLICESTR) && // src is slice operation
 			(n.Right.Left.Op == OIND || n.Right.Left.Op == ODOTPTR) && n.Right.Left.Left.Op == ONAME && // slice is applied to ONAME dereference
diff --git a/src/cmd/internal/gc/fmt.go b/src/cmd/internal/gc/fmt.go
index 547b873..1a991a0 100644
--- a/src/cmd/internal/gc/fmt.go
+++ b/src/cmd/internal/gc/fmt.go
@@ -816,7 +816,7 @@
 		// Don't export "v = <N>" initializing statements, hope they're always
 	// preceded by the DCL which will be re-parsed and typecheck to reproduce
 	// the "v = <N>" again.
-	case OAS:
+	case OAS, OASWB:
 		if fmtmode == FExp && n.Right == nil {
 			break
 		}
diff --git a/src/cmd/internal/gc/gen.go b/src/cmd/internal/gc/gen.go
index dda33c9..4c03915 100644
--- a/src/cmd/internal/gc/gen.go
+++ b/src/cmd/internal/gc/gen.go
@@ -403,7 +403,7 @@
  * n.Left is x
  * n.Type is T
  */
-func cgen_dottype(n *Node, res, resok *Node) {
+func cgen_dottype(n *Node, res, resok *Node, wb bool) {
 	if Debug_typeassert > 0 {
 		Warn("type assertion inlined")
 	}
@@ -441,16 +441,17 @@
 	Cgen(typename(n.Type), &r2)
 	Thearch.Gins(Thearch.Optoas(OCMP, byteptr), &r1, &r2)
 	p := Gbranch(Thearch.Optoas(ONE, byteptr), nil, -1)
+	Regfree(&r2) // not needed for success path; reclaimed on one failure path
 	iface.Xoffset += int64(Widthptr)
 	Cgen(&iface, &r1)
 	Regfree(&iface)
 
 	if resok == nil {
 		r1.Type = res.Type
-		Cgen(&r1, res)
+		cgen_wb(&r1, res, wb)
 		q := Gbranch(obj.AJMP, nil, 0)
 		Patch(p, Pc)
-
+		Regrealloc(&r2) // reclaim from above, for this failure path
 		fn := syslook("panicdottype", 0)
 		dowidth(fn.Type)
 		call := Nod(OCALLFUNC, fn, nil)
@@ -467,10 +468,9 @@
 		// This half is handling the res, resok = x.(T) case,
 		// which is called from gen, not cgen, and is consequently fussier
 		// about blank assignments. We have to avoid calling cgen for those.
-		Regfree(&r2)
 		r1.Type = res.Type
 		if !isblank(res) {
-			Cgen(&r1, res)
+			cgen_wb(&r1, res, wb)
 		}
 		Regfree(&r1)
 		if !isblank(resok) {
@@ -979,8 +979,11 @@
 		}
 		Cgen_as(n.Left, n.Right)
 
+	case OASWB:
+		Cgen_as_wb(n.Left, n.Right, true)
+
 	case OAS2DOTTYPE:
-		cgen_dottype(n.Rlist.N, n.List.N, n.List.Next.N)
+		cgen_dottype(n.Rlist.N, n.List.N, n.List.Next.N, false)
 
 	case OCALLMETH:
 		cgen_callmeth(n, 0)
@@ -1023,10 +1026,18 @@
 	lineno = lno
 }
 
-func Cgen_as(nl *Node, nr *Node) {
+func Cgen_as(nl, nr *Node) {
+	Cgen_as_wb(nl, nr, false)
+}
+
+func Cgen_as_wb(nl, nr *Node, wb bool) {
 	if Debug['g'] != 0 {
-		Dump("cgen_as", nl)
-		Dump("cgen_as = ", nr)
+		op := "cgen_as"
+		if wb {
+			op = "cgen_as_wb"
+		}
+		Dump(op, nl)
+		Dump(op+" = ", nr)
 	}
 
 	for nr != nil && nr.Op == OCONVNOP {
@@ -1065,7 +1076,7 @@
 		return
 	}
 
-	Cgen(nr, nl)
+	cgen_wb(nr, nl, wb)
 }
 
 func cgen_callmeth(n *Node, proc int) {
@@ -1126,31 +1137,40 @@
 // Slices, strings and interfaces are supported. Small structs or arrays with
 // elements of basic type are also supported.
 // nr is nil when assigning a zero value.
-func Componentgen(nr *Node, nl *Node) bool {
+func Componentgen(nr, nl *Node) bool {
+	return componentgen_wb(nr, nl, false)
+}
+
+// componentgen_wb is like componentgen but if wb==true emits write barriers for pointer updates.
+func componentgen_wb(nr, nl *Node, wb bool) bool {
+	// Don't generate any code for complete copy of a variable into itself.
+	// It's useless, and the VARDEF will incorrectly mark the old value as dead.
+	// (This check assumes that the arguments passed to componentgen did not
+	// themselves come from Igen, or else we could have Op==ONAME but
+	// with a Type and Xoffset describing an individual field, not the entire
+	// variable.)
+	if nl.Op == ONAME && nl == nr {
+		return true
+	}
+
 	// Count number of moves required to move components.
+	// If using write barrier, can only emit one pointer.
+	// TODO(rsc): Allow more pointers, for reflect.Value.
 	const maxMoves = 8
 	n := 0
+	numPtr := 0
 	visitComponents(nl.Type, 0, func(t *Type, offset int64) bool {
 		n++
-		return n <= maxMoves
+		if int(Simtype[t.Etype]) == Tptr && t != itable {
+			numPtr++
+		}
+		return n <= maxMoves && (!wb || numPtr <= 1)
 	})
-	if n > maxMoves {
+	if n > maxMoves || wb && numPtr > 1 {
 		return false
 	}
 
-	isConstString := Isconst(nr, CTSTR)
-	nodl := *nl
-	if !cadable(nl) {
-		if nr != nil && !cadable(nr) && !isConstString {
-			return false
-		}
-		Igen(nl, &nodl, nil)
-		defer Regfree(&nodl)
-	}
-	lbase := nodl.Xoffset
-
-	// Must call emitVardef on every path out of this function,
-	// but only after evaluating rhs.
+	// Must call emitVardef after evaluating rhs but before writing to lhs.
 	emitVardef := func() {
 		// Emit vardef if needed.
 		if nl.Op == ONAME {
@@ -1161,6 +1181,26 @@
 		}
 	}
 
+	isConstString := Isconst(nr, CTSTR)
+
+	if !cadable(nl) && nr != nil && !cadable(nr) && !isConstString {
+		return false
+	}
+
+	var nodl Node
+	if cadable(nl) {
+		nodl = *nl
+	} else {
+		if nr != nil && !cadable(nr) && !isConstString {
+			return false
+		}
+		if nr == nil || isConstString || nl.Ullman >= nr.Ullman {
+			Igen(nl, &nodl, nil)
+			defer Regfree(&nodl)
+		}
+	}
+	lbase := nodl.Xoffset
+
 	// Special case: zeroing.
 	var nodr Node
 	if nr == nil {
@@ -1218,23 +1258,34 @@
 	// General case: copy nl = nr.
 	nodr = *nr
 	if !cadable(nr) {
+		if nr.Ullman >= UINF && nodl.Op == OINDREG {
+			Fatal("miscompile")
+		}
 		Igen(nr, &nodr, nil)
 		defer Regfree(&nodr)
 	}
 	rbase := nodr.Xoffset
 
-	// Don't generate any code for complete copy of a variable into itself.
-	// It's useless, and the VARDEF will incorrectly mark the old value as dead.
-	// (This check assumes that the arguments passed to componentgen did not
-	// themselves come from Igen, or else we could have Op==ONAME but
-	// with a Type and Xoffset describing an individual field, not the entire
-	// variable.)
-	if nl.Op == ONAME && nr.Op == ONAME && nl == nr {
-		return true
+	if nodl.Op == 0 {
+		Igen(nl, &nodl, nil)
+		defer Regfree(&nodl)
+		lbase = nodl.Xoffset
 	}
 
 	emitVardef()
+	var (
+		ptrType   *Type
+		ptrOffset int64
+	)
 	visitComponents(nl.Type, 0, func(t *Type, offset int64) bool {
+		if wb && int(Simtype[t.Etype]) == Tptr && t != itable {
+			if ptrType != nil {
+				Fatal("componentgen_wb %v", Tconv(nl.Type, 0))
+			}
+			ptrType = t
+			ptrOffset = offset
+			return true
+		}
 		nodl.Type = t
 		nodl.Xoffset = lbase + offset
 		nodr.Type = t
@@ -1242,6 +1293,13 @@
 		Thearch.Gmove(&nodr, &nodl)
 		return true
 	})
+	if ptrType != nil {
+		nodl.Type = ptrType
+		nodl.Xoffset = lbase + ptrOffset
+		nodr.Type = ptrType
+		nodr.Xoffset = rbase + ptrOffset
+		cgen_wbptr(&nodr, &nodl)
+	}
 	return true
 }
 
@@ -1283,7 +1341,7 @@
 			f(Types[TFLOAT64], startOffset+8)
 
 	case TINTER:
-		return f(Ptrto(Types[TUINT8]), startOffset) &&
+		return f(itable, startOffset) &&
 			f(Ptrto(Types[TUINT8]), startOffset+int64(Widthptr))
 		return true
 
diff --git a/src/cmd/internal/gc/init.go b/src/cmd/internal/gc/init.go
index b3a6a00..b5d1e50 100644
--- a/src/cmd/internal/gc/init.go
+++ b/src/cmd/internal/gc/init.go
@@ -57,7 +57,7 @@
 		case ODCLFUNC, ODCLCONST, ODCLTYPE, OEMPTY:
 			break
 
-		case OAS:
+		case OAS, OASWB:
 			if isblank(l.N.Left) && candiscard(l.N.Right) {
 				break
 			}
diff --git a/src/cmd/internal/gc/racewalk.go b/src/cmd/internal/gc/racewalk.go
index 934cfe7..e7f3500 100644
--- a/src/cmd/internal/gc/racewalk.go
+++ b/src/cmd/internal/gc/racewalk.go
@@ -137,7 +137,7 @@
 	default:
 		Fatal("racewalk: unknown node type %v", Oconv(int(n.Op), 0))
 
-	case OAS, OAS2FUNC:
+	case OAS, OASWB, OAS2FUNC:
 		racewalknode(&n.Left, init, 1, 0)
 		racewalknode(&n.Right, init, 0, 0)
 		goto ret
diff --git a/src/cmd/internal/gc/subr.go b/src/cmd/internal/gc/subr.go
index cfe1b87..381079e 100644
--- a/src/cmd/internal/gc/subr.go
+++ b/src/cmd/internal/gc/subr.go
@@ -1614,7 +1614,7 @@
 		}
 		goto out
 
-	case OCALL, OCALLFUNC, OCALLMETH, OCALLINTER:
+	case OCALL, OCALLFUNC, OCALLMETH, OCALLINTER, OASWB:
 		ul = UINF
 		goto out
 
diff --git a/src/cmd/internal/gc/syntax.go b/src/cmd/internal/gc/syntax.go
index d448188..e9593fd 100644
--- a/src/cmd/internal/gc/syntax.go
+++ b/src/cmd/internal/gc/syntax.go
@@ -170,6 +170,7 @@
 	OAS2MAPR         // x, ok = m["foo"]
 	OAS2DOTTYPE      // x, ok = I.(int)
 	OASOP            // x += y
+	OASWB            // OAS but with write barrier
 	OCALL            // function call, method call or type conversion, possibly preceded by defer or go.
 	OCALLFUNC        // f()
 	OCALLMETH        // t.Method()
diff --git a/src/cmd/internal/gc/walk.go b/src/cmd/internal/gc/walk.go
index 043edc9..79a3038 100644
--- a/src/cmd/internal/gc/walk.go
+++ b/src/cmd/internal/gc/walk.go
@@ -2218,10 +2218,14 @@
 		if Curfn != nil && Curfn.Func.Nowritebarrier {
 			Yyerror("write barrier prohibited")
 		}
+		t := n.Left.Type
+		if t.Width == int64(Widthptr) {
+			n.Op = OASWB
+			return n
+		}
 		if Debug_wb > 0 {
 			Warnl(int(n.Lineno), "write barrier")
 		}
-		t := n.Left.Type
 		l := Nod(OADDR, n.Left, nil)
 		l.Etype = 1 // addr does not escape
 		if t.Width == int64(Widthptr) {
@@ -2274,7 +2278,6 @@
 			n = mkcall1(writebarrierfn("typedmemmove", t, r.Left.Type), nil, init, typename(t), l, r)
 		}
 	}
-
 	return n
 }