cmd/internal/gc: optimize slice + write barrier

The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.

If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.

For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.

For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.

For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.

No write to base, no write barrier.

The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)

This CL by itself:
name                   old mean              new mean              delta
BinaryTree17            5.81s × (0.98,1.03)   5.71s × (0.96,1.05)     ~    (p=0.101)
Fannkuch11              4.35s × (1.00,1.00)   4.39s × (1.00,1.00)   +0.79% (p=0.000)
FmtFprintfEmpty        86.0ns × (0.94,1.11)  82.6ns × (0.98,1.04)   -3.86% (p=0.048)
FmtFprintfString        276ns × (0.98,1.04)   273ns × (0.98,1.02)     ~    (p=0.235)
FmtFprintfInt           274ns × (0.98,1.06)   270ns × (0.99,1.01)     ~    (p=0.119)
FmtFprintfIntInt        506ns × (0.99,1.01)   475ns × (0.99,1.01)   -6.02% (p=0.000)
FmtFprintfPrefixedInt   391ns × (0.99,1.01)   393ns × (1.00,1.01)     ~    (p=0.139)
FmtFprintfFloat         566ns × (0.99,1.01)   574ns × (1.00,1.01)   +1.33% (p=0.001)
FmtManyArgs            1.91µs × (0.99,1.01)  1.87µs × (0.99,1.02)   -1.83% (p=0.000)
GobDecode              15.3ms × (0.99,1.02)  15.0ms × (0.98,1.05)   -1.84% (p=0.042)
GobEncode              11.5ms × (0.97,1.03)  11.4ms × (0.99,1.03)     ~    (p=0.152)
Gzip                    645ms × (0.99,1.01)   647ms × (0.99,1.01)     ~    (p=0.265)
Gunzip                  142ms × (1.00,1.00)   143ms × (1.00,1.01)   +0.90% (p=0.000)
HTTPClientServer       90.5µs × (0.97,1.04)  88.5µs × (0.99,1.03)   -2.27% (p=0.014)
JSONEncode             32.0ms × (0.98,1.03)  29.6ms × (0.98,1.01)   -7.51% (p=0.000)
JSONDecode              114ms × (0.99,1.01)   104ms × (1.00,1.01)   -8.60% (p=0.000)
Mandelbrot200          6.04ms × (1.00,1.01)  6.02ms × (1.00,1.00)     ~    (p=0.057)
GoParse                6.47ms × (0.97,1.05)  6.37ms × (0.97,1.04)     ~    (p=0.105)
RegexpMatchEasy0_32     171ns × (0.93,1.07)   152ns × (0.99,1.01)  -11.09% (p=0.000)
RegexpMatchEasy0_1K     550ns × (0.98,1.01)   530ns × (1.00,1.00)   -3.78% (p=0.000)
RegexpMatchEasy1_32     135ns × (0.99,1.02)   134ns × (0.99,1.01)   -1.33% (p=0.002)
RegexpMatchEasy1_1K     879ns × (1.00,1.01)   865ns × (1.00,1.00)   -1.58% (p=0.000)
RegexpMatchMedium_32    243ns × (1.00,1.00)   233ns × (1.00,1.00)   -4.30% (p=0.000)
RegexpMatchMedium_1K   70.3µs × (1.00,1.00)  69.5µs × (1.00,1.00)   -1.13% (p=0.000)
RegexpMatchHard_32     3.82µs × (1.00,1.01)  3.74µs × (1.00,1.00)   -1.95% (p=0.000)
RegexpMatchHard_1K      117µs × (1.00,1.00)   115µs × (1.00,1.00)   -1.69% (p=0.000)
Revcomp                 917ms × (0.97,1.04)   920ms × (0.97,1.04)     ~    (p=0.786)
Template                114ms × (0.99,1.01)   117ms × (0.99,1.01)   +2.58% (p=0.000)
TimeParse               622ns × (0.99,1.01)   615ns × (0.99,1.00)   -1.06% (p=0.000)
TimeFormat              665ns × (0.99,1.01)   654ns × (0.99,1.00)   -1.70% (p=0.000)

This CL and previous CL (append) combined:
name                   old mean              new mean              delta
BinaryTree17            5.68s × (0.97,1.04)   5.71s × (0.96,1.05)     ~    (p=0.638)
Fannkuch11              4.41s × (0.98,1.03)   4.39s × (1.00,1.00)     ~    (p=0.474)
FmtFprintfEmpty        92.7ns × (0.91,1.16)  82.6ns × (0.98,1.04)  -10.89% (p=0.004)
FmtFprintfString        281ns × (0.96,1.08)   273ns × (0.98,1.02)     ~    (p=0.078)
FmtFprintfInt           288ns × (0.97,1.06)   270ns × (0.99,1.01)   -6.37% (p=0.000)
FmtFprintfIntInt        493ns × (0.97,1.04)   475ns × (0.99,1.01)   -3.53% (p=0.002)
FmtFprintfPrefixedInt   423ns × (0.97,1.04)   393ns × (1.00,1.01)   -7.07% (p=0.000)
FmtFprintfFloat         598ns × (0.99,1.01)   574ns × (1.00,1.01)   -4.02% (p=0.000)
FmtManyArgs            1.89µs × (0.98,1.05)  1.87µs × (0.99,1.02)     ~    (p=0.305)
GobDecode              14.8ms × (0.98,1.03)  15.0ms × (0.98,1.05)     ~    (p=0.237)
GobEncode              12.3ms × (0.98,1.01)  11.4ms × (0.99,1.03)   -6.95% (p=0.000)
Gzip                    656ms × (0.99,1.05)   647ms × (0.99,1.01)     ~    (p=0.101)
Gunzip                  142ms × (1.00,1.00)   143ms × (1.00,1.01)   +0.58% (p=0.001)
HTTPClientServer       91.2µs × (0.97,1.04)  88.5µs × (0.99,1.03)   -3.02% (p=0.003)
JSONEncode             32.6ms × (0.97,1.08)  29.6ms × (0.98,1.01)   -9.10% (p=0.000)
JSONDecode              114ms × (0.97,1.05)   104ms × (1.00,1.01)   -8.74% (p=0.000)
Mandelbrot200          6.11ms × (0.98,1.04)  6.02ms × (1.00,1.00)     ~    (p=0.090)
GoParse                6.66ms × (0.97,1.04)  6.37ms × (0.97,1.04)   -4.41% (p=0.000)
RegexpMatchEasy0_32     159ns × (0.99,1.00)   152ns × (0.99,1.01)   -4.69% (p=0.000)
RegexpMatchEasy0_1K     538ns × (1.00,1.01)   530ns × (1.00,1.00)   -1.57% (p=0.000)
RegexpMatchEasy1_32     138ns × (1.00,1.00)   134ns × (0.99,1.01)   -2.91% (p=0.000)
RegexpMatchEasy1_1K     869ns × (0.99,1.01)   865ns × (1.00,1.00)   -0.51% (p=0.012)
RegexpMatchMedium_32    252ns × (0.99,1.01)   233ns × (1.00,1.00)   -7.85% (p=0.000)
RegexpMatchMedium_1K   72.7µs × (1.00,1.00)  69.5µs × (1.00,1.00)   -4.43% (p=0.000)
RegexpMatchHard_32     3.85µs × (1.00,1.00)  3.74µs × (1.00,1.00)   -2.74% (p=0.000)
RegexpMatchHard_1K      118µs × (1.00,1.00)   115µs × (1.00,1.00)   -2.24% (p=0.000)
Revcomp                 920ms × (0.97,1.07)   920ms × (0.97,1.04)     ~    (p=0.998)
Template                129ms × (0.98,1.03)   117ms × (0.99,1.01)   -9.79% (p=0.000)
TimeParse               619ns × (0.99,1.01)   615ns × (0.99,1.00)   -0.57% (p=0.011)
TimeFormat              661ns × (0.98,1.04)   654ns × (0.99,1.00)     ~    (p=0.223)

Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go
index 3763a36..a4b4f0b 100644
--- a/src/cmd/internal/gc/cgen.go
+++ b/src/cmd/internal/gc/cgen.go
@@ -43,14 +43,7 @@
 
 	switch n.Op {
 	case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
-		if res.Op != ONAME || !res.Addable || wb {
-			var n1 Node
-			Tempname(&n1, n.Type)
-			Cgen_slice(n, &n1)
-			cgen_wb(&n1, res, wb)
-		} else {
-			Cgen_slice(n, res)
-		}
+		cgen_slice(n, res, wb)
 		return
 
 	case OEFACE:
@@ -2970,3 +2963,568 @@
 		i++
 	}
 }
+
+// Generate res = n, where n is x[i:j] or x[i:j:k].
+// If wb is true, need write barrier updating res's base pointer.
+// On systems with 32-bit ints, i, j, k are guaranteed to be 32-bit values.
+func cgen_slice(n, res *Node, wb bool) {
+	needFullUpdate := !samesafeexpr(n.Left, res)
+
+	// orderexpr has made sure that x is safe (but possibly expensive)
+	// and i, j, k are cheap. On a system with registers (anything but 386)
+	// we can evaluate x first and then know we have enough registers
+	// for i, j, k as well.
+	var x, xbase, xlen, xcap, i, j, k Node
+	if n.Op != OSLICEARR && n.Op != OSLICE3ARR {
+		Igen(n.Left, &x, nil)
+	}
+
+	indexRegType := Types[TUINT]
+	if Widthreg > Widthptr { // amd64p32
+		indexRegType = Types[TUINT64]
+	}
+
+	// On most systems, we use registers.
+	// The 386 has basically no registers, so substitute functions
+	// that can work with temporaries instead.
+	regalloc := Regalloc
+	ginscon := Thearch.Ginscon
+	gins := Thearch.Gins
+	if Thearch.Thechar == '8' {
+		regalloc = func(n *Node, t *Type, reuse *Node) {
+			Tempname(n, t)
+		}
+		ginscon = func(as int, c int64, n *Node) {
+			var n1 Node
+			Regalloc(&n1, n.Type, n)
+			Thearch.Gmove(n, &n1)
+			Thearch.Ginscon(as, c, &n1)
+			Thearch.Gmove(&n1, n)
+			Regfree(&n1)
+		}
+		gins = func(as int, f, t *Node) *obj.Prog {
+			var n1 Node
+			Regalloc(&n1, t.Type, t)
+			Thearch.Gmove(t, &n1)
+			Thearch.Gins(as, f, &n1)
+			Thearch.Gmove(&n1, t)
+			Regfree(&n1)
+			return nil
+		}
+	}
+
+	panics := make([]*obj.Prog, 0, 6) // 3 loads + 3 checks
+
+	loadlen := func() {
+		if xlen.Op != 0 {
+			return
+		}
+		if n.Op == OSLICEARR || n.Op == OSLICE3ARR {
+			Nodconst(&xlen, indexRegType, n.Left.Type.Type.Bound)
+			return
+		}
+		if n.Op == OSLICESTR && Isconst(n.Left, CTSTR) {
+			Nodconst(&xlen, indexRegType, int64(len(n.Left.Val.U.Sval)))
+			return
+		}
+		regalloc(&xlen, indexRegType, nil)
+		x.Xoffset += int64(Widthptr)
+		x.Type = Types[TUINT]
+		Thearch.Gmove(&x, &xlen)
+		x.Xoffset -= int64(Widthptr)
+	}
+
+	loadcap := func() {
+		if xcap.Op != 0 {
+			return
+		}
+		if n.Op == OSLICEARR || n.Op == OSLICE3ARR || n.Op == OSLICESTR {
+			loadlen()
+			xcap = xlen
+			if xcap.Op == OREGISTER {
+				Regrealloc(&xcap)
+			}
+			return
+		}
+		regalloc(&xcap, indexRegType, nil)
+		x.Xoffset += 2 * int64(Widthptr)
+		x.Type = Types[TUINT]
+		Thearch.Gmove(&x, &xcap)
+		x.Xoffset -= 2 * int64(Widthptr)
+	}
+
+	var x1, x2, x3 *Node // unevaluated index arguments
+	x1 = n.Right.Left
+	switch n.Op {
+	default:
+		x2 = n.Right.Right
+	case OSLICE3, OSLICE3ARR:
+		x2 = n.Right.Right.Left
+		x3 = n.Right.Right.Right
+	}
+
+	// load computes src into targ, but if src refers to the len or cap of n.Left,
+	// load copies those from xlen, xcap, loading xlen if needed.
+	// If targ.Op == OREGISTER on return, it must be Regfreed,
+	// but it should not be modified without first checking whether it is
+	// xlen or xcap's register.
+	load := func(src, targ *Node) {
+		if src == nil {
+			return
+		}
+		switch src.Op {
+		case OLITERAL:
+			*targ = *src
+			return
+		case OLEN:
+			// NOTE(rsc): This doesn't actually trigger, because order.go
+			// has pulled all the len and cap calls into separate assignments
+			// to temporaries. There are tests in test/sliceopt.go that could
+			// be enabled if this is fixed.
+			if samesafeexpr(n.Left, src.Left) {
+				if Debug_slice > 0 {
+					Warn("slice: reuse len")
+				}
+				loadlen()
+				*targ = xlen
+				if targ.Op == OREGISTER {
+					Regrealloc(targ)
+				}
+				return
+			}
+		case OCAP:
+			// NOTE(rsc): This doesn't actually trigger; see note in case OLEN above.
+			if samesafeexpr(n.Left, src.Left) {
+				if Debug_slice > 0 {
+					Warn("slice: reuse cap")
+				}
+				loadcap()
+				*targ = xcap
+				if targ.Op == OREGISTER {
+					Regrealloc(targ)
+				}
+				return
+			}
+		}
+		if i.Op != 0 && samesafeexpr(x1, src) {
+			if Debug_slice > 0 {
+				Warn("slice: reuse 1st index")
+			}
+			*targ = i
+			if targ.Op == OREGISTER {
+				Regrealloc(targ)
+			}
+			return
+		}
+		if j.Op != 0 && samesafeexpr(x2, src) {
+			if Debug_slice > 0 {
+				Warn("slice: reuse 2nd index")
+			}
+			*targ = j
+			if targ.Op == OREGISTER {
+				Regrealloc(targ)
+			}
+			return
+		}
+		if Thearch.Cgenindex != nil {
+			regalloc(targ, indexRegType, nil)
+			p := Thearch.Cgenindex(src, targ, false)
+			if p != nil {
+				panics = append(panics, p)
+			}
+		} else if Thearch.Igenindex != nil {
+			p := Thearch.Igenindex(src, targ, false)
+			if p != nil {
+				panics = append(panics, p)
+			}
+		} else {
+			regalloc(targ, indexRegType, nil)
+			var tmp Node
+			Cgenr(src, &tmp, targ)
+			Thearch.Gmove(&tmp, targ)
+			Regfree(&tmp)
+		}
+	}
+
+	load(x1, &i)
+	load(x2, &j)
+	load(x3, &k)
+
+	// i defaults to 0.
+	if i.Op == 0 {
+		Nodconst(&i, indexRegType, 0)
+	}
+
+	// j defaults to len(x)
+	if j.Op == 0 {
+		loadlen()
+		j = xlen
+		if j.Op == OREGISTER {
+			Regrealloc(&j)
+		}
+	}
+
+	// k defaults to cap(x)
+	// Only need to load it if we're recalculating cap or doing a full update.
+	if k.Op == 0 && n.Op != OSLICESTR && (!iszero(&i) || needFullUpdate) {
+		loadcap()
+		k = xcap
+		if k.Op == OREGISTER {
+			Regrealloc(&k)
+		}
+	}
+
+	// Check constant indexes for negative values, and against constant length if known.
+	// The func obvious below checks for out-of-order constant indexes.
+	var bound int64 = -1
+	if n.Op == OSLICEARR || n.Op == OSLICE3ARR {
+		bound = n.Left.Type.Type.Bound
+	} else if n.Op == OSLICESTR && Isconst(n.Left, CTSTR) {
+		bound = int64(len(n.Left.Val.U.Sval))
+	}
+	if Isconst(&i, CTINT) {
+		if mpcmpfixc(i.Val.U.Xval, 0) < 0 || bound >= 0 && mpcmpfixc(i.Val.U.Xval, bound) > 0 {
+			Yyerror("slice index out of bounds")
+		}
+	}
+	if Isconst(&j, CTINT) {
+		if mpcmpfixc(j.Val.U.Xval, 0) < 0 || bound >= 0 && mpcmpfixc(j.Val.U.Xval, bound) > 0 {
+			Yyerror("slice index out of bounds")
+		}
+	}
+	if Isconst(&k, CTINT) {
+		if mpcmpfixc(k.Val.U.Xval, 0) < 0 || bound >= 0 && mpcmpfixc(k.Val.U.Xval, bound) > 0 {
+			Yyerror("slice index out of bounds")
+		}
+	}
+
+	// same reports whether n1 and n2 are the same register or constant.
+	same := func(n1, n2 *Node) bool {
+		return n1.Op == OREGISTER && n2.Op == OREGISTER && n1.Reg == n2.Reg ||
+			n1.Op == ONAME && n2.Op == ONAME && n1.Orig == n2.Orig && n1.Type == n2.Type && n1.Xoffset == n2.Xoffset ||
+			n1.Op == OLITERAL && n2.Op == OLITERAL && Mpcmpfixfix(n1.Val.U.Xval, n2.Val.U.Xval) == 0
+	}
+
+	// obvious reports whether n1 <= n2 is obviously true,
+	// and it calls Yyerror if n1 <= n2 is obviously false.
+	obvious := func(n1, n2 *Node) bool {
+		if Debug['B'] != 0 { // -B disables bounds checks
+			return true
+		}
+		if same(n1, n2) {
+			return true // n1 == n2
+		}
+		if iszero(n1) {
+			return true // using unsigned compare, so 0 <= n2 always true
+		}
+		if xlen.Op != 0 && same(n1, &xlen) && xcap.Op != 0 && same(n2, &xcap) {
+			return true // len(x) <= cap(x) always true
+		}
+		if Isconst(n1, CTINT) && Isconst(n2, CTINT) {
+			if Mpcmpfixfix(n1.Val.U.Xval, n2.Val.U.Xval) <= 0 {
+				return true // n1, n2 constants such that n1 <= n2
+			}
+			Yyerror("slice index out of bounds")
+			return true
+		}
+		return false
+	}
+
+	compare := func(n1, n2 *Node) {
+		p := Thearch.Ginscmp(OGT, indexRegType, n1, n2, -1)
+		panics = append(panics, p)
+	}
+
+	loadcap()
+	max := &xcap
+	if k.Op != 0 && (n.Op == OSLICE3 || n.Op == OSLICE3ARR) {
+		if obvious(&k, max) {
+			if Debug_slice > 0 {
+				Warn("slice: omit check for 3rd index")
+			}
+		} else {
+			compare(&k, max)
+		}
+		max = &k
+	}
+	if j.Op != 0 {
+		if obvious(&j, max) {
+			if Debug_slice > 0 {
+				Warn("slice: omit check for 2nd index")
+			}
+		} else {
+			compare(&j, max)
+		}
+		max = &j
+	}
+	if i.Op != 0 {
+		if obvious(&i, max) {
+			if Debug_slice > 0 {
+				Warn("slice: omit check for 1st index")
+			}
+		} else {
+			compare(&i, max)
+		}
+		max = &i
+	}
+	if k.Op != 0 && i.Op != 0 {
+		obvious(&i, &k) // emit compile-time error for x[3:n:2]
+	}
+
+	if len(panics) > 0 {
+		p := Gbranch(obj.AJMP, nil, 0)
+		for _, q := range panics {
+			Patch(q, Pc)
+		}
+		Ginscall(panicslice, -1)
+		Patch(p, Pc)
+	}
+
+	// Checks are done.
+	// Compute new len as j-i, cap as k-i.
+	// If i and j are same register, len is constant 0.
+	// If i and k are same register, cap is constant 0.
+	// If j and k are same register, len and cap are same.
+
+	// Done with xlen and xcap.
+	// Now safe to modify j and k even if they alias xlen, xcap.
+	if xlen.Op == OREGISTER {
+		Regfree(&xlen)
+	}
+	if xcap.Op == OREGISTER {
+		Regfree(&xcap)
+	}
+
+	// are j and k the same value?
+	sameJK := same(&j, &k)
+
+	if i.Op != 0 {
+		// j -= i
+		if same(&i, &j) {
+			if Debug_slice > 0 {
+				Warn("slice: result len == 0")
+			}
+			if j.Op == OREGISTER {
+				Regfree(&j)
+			}
+			Nodconst(&j, indexRegType, 0)
+		} else {
+			switch j.Op {
+			case OLITERAL:
+				if Isconst(&i, CTINT) {
+					Nodconst(&j, indexRegType, Mpgetfix(j.Val.U.Xval)-Mpgetfix(i.Val.U.Xval))
+					if Debug_slice > 0 {
+						Warn("slice: result len == %d", Mpgetfix(j.Val.U.Xval))
+					}
+					break
+				}
+				fallthrough
+			case ONAME:
+				if !istemp(&j) {
+					var r Node
+					regalloc(&r, indexRegType, nil)
+					Thearch.Gmove(&j, &r)
+					j = r
+				}
+				fallthrough
+			case OREGISTER:
+				if i.Op == OLITERAL {
+					v := Mpgetfix(i.Val.U.Xval)
+					if v != 0 {
+						ginscon(Thearch.Optoas(OSUB, indexRegType), v, &j)
+					}
+				} else {
+					gins(Thearch.Optoas(OSUB, indexRegType), &i, &j)
+				}
+			}
+		}
+
+		// k -= i if k different from j and cap is needed.j
+		// (The modifications to j above cannot affect i: if j and i were aliased,
+		// we replace j with a constant 0 instead of doing a subtraction,
+		// leaving i unmodified.)
+		if k.Op == 0 {
+			if Debug_slice > 0 && n.Op != OSLICESTR {
+				Warn("slice: result cap not computed")
+			}
+			// no need
+		} else if same(&i, &k) {
+			if k.Op == OREGISTER {
+				Regfree(&k)
+			}
+			Nodconst(&k, indexRegType, 0)
+			if Debug_slice > 0 {
+				Warn("slice: result cap == 0")
+			}
+		} else if sameJK {
+			if Debug_slice > 0 {
+				Warn("slice: result cap == result len")
+			}
+			// k and j were the same value; make k-i the same as j-i.
+			if k.Op == OREGISTER {
+				Regfree(&k)
+			}
+			k = j
+			if k.Op == OREGISTER {
+				Regrealloc(&k)
+			}
+		} else {
+			switch k.Op {
+			case OLITERAL:
+				if Isconst(&i, CTINT) {
+					Nodconst(&k, indexRegType, Mpgetfix(k.Val.U.Xval)-Mpgetfix(i.Val.U.Xval))
+					if Debug_slice > 0 {
+						Warn("slice: result cap == %d", Mpgetfix(k.Val.U.Xval))
+					}
+					break
+				}
+				fallthrough
+			case ONAME:
+				if !istemp(&k) {
+					var r Node
+					regalloc(&r, indexRegType, nil)
+					Thearch.Gmove(&k, &r)
+					k = r
+				}
+				fallthrough
+			case OREGISTER:
+				if same(&i, &k) {
+					Regfree(&k)
+					Nodconst(&k, indexRegType, 0)
+					if Debug_slice > 0 {
+						Warn("slice: result cap == 0")
+					}
+				} else if i.Op == OLITERAL {
+					v := Mpgetfix(i.Val.U.Xval)
+					if v != 0 {
+						ginscon(Thearch.Optoas(OSUB, indexRegType), v, &k)
+					}
+				} else {
+					gins(Thearch.Optoas(OSUB, indexRegType), &i, &k)
+				}
+			}
+		}
+	}
+
+	adjustBase := true
+	if i.Op == 0 || iszero(&i) {
+		if Debug_slice > 0 {
+			Warn("slice: skip base adjustment for 1st index 0")
+		}
+		adjustBase = false
+	} else if k.Op != 0 && iszero(&k) || k.Op == 0 && iszero(&j) {
+		if Debug_slice > 0 {
+			if n.Op == OSLICESTR {
+				Warn("slice: skip base adjustment for string len == 0")
+			} else {
+				Warn("slice: skip base adjustment for cap == 0")
+			}
+		}
+		adjustBase = false
+	}
+
+	if !adjustBase && !needFullUpdate {
+		if Debug_slice > 0 {
+			if k.Op != 0 {
+				Warn("slice: len/cap-only update")
+			} else {
+				Warn("slice: len-only update")
+			}
+		}
+		if i.Op == OREGISTER {
+			Regfree(&i)
+		}
+		// Write len (and cap if needed) back to x.
+		x.Xoffset += int64(Widthptr)
+		x.Type = Types[TUINT]
+		Thearch.Gmove(&j, &x)
+		x.Xoffset -= int64(Widthptr)
+		if k.Op != 0 {
+			x.Xoffset += 2 * int64(Widthptr)
+			x.Type = Types[TUINT]
+			Thearch.Gmove(&k, &x)
+			x.Xoffset -= 2 * int64(Widthptr)
+		}
+		Regfree(&x)
+	} else {
+		// Compute new base. May smash i.
+		if n.Op == OSLICEARR || n.Op == OSLICE3ARR {
+			Cgenr(n.Left, &xbase, nil)
+			Cgen_checknil(&xbase)
+		} else {
+			regalloc(&xbase, Ptrto(res.Type.Type), nil)
+			x.Type = xbase.Type
+			Thearch.Gmove(&x, &xbase)
+			Regfree(&x)
+		}
+		if i.Op != 0 && adjustBase {
+			// Branch around the base adjustment if the resulting cap will be 0.
+			var p *obj.Prog
+			size := &k
+			if k.Op == 0 {
+				size = &j
+			}
+			if Isconst(size, CTINT) {
+				// zero was checked above, must be non-zero.
+			} else {
+				var tmp Node
+				Nodconst(&tmp, indexRegType, 0)
+				p = Thearch.Ginscmp(OEQ, indexRegType, size, &tmp, -1)
+			}
+			var w int64
+			if n.Op == OSLICESTR {
+				w = 1 // res is string, elem size is 1 (byte)
+			} else {
+				w = res.Type.Type.Width // res is []T, elem size is T.width
+			}
+			if Isconst(&i, CTINT) {
+				ginscon(Thearch.Optoas(OADD, xbase.Type), Mpgetfix(i.Val.U.Xval)*w, &xbase)
+			} else if Thearch.AddIndex != nil && Thearch.AddIndex(&i, w, &xbase) {
+				// done by back end
+			} else if w == 1 {
+				gins(Thearch.Optoas(OADD, xbase.Type), &i, &xbase)
+			} else {
+				if i.Op == ONAME && !istemp(&i) {
+					var tmp Node
+					Tempname(&tmp, i.Type)
+					Thearch.Gmove(&i, &tmp)
+					i = tmp
+				}
+				ginscon(Thearch.Optoas(OMUL, i.Type), w, &i)
+				gins(Thearch.Optoas(OADD, xbase.Type), &i, &xbase)
+			}
+			if p != nil {
+				Patch(p, Pc)
+			}
+		}
+		if i.Op == OREGISTER {
+			Regfree(&i)
+		}
+
+		// Write len, cap, base to result.
+		if res.Op == ONAME {
+			Gvardef(res)
+		}
+		Igen(res, &x, nil)
+		x.Xoffset += int64(Widthptr)
+		x.Type = Types[TUINT]
+		Thearch.Gmove(&j, &x)
+		x.Xoffset -= int64(Widthptr)
+		if k.Op != 0 {
+			x.Xoffset += 2 * int64(Widthptr)
+			Thearch.Gmove(&k, &x)
+			x.Xoffset -= 2 * int64(Widthptr)
+		}
+		x.Type = xbase.Type
+		cgen_wb(&xbase, &x, wb)
+		Regfree(&xbase)
+		Regfree(&x)
+	}
+
+	if j.Op == OREGISTER {
+		Regfree(&j)
+	}
+	if k.Op == OREGISTER {
+		Regfree(&k)
+	}
+}