cmd/internal/gc: optimize slice + write barrier

The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.

If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.

For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.

For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.

For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.

No write to base, no write barrier.

The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)

This CL by itself:
name                   old mean              new mean              delta
BinaryTree17            5.81s × (0.98,1.03)   5.71s × (0.96,1.05)     ~    (p=0.101)
Fannkuch11              4.35s × (1.00,1.00)   4.39s × (1.00,1.00)   +0.79% (p=0.000)
FmtFprintfEmpty        86.0ns × (0.94,1.11)  82.6ns × (0.98,1.04)   -3.86% (p=0.048)
FmtFprintfString        276ns × (0.98,1.04)   273ns × (0.98,1.02)     ~    (p=0.235)
FmtFprintfInt           274ns × (0.98,1.06)   270ns × (0.99,1.01)     ~    (p=0.119)
FmtFprintfIntInt        506ns × (0.99,1.01)   475ns × (0.99,1.01)   -6.02% (p=0.000)
FmtFprintfPrefixedInt   391ns × (0.99,1.01)   393ns × (1.00,1.01)     ~    (p=0.139)
FmtFprintfFloat         566ns × (0.99,1.01)   574ns × (1.00,1.01)   +1.33% (p=0.001)
FmtManyArgs            1.91µs × (0.99,1.01)  1.87µs × (0.99,1.02)   -1.83% (p=0.000)
GobDecode              15.3ms × (0.99,1.02)  15.0ms × (0.98,1.05)   -1.84% (p=0.042)
GobEncode              11.5ms × (0.97,1.03)  11.4ms × (0.99,1.03)     ~    (p=0.152)
Gzip                    645ms × (0.99,1.01)   647ms × (0.99,1.01)     ~    (p=0.265)
Gunzip                  142ms × (1.00,1.00)   143ms × (1.00,1.01)   +0.90% (p=0.000)
HTTPClientServer       90.5µs × (0.97,1.04)  88.5µs × (0.99,1.03)   -2.27% (p=0.014)
JSONEncode             32.0ms × (0.98,1.03)  29.6ms × (0.98,1.01)   -7.51% (p=0.000)
JSONDecode              114ms × (0.99,1.01)   104ms × (1.00,1.01)   -8.60% (p=0.000)
Mandelbrot200          6.04ms × (1.00,1.01)  6.02ms × (1.00,1.00)     ~    (p=0.057)
GoParse                6.47ms × (0.97,1.05)  6.37ms × (0.97,1.04)     ~    (p=0.105)
RegexpMatchEasy0_32     171ns × (0.93,1.07)   152ns × (0.99,1.01)  -11.09% (p=0.000)
RegexpMatchEasy0_1K     550ns × (0.98,1.01)   530ns × (1.00,1.00)   -3.78% (p=0.000)
RegexpMatchEasy1_32     135ns × (0.99,1.02)   134ns × (0.99,1.01)   -1.33% (p=0.002)
RegexpMatchEasy1_1K     879ns × (1.00,1.01)   865ns × (1.00,1.00)   -1.58% (p=0.000)
RegexpMatchMedium_32    243ns × (1.00,1.00)   233ns × (1.00,1.00)   -4.30% (p=0.000)
RegexpMatchMedium_1K   70.3µs × (1.00,1.00)  69.5µs × (1.00,1.00)   -1.13% (p=0.000)
RegexpMatchHard_32     3.82µs × (1.00,1.01)  3.74µs × (1.00,1.00)   -1.95% (p=0.000)
RegexpMatchHard_1K      117µs × (1.00,1.00)   115µs × (1.00,1.00)   -1.69% (p=0.000)
Revcomp                 917ms × (0.97,1.04)   920ms × (0.97,1.04)     ~    (p=0.786)
Template                114ms × (0.99,1.01)   117ms × (0.99,1.01)   +2.58% (p=0.000)
TimeParse               622ns × (0.99,1.01)   615ns × (0.99,1.00)   -1.06% (p=0.000)
TimeFormat              665ns × (0.99,1.01)   654ns × (0.99,1.00)   -1.70% (p=0.000)

This CL and previous CL (append) combined:
name                   old mean              new mean              delta
BinaryTree17            5.68s × (0.97,1.04)   5.71s × (0.96,1.05)     ~    (p=0.638)
Fannkuch11              4.41s × (0.98,1.03)   4.39s × (1.00,1.00)     ~    (p=0.474)
FmtFprintfEmpty        92.7ns × (0.91,1.16)  82.6ns × (0.98,1.04)  -10.89% (p=0.004)
FmtFprintfString        281ns × (0.96,1.08)   273ns × (0.98,1.02)     ~    (p=0.078)
FmtFprintfInt           288ns × (0.97,1.06)   270ns × (0.99,1.01)   -6.37% (p=0.000)
FmtFprintfIntInt        493ns × (0.97,1.04)   475ns × (0.99,1.01)   -3.53% (p=0.002)
FmtFprintfPrefixedInt   423ns × (0.97,1.04)   393ns × (1.00,1.01)   -7.07% (p=0.000)
FmtFprintfFloat         598ns × (0.99,1.01)   574ns × (1.00,1.01)   -4.02% (p=0.000)
FmtManyArgs            1.89µs × (0.98,1.05)  1.87µs × (0.99,1.02)     ~    (p=0.305)
GobDecode              14.8ms × (0.98,1.03)  15.0ms × (0.98,1.05)     ~    (p=0.237)
GobEncode              12.3ms × (0.98,1.01)  11.4ms × (0.99,1.03)   -6.95% (p=0.000)
Gzip                    656ms × (0.99,1.05)   647ms × (0.99,1.01)     ~    (p=0.101)
Gunzip                  142ms × (1.00,1.00)   143ms × (1.00,1.01)   +0.58% (p=0.001)
HTTPClientServer       91.2µs × (0.97,1.04)  88.5µs × (0.99,1.03)   -3.02% (p=0.003)
JSONEncode             32.6ms × (0.97,1.08)  29.6ms × (0.98,1.01)   -9.10% (p=0.000)
JSONDecode              114ms × (0.97,1.05)   104ms × (1.00,1.01)   -8.74% (p=0.000)
Mandelbrot200          6.11ms × (0.98,1.04)  6.02ms × (1.00,1.00)     ~    (p=0.090)
GoParse                6.66ms × (0.97,1.04)  6.37ms × (0.97,1.04)   -4.41% (p=0.000)
RegexpMatchEasy0_32     159ns × (0.99,1.00)   152ns × (0.99,1.01)   -4.69% (p=0.000)
RegexpMatchEasy0_1K     538ns × (1.00,1.01)   530ns × (1.00,1.00)   -1.57% (p=0.000)
RegexpMatchEasy1_32     138ns × (1.00,1.00)   134ns × (0.99,1.01)   -2.91% (p=0.000)
RegexpMatchEasy1_1K     869ns × (0.99,1.01)   865ns × (1.00,1.00)   -0.51% (p=0.012)
RegexpMatchMedium_32    252ns × (0.99,1.01)   233ns × (1.00,1.00)   -7.85% (p=0.000)
RegexpMatchMedium_1K   72.7µs × (1.00,1.00)  69.5µs × (1.00,1.00)   -4.43% (p=0.000)
RegexpMatchHard_32     3.85µs × (1.00,1.00)  3.74µs × (1.00,1.00)   -2.74% (p=0.000)
RegexpMatchHard_1K      118µs × (1.00,1.00)   115µs × (1.00,1.00)   -2.24% (p=0.000)
Revcomp                 920ms × (0.97,1.07)   920ms × (0.97,1.04)     ~    (p=0.998)
Template                129ms × (0.98,1.03)   117ms × (0.99,1.01)   -9.79% (p=0.000)
TimeParse               619ns × (0.99,1.01)   615ns × (0.99,1.00)   -0.57% (p=0.011)
TimeFormat              661ns × (0.98,1.04)   654ns × (0.99,1.00)     ~    (p=0.223)

Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/internal/gc/walk.go b/src/cmd/internal/gc/walk.go
index bef08ae..495acb1 100644
--- a/src/cmd/internal/gc/walk.go
+++ b/src/cmd/internal/gc/walk.go
@@ -1280,56 +1280,39 @@
 	case ORECV:
 		Fatal("walkexpr ORECV") // should see inside OAS only
 
-	case OSLICE:
-		if n.Right != nil && n.Right.Left == nil && n.Right.Right == nil { // noop
-			walkexpr(&n.Left, init)
-			n = n.Left
-			goto ret
-		}
-		fallthrough
-
-	case OSLICEARR, OSLICESTR:
-		if n.Right == nil { // already processed
-			goto ret
-		}
-
+	case OSLICE, OSLICEARR, OSLICESTR:
 		walkexpr(&n.Left, init)
-
-		// cgen_slice can't handle string literals as source
-		// TODO the OINDEX case is a bug elsewhere that needs to be traced.  it causes a crash on ([2][]int{ ... })[1][lo:hi]
-		if (n.Op == OSLICESTR && n.Left.Op == OLITERAL) || (n.Left.Op == OINDEX) {
-			n.Left = copyexpr(n.Left, n.Left.Type, init)
-		} else {
-			n.Left = safeexpr(n.Left, init)
-		}
 		walkexpr(&n.Right.Left, init)
-		n.Right.Left = safeexpr(n.Right.Left, init)
+		if n.Right.Left != nil && iszero(n.Right.Left) {
+			// Reduce x[0:j] to x[:j].
+			n.Right.Left = nil
+		}
 		walkexpr(&n.Right.Right, init)
-		n.Right.Right = safeexpr(n.Right.Right, init)
-		n = sliceany(n, init) // chops n.Right, sets n.List
+		n = reduceSlice(n)
 		goto ret
 
 	case OSLICE3, OSLICE3ARR:
-		if n.Right == nil { // already processed
+		walkexpr(&n.Left, init)
+		walkexpr(&n.Right.Left, init)
+		if n.Right.Left != nil && iszero(n.Right.Left) {
+			// Reduce x[0:j:k] to x[:j:k].
+			n.Right.Left = nil
+		}
+		walkexpr(&n.Right.Right.Left, init)
+		walkexpr(&n.Right.Right.Right, init)
+
+		r := n.Right.Right.Right
+		if r != nil && r.Op == OCAP && samesafeexpr(n.Left, r.Left) {
+			// Reduce x[i:j:cap(x)] to x[i:j].
+			n.Right.Right = n.Right.Right.Left
+			if n.Op == OSLICE3 {
+				n.Op = OSLICE
+			} else {
+				n.Op = OSLICEARR
+			}
+			n = reduceSlice(n)
 			goto ret
 		}
-
-		walkexpr(&n.Left, init)
-
-		// TODO the OINDEX case is a bug elsewhere that needs to be traced.  it causes a crash on ([2][]int{ ... })[1][lo:hi]
-		// TODO the comment on the previous line was copied from case OSLICE. it might not even be true.
-		if n.Left.Op == OINDEX {
-			n.Left = copyexpr(n.Left, n.Left.Type, init)
-		} else {
-			n.Left = safeexpr(n.Left, init)
-		}
-		walkexpr(&n.Right.Left, init)
-		n.Right.Left = safeexpr(n.Right.Left, init)
-		walkexpr(&n.Right.Right.Left, init)
-		n.Right.Right.Left = safeexpr(n.Right.Right.Left, init)
-		walkexpr(&n.Right.Right.Right, init)
-		n.Right.Right.Right = safeexpr(n.Right.Right.Right, init)
-		n = sliceany(n, init) // chops n.Right, sets n.List
 		goto ret
 
 	case OADDR:
@@ -1660,6 +1643,22 @@
 	*np = n
 }
 
+func reduceSlice(n *Node) *Node {
+	r := n.Right.Right
+	if r != nil && r.Op == OLEN && samesafeexpr(n.Left, r.Left) {
+		// Reduce x[i:len(x)] to x[i:].
+		n.Right.Right = nil
+	}
+	if (n.Op == OSLICE || n.Op == OSLICESTR) && n.Right.Left == nil && n.Right.Right == nil {
+		// Reduce x[:] to x.
+		if Debug_slice > 0 {
+			Warn("slice: omit slice operation")
+		}
+		return n.Left
+	}
+	return n
+}
+
 func ascompatee1(op int, l *Node, r *Node, init **NodeList) *Node {
 	// convas will turn map assigns into function calls,
 	// making it impossible for reorder3 to work.
@@ -3178,213 +3177,6 @@
 	return nlen
 }
 
-// Generate frontend part for OSLICE[3][ARR|STR]
-//
-func sliceany(n *Node, init **NodeList) *Node {
-	var hb *Node
-	var cb *Node
-
-	//	print("before sliceany: %+N\n", n);
-
-	src := n.Left
-
-	lb := n.Right.Left
-	slice3 := n.Op == OSLICE3 || n.Op == OSLICE3ARR
-	if slice3 {
-		hb = n.Right.Right.Left
-		cb = n.Right.Right.Right
-	} else {
-		hb = n.Right.Right
-		cb = nil
-	}
-
-	bounded := int(n.Etype)
-
-	var bound *Node
-	if n.Op == OSLICESTR {
-		bound = Nod(OLEN, src, nil)
-	} else {
-		bound = Nod(OCAP, src, nil)
-	}
-
-	typecheck(&bound, Erv)
-	walkexpr(&bound, init) // if src is an array, bound will be a const now.
-
-	// static checks if possible
-	bv := int64(1 << 50)
-
-	if Isconst(bound, CTINT) {
-		if !Smallintconst(bound) {
-			Yyerror("array len too large")
-		} else {
-			bv = Mpgetfix(bound.Val.U.Xval)
-		}
-	}
-
-	if Isconst(cb, CTINT) {
-		cbv := Mpgetfix(cb.Val.U.Xval)
-		if cbv < 0 || cbv > bv {
-			Yyerror("slice index out of bounds")
-		}
-	}
-
-	if Isconst(hb, CTINT) {
-		hbv := Mpgetfix(hb.Val.U.Xval)
-		if hbv < 0 || hbv > bv {
-			Yyerror("slice index out of bounds")
-		}
-	}
-
-	if Isconst(lb, CTINT) {
-		lbv := Mpgetfix(lb.Val.U.Xval)
-		if lbv < 0 || lbv > bv {
-			Yyerror("slice index out of bounds")
-			lbv = -1
-		}
-
-		if lbv == 0 {
-			lb = nil
-		}
-	}
-
-	// Checking src[lb:hb:cb] or src[lb:hb].
-	// if chk0 || chk1 || chk2 { panicslice() }
-
-	// All comparisons are unsigned to avoid testing < 0.
-	bt := Types[Simtype[TUINT]]
-
-	if cb != nil && cb.Type.Width > 4 {
-		bt = Types[TUINT64]
-	}
-	if hb != nil && hb.Type.Width > 4 {
-		bt = Types[TUINT64]
-	}
-	if lb != nil && lb.Type.Width > 4 {
-		bt = Types[TUINT64]
-	}
-
-	bound = cheapexpr(conv(bound, bt), init)
-
-	var chk0 *Node // cap(src) < cb
-	if cb != nil {
-		cb = cheapexpr(conv(cb, bt), init)
-		if bounded == 0 {
-			chk0 = Nod(OLT, bound, cb)
-		}
-	} else if slice3 {
-		// When we figure out what this means, implement it.
-		Fatal("slice3 with cb == N") // rejected by parser
-	}
-
-	var chk1 *Node // cb < hb for src[lb:hb:cb]; cap(src) < hb for src[lb:hb]
-	if hb != nil {
-		hb = cheapexpr(conv(hb, bt), init)
-		if bounded == 0 {
-			if cb != nil {
-				chk1 = Nod(OLT, cb, hb)
-			} else {
-				chk1 = Nod(OLT, bound, hb)
-			}
-		}
-	} else if slice3 {
-		// When we figure out what this means, implement it.
-		Fatal("slice3 with hb == N") // rejected by parser
-	} else if n.Op == OSLICEARR {
-		hb = bound
-	} else {
-		hb = Nod(OLEN, src, nil)
-		typecheck(&hb, Erv)
-		walkexpr(&hb, init)
-		hb = cheapexpr(conv(hb, bt), init)
-	}
-
-	var chk2 *Node // hb < lb
-	if lb != nil {
-		lb = cheapexpr(conv(lb, bt), init)
-		if bounded == 0 {
-			chk2 = Nod(OLT, hb, lb)
-		}
-	}
-
-	if chk0 != nil || chk1 != nil || chk2 != nil {
-		chk := Nod(OIF, nil, nil)
-		chk.Nbody = list1(mkcall("panicslice", nil, init))
-		chk.Likely = -1
-		if chk0 != nil {
-			chk.Ntest = chk0
-		}
-		if chk1 != nil {
-			if chk.Ntest == nil {
-				chk.Ntest = chk1
-			} else {
-				chk.Ntest = Nod(OOROR, chk.Ntest, chk1)
-			}
-		}
-
-		if chk2 != nil {
-			if chk.Ntest == nil {
-				chk.Ntest = chk2
-			} else {
-				chk.Ntest = Nod(OOROR, chk.Ntest, chk2)
-			}
-		}
-
-		typecheck(&chk, Etop)
-		walkstmt(&chk)
-		*init = concat(*init, chk.Ninit)
-		chk.Ninit = nil
-		*init = list(*init, chk)
-	}
-
-	// prepare new cap, len and offs for backend cgen_slice
-	// cap = bound [ - lo ]
-	n.Right = nil
-
-	n.List = nil
-	if !slice3 {
-		cb = bound
-	}
-	if lb == nil {
-		bound = conv(cb, Types[Simtype[TUINT]])
-	} else {
-		bound = Nod(OSUB, conv(cb, Types[Simtype[TUINT]]), conv(lb, Types[Simtype[TUINT]]))
-	}
-	typecheck(&bound, Erv)
-	walkexpr(&bound, init)
-	n.List = list(n.List, bound)
-
-	// len = hi [ - lo]
-	if lb == nil {
-		hb = conv(hb, Types[Simtype[TUINT]])
-	} else {
-		hb = Nod(OSUB, conv(hb, Types[Simtype[TUINT]]), conv(lb, Types[Simtype[TUINT]]))
-	}
-	typecheck(&hb, Erv)
-	walkexpr(&hb, init)
-	n.List = list(n.List, hb)
-
-	// offs = [width *] lo, but omit if zero
-	if lb != nil {
-		var w int64
-		if n.Op == OSLICESTR {
-			w = 1
-		} else {
-			w = n.Type.Type.Width
-		}
-		lb = conv(lb, Types[TUINTPTR])
-		if w > 1 {
-			lb = Nod(OMUL, Nodintconst(w), lb)
-		}
-		typecheck(&lb, Erv)
-		walkexpr(&lb, init)
-		n.List = list(n.List, lb)
-	}
-
-	//	print("after sliceany: %+N\n", n);
-
-	return n
-}
-
 func eqfor(t *Type, needsize *int) *Node {
 	// Should only arrive here with large memory or
 	// a struct/array containing a non-memory field/element.