cmd/gc: optimize memclr of slices and arrays
Recognize loops of the form
for i := range a {
a[i] = zero
}
in which the evaluation of a is free from side effects.
Replace these loops with calls to memclr.
This occurs in the stdlib in 18 places.
The motivating example is clearing a byte slice:
benchmark old ns/op new ns/op delta
BenchmarkGoMemclr5 3.31 3.26 -1.51%
BenchmarkGoMemclr16 13.7 3.28 -76.06%
BenchmarkGoMemclr64 50.8 4.14 -91.85%
BenchmarkGoMemclr256 157 6.02 -96.17%
Update #5373.
Change-Id: I99d3e6f5f268e8c6499b7e661df46403e5eb83e4
Reviewed-on: https://go-review.googlesource.com/2520
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/gc/builtin.c b/src/cmd/gc/builtin.c
index b11aa1e..e2e14f0 100644
--- a/src/cmd/gc/builtin.c
+++ b/src/cmd/gc/builtin.c
@@ -126,6 +126,7 @@
"func @\"\".makeslice (@\"\".typ·2 *byte, @\"\".nel·3 int64, @\"\".cap·4 int64) (@\"\".ary·1 []any)\n"
"func @\"\".growslice (@\"\".typ·2 *byte, @\"\".old·3 []any, @\"\".n·4 int64) (@\"\".ary·1 []any)\n"
"func @\"\".memmove (@\"\".to·1 *any, @\"\".frm·2 *any, @\"\".length·3 uintptr)\n"
+ "func @\"\".memclr (@\"\".ptr·1 *byte, @\"\".length·2 uintptr)\n"
"func @\"\".memequal (@\"\".x·2 *any, @\"\".y·3 *any, @\"\".size·4 uintptr) (? bool)\n"
"func @\"\".memequal8 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
"func @\"\".memequal16 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h
index 5150026..21cf8b8 100644
--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@@ -1451,6 +1451,7 @@
* typecheck.c
*/
int islvalue(Node *n);
+int samesafeexpr(Node *l, Node *r);
Node* typecheck(Node **np, int top);
void typechecklist(NodeList *l, int top);
Node* typecheckdef(Node *n);
diff --git a/src/cmd/gc/range.c b/src/cmd/gc/range.c
index 4ed4528..947b458 100644
--- a/src/cmd/gc/range.c
+++ b/src/cmd/gc/range.c
@@ -144,6 +144,72 @@
fatal("walkrange");
case TARRAY:
+ // Lower n into runtime·memclr if possible, for
+ // fast zeroing of slices and arrays (issue 5373).
+ // Look for instances of
+ //
+ // for i := range a {
+ // a[i] = zero
+ // }
+ //
+ // in which the evaluation of a is side-effect-free.
+ if(!debug['N'])
+ if(!flag_race)
+ if(v1 != N)
+ if(v2 == N)
+ if(n->nbody != nil)
+ if(n->nbody->n != N) // at least one statement in body
+ if(n->nbody->next == nil) { // at most one statement in body
+ tmp = n->nbody->n; // first statement of body
+ if(tmp->op == OAS)
+ if(tmp->left->op == OINDEX)
+ if(samesafeexpr(tmp->left->left, a))
+ if(samesafeexpr(tmp->left->right, v1))
+ if(t->type->width > 0)
+ if(iszero(tmp->right)) {
+ // Convert to
+ // if len(a) != 0 {
+ // hp = &a[0]
+ // hn = len(a)*sizeof(elem(a))
+ // memclr(hp, hn)
+ // i = len(a) - 1
+ // }
+ n->op = OIF;
+ n->nbody = nil;
+ n->ntest = nod(ONE, nod(OLEN, a, N), nodintconst(0));
+ n->nincr = nil;
+
+ // hp = &a[0]
+ hp = temp(ptrto(types[TUINT8]));
+ tmp = nod(OINDEX, a, nodintconst(0));
+ tmp->bounded = 1;
+ tmp = nod(OADDR, tmp, N);
+ tmp = nod(OCONVNOP, tmp, N);
+ n->nbody = list(n->nbody, nod(OAS, hp, tmp));
+
+ // hn = len(a) * sizeof(elem(a))
+ hn = temp(types[TUINTPTR]);
+ tmp = nod(OLEN, a, N);
+ tmp = nod(OMUL, tmp, nodintconst(t->type->width));
+ tmp = conv(tmp, types[TUINTPTR]);
+ n->nbody = list(n->nbody, nod(OAS, hn, tmp));
+
+ // memclr(hp, hn)
+ fn = mkcall("memclr", T, nil, hp, hn);
+ n->nbody = list(n->nbody, fn);
+
+ // i = len(a) - 1
+ v1 = nod(OAS, v1, nod(OSUB, nod(OLEN, a, N), nodintconst(1)));
+ n->nbody = list(n->nbody, v1);
+
+ typecheck(&n->ntest, Erv);
+ typechecklist(n->nbody, Etop);
+ walkstmt(&n);
+ lineno = lno;
+ return;
+ }
+ }
+
// orderstmt arranged for a copy of the array/slice variable if needed.
ha = a;
hv1 = temp(types[TINT]);
diff --git a/src/cmd/gc/runtime.go b/src/cmd/gc/runtime.go
index a294456..463bb3a 100644
--- a/src/cmd/gc/runtime.go
+++ b/src/cmd/gc/runtime.go
@@ -160,6 +160,7 @@
func makeslice(typ *byte, nel int64, cap int64) (ary []any)
func growslice(typ *byte, old []any, n int64) (ary []any)
func memmove(to *any, frm *any, length uintptr)
+func memclr(ptr *byte, length uintptr)
func memequal(x, y *any, size uintptr) bool
func memequal8(x, y *any) bool
diff --git a/src/cmd/gc/typecheck.c b/src/cmd/gc/typecheck.c
index aa693af..8a3b486 100644
--- a/src/cmd/gc/typecheck.c
+++ b/src/cmd/gc/typecheck.c
@@ -2831,7 +2831,7 @@
// Check whether l and r are the same side effect-free expression,
// so that it is safe to reuse one instead of computing both.
-static int
+int
samesafeexpr(Node *l, Node *r)
{
if(l->op != r->op || !eqtype(l->type, r->type))
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go
index ffda4fe..29c62cc 100644
--- a/src/runtime/memmove_test.go
+++ b/src/runtime/memmove_test.go
@@ -162,6 +162,20 @@
func BenchmarkMemclr4096(b *testing.B) { bmMemclr(b, 4096) }
func BenchmarkMemclr65536(b *testing.B) { bmMemclr(b, 65536) }
+func bmGoMemclr(b *testing.B, n int) {
+ x := make([]byte, n)
+ b.SetBytes(int64(n))
+ for i := 0; i < b.N; i++ {
+ for j := range x {
+ x[j] = 0
+ }
+ }
+}
+func BenchmarkGoMemclr5(b *testing.B) { bmGoMemclr(b, 5) }
+func BenchmarkGoMemclr16(b *testing.B) { bmGoMemclr(b, 16) }
+func BenchmarkGoMemclr64(b *testing.B) { bmGoMemclr(b, 64) }
+func BenchmarkGoMemclr256(b *testing.B) { bmGoMemclr(b, 256) }
+
func BenchmarkClearFat8(b *testing.B) {
for i := 0; i < b.N; i++ {
var x [8 / 4]uint32
diff --git a/test/fixedbugs/issue5373.go b/test/fixedbugs/issue5373.go
new file mode 100644
index 0000000..17ce189
--- /dev/null
+++ b/test/fixedbugs/issue5373.go
@@ -0,0 +1,71 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Ensure that zeroing range loops have the requisite side-effects.
+
+package main
+
+import (
+ "fmt"
+ "os"
+)
+
+func check(n int) {
+ // When n == 0, i is untouched by the range loop.
+ // Picking an initial value of -1 for i makes the
+ // "want" calculation below correct in all cases.
+ i := -1
+ s := make([]byte, n)
+ for i = range s {
+ s[i] = 0
+ }
+ if want := n - 1; i != want {
+ fmt.Printf("index after range with side-effect = %d want %d\n", i, want)
+ os.Exit(1)
+ }
+
+ i = n + 1
+ // i is shadowed here, so its value should be unchanged.
+ for i := range s {
+ s[i] = 0
+ }
+ if want := n + 1; i != want {
+ fmt.Printf("index after range without side-effect = %d want %d\n", i, want)
+ os.Exit(1)
+ }
+
+ // Index variable whose evaluation has side-effects
+ var x int
+ f := func() int {
+ x++
+ return 0
+ }
+ var a [1]int
+ for a[f()] = range s {
+ s[a[f()]] = 0
+ }
+ if want := n * 2; x != want {
+ fmt.Printf("index function calls = %d want %d\n", x, want)
+ os.Exit(1)
+ }
+
+ // Range expression whose evaluation has side-effects
+ x = 0
+ b := [1][]byte{s}
+ for i := range b[f()] {
+ b[f()][i] = 0
+ }
+ if want := n + 1; x != n+1 {
+ fmt.Printf("range expr function calls = %d want %d\n", x, want)
+ os.Exit(1)
+ }
+}
+
+func main() {
+ check(0)
+ check(1)
+ check(15)
+}