cmd/gc: faster code, mainly for rotate

* Eliminate bounds check on known small shifts.
* Rewrite x<<s | x>>(32-s) as a rotate (constant s).
* More aggressive (but still minimal) range analysis.

R=ken, dave, iant
CC=golang-dev
https://golang.org/cl/6209077
diff --git a/test/bounds.go b/test/bounds.go
new file mode 100644
index 0000000..7b2b528
--- /dev/null
+++ b/test/bounds.go
@@ -0,0 +1,277 @@
+// errchk -0 $G -m -l $D/$F.go
+
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test, using compiler diagnostic flags, that bounds check elimination
+// is eliminating the correct checks.
+
+package foo
+
+var (
+	s []int
+
+	a1 [1]int
+	a1k [1000]int
+	a100k [100000]int
+
+	p1 *[1]int
+	p1k *[1000]int
+	p100k *[100000]int
+
+	i int
+	ui uint
+	i8 int8
+	ui8 uint8
+	i16 int16
+	ui16 uint16
+	i32 int32
+	ui32 uint32
+	i64 int64
+	ui64 uint64
+)
+
+func main() {
+	// Most things need checks.
+	use(s[i])
+	use(a1[i])
+	use(a1k[i])
+	use(a100k[i])
+	use(p1[i])
+	use(p1k[i])
+	use(p100k[i])
+
+	use(s[ui])
+	use(a1[ui])
+	use(a1k[ui])
+	use(a100k[ui])
+	use(p1[ui])
+	use(p1k[ui])
+	use(p100k[ui])
+
+	use(s[i8])
+	use(a1[i8])
+	use(a1k[i8])
+	use(a100k[i8])
+	use(p1[i8])
+	use(p1k[i8])
+	use(p100k[i8])
+
+	// Unsigned 8-bit numbers don't need checks for len >= 2⁸.
+	use(s[ui8])
+	use(a1[ui8])
+	use(a1k[ui8])  // ERROR "index bounds check elided"
+	use(a100k[ui8])  // ERROR "index bounds check elided"
+	use(p1[ui8])
+	use(p1k[ui8])  // ERROR "index bounds check elided"
+	use(p100k[ui8])  // ERROR "index bounds check elided"
+
+	use(s[i16])
+	use(a1[i16])
+	use(a1k[i16])
+	use(a100k[i16])
+	use(p1[i16])
+	use(p1k[i16])
+	use(p100k[i16])
+
+	// Unsigned 16-bit numbers don't need checks for len >= 2¹⁶.
+	use(s[ui16])
+	use(a1[ui16])
+	use(a1k[ui16])
+	use(a100k[ui16])  // ERROR "index bounds check elided"
+	use(p1[ui16])
+	use(p1k[ui16])
+	use(p100k[ui16])  // ERROR "index bounds check elided"
+
+	use(s[i32])
+	use(a1[i32])
+	use(a1k[i32])
+	use(a100k[i32])
+	use(p1[i32])
+	use(p1k[i32])
+	use(p100k[i32])
+
+	use(s[ui32])
+	use(a1[ui32])
+	use(a1k[ui32])
+	use(a100k[ui32])
+	use(p1[ui32])
+	use(p1k[ui32])
+	use(p100k[ui32])
+
+	use(s[i64])
+	use(a1[i64])
+	use(a1k[i64])
+	use(a100k[i64])
+	use(p1[i64])
+	use(p1k[i64])
+	use(p100k[i64])
+
+	use(s[ui64])
+	use(a1[ui64])
+	use(a1k[ui64])
+	use(a100k[ui64])
+	use(p1[ui64])
+	use(p1k[ui64])
+	use(p100k[ui64])
+
+	// Mod truncates the maximum value to one less than the argument,
+	// but signed mod can be negative, so only unsigned mod counts.
+	use(s[i%999])
+	use(a1[i%999])
+	use(a1k[i%999])
+	use(a100k[i%999])
+	use(p1[i%999])
+	use(p1k[i%999])
+	use(p100k[i%999])
+
+	use(s[ui%999])
+	use(a1[ui%999])
+	use(a1k[ui%999])  // ERROR "index bounds check elided"
+	use(a100k[ui%999])  // ERROR "index bounds check elided"
+	use(p1[ui%999])
+	use(p1k[ui%999])  // ERROR "index bounds check elided"
+	use(p100k[ui%999])  // ERROR "index bounds check elided"
+
+	use(s[i%1000])
+	use(a1[i%1000])
+	use(a1k[i%1000])
+	use(a100k[i%1000])
+	use(p1[i%1000])
+	use(p1k[i%1000])
+	use(p100k[i%1000])
+
+	use(s[ui%1000])
+	use(a1[ui%1000])
+	use(a1k[ui%1000])  // ERROR "index bounds check elided"
+	use(a100k[ui%1000])  // ERROR "index bounds check elided"
+	use(p1[ui%1000])
+	use(p1k[ui%1000])  // ERROR "index bounds check elided"
+	use(p100k[ui%1000])  // ERROR "index bounds check elided"
+
+	use(s[i%1001])
+	use(a1[i%1001])
+	use(a1k[i%1001])
+	use(a100k[i%1001])
+	use(p1[i%1001])
+	use(p1k[i%1001])
+	use(p100k[i%1001])
+
+	use(s[ui%1001])
+	use(a1[ui%1001])
+	use(a1k[ui%1001])
+	use(a100k[ui%1001])  // ERROR "index bounds check elided"
+	use(p1[ui%1001])
+	use(p1k[ui%1001])
+	use(p100k[ui%1001])  // ERROR "index bounds check elided"
+
+	// Bitwise and truncates the maximum value to the mask value.
+	// The result (for a positive mask) cannot be negative, so elision
+	// applies to both signed and unsigned indexes.
+	use(s[i&999])
+	use(a1[i&999])
+	use(a1k[i&999])  // ERROR "index bounds check elided"
+	use(a100k[i&999])  // ERROR "index bounds check elided"
+	use(p1[i&999])
+	use(p1k[i&999])  // ERROR "index bounds check elided"
+	use(p100k[i&999])  // ERROR "index bounds check elided"
+
+	use(s[ui&999])
+	use(a1[ui&999])
+	use(a1k[ui&999])  // ERROR "index bounds check elided"
+	use(a100k[ui&999])  // ERROR "index bounds check elided"
+	use(p1[ui&999])
+	use(p1k[ui&999])  // ERROR "index bounds check elided"
+	use(p100k[ui&999])  // ERROR "index bounds check elided"
+
+	use(s[i&1000])
+	use(a1[i&1000])
+	use(a1k[i&1000])
+	use(a100k[i&1000])  // ERROR "index bounds check elided"
+	use(p1[i&1000])
+	use(p1k[i&1000])
+	use(p100k[i&1000])  // ERROR "index bounds check elided"
+
+	use(s[ui&1000])
+	use(a1[ui&1000])
+	use(a1k[ui&1000])
+	use(a100k[ui&1000])  // ERROR "index bounds check elided"
+	use(p1[ui&1000])
+	use(p1k[ui&1000])
+	use(p100k[ui&1000])  // ERROR "index bounds check elided"
+
+	// Right shift cuts the effective number of bits in the index,
+	// but only for unsigned (signed stays negative).
+	use(s[i32>>22])
+	use(a1[i32>>22])
+	use(a1k[i32>>22])
+	use(a100k[i32>>22])
+	use(p1[i32>>22])
+	use(p1k[i32>>22])
+	use(p100k[i32>>22])
+
+	use(s[ui32>>22])
+	use(a1[ui32>>22])
+	use(a1k[ui32>>22])
+	use(a100k[ui32>>22])  // ERROR "index bounds check elided"
+	use(p1[ui32>>22])
+	use(p1k[ui32>>22])
+	use(p100k[ui32>>22])  // ERROR "index bounds check elided"
+
+	use(s[i32>>23])
+	use(a1[i32>>23])
+	use(a1k[i32>>23])
+	use(a100k[i32>>23])
+	use(p1[i32>>23])
+	use(p1k[i32>>23])
+	use(p100k[i32>>23])
+
+	use(s[ui32>>23])
+	use(a1[ui32>>23])
+	use(a1k[ui32>>23])  // ERROR "index bounds check elided"
+	use(a100k[ui32>>23])  // ERROR "index bounds check elided"
+	use(p1[ui32>>23])
+	use(p1k[ui32>>23])  // ERROR "index bounds check elided"
+	use(p100k[ui32>>23])  // ERROR "index bounds check elided"
+
+	// Division cuts the range like right shift does.
+	use(s[i/1e6])
+	use(a1[i/1e6])
+	use(a1k[i/1e6])
+	use(a100k[i/1e6])
+	use(p1[i/1e6])
+	use(p1k[i/1e6])
+	use(p100k[i/1e6])
+
+	use(s[ui/1e6])
+	use(a1[ui/1e6])
+	use(a1k[ui/1e6])
+	use(a100k[ui/1e6])  // ERROR "index bounds check elided"
+	use(p1[ui/1e6])
+	use(p1k[ui/1e6])
+	use(p100k[ui/1e6])  // ERROR "index bounds check elided"
+
+	use(s[i/1e7])
+	use(a1[i/1e7])
+	use(a1k[i/1e7])
+	use(a100k[i/1e7])
+	use(p1[i/1e7])
+	use(p1k[i/1e7])
+	use(p100k[i/1e7])
+
+	use(s[ui/1e7])
+	use(a1[ui/1e7])
+	use(a1k[ui/1e7])  // ERROR "index bounds check elided"
+	use(a100k[ui/1e7])  // ERROR "index bounds check elided"
+	use(p1[ui/1e7])
+	use(p1k[ui/1e7])  // ERROR "index bounds check elided"
+	use(p100k[ui/1e7])  // ERROR "index bounds check elided"
+
+}
+
+var sum int 
+
+func use(x int) {
+	sum += x
+}
diff --git a/test/rotate.go b/test/rotate.go
new file mode 100644
index 0000000..67d32d7
--- /dev/null
+++ b/test/rotate.go
@@ -0,0 +1,141 @@
+// $G $D/$F.go && $L $F.$A &&
+// ./$A.out >tmp.go && $G tmp.go && $L -o $A.out1 tmp.$A && ./$A.out1
+// rm -f tmp.go $A.out1
+
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Generate test of shift and rotate by constants.
+// The output is compiled and run.
+//
+// The output takes around a minute to compile, link, and run
+// but it is only done during ./run, not in normal builds using run.go.
+
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"os"
+)
+
+func main() {
+	flag.Parse()
+
+	b := bufio.NewWriter(os.Stdout)
+	defer b.Flush()
+
+	fmt.Fprintf(b, "%s\n", prolog)
+
+	for logBits := uint(3); logBits <= 6; logBits++ {
+		for mode := 0; mode < 1<<2; mode++ {
+			gentest(b, 1<<logBits, mode&1 != 0, mode&2 != 0)
+		}
+	}
+}
+
+const prolog = `
+
+package main
+
+import (
+	"fmt"
+	"os"
+)
+
+var (
+	i8 int8 = 0x12
+	i16 int16 = 0x1234
+	i32 int32 = 0x12345678
+	i64 int64 = 0x123456789abcdef0
+	ui8 uint8 = 0x12
+	ui16 uint16 = 0x1234
+	ui32 uint32 = 0x12345678
+	ui64 uint64 = 0x123456789abcdef0
+
+	ni8 = ^i8
+	ni16 = ^i16
+	ni32 = ^i32
+	ni64 = ^i64
+	nui8 = ^ui8
+	nui16 = ^ui16
+	nui32 = ^ui32
+	nui64 = ^ui64
+)
+
+var nfail = 0
+
+func check(desc string, have, want interface{}) {
+	if have != want {
+		nfail++
+		fmt.Printf("%s = %T(%#x), want %T(%#x)\n", desc, have, have, want, want)
+		if nfail >= 100 {
+			fmt.Printf("BUG: stopping after 100 failures\n")
+			os.Exit(0)
+		}
+	}
+}
+
+func main() {
+	if nfail > 0 {
+		fmt.Printf("BUG\n")
+	}
+}
+
+`
+
+func gentest(b *bufio.Writer, bits uint, unsigned, inverted bool) {
+	fmt.Fprintf(b, "func init() {\n")
+	defer fmt.Fprintf(b, "}\n")
+	n := 0
+
+	// Generate tests for left/right and right/left.
+	for l := uint(0); l <= bits; l++ {
+		for r := uint(0); r <= bits; r++ {
+			typ := fmt.Sprintf("int%d", bits)
+			v := fmt.Sprintf("i%d", bits)
+			if unsigned {
+				typ = "u" + typ
+				v = "u" + v
+			}
+			v0 := int64(0x123456789abcdef0)
+			if inverted {
+				v = "n" + v
+				v0 = ^v0
+			}
+			expr1 := fmt.Sprintf("%s<<%d | %s>>%d", v, l, v, r)
+			expr2 := fmt.Sprintf("%s>>%d | %s<<%d", v, r, v, l)
+			
+			var result string
+			if unsigned {
+				v := uint64(v0) >> (64 - bits)
+				v = v<<l | v>>r
+				v <<= 64 - bits
+				v >>= 64 - bits
+				result = fmt.Sprintf("%#x", v)
+			} else {
+				v := int64(v0) >> (64 - bits)
+				v = v<<l | v>>r
+				v <<= 64 - bits
+				v >>= 64 - bits
+				result = fmt.Sprintf("%#x", v)
+			}
+
+			fmt.Fprintf(b, "\tcheck(%q, %s, %s(%s))\n", expr1, expr1, typ, result)
+			fmt.Fprintf(b, "\tcheck(%q, %s, %s(%s))\n", expr2, expr2, typ, result)
+
+			// Chop test into multiple functions so that there's not one
+			// enormous function to compile/link.
+			// All the functions are named init so we don't have to do
+			// anything special to call them.  ☺
+			if n++; n >= 100 {
+				fmt.Fprintf(b, "}\n")
+				fmt.Fprintf(b, "func init() {\n")
+				n = 0
+			}
+		}
+	}
+}
+