database/sql: reduce lock contention in Stmt.connStmt

Previouslly, Stmt.connStmt calls DB.connIfFree on each Stmt.css.
Since Stmt.connStmt locks Stmt.mu, a concurrent use of Stmt causes lock
contention on Stmt.mu.
Additionally, DB.connIfFree locks DB.mu which is shared by DB.addDep and
DB.removeDep.

This change removes DB.connIfFree and makes use of a first unused
connection in idle connection pool to reduce lock contention
without making it complicated.

Fixes #9484

On EC2 c3.8xlarge (E5-2680 v2 @ 2.80GHz * 32 vCPU):

benchmark                           old ns/op     new ns/op     delta
BenchmarkManyConcurrentQuery-8      40249         34721         -13.73%
BenchmarkManyConcurrentQuery-16     45610         40176         -11.91%
BenchmarkManyConcurrentQuery-32     109831        43179         -60.69%

benchmark                           old allocs     new allocs     delta
BenchmarkManyConcurrentQuery-8      25             25             +0.00%
BenchmarkManyConcurrentQuery-16     25             25             +0.00%
BenchmarkManyConcurrentQuery-32     25             25             +0.00%

benchmark                           old bytes     new bytes     delta
BenchmarkManyConcurrentQuery-8      3980          3969          -0.28%
BenchmarkManyConcurrentQuery-16     3980          3982          +0.05%
BenchmarkManyConcurrentQuery-32     3993          3990          -0.08%

Change-Id: Ic96296922c465bac38a260018c58324dae1531d9
Reviewed-on: https://go-review.googlesource.com/2207
Reviewed-by: Mikio Hara <mikioh.mikioh@gmail.com>
diff --git a/src/database/sql/sql_test.go b/src/database/sql/sql_test.go
index 34efdf2..60bdefa 100644
--- a/src/database/sql/sql_test.go
+++ b/src/database/sql/sql_test.go
@@ -1764,56 +1764,6 @@
 	wg.Wait()
 }
 
-func manyConcurrentQueries(t testing.TB) {
-	maxProcs, numReqs := 16, 500
-	if testing.Short() {
-		maxProcs, numReqs = 4, 50
-	}
-	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(maxProcs))
-
-	db := newTestDB(t, "people")
-	defer closeDB(t, db)
-
-	stmt, err := db.Prepare("SELECT|people|name|")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer stmt.Close()
-
-	var wg sync.WaitGroup
-	wg.Add(numReqs)
-
-	reqs := make(chan bool)
-	defer close(reqs)
-
-	for i := 0; i < maxProcs*2; i++ {
-		go func() {
-			for range reqs {
-				rows, err := stmt.Query()
-				if err != nil {
-					t.Errorf("error on query:  %v", err)
-					wg.Done()
-					continue
-				}
-
-				var name string
-				for rows.Next() {
-					rows.Scan(&name)
-				}
-				rows.Close()
-
-				wg.Done()
-			}
-		}()
-	}
-
-	for i := 0; i < numReqs; i++ {
-		reqs <- true
-	}
-
-	wg.Wait()
-}
-
 func TestIssue6081(t *testing.T) {
 	db := newTestDB(t, "people")
 	defer closeDB(t, db)
@@ -1985,3 +1935,31 @@
 		doConcurrentTest(b, ct)
 	}
 }
+
+func BenchmarkManyConcurrentQueries(b *testing.B) {
+	b.ReportAllocs()
+	// To see lock contention in Go 1.4, 16~ cores and 128~ goroutines are required.
+	const parallelism = 16
+
+	db := newTestDB(b, "magicquery")
+	defer closeDB(b, db)
+	db.SetMaxIdleConns(runtime.GOMAXPROCS(0) * parallelism)
+
+	stmt, err := db.Prepare("SELECT|magicquery|op|op=?,millis=?")
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer stmt.Close()
+
+	b.SetParallelism(parallelism)
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			rows, err := stmt.Query("sleep", 1)
+			if err != nil {
+				b.Error(err)
+				return
+			}
+			rows.Close()
+		}
+	})
+}