buildletclient: simplify Close

Now Close always means destroy.

The old code & API was a half-baked implementation of a (lack of)
design where there was a difference between being done with a buildlet
(with it possibly being reused by somebody else?) and you wanting to
nuke it completely. Unfortunately this just grew messier and more
broken over time.

This attempts to clean it all up.

We can add the sharing ideas back later when there's actually a design
and implementation. (We're not losing anything with this CL, because
nothing ever shared buildlets)

In fact, this CL fixes a problem where reverse buildlets weren't
getting their underlying net.Conns closed when their healthchecks
failed, leading to 4+ hour (and counting) build hangs due to buildlets
getting killed at inopportune moments (e.g. me testing running a
reverse buildlet on my home mac to help out with the dashboard
backlog)

Change-Id: I07be09f4d5f0f09d35e51e41c48b1296b71bb9b5
Reviewed-on: https://go-review.googlesource.com/14585
Reviewed-by: David Crawshaw <crawshaw@golang.org>
Reviewed-by: Andrew Gerrand <adg@golang.org>
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index a344714..4a79e52 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -1240,7 +1240,6 @@
 		return fmt.Errorf("failed to get a buildlet: %v", err)
 	}
 	defer bc.Close()
-	defer nukeIfBroken(bc)
 	st.mu.Lock()
 	st.bc = bc
 	st.mu.Unlock()
@@ -1966,7 +1965,6 @@
 			go func(bc *buildlet.Client) {
 				defer st.logEventTime("closed_helper", bc.Name())
 				defer bc.Close()
-				defer nukeIfBroken(bc)
 				if devPause {
 					defer time.Sleep(5 * time.Minute)
 					defer st.logEventTime("DEV_HELPER_SLEEP", bc.Name())
@@ -2642,12 +2640,4 @@
 	return slurp, nil
 }
 
-func nukeIfBroken(bc *buildlet.Client) {
-	if bc.IsBroken() {
-		// It may not have come from the reverse pool, but it's harmless if
-		// it didn't.
-		reversePool.nukeBuildlet(bc)
-	}
-}
-
 var nl = []byte("\n")
diff --git a/cmd/coordinator/gce.go b/cmd/coordinator/gce.go
index fc75a02..a2b87d4 100644
--- a/cmd/coordinator/gce.go
+++ b/cmd/coordinator/gce.go
@@ -266,7 +266,9 @@
 		return nil, err
 	}
 	bc.SetDescription("GCE VM: " + instName)
-	bc.SetCloseFunc(func() error { return p.putBuildlet(bc, typ, instName) })
+	bc.SetOnHeartbeatFailure(func() {
+		p.putBuildlet(bc, typ, instName)
+	})
 	return bc, nil
 }
 
@@ -274,8 +276,12 @@
 	// TODO(bradfitz): add the buildlet to a freelist (of max N
 	// items) for up to 10 minutes since when it got started if
 	// it's never seen a command execution failure, and we can
-	// wipe all its disk content. (perhaps wipe its disk content when
-	// it's retrieved, not put back on the freelist)
+	// wipe all its disk content? (perhaps wipe its disk content
+	// when it's retrieved, like the reverse buildlet pool) But
+	// this will require re-introducing a distinction in the
+	// buildlet client library between Close, Destroy/Halt, and
+	// tracking execution errors.  That was all half-baked before
+	// and thus removed. Now Close always destroys everything.
 	deleteVM(projectZone, instName)
 	p.setInstanceUsed(instName, false)
 
diff --git a/cmd/coordinator/remote.go b/cmd/coordinator/remote.go
index bf6b1f5..41dd59d 100644
--- a/cmd/coordinator/remote.go
+++ b/cmd/coordinator/remote.go
@@ -66,11 +66,6 @@
 	}
 }
 
-func destroyCloseBuildlet(bc *buildlet.Client) {
-	bc.Destroy()
-	bc.Close()
-}
-
 func expireBuildlets() {
 	defer cleanTimer.Reset(remoteBuildletCleanInterval)
 	remoteBuildlets.Lock()
@@ -78,7 +73,7 @@
 	now := time.Now()
 	for name, rb := range remoteBuildlets.m {
 		if !rb.Expires.IsZero() && rb.Expires.Before(now) {
-			go destroyCloseBuildlet(rb.buildlet)
+			go rb.buildlet.Close()
 			delete(remoteBuildlets.m, name)
 		}
 	}
@@ -261,7 +256,7 @@
 	}
 
 	if r.Method == "POST" && r.URL.Path == "/halt" {
-		err := rb.buildlet.Destroy()
+		err := rb.buildlet.Close()
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 		}
diff --git a/cmd/coordinator/reverse.go b/cmd/coordinator/reverse.go
index 96e7953..36bd9af 100644
--- a/cmd/coordinator/reverse.go
+++ b/cmd/coordinator/reverse.go
@@ -79,10 +79,6 @@
 			// Found an unused match.
 			b.inUseAs = machineType
 			b.inUseTime = time.Now()
-			b.client.SetCloseFunc(func() error {
-				p.nukeBuildlet(b.client)
-				return nil
-			})
 			return b.client, nil
 		}
 	}
@@ -441,6 +437,7 @@
 		},
 	})
 	client.SetDescription(fmt.Sprintf("reverse peer %s/%s for modes %v", hostname, r.RemoteAddr, modes))
+	client.SetOnHeartbeatFailure(func() { conn.Close() })
 	tstatus := time.Now()
 	status, err := client.Status()
 	if err != nil {