acme/autocert: revoke dangling pending authzs

We now keep track of pending authorization requests during verify() and
defer the asynchronous revocation of the ones that failed.
This should help avoid letsencrypt's "too many currently pending
authorizations" error.

Fixes golang/go#23426

Change-Id: Ibffb10f59733962d45e43b67fc42a2ec7c5faf51
Reviewed-on: https://go-review.googlesource.com/100078
Run-TryBot: Filippo Valsorda <filippo@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Leo Antunes <costela@gmail.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
diff --git a/acme/autocert/autocert.go b/acme/autocert/autocert.go
index 263b291..97281d8 100644
--- a/acme/autocert/autocert.go
+++ b/acme/autocert/autocert.go
@@ -542,6 +542,20 @@
 	return der, leaf, nil
 }
 
+// revokePending revokes all provided authorizations (passed as a map of URIs)
+func (m *Manager) revokePending(ctx context.Context, pendingAuthzURIs map[string]bool) {
+	f := func(ctx context.Context) {
+		for uri := range pendingAuthzURIs {
+			m.Client.RevokeAuthorization(ctx, uri)
+		}
+	}
+	if waitForRevocations {
+		f(ctx)
+	} else {
+		go f(context.Background())
+	}
+}
+
 // verify runs the identifier (domain) authorization flow
 // using each applicable ACME challenge type.
 func (m *Manager) verify(ctx context.Context, client *acme.Client, domain string) error {
@@ -554,6 +568,10 @@
 	}
 	m.tokensMu.RUnlock()
 
+	// we keep track of pending authzs and revoke the ones that did not validate.
+	pendingAuthzs := make(map[string]bool)
+	defer m.revokePending(ctx, pendingAuthzs)
+
 	var nextTyp int // challengeType index of the next challenge type to try
 	for {
 		// Start domain authorization and get the challenge.
@@ -570,6 +588,8 @@
 			return fmt.Errorf("acme/autocert: invalid authorization %q", authz.URI)
 		}
 
+		pendingAuthzs[authz.URI] = true
+
 		// Pick the next preferred challenge.
 		var chal *acme.Challenge
 		for chal == nil && nextTyp < len(challengeTypes) {
@@ -590,6 +610,7 @@
 
 		// A challenge is fulfilled and accepted: wait for the CA to validate.
 		if _, err := client.WaitAuthorization(ctx, authz.URI); err == nil {
+			delete(pendingAuthzs, authz.URI)
 			return nil
 		}
 	}
@@ -959,4 +980,7 @@
 
 	// Called when a state is removed.
 	testDidRemoveState = func(domain string) {}
+
+	// make testing of revokePending synchronous
+	waitForRevocations = false
 )
diff --git a/acme/autocert/autocert_test.go b/acme/autocert/autocert_test.go
index 2da1912..54d38e2 100644
--- a/acme/autocert/autocert_test.go
+++ b/acme/autocert/autocert_test.go
@@ -19,10 +19,12 @@
 	"fmt"
 	"html/template"
 	"io"
+	"io/ioutil"
 	"math/big"
 	"net/http"
 	"net/http/httptest"
 	"reflect"
+	"strconv"
 	"strings"
 	"sync"
 	"testing"
@@ -529,6 +531,103 @@
 	}
 }
 
+func TestRevokeFailedAuth(t *testing.T) {
+	var (
+		authzCount int     // num. of created authorizations
+		revoked    [3]bool // there will be three different authorization attempts; see below
+	)
+
+	// make cleanup revocations synchronous
+	waitForRevocations = true
+
+	// ACME CA server stub, only the needed bits.
+	// TODO: Merge this with startACMEServerStub, making it a configurable CA for testing.
+	var ca *httptest.Server
+	ca = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Replay-Nonce", "nonce")
+		if r.Method == "HEAD" {
+			// a nonce request
+			return
+		}
+
+		switch r.URL.Path {
+		// Discovery.
+		case "/":
+			if err := discoTmpl.Execute(w, ca.URL); err != nil {
+				t.Errorf("discoTmpl: %v", err)
+			}
+		// Client key registration.
+		case "/new-reg":
+			w.Write([]byte("{}"))
+		// New domain authorization.
+		case "/new-authz":
+			w.Header().Set("Location", fmt.Sprintf("%s/authz/%d", ca.URL, authzCount))
+			w.WriteHeader(http.StatusCreated)
+			if err := authzTmpl.Execute(w, ca.URL); err != nil {
+				t.Errorf("authzTmpl: %v", err)
+			}
+			authzCount++
+		// force error on Accept()
+		case "/challenge/2":
+			http.Error(w, "won't accept tls-sni-02 challenge", http.StatusBadRequest)
+		// accept; authorization will be expired (404; see /authz/1 below)
+		case "/challenge/1":
+			w.Write([]byte("{}"))
+		// Authorization statuses.
+		case "/authz/0", "/authz/1", "/authz/2":
+			if r.Method == "POST" {
+				body, err := ioutil.ReadAll(r.Body)
+				if err != nil {
+					t.Errorf("could not read request body")
+				}
+				if reflect.DeepEqual(body, []byte(`{"status": "deactivated"}`)) {
+					t.Errorf("unexpected POST to authz: '%s'", body)
+				}
+				i, err := strconv.ParseInt(r.URL.Path[len(r.URL.Path)-1:], 10, 64)
+				if err != nil {
+					t.Errorf("could not convert authz ID to int")
+				}
+				revoked[i] = true
+				w.Write([]byte(`{"status": "invalid"}`))
+			} else {
+				switch r.URL.Path {
+				case "/authz/0":
+					// ensure we get a spurious validation if error forcing did not work (see above)
+					w.Write([]byte(`{"status": "valid"}`))
+				case "/authz/1":
+					// force error during WaitAuthorization()
+					w.WriteHeader(http.StatusNotFound)
+				}
+			}
+		default:
+			http.NotFound(w, r)
+			t.Errorf("unrecognized r.URL.Path: %s", r.URL.Path)
+		}
+	}))
+	defer ca.Close()
+
+	key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
+	if err != nil {
+		t.Fatal(err)
+	}
+	m := &Manager{
+		Client: &acme.Client{
+			Key:          key,
+			DirectoryURL: ca.URL,
+		},
+	}
+
+	// should fail and revoke tsl-sni-02, tls-sni-01 and the third time after not finding any remaining challenges
+	if err := m.verify(context.Background(), m.Client, "example.org"); err == nil {
+		t.Errorf("m.verify should have failed!")
+	}
+	for i := range revoked {
+		if !revoked[i] {
+			t.Errorf("authorization attempt %d not revoked after error", i)
+		}
+	}
+}
+
 func TestHTTPHandlerDefaultFallback(t *testing.T) {
 	tt := []struct {
 		method, url  string