acme/autocert: remove failed state entries

This change makes the Manager try creating a certificate
again, after a previously unsuccessful attempt.

The implementation is based on a timer, to prevent hitting
an ACME CA with too high QPS when under a heavy load.
The timer is hardcoded to 1 minute.

Fixes golang/go#17740.
Change-Id: I46a49201cf423be3360633a89209d7b2bccc1d76
Reviewed-on: https://go-review.googlesource.com/41694
Run-TryBot: Alex Vaghin <ddos@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/acme/autocert/autocert.go b/acme/autocert/autocert.go
index 679ff4d..2b5d068 100644
--- a/acme/autocert/autocert.go
+++ b/acme/autocert/autocert.go
@@ -33,6 +33,12 @@
 	"golang.org/x/crypto/acme"
 )
 
+// createCertRetryAfter is how much time to wait before removing a failed state
+// entry due to an unsuccessful createCert call.
+// This is a variable instead of a const for testing.
+// TODO: Consider making it configurable or an exp backoff?
+var createCertRetryAfter = time.Minute
+
 // pseudoRand is safe for concurrent use.
 var pseudoRand *lockedMathRand
 
@@ -363,6 +369,23 @@
 
 	der, leaf, err := m.authorizedCert(ctx, state.key, domain)
 	if err != nil {
+		// Remove the failed state after some time,
+		// making the manager call createCert again on the following TLS hello.
+		time.AfterFunc(createCertRetryAfter, func() {
+			defer testDidRemoveState(domain)
+			m.stateMu.Lock()
+			defer m.stateMu.Unlock()
+			// Verify the state hasn't changed and it's still invalid
+			// before deleting.
+			s, ok := m.state[domain]
+			if !ok {
+				return
+			}
+			if _, err := validCert(domain, s.cert, s.key); err == nil {
+				return
+			}
+			delete(m.state, domain)
+		})
 		return nil, err
 	}
 	state.cert = der
@@ -411,7 +434,6 @@
 // authorizedCert starts domain ownership verification process and requests a new cert upon success.
 // The key argument is the certificate private key.
 func (m *Manager) authorizedCert(ctx context.Context, key crypto.Signer, domain string) (der [][]byte, leaf *x509.Certificate, err error) {
-	// TODO: make m.verify retry or retry m.verify calls here
 	if err := m.verify(ctx, domain); err != nil {
 		return nil, nil, err
 	}
@@ -782,5 +804,10 @@
 	return n
 }
 
-// for easier testing
-var timeNow = time.Now
+// For easier testing.
+var (
+	timeNow = time.Now
+
+	// Called when a state is removed.
+	testDidRemoveState = func(domain string) {}
+)
diff --git a/acme/autocert/autocert_test.go b/acme/autocert/autocert_test.go
index 3166c1f..4bb4640 100644
--- a/acme/autocert/autocert_test.go
+++ b/acme/autocert/autocert_test.go
@@ -210,6 +210,56 @@
 	testGetCertificate(t, man, "example.org", hello)
 }
 
+func TestGetCertificate_failedAttempt(t *testing.T) {
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadRequest)
+	}))
+	defer ts.Close()
+
+	const example = "example.org"
+	d := createCertRetryAfter
+	f := testDidRemoveState
+	defer func() {
+		createCertRetryAfter = d
+		testDidRemoveState = f
+	}()
+	createCertRetryAfter = 0
+	done := make(chan struct{})
+	testDidRemoveState = func(domain string) {
+		if domain != example {
+			t.Errorf("testDidRemoveState: domain = %q; want %q", domain, example)
+		}
+		close(done)
+	}
+
+	key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
+	if err != nil {
+		t.Fatal(err)
+	}
+	man := &Manager{
+		Prompt: AcceptTOS,
+		Client: &acme.Client{
+			Key:          key,
+			DirectoryURL: ts.URL,
+		},
+	}
+	defer man.stopRenew()
+	hello := &tls.ClientHelloInfo{ServerName: example}
+	if _, err := man.GetCertificate(hello); err == nil {
+		t.Error("GetCertificate: err is nil")
+	}
+	select {
+	case <-time.After(5 * time.Second):
+		t.Errorf("took too long to remove the %q state", example)
+	case <-done:
+		man.stateMu.Lock()
+		defer man.stateMu.Unlock()
+		if v, exist := man.state[example]; exist {
+			t.Errorf("state exists for %q: %+v", example, v)
+		}
+	}
+}
+
 // startACMEServerStub runs an ACME server
 // The domain argument is the expected domain name of a certificate request.
 func startACMEServerStub(t *testing.T, man *Manager, domain string) (url string, finish func()) {