acme: clarify retries and backoff algorithm

There's been some confusion about failed request retries.
Rightfully so: some requests are retried, others are not.

This change attempts to clarify the issue and unify backoff
usage in all Client's methods by introducing a new exported
optional field RetryBackoff and adding retry logic where missing.

Also, updates golang/go#22457.

Change-Id: Ied434edf998d52925a48b6b3b2407d45a6e9d2ee
Reviewed-on: https://go-review.googlesource.com/109615
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/acme/acme.go b/acme/acme.go
index 1f4fb69..0d7579c 100644
--- a/acme/acme.go
+++ b/acme/acme.go
@@ -14,7 +14,6 @@
 package acme
 
 import (
-	"bytes"
 	"context"
 	"crypto"
 	"crypto/ecdsa"
@@ -33,7 +32,6 @@
 	"io/ioutil"
 	"math/big"
 	"net/http"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -76,6 +74,22 @@
 	// will have no effect.
 	DirectoryURL string
 
+	// RetryBackoff computes the duration after which the nth retry of a failed request
+	// should occur. The value of n for the first call on failure is 1.
+	// The values of r and resp are the request and response of the last failed attempt.
+	// If the returned value is negative or zero, no more retries are done and an error
+	// is returned to the caller of the original method.
+	//
+	// Requests which result in a 4xx client error are not retried,
+	// except for 400 Bad Request due to "bad nonce" errors and 429 Too Many Requests.
+	//
+	// If RetryBackoff is nil, a truncated exponential backoff algorithm
+	// with the ceiling of 10 seconds is used, where each subsequent retry n
+	// is done after either ("Retry-After" + jitter) or (2^n seconds + jitter),
+	// preferring the former if "Retry-After" header is found in the resp.
+	// The jitter is a random value up to 1 second.
+	RetryBackoff func(n int, r *http.Request, resp *http.Response) time.Duration
+
 	dirMu sync.Mutex // guards writes to dir
 	dir   *Directory // cached result of Client's Discover method
 
@@ -99,15 +113,12 @@
 	if dirURL == "" {
 		dirURL = LetsEncryptURL
 	}
-	res, err := c.get(ctx, dirURL)
+	res, err := c.get(ctx, dirURL, wantStatus(http.StatusOK))
 	if err != nil {
 		return Directory{}, err
 	}
 	defer res.Body.Close()
 	c.addNonce(res.Header)
-	if res.StatusCode != http.StatusOK {
-		return Directory{}, responseError(res)
-	}
 
 	var v struct {
 		Reg    string `json:"new-reg"`
@@ -166,14 +177,11 @@
 		req.NotAfter = now.Add(exp).Format(time.RFC3339)
 	}
 
-	res, err := c.retryPostJWS(ctx, c.Key, c.dir.CertURL, req)
+	res, err := c.post(ctx, c.Key, c.dir.CertURL, req, wantStatus(http.StatusCreated))
 	if err != nil {
 		return nil, "", err
 	}
 	defer res.Body.Close()
-	if res.StatusCode != http.StatusCreated {
-		return nil, "", responseError(res)
-	}
 
 	curl := res.Header.Get("Location") // cert permanent URL
 	if res.ContentLength == 0 {
@@ -196,26 +204,11 @@
 // Callers are encouraged to parse the returned value to ensure the certificate is valid
 // and has expected features.
 func (c *Client) FetchCert(ctx context.Context, url string, bundle bool) ([][]byte, error) {
-	for {
-		res, err := c.get(ctx, url)
-		if err != nil {
-			return nil, err
-		}
-		defer res.Body.Close()
-		if res.StatusCode == http.StatusOK {
-			return c.responseCert(ctx, res, bundle)
-		}
-		if res.StatusCode > 299 {
-			return nil, responseError(res)
-		}
-		d := retryAfter(res.Header.Get("Retry-After"), 3*time.Second)
-		select {
-		case <-time.After(d):
-			// retry
-		case <-ctx.Done():
-			return nil, ctx.Err()
-		}
+	res, err := c.get(ctx, url, wantStatus(http.StatusOK))
+	if err != nil {
+		return nil, err
 	}
+	return c.responseCert(ctx, res, bundle)
 }
 
 // RevokeCert revokes a previously issued certificate cert, provided in DER format.
@@ -241,14 +234,11 @@
 	if key == nil {
 		key = c.Key
 	}
-	res, err := c.retryPostJWS(ctx, key, c.dir.RevokeURL, body)
+	res, err := c.post(ctx, key, c.dir.RevokeURL, body, wantStatus(http.StatusOK))
 	if err != nil {
 		return err
 	}
 	defer res.Body.Close()
-	if res.StatusCode != http.StatusOK {
-		return responseError(res)
-	}
 	return nil
 }
 
@@ -329,14 +319,11 @@
 		Resource:   "new-authz",
 		Identifier: authzID{Type: "dns", Value: domain},
 	}
-	res, err := c.retryPostJWS(ctx, c.Key, c.dir.AuthzURL, req)
+	res, err := c.post(ctx, c.Key, c.dir.AuthzURL, req, wantStatus(http.StatusCreated))
 	if err != nil {
 		return nil, err
 	}
 	defer res.Body.Close()
-	if res.StatusCode != http.StatusCreated {
-		return nil, responseError(res)
-	}
 
 	var v wireAuthz
 	if err := json.NewDecoder(res.Body).Decode(&v); err != nil {
@@ -353,14 +340,11 @@
 // If a caller needs to poll an authorization until its status is final,
 // see the WaitAuthorization method.
 func (c *Client) GetAuthorization(ctx context.Context, url string) (*Authorization, error) {
-	res, err := c.get(ctx, url)
+	res, err := c.get(ctx, url, wantStatus(http.StatusOK, http.StatusAccepted))
 	if err != nil {
 		return nil, err
 	}
 	defer res.Body.Close()
-	if res.StatusCode != http.StatusOK && res.StatusCode != http.StatusAccepted {
-		return nil, responseError(res)
-	}
 	var v wireAuthz
 	if err := json.NewDecoder(res.Body).Decode(&v); err != nil {
 		return nil, fmt.Errorf("acme: invalid response: %v", err)
@@ -387,14 +371,11 @@
 		Status:   "deactivated",
 		Delete:   true,
 	}
-	res, err := c.retryPostJWS(ctx, c.Key, url, req)
+	res, err := c.post(ctx, c.Key, url, req, wantStatus(http.StatusOK))
 	if err != nil {
 		return err
 	}
 	defer res.Body.Close()
-	if res.StatusCode != http.StatusOK {
-		return responseError(res)
-	}
 	return nil
 }
 
@@ -406,44 +387,42 @@
 // In all other cases WaitAuthorization returns an error.
 // If the Status is StatusInvalid, the returned error is of type *AuthorizationError.
 func (c *Client) WaitAuthorization(ctx context.Context, url string) (*Authorization, error) {
-	sleep := sleeper(ctx)
 	for {
-		res, err := c.get(ctx, url)
+		res, err := c.get(ctx, url, wantStatus(http.StatusOK, http.StatusAccepted))
 		if err != nil {
 			return nil, err
 		}
-		if res.StatusCode >= 400 && res.StatusCode <= 499 {
-			// Non-retriable error. For instance, Let's Encrypt may return 404 Not Found
-			// when requesting an expired authorization.
-			defer res.Body.Close()
-			return nil, responseError(res)
-		}
 
-		retry := res.Header.Get("Retry-After")
-		if res.StatusCode != http.StatusOK && res.StatusCode != http.StatusAccepted {
-			res.Body.Close()
-			if err := sleep(retry, 1); err != nil {
-				return nil, err
-			}
-			continue
-		}
 		var raw wireAuthz
 		err = json.NewDecoder(res.Body).Decode(&raw)
 		res.Body.Close()
-		if err != nil {
-			if err := sleep(retry, 0); err != nil {
-				return nil, err
-			}
-			continue
-		}
-		if raw.Status == StatusValid {
+		switch {
+		case err != nil:
+			// Skip and retry.
+		case raw.Status == StatusValid:
 			return raw.authorization(url), nil
-		}
-		if raw.Status == StatusInvalid {
+		case raw.Status == StatusInvalid:
 			return nil, raw.error(url)
 		}
-		if err := sleep(retry, 0); err != nil {
-			return nil, err
+
+		// Exponential backoff is implemented in c.get above.
+		// This is just to prevent continuously hitting the CA
+		// while waiting for a final authorization status.
+		d := retryAfter(res.Header.Get("Retry-After"))
+		if d == 0 {
+			// Given that the fastest challenges TLS-SNI and HTTP-01
+			// require a CA to make at least 1 network round trip
+			// and most likely persist a challenge state,
+			// this default delay seems reasonable.
+			d = time.Second
+		}
+		t := time.NewTimer(d)
+		select {
+		case <-ctx.Done():
+			t.Stop()
+			return nil, ctx.Err()
+		case <-t.C:
+			// Retry.
 		}
 	}
 }
@@ -452,14 +431,11 @@
 //
 // A client typically polls a challenge status using this method.
 func (c *Client) GetChallenge(ctx context.Context, url string) (*Challenge, error) {
-	res, err := c.get(ctx, url)
+	res, err := c.get(ctx, url, wantStatus(http.StatusOK, http.StatusAccepted))
 	if err != nil {
 		return nil, err
 	}
 	defer res.Body.Close()
-	if res.StatusCode != http.StatusOK && res.StatusCode != http.StatusAccepted {
-		return nil, responseError(res)
-	}
 	v := wireChallenge{URI: url}
 	if err := json.NewDecoder(res.Body).Decode(&v); err != nil {
 		return nil, fmt.Errorf("acme: invalid response: %v", err)
@@ -486,16 +462,14 @@
 		Type:     chal.Type,
 		Auth:     auth,
 	}
-	res, err := c.retryPostJWS(ctx, c.Key, chal.URI, req)
+	res, err := c.post(ctx, c.Key, chal.URI, req, wantStatus(
+		http.StatusOK,       // according to the spec
+		http.StatusAccepted, // Let's Encrypt: see https://goo.gl/WsJ7VT (acme-divergences.md)
+	))
 	if err != nil {
 		return nil, err
 	}
 	defer res.Body.Close()
-	// Note: the protocol specifies 200 as the expected response code, but
-	// letsencrypt seems to be returning 202.
-	if res.StatusCode != http.StatusOK && res.StatusCode != http.StatusAccepted {
-		return nil, responseError(res)
-	}
 
 	var v wireChallenge
 	if err := json.NewDecoder(res.Body).Decode(&v); err != nil {
@@ -619,14 +593,14 @@
 		req.Contact = acct.Contact
 		req.Agreement = acct.AgreedTerms
 	}
-	res, err := c.retryPostJWS(ctx, c.Key, url, req)
+	res, err := c.post(ctx, c.Key, url, req, wantStatus(
+		http.StatusOK,      // updates and deletes
+		http.StatusCreated, // new account creation
+	))
 	if err != nil {
 		return nil, err
 	}
 	defer res.Body.Close()
-	if res.StatusCode < 200 || res.StatusCode > 299 {
-		return nil, responseError(res)
-	}
 
 	var v struct {
 		Contact        []string
@@ -656,59 +630,6 @@
 	}, nil
 }
 
-// retryPostJWS will retry calls to postJWS if there is a badNonce error,
-// clearing the stored nonces after each error.
-// If the response was 4XX-5XX, then responseError is called on the body,
-// the body is closed, and the error returned.
-func (c *Client) retryPostJWS(ctx context.Context, key crypto.Signer, url string, body interface{}) (*http.Response, error) {
-	sleep := sleeper(ctx)
-	for {
-		res, err := c.postJWS(ctx, key, url, body)
-		if err != nil {
-			return nil, err
-		}
-		// handle errors 4XX-5XX with responseError
-		if res.StatusCode >= 400 && res.StatusCode <= 599 {
-			err := responseError(res)
-			res.Body.Close()
-			// according to spec badNonce is urn:ietf:params:acme:error:badNonce
-			// however, acme servers in the wild return their version of the error
-			// https://tools.ietf.org/html/draft-ietf-acme-acme-02#section-5.4
-			if ae, ok := err.(*Error); ok && strings.HasSuffix(strings.ToLower(ae.ProblemType), ":badnonce") {
-				// clear any nonces that we might've stored that might now be
-				// considered bad
-				c.clearNonces()
-				retry := res.Header.Get("Retry-After")
-				if err := sleep(retry, 1); err != nil {
-					return nil, err
-				}
-				continue
-			}
-			return nil, err
-		}
-		return res, nil
-	}
-}
-
-// postJWS signs the body with the given key and POSTs it to the provided url.
-// The body argument must be JSON-serializable.
-func (c *Client) postJWS(ctx context.Context, key crypto.Signer, url string, body interface{}) (*http.Response, error) {
-	nonce, err := c.popNonce(ctx, url)
-	if err != nil {
-		return nil, err
-	}
-	b, err := jwsEncodeJSON(body, key, nonce)
-	if err != nil {
-		return nil, err
-	}
-	res, err := c.post(ctx, url, "application/jose+json", bytes.NewReader(b))
-	if err != nil {
-		return nil, err
-	}
-	c.addNonce(res.Header)
-	return res, nil
-}
-
 // popNonce returns a nonce value previously stored with c.addNonce
 // or fetches a fresh one from the given URL.
 func (c *Client) popNonce(ctx context.Context, url string) (string, error) {
@@ -749,58 +670,12 @@
 	c.nonces[v] = struct{}{}
 }
 
-func (c *Client) httpClient() *http.Client {
-	if c.HTTPClient != nil {
-		return c.HTTPClient
-	}
-	return http.DefaultClient
-}
-
-func (c *Client) get(ctx context.Context, urlStr string) (*http.Response, error) {
-	req, err := http.NewRequest("GET", urlStr, nil)
-	if err != nil {
-		return nil, err
-	}
-	return c.do(ctx, req)
-}
-
-func (c *Client) head(ctx context.Context, urlStr string) (*http.Response, error) {
-	req, err := http.NewRequest("HEAD", urlStr, nil)
-	if err != nil {
-		return nil, err
-	}
-	return c.do(ctx, req)
-}
-
-func (c *Client) post(ctx context.Context, urlStr, contentType string, body io.Reader) (*http.Response, error) {
-	req, err := http.NewRequest("POST", urlStr, body)
-	if err != nil {
-		return nil, err
-	}
-	req.Header.Set("Content-Type", contentType)
-	return c.do(ctx, req)
-}
-
-func (c *Client) do(ctx context.Context, req *http.Request) (*http.Response, error) {
-	res, err := c.httpClient().Do(req.WithContext(ctx))
-	if err != nil {
-		select {
-		case <-ctx.Done():
-			// Prefer the unadorned context error.
-			// (The acme package had tests assuming this, previously from ctxhttp's
-			// behavior, predating net/http supporting contexts natively)
-			// TODO(bradfitz): reconsider this in the future. But for now this
-			// requires no test updates.
-			return nil, ctx.Err()
-		default:
-			return nil, err
-		}
-	}
-	return res, nil
-}
-
 func (c *Client) fetchNonce(ctx context.Context, url string) (string, error) {
-	resp, err := c.head(ctx, url)
+	r, err := http.NewRequest("HEAD", url, nil)
+	if err != nil {
+		return "", err
+	}
+	resp, err := c.doNoRetry(ctx, r)
 	if err != nil {
 		return "", err
 	}
@@ -852,24 +727,6 @@
 	return cert, nil
 }
 
-// responseError creates an error of Error type from resp.
-func responseError(resp *http.Response) error {
-	// don't care if ReadAll returns an error:
-	// json.Unmarshal will fail in that case anyway
-	b, _ := ioutil.ReadAll(resp.Body)
-	e := &wireError{Status: resp.StatusCode}
-	if err := json.Unmarshal(b, e); err != nil {
-		// this is not a regular error response:
-		// populate detail with anything we received,
-		// e.Status will already contain HTTP response code value
-		e.Detail = string(b)
-		if e.Detail == "" {
-			e.Detail = resp.Status
-		}
-	}
-	return e.error(resp.Header)
-}
-
 // chainCert fetches CA certificate chain recursively by following "up" links.
 // Each recursive call increments the depth by 1, resulting in an error
 // if the recursion level reaches maxChainLen.
@@ -880,14 +737,11 @@
 		return nil, errors.New("acme: certificate chain is too deep")
 	}
 
-	res, err := c.get(ctx, url)
+	res, err := c.get(ctx, url, wantStatus(http.StatusOK))
 	if err != nil {
 		return nil, err
 	}
 	defer res.Body.Close()
-	if res.StatusCode != http.StatusOK {
-		return nil, responseError(res)
-	}
 	b, err := ioutil.ReadAll(io.LimitReader(res.Body, maxCertSize+1))
 	if err != nil {
 		return nil, err
@@ -932,65 +786,6 @@
 	return links
 }
 
-// sleeper returns a function that accepts the Retry-After HTTP header value
-// and an increment that's used with backoff to increasingly sleep on
-// consecutive calls until the context is done. If the Retry-After header
-// cannot be parsed, then backoff is used with a maximum sleep time of 10
-// seconds.
-func sleeper(ctx context.Context) func(ra string, inc int) error {
-	var count int
-	return func(ra string, inc int) error {
-		count += inc
-		d := backoff(count, 10*time.Second)
-		d = retryAfter(ra, d)
-		wakeup := time.NewTimer(d)
-		defer wakeup.Stop()
-		select {
-		case <-ctx.Done():
-			return ctx.Err()
-		case <-wakeup.C:
-			return nil
-		}
-	}
-}
-
-// retryAfter parses a Retry-After HTTP header value,
-// trying to convert v into an int (seconds) or use http.ParseTime otherwise.
-// It returns d if v cannot be parsed.
-func retryAfter(v string, d time.Duration) time.Duration {
-	if i, err := strconv.Atoi(v); err == nil {
-		return time.Duration(i) * time.Second
-	}
-	t, err := http.ParseTime(v)
-	if err != nil {
-		return d
-	}
-	return t.Sub(timeNow())
-}
-
-// backoff computes a duration after which an n+1 retry iteration should occur
-// using truncated exponential backoff algorithm.
-//
-// The n argument is always bounded between 0 and 30.
-// The max argument defines upper bound for the returned value.
-func backoff(n int, max time.Duration) time.Duration {
-	if n < 0 {
-		n = 0
-	}
-	if n > 30 {
-		n = 30
-	}
-	var d time.Duration
-	if x, err := rand.Int(rand.Reader, big.NewInt(1000)); err == nil {
-		d = time.Duration(x.Int64()) * time.Millisecond
-	}
-	d += time.Duration(1<<uint(n)) * time.Second
-	if d > max {
-		return max
-	}
-	return d
-}
-
 // keyAuth generates a key authorization string for a given token.
 func keyAuth(pub crypto.PublicKey, token string) (string, error) {
 	th, err := JWKThumbprint(pub)
diff --git a/acme/acme_test.go b/acme/acme_test.go
index 63cb79b..0cd5cb7 100644
--- a/acme/acme_test.go
+++ b/acme/acme_test.go
@@ -15,7 +15,6 @@
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
-	"io/ioutil"
 	"math/big"
 	"net/http"
 	"net/http/httptest"
@@ -485,88 +484,37 @@
 }
 
 func TestWaitAuthorization(t *testing.T) {
-	var count int
-	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		count++
-		w.Header().Set("Retry-After", "0")
-		if count > 1 {
-			fmt.Fprintf(w, `{"status":"valid"}`)
-			return
+	t.Run("wait loop", func(t *testing.T) {
+		var count int
+		authz, err := runWaitAuthorization(context.Background(), t, func(w http.ResponseWriter, r *http.Request) {
+			count++
+			w.Header().Set("Retry-After", "0")
+			if count > 1 {
+				fmt.Fprintf(w, `{"status":"valid"}`)
+				return
+			}
+			fmt.Fprintf(w, `{"status":"pending"}`)
+		})
+		if err != nil {
+			t.Fatalf("non-nil error: %v", err)
 		}
-		fmt.Fprintf(w, `{"status":"pending"}`)
-	}))
-	defer ts.Close()
-
-	type res struct {
-		authz *Authorization
-		err   error
-	}
-	done := make(chan res)
-	defer close(done)
-	go func() {
-		var client Client
-		a, err := client.WaitAuthorization(context.Background(), ts.URL)
-		done <- res{a, err}
-	}()
-
-	select {
-	case <-time.After(5 * time.Second):
-		t.Fatal("WaitAuthz took too long to return")
-	case res := <-done:
-		if res.err != nil {
-			t.Fatalf("res.err =  %v", res.err)
+		if authz == nil {
+			t.Fatal("authz is nil")
 		}
-		if res.authz == nil {
-			t.Fatal("res.authz is nil")
-		}
-	}
-}
-
-func TestWaitAuthorizationInvalid(t *testing.T) {
-	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		fmt.Fprintf(w, `{"status":"invalid"}`)
-	}))
-	defer ts.Close()
-
-	res := make(chan error)
-	defer close(res)
-	go func() {
-		var client Client
-		_, err := client.WaitAuthorization(context.Background(), ts.URL)
-		res <- err
-	}()
-
-	select {
-	case <-time.After(3 * time.Second):
-		t.Fatal("WaitAuthz took too long to return")
-	case err := <-res:
-		if err == nil {
-			t.Error("err is nil")
-		}
+	})
+	t.Run("invalid status", func(t *testing.T) {
+		_, err := runWaitAuthorization(context.Background(), t, func(w http.ResponseWriter, r *http.Request) {
+			fmt.Fprintf(w, `{"status":"invalid"}`)
+		})
 		if _, ok := err.(*AuthorizationError); !ok {
-			t.Errorf("err is %T; want *AuthorizationError", err)
+			t.Errorf("err is %v (%T); want non-nil *AuthorizationError", err, err)
 		}
-	}
-}
-
-func TestWaitAuthorizationClientError(t *testing.T) {
-	const code = http.StatusBadRequest
-	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.WriteHeader(code)
-	}))
-	defer ts.Close()
-
-	ch := make(chan error, 1)
-	go func() {
-		var client Client
-		_, err := client.WaitAuthorization(context.Background(), ts.URL)
-		ch <- err
-	}()
-
-	select {
-	case <-time.After(3 * time.Second):
-		t.Fatal("WaitAuthz took too long to return")
-	case err := <-ch:
+	})
+	t.Run("non-retriable error", func(t *testing.T) {
+		const code = http.StatusBadRequest
+		_, err := runWaitAuthorization(context.Background(), t, func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(code)
+		})
 		res, ok := err.(*Error)
 		if !ok {
 			t.Fatalf("err is %v (%T); want a non-nil *Error", err, err)
@@ -574,34 +522,60 @@
 		if res.StatusCode != code {
 			t.Errorf("res.StatusCode = %d; want %d", res.StatusCode, code)
 		}
+	})
+	for _, code := range []int{http.StatusTooManyRequests, http.StatusInternalServerError} {
+		t.Run(fmt.Sprintf("retriable %d error", code), func(t *testing.T) {
+			var count int
+			authz, err := runWaitAuthorization(context.Background(), t, func(w http.ResponseWriter, r *http.Request) {
+				count++
+				w.Header().Set("Retry-After", "0")
+				if count > 1 {
+					fmt.Fprintf(w, `{"status":"valid"}`)
+					return
+				}
+				w.WriteHeader(code)
+			})
+			if err != nil {
+				t.Fatalf("non-nil error: %v", err)
+			}
+			if authz == nil {
+				t.Fatal("authz is nil")
+			}
+		})
 	}
-}
-
-func TestWaitAuthorizationCancel(t *testing.T) {
-	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Retry-After", "60")
-		fmt.Fprintf(w, `{"status":"pending"}`)
-	}))
-	defer ts.Close()
-
-	res := make(chan error)
-	defer close(res)
-	go func() {
-		var client Client
+	t.Run("context cancel", func(t *testing.T) {
 		ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 		defer cancel()
-		_, err := client.WaitAuthorization(ctx, ts.URL)
-		res <- err
-	}()
-
-	select {
-	case <-time.After(time.Second):
-		t.Fatal("WaitAuthz took too long to return")
-	case err := <-res:
+		_, err := runWaitAuthorization(ctx, t, func(w http.ResponseWriter, r *http.Request) {
+			w.Header().Set("Retry-After", "60")
+			fmt.Fprintf(w, `{"status":"pending"}`)
+		})
 		if err == nil {
 			t.Error("err is nil")
 		}
+	})
+}
+func runWaitAuthorization(ctx context.Context, t *testing.T, h http.HandlerFunc) (*Authorization, error) {
+	t.Helper()
+	ts := httptest.NewServer(h)
+	defer ts.Close()
+	type res struct {
+		authz *Authorization
+		err   error
 	}
+	ch := make(chan res, 1)
+	go func() {
+		var client Client
+		a, err := client.WaitAuthorization(ctx, ts.URL)
+		ch <- res{a, err}
+	}()
+	select {
+	case <-time.After(3 * time.Second):
+		t.Fatal("WaitAuthorization took too long to return")
+	case v := <-ch:
+		return v.authz, v.err
+	}
+	panic("runWaitAuthorization: out of select")
 }
 
 func TestRevokeAuthorization(t *testing.T) {
@@ -628,7 +602,7 @@
 				t.Errorf("req.Delete is false")
 			}
 		case "/2":
-			w.WriteHeader(http.StatusInternalServerError)
+			w.WriteHeader(http.StatusBadRequest)
 		}
 	}))
 	defer ts.Close()
@@ -849,7 +823,7 @@
 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if count < 1 {
 			w.Header().Set("Retry-After", "0")
-			w.WriteHeader(http.StatusAccepted)
+			w.WriteHeader(http.StatusTooManyRequests)
 			count++
 			return
 		}
@@ -1096,44 +1070,6 @@
 	}
 }
 
-func TestRetryPostJWS(t *testing.T) {
-	var count int
-	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		count++
-		w.Header().Set("Replay-Nonce", fmt.Sprintf("nonce%d", count))
-		if r.Method == "HEAD" {
-			// We expect the client to do 2 head requests to fetch
-			// nonces, one to start and another after getting badNonce
-			return
-		}
-
-		head, err := decodeJWSHead(r)
-		if err != nil {
-			t.Errorf("decodeJWSHead: %v", err)
-		} else if head.Nonce == "" {
-			t.Error("head.Nonce is empty")
-		} else if head.Nonce == "nonce1" {
-			// return a badNonce error to force the call to retry
-			w.WriteHeader(http.StatusBadRequest)
-			w.Write([]byte(`{"type":"urn:ietf:params:acme:error:badNonce"}`))
-			return
-		}
-		// Make client.Authorize happy; we're not testing its result.
-		w.WriteHeader(http.StatusCreated)
-		w.Write([]byte(`{"status":"valid"}`))
-	}))
-	defer ts.Close()
-
-	client := Client{Key: testKey, dir: &Directory{AuthzURL: ts.URL}}
-	// This call will fail with badNonce, causing a retry
-	if _, err := client.Authorize(context.Background(), "example.com"); err != nil {
-		t.Errorf("client.Authorize 1: %v", err)
-	}
-	if count != 4 {
-		t.Errorf("total requests count: %d; want 4", count)
-	}
-}
-
 func TestLinkHeader(t *testing.T) {
 	h := http.Header{"Link": {
 		`<https://example.com/acme/new-authz>;rel="next"`,
@@ -1157,37 +1093,6 @@
 	}
 }
 
-func TestErrorResponse(t *testing.T) {
-	s := `{
-		"status": 400,
-		"type": "urn:acme:error:xxx",
-		"detail": "text"
-	}`
-	res := &http.Response{
-		StatusCode: 400,
-		Status:     "400 Bad Request",
-		Body:       ioutil.NopCloser(strings.NewReader(s)),
-		Header:     http.Header{"X-Foo": {"bar"}},
-	}
-	err := responseError(res)
-	v, ok := err.(*Error)
-	if !ok {
-		t.Fatalf("err = %+v (%T); want *Error type", err, err)
-	}
-	if v.StatusCode != 400 {
-		t.Errorf("v.StatusCode = %v; want 400", v.StatusCode)
-	}
-	if v.ProblemType != "urn:acme:error:xxx" {
-		t.Errorf("v.ProblemType = %q; want urn:acme:error:xxx", v.ProblemType)
-	}
-	if v.Detail != "text" {
-		t.Errorf("v.Detail = %q; want text", v.Detail)
-	}
-	if !reflect.DeepEqual(v.Header, res.Header) {
-		t.Errorf("v.Header = %+v; want %+v", v.Header, res.Header)
-	}
-}
-
 func TestTLSSNI01ChallengeCert(t *testing.T) {
 	const (
 		token = "evaGxfADs6pSRb2LAv9IZf17Dt3juxGJ-PCt92wr-oA"
@@ -1353,28 +1258,3 @@
 		t.Errorf("val = %q; want %q", val, value)
 	}
 }
-
-func TestBackoff(t *testing.T) {
-	tt := []struct{ min, max time.Duration }{
-		{time.Second, 2 * time.Second},
-		{2 * time.Second, 3 * time.Second},
-		{4 * time.Second, 5 * time.Second},
-		{8 * time.Second, 9 * time.Second},
-	}
-	for i, test := range tt {
-		d := backoff(i, time.Minute)
-		if d < test.min || test.max < d {
-			t.Errorf("%d: d = %v; want between %v and %v", i, d, test.min, test.max)
-		}
-	}
-
-	min, max := time.Second, 2*time.Second
-	if d := backoff(-1, time.Minute); d < min || max < d {
-		t.Errorf("d = %v; want between %v and %v", d, min, max)
-	}
-
-	bound := 10 * time.Second
-	if d := backoff(100, bound); d != bound {
-		t.Errorf("d = %v; want %v", d, bound)
-	}
-}
diff --git a/acme/http.go b/acme/http.go
new file mode 100644
index 0000000..56ba53a
--- /dev/null
+++ b/acme/http.go
@@ -0,0 +1,276 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package acme
+
+import (
+	"bytes"
+	"context"
+	"crypto"
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"math/big"
+	"net/http"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// retryTimer encapsulates common logic for retrying unsuccessful requests.
+// It is not safe for concurrent use.
+type retryTimer struct {
+	// backoffFn provides backoff delay sequence for retries.
+	// See Client.RetryBackoff doc comment.
+	backoffFn func(n int, r *http.Request, res *http.Response) time.Duration
+	// n is the current retry attempt.
+	n int
+}
+
+func (t *retryTimer) inc() {
+	t.n++
+}
+
+// backoff pauses the current goroutine as described in Client.RetryBackoff.
+func (t *retryTimer) backoff(ctx context.Context, r *http.Request, res *http.Response) error {
+	d := t.backoffFn(t.n, r, res)
+	if d <= 0 {
+		return fmt.Errorf("acme: no more retries for %s; tried %d time(s)", r.URL, t.n)
+	}
+	wakeup := time.NewTimer(d)
+	defer wakeup.Stop()
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-wakeup.C:
+		return nil
+	}
+}
+
+func (c *Client) retryTimer() *retryTimer {
+	f := c.RetryBackoff
+	if f == nil {
+		f = defaultBackoff
+	}
+	return &retryTimer{backoffFn: f}
+}
+
+// defaultBackoff provides default Client.RetryBackoff implementation
+// using a truncated exponential backoff algorithm,
+// as described in Client.RetryBackoff.
+//
+// The n argument is always bounded between 1 and 30.
+// The returned value is always greater than 0.
+func defaultBackoff(n int, r *http.Request, res *http.Response) time.Duration {
+	const max = 10 * time.Second
+	var jitter time.Duration
+	if x, err := rand.Int(rand.Reader, big.NewInt(1000)); err == nil {
+		// Set the minimum to 1ms to avoid a case where
+		// an invalid Retry-After value is parsed into 0 below,
+		// resulting in the 0 returned value which would unintentionally
+		// stop the retries.
+		jitter = (1 + time.Duration(x.Int64())) * time.Millisecond
+	}
+	if v, ok := res.Header["Retry-After"]; ok {
+		return retryAfter(v[0]) + jitter
+	}
+
+	if n < 1 {
+		n = 1
+	}
+	if n > 30 {
+		n = 30
+	}
+	d := time.Duration(1<<uint(n-1))*time.Second + jitter
+	if d > max {
+		return max
+	}
+	return d
+}
+
+// retryAfter parses a Retry-After HTTP header value,
+// trying to convert v into an int (seconds) or use http.ParseTime otherwise.
+// It returns zero value if v cannot be parsed.
+func retryAfter(v string) time.Duration {
+	if i, err := strconv.Atoi(v); err == nil {
+		return time.Duration(i) * time.Second
+	}
+	t, err := http.ParseTime(v)
+	if err != nil {
+		return 0
+	}
+	return t.Sub(timeNow())
+}
+
+// resOkay is a function that reports whether the provided response is okay.
+// It is expected to keep the response body unread.
+type resOkay func(*http.Response) bool
+
+// wantStatus returns a function which reports whether the code
+// matches the status code of a response.
+func wantStatus(codes ...int) resOkay {
+	return func(res *http.Response) bool {
+		for _, code := range codes {
+			if code == res.StatusCode {
+				return true
+			}
+		}
+		return false
+	}
+}
+
+// get issues an unsigned GET request to the specified URL.
+// It returns a non-error value only when ok reports true.
+//
+// get retries unsuccessful attempts according to c.RetryBackoff
+// until the context is done or a non-retriable error is received.
+func (c *Client) get(ctx context.Context, url string, ok resOkay) (*http.Response, error) {
+	retry := c.retryTimer()
+	for {
+		req, err := http.NewRequest("GET", url, nil)
+		if err != nil {
+			return nil, err
+		}
+		res, err := c.doNoRetry(ctx, req)
+		switch {
+		case err != nil:
+			return nil, err
+		case ok(res):
+			return res, nil
+		case isRetriable(res.StatusCode):
+			res.Body.Close()
+			retry.inc()
+			if err := retry.backoff(ctx, req, res); err != nil {
+				return nil, err
+			}
+		default:
+			defer res.Body.Close()
+			return nil, responseError(res)
+		}
+	}
+}
+
+// post issues a signed POST request in JWS format using the provided key
+// to the specified URL.
+// It returns a non-error value only when ok reports true.
+//
+// post retries unsuccessful attempts according to c.RetryBackoff
+// until the context is done or a non-retriable error is received.
+// It uses postNoRetry to make individual requests.
+func (c *Client) post(ctx context.Context, key crypto.Signer, url string, body interface{}, ok resOkay) (*http.Response, error) {
+	retry := c.retryTimer()
+	for {
+		res, req, err := c.postNoRetry(ctx, key, url, body)
+		if err != nil {
+			return nil, err
+		}
+		if ok(res) {
+			return res, nil
+		}
+		err = responseError(res)
+		res.Body.Close()
+		switch {
+		// Check for bad nonce before isRetriable because it may have been returned
+		// with an unretriable response code such as 400 Bad Request.
+		case isBadNonce(err):
+			// Consider any previously stored nonce values to be invalid.
+			c.clearNonces()
+		case !isRetriable(res.StatusCode):
+			return nil, err
+		}
+		retry.inc()
+		if err := retry.backoff(ctx, req, res); err != nil {
+			return nil, err
+		}
+	}
+}
+
+// postNoRetry signs the body with the given key and POSTs it to the provided url.
+// The body argument must be JSON-serializable.
+// It is used by c.post to retry unsuccessful attempts.
+func (c *Client) postNoRetry(ctx context.Context, key crypto.Signer, url string, body interface{}) (*http.Response, *http.Request, error) {
+	nonce, err := c.popNonce(ctx, url)
+	if err != nil {
+		return nil, nil, err
+	}
+	b, err := jwsEncodeJSON(body, key, nonce)
+	if err != nil {
+		return nil, nil, err
+	}
+	req, err := http.NewRequest("POST", url, bytes.NewReader(b))
+	if err != nil {
+		return nil, nil, err
+	}
+	req.Header.Set("Content-Type", "application/jose+json")
+	res, err := c.doNoRetry(ctx, req)
+	if err != nil {
+		return nil, nil, err
+	}
+	c.addNonce(res.Header)
+	return res, req, nil
+}
+
+// doNoRetry issues a request req, replacing its context (if any) with ctx.
+func (c *Client) doNoRetry(ctx context.Context, req *http.Request) (*http.Response, error) {
+	res, err := c.httpClient().Do(req.WithContext(ctx))
+	if err != nil {
+		select {
+		case <-ctx.Done():
+			// Prefer the unadorned context error.
+			// (The acme package had tests assuming this, previously from ctxhttp's
+			// behavior, predating net/http supporting contexts natively)
+			// TODO(bradfitz): reconsider this in the future. But for now this
+			// requires no test updates.
+			return nil, ctx.Err()
+		default:
+			return nil, err
+		}
+	}
+	return res, nil
+}
+
+func (c *Client) httpClient() *http.Client {
+	if c.HTTPClient != nil {
+		return c.HTTPClient
+	}
+	return http.DefaultClient
+}
+
+// isBadNonce reports whether err is an ACME "badnonce" error.
+func isBadNonce(err error) bool {
+	// According to the spec badNonce is urn:ietf:params:acme:error:badNonce.
+	// However, ACME servers in the wild return their versions of the error.
+	// See https://tools.ietf.org/html/draft-ietf-acme-acme-02#section-5.4
+	// and https://github.com/letsencrypt/boulder/blob/0e07eacb/docs/acme-divergences.md#section-66.
+	ae, ok := err.(*Error)
+	return ok && strings.HasSuffix(strings.ToLower(ae.ProblemType), ":badnonce")
+}
+
+// isRetriable reports whether a request can be retried
+// based on the response status code.
+//
+// Note that a "bad nonce" error is returned with a non-retriable 400 Bad Request code.
+// Callers should parse the response and check with isBadNonce.
+func isRetriable(code int) bool {
+	return code <= 399 || code >= 500 || code == http.StatusTooManyRequests
+}
+
+// responseError creates an error of Error type from resp.
+func responseError(resp *http.Response) error {
+	// don't care if ReadAll returns an error:
+	// json.Unmarshal will fail in that case anyway
+	b, _ := ioutil.ReadAll(resp.Body)
+	e := &wireError{Status: resp.StatusCode}
+	if err := json.Unmarshal(b, e); err != nil {
+		// this is not a regular error response:
+		// populate detail with anything we received,
+		// e.Status will already contain HTTP response code value
+		e.Detail = string(b)
+		if e.Detail == "" {
+			e.Detail = resp.Status
+		}
+	}
+	return e.error(resp.Header)
+}
diff --git a/acme/http_test.go b/acme/http_test.go
new file mode 100644
index 0000000..c9ce1d6
--- /dev/null
+++ b/acme/http_test.go
@@ -0,0 +1,158 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package acme
+
+import (
+	"context"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"reflect"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestDefaultBackoff(t *testing.T) {
+	tt := []struct {
+		nretry     int
+		retryAfter string        // Retry-After header
+		out        time.Duration // expected min; max = min + jitter
+	}{
+		{-1, "", time.Second},       // verify the lower bound is 1
+		{0, "", time.Second},        // verify the lower bound is 1
+		{100, "", 10 * time.Second}, // verify the ceiling
+		{1, "3600", time.Hour},      // verify the header value is used
+		{1, "", 1 * time.Second},
+		{2, "", 2 * time.Second},
+		{3, "", 4 * time.Second},
+		{4, "", 8 * time.Second},
+	}
+	for i, test := range tt {
+		r := httptest.NewRequest("GET", "/", nil)
+		resp := &http.Response{Header: http.Header{}}
+		if test.retryAfter != "" {
+			resp.Header.Set("Retry-After", test.retryAfter)
+		}
+		d := defaultBackoff(test.nretry, r, resp)
+		max := test.out + time.Second // + max jitter
+		if d < test.out || max < d {
+			t.Errorf("%d: defaultBackoff(%v) = %v; want between %v and %v", i, test.nretry, d, test.out, max)
+		}
+	}
+}
+
+func TestErrorResponse(t *testing.T) {
+	s := `{
+		"status": 400,
+		"type": "urn:acme:error:xxx",
+		"detail": "text"
+	}`
+	res := &http.Response{
+		StatusCode: 400,
+		Status:     "400 Bad Request",
+		Body:       ioutil.NopCloser(strings.NewReader(s)),
+		Header:     http.Header{"X-Foo": {"bar"}},
+	}
+	err := responseError(res)
+	v, ok := err.(*Error)
+	if !ok {
+		t.Fatalf("err = %+v (%T); want *Error type", err, err)
+	}
+	if v.StatusCode != 400 {
+		t.Errorf("v.StatusCode = %v; want 400", v.StatusCode)
+	}
+	if v.ProblemType != "urn:acme:error:xxx" {
+		t.Errorf("v.ProblemType = %q; want urn:acme:error:xxx", v.ProblemType)
+	}
+	if v.Detail != "text" {
+		t.Errorf("v.Detail = %q; want text", v.Detail)
+	}
+	if !reflect.DeepEqual(v.Header, res.Header) {
+		t.Errorf("v.Header = %+v; want %+v", v.Header, res.Header)
+	}
+}
+
+func TestPostWithRetries(t *testing.T) {
+	var count int
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		count++
+		w.Header().Set("Replay-Nonce", fmt.Sprintf("nonce%d", count))
+		if r.Method == "HEAD" {
+			// We expect the client to do 2 head requests to fetch
+			// nonces, one to start and another after getting badNonce
+			return
+		}
+
+		head, err := decodeJWSHead(r)
+		if err != nil {
+			t.Errorf("decodeJWSHead: %v", err)
+		} else if head.Nonce == "" {
+			t.Error("head.Nonce is empty")
+		} else if head.Nonce == "nonce1" {
+			// return a badNonce error to force the call to retry
+			w.WriteHeader(http.StatusBadRequest)
+			w.Write([]byte(`{"type":"urn:ietf:params:acme:error:badNonce"}`))
+			return
+		}
+		// Make client.Authorize happy; we're not testing its result.
+		w.WriteHeader(http.StatusCreated)
+		w.Write([]byte(`{"status":"valid"}`))
+	}))
+	defer ts.Close()
+
+	client := &Client{Key: testKey, dir: &Directory{AuthzURL: ts.URL}}
+	// This call will fail with badNonce, causing a retry
+	if _, err := client.Authorize(context.Background(), "example.com"); err != nil {
+		t.Errorf("client.Authorize 1: %v", err)
+	}
+	if count != 4 {
+		t.Errorf("total requests count: %d; want 4", count)
+	}
+}
+
+func TestRetryBackoffArgs(t *testing.T) {
+	const resCode = http.StatusInternalServerError
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Replay-Nonce", "test-nonce")
+		w.WriteHeader(resCode)
+	}))
+	defer ts.Close()
+
+	// Canceled in backoff.
+	ctx, cancel := context.WithCancel(context.Background())
+
+	var nretry int
+	backoff := func(n int, r *http.Request, res *http.Response) time.Duration {
+		nretry++
+		if n != nretry {
+			t.Errorf("n = %d; want %d", n, nretry)
+		}
+		if nretry == 3 {
+			cancel()
+		}
+
+		if r == nil {
+			t.Error("r is nil")
+		}
+		if res.StatusCode != resCode {
+			t.Errorf("res.StatusCode = %d; want %d", res.StatusCode, resCode)
+		}
+		return time.Millisecond
+	}
+
+	client := &Client{
+		Key:          testKey,
+		RetryBackoff: backoff,
+		dir:          &Directory{AuthzURL: ts.URL},
+	}
+	if _, err := client.Authorize(ctx, "example.com"); err == nil {
+		t.Error("err is nil")
+	}
+	if nretry != 3 {
+		t.Errorf("nretry = %d; want 3", nretry)
+	}
+}
diff --git a/acme/types.go b/acme/types.go
index 3e19974..00457c6 100644
--- a/acme/types.go
+++ b/acme/types.go
@@ -104,7 +104,7 @@
 	if e.Header == nil {
 		return 0, true
 	}
-	return retryAfter(e.Header.Get("Retry-After"), 0), true
+	return retryAfter(e.Header.Get("Retry-After")), true
 }
 
 // Account is a user account. It is associated with a private key.