internal/httpsfv: add support for consuming Display String and Date type

This CL adds consumeDisplayString() and consumeDate() function, meaning
that we can now consume all types that are defined within RFC 9651. In
future CL, we will add the corresponding parsing function for all the
types, so callers of this package will not have to implement their own
parsing / formatting.

For golang/go#75500

Change-Id: I90aa132d3ab1385b310d821997da13a095cd71bc
Reviewed-on: https://go-review.googlesource.com/c/net/+/708015
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Nicholas Husin <husin@google.com>
diff --git a/internal/httpsfv/httpsfv.go b/internal/httpsfv/httpsfv.go
index d821e81..7567d05 100644
--- a/internal/httpsfv/httpsfv.go
+++ b/internal/httpsfv/httpsfv.go
@@ -8,6 +8,7 @@
 
 import (
 	"slices"
+	"unicode/utf8"
 )
 
 func isLCAlpha(b byte) bool {
@@ -48,6 +49,27 @@
 	return i
 }
 
+// https://www.rfc-editor.org/rfc/rfc4648#section-8.
+func decOctetHex(ch1, ch2 byte) (ch byte, ok bool) {
+	decBase16 := func(in byte) (out byte, ok bool) {
+		if !isDigit(in) && !(in >= 'a' && in <= 'f') {
+			return 0, false
+		}
+		if isDigit(in) {
+			return in - '0', true
+		}
+		return in - 'a' + 10, true
+	}
+
+	if ch1, ok = decBase16(ch1); !ok {
+		return 0, ok
+	}
+	if ch2, ok = decBase16(ch2); !ok {
+		return 0, ok
+	}
+	return ch1<<4 | ch2, true
+}
+
 // TODO(nsh): Implement corresponding parse functions for all consume functions
 // that exists.
 
@@ -409,14 +431,85 @@
 	return "", s, false
 }
 
+// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-date.
+func consumeDate(s string) (consumed, rest string, ok bool) {
+	if len(s) == 0 || s[0] != '@' {
+		return "", s, false
+	}
+	if _, rest, ok = consumeIntegerOrDecimal(s[1:]); !ok {
+		return "", s, ok
+	}
+	consumed = s[:len(s)-len(rest)]
+	if slices.Contains([]byte(consumed), '.') {
+		return "", s, false
+	}
+	return consumed, rest, ok
+}
+
+// https://www.rfc-editor.org/rfc/rfc9651.html#name-parsing-a-display-string.
+func consumeDisplayString(s string) (consumed, rest string, ok bool) {
+	// To prevent excessive allocation, especially when input is large, we
+	// maintain a buffer of 4 bytes to keep track of the last rune we
+	// encounter. This way, we can validate that the display string conforms to
+	// UTF-8 without actually building the whole string.
+	var lastRune [4]byte
+	var runeLen int
+	isPartOfValidRune := func(ch byte) bool {
+		lastRune[runeLen] = ch
+		runeLen++
+		if utf8.FullRune(lastRune[:runeLen]) {
+			r, s := utf8.DecodeRune(lastRune[:runeLen])
+			if r == utf8.RuneError {
+				return false
+			}
+			copy(lastRune[:], lastRune[s:runeLen])
+			runeLen -= s
+			return true
+		}
+		return runeLen <= 4
+	}
+
+	if len(s) <= 1 || s[:2] != `%"` {
+		return "", s, false
+	}
+	i := 2
+	for i < len(s) {
+		ch := s[i]
+		if !isVChar(ch) && !isSP(ch) {
+			return "", s, false
+		}
+		switch ch {
+		case '"':
+			if runeLen > 0 {
+				return "", s, false
+			}
+			return s[:i+1], s[i+1:], true
+		case '%':
+			if i+2 >= len(s) {
+				return "", s, false
+			}
+			if ch, ok = decOctetHex(s[i+1], s[i+2]); !ok {
+				return "", s, ok
+			}
+			if ok = isPartOfValidRune(ch); !ok {
+				return "", s, ok
+			}
+			i += 3
+		default:
+			if ok = isPartOfValidRune(ch); !ok {
+				return "", s, ok
+			}
+			i++
+		}
+	}
+	return "", s, false
+}
+
 // https://www.rfc-editor.org/rfc/rfc9651.html#parse-bare-item.
 func consumeBareItem(s string) (consumed, rest string, ok bool) {
 	if len(s) == 0 {
 		return "", s, false
 	}
-
-	// TODO(nsh): This is currently only up to date with RFC 8941. Implement
-	// Date and Display string for full feature parity with RFC 9651.
 	ch := s[0]
 	switch {
 	case ch == '-' || isDigit(ch):
@@ -429,6 +522,10 @@
 		return consumeByteSequence(s)
 	case ch == '?':
 		return consumeBoolean(s)
+	case ch == '@':
+		return consumeDate(s)
+	case ch == '%':
+		return consumeDisplayString(s)
 	default:
 		return "", s, false
 	}
diff --git a/internal/httpsfv/httpsfv_test.go b/internal/httpsfv/httpsfv_test.go
index a1ecec7..ab1cd0f 100644
--- a/internal/httpsfv/httpsfv_test.go
+++ b/internal/httpsfv/httpsfv_test.go
@@ -832,3 +832,175 @@
 		}
 	}
 }
+
+func TestConsumeDate(t *testing.T) {
+	tests := []struct {
+		name   string
+		in     string
+		want   string
+		wantOk bool
+	}{
+		{
+			name:   "valid zero date",
+			in:     "@0",
+			want:   "@0",
+			wantOk: true,
+		},
+		{
+			name:   "valid positive date",
+			in:     "@1659578233",
+			want:   "@1659578233",
+			wantOk: true,
+		},
+		{
+			name:   "valid negative date",
+			in:     "@-1659578233",
+			want:   "@-1659578233",
+			wantOk: true,
+		},
+		{
+			name:   "valid large date",
+			in:     "@25340221440",
+			want:   "@25340221440",
+			wantOk: true,
+		},
+		{
+			name:   "valid small date",
+			in:     "@-62135596800",
+			want:   "@-62135596800",
+			wantOk: true,
+		},
+		{
+			name: "invalid decimal date",
+			in:   "@1.2",
+		},
+		{
+			name:   "valid date with more content after",
+			in:     "@1659578233, foo;bar",
+			want:   "@1659578233",
+			wantOk: true,
+		},
+	}
+
+	for _, tc := range tests {
+		got, gotRest, ok := consumeDate(tc.in)
+		if ok != tc.wantOk {
+			t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
+		}
+		if tc.want != got {
+			t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
+		}
+		if got+gotRest != tc.in {
+			t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
+		}
+	}
+}
+
+func TestConsumeDisplayString(t *testing.T) {
+	tests := []struct {
+		name   string
+		in     string
+		want   string
+		wantOk bool
+	}{
+		{
+			name:   "valid ascii string",
+			in:     "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
+			want:   "%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\"",
+			wantOk: true,
+		},
+		{
+			name:   "valid lowercase non-ascii string",
+			in:     `%"f%c3%bc%c3%bc"`,
+			want:   `%"f%c3%bc%c3%bc"`,
+			wantOk: true,
+		},
+		{
+			name: "invalid uppercase non-ascii string",
+			in:   `%"f%C3%BC%C3%BC"`,
+		},
+		{
+			name: "invalid unqouted string",
+			in:   "%foo",
+		},
+		{
+			name: "invalid string missing initial quote",
+			in:   `%foo"`,
+		},
+		{
+			name: "invalid string missing closing quote",
+			in:   `%"foo`,
+		},
+		{
+			name: "invalid tab in string",
+			in:   "%\"\t\"",
+		},
+		{
+			name: "invalid newline in string",
+			in:   "%\"\n\"",
+		},
+		{
+			name: "invalid single quoted string",
+			in:   `%'foo'`,
+		},
+		{
+			name: "invalid string bad escaping",
+			in:   `%\"foo %a"`,
+		},
+		{
+			name:   "valid string with escaped quotes",
+			in:     `%"foo %22bar%22 \\ baz"`,
+			want:   `%"foo %22bar%22 \\ baz"`,
+			wantOk: true,
+		},
+		{
+			name: "invalid sequence id utf-8 string",
+			in:   `%"%a0%a1"`,
+		},
+		{
+			name: "invalid 2 bytes sequence utf-8 string",
+			in:   `%"%c3%28"`,
+		},
+		{
+			name: "invalid 3 bytes sequence utf-8 string",
+			in:   `%"%e2%28%a1"`,
+		},
+		{
+			name: "invalid 4 bytes sequence utf-8 string",
+			in:   `%"%f0%28%8c%28"`,
+		},
+		{
+			name: "invalid hex utf-8 string",
+			in:   `%"%g0%1w"`,
+		},
+		{
+			name:   "valid byte order mark in display string",
+			in:     `%"BOM: %ef%bb%bf"`,
+			want:   `%"BOM: %ef%bb%bf"`,
+			wantOk: true,
+		},
+		{
+			name:   "valid string with content after",
+			in:     `%"foo\nbar", foo;bar`,
+			want:   `%"foo\nbar"`,
+			wantOk: true,
+		},
+		{
+			name: "invalid unfinished 4 bytes rune",
+			in:   `%"%f0%9f%98"`,
+		},
+	}
+
+	for _, tc := range tests {
+		got, gotRest, ok := consumeDisplayString(tc.in)
+		if ok != tc.wantOk {
+			t.Fatalf("test %q: want ok to be %v, got: %v", tc.name, tc.wantOk, ok)
+		}
+		if tc.want != got {
+			t.Fatalf("test %q: mismatch.\n got: %#v\nwant: %#v\n", tc.name, got, tc.want)
+		}
+		if got+gotRest != tc.in {
+			t.Fatalf("test %q: %#v + %#v != %#v", tc.name, got, gotRest, tc.in)
+		}
+	}
+}