| // Copyright 2023 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build goexperiment.jsonv2 |
| |
| // Package jsonwire implements stateless functionality for handling JSON text. |
| package jsonwire |
| |
| import ( |
| "cmp" |
| "errors" |
| "strconv" |
| "strings" |
| "unicode" |
| "unicode/utf16" |
| "unicode/utf8" |
| ) |
| |
| // TrimSuffixWhitespace trims JSON from the end of b. |
| func TrimSuffixWhitespace(b []byte) []byte { |
| // NOTE: The arguments and logic are kept simple to keep this inlinable. |
| n := len(b) - 1 |
| for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { |
| n-- |
| } |
| return b[:n+1] |
| } |
| |
| // TrimSuffixString trims a valid JSON string at the end of b. |
| // The behavior is undefined if there is not a valid JSON string present. |
| func TrimSuffixString(b []byte) []byte { |
| // NOTE: The arguments and logic are kept simple to keep this inlinable. |
| if len(b) > 0 && b[len(b)-1] == '"' { |
| b = b[:len(b)-1] |
| } |
| for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') { |
| b = b[:len(b)-1] // trim all characters except an unescaped quote |
| } |
| if len(b) > 0 && b[len(b)-1] == '"' { |
| b = b[:len(b)-1] |
| } |
| return b |
| } |
| |
| // HasSuffixByte reports whether b ends with c. |
| func HasSuffixByte(b []byte, c byte) bool { |
| // NOTE: The arguments and logic are kept simple to keep this inlinable. |
| return len(b) > 0 && b[len(b)-1] == c |
| } |
| |
| // TrimSuffixByte removes c from the end of b if it is present. |
| func TrimSuffixByte(b []byte, c byte) []byte { |
| // NOTE: The arguments and logic are kept simple to keep this inlinable. |
| if len(b) > 0 && b[len(b)-1] == c { |
| return b[:len(b)-1] |
| } |
| return b |
| } |
| |
| // QuoteRune quotes the first rune in the input. |
| func QuoteRune[Bytes ~[]byte | ~string](b Bytes) string { |
| r, n := utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) |
| if r == utf8.RuneError && n == 1 { |
| return `'\x` + strconv.FormatUint(uint64(b[0]), 16) + `'` |
| } |
| return strconv.QuoteRune(r) |
| } |
| |
| // CompareUTF16 lexicographically compares x to y according |
| // to the UTF-16 codepoints of the UTF-8 encoded input strings. |
| // This implements the ordering specified in RFC 8785, section 3.2.3. |
| func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int { |
| // NOTE: This is an optimized, mostly allocation-free implementation |
| // of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the |
| // two implementations agree on the result of comparing any two strings. |
| isUTF16Self := func(r rune) bool { |
| return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF') |
| } |
| |
| for { |
| if len(x) == 0 || len(y) == 0 { |
| return cmp.Compare(len(x), len(y)) |
| } |
| |
| // ASCII fast-path. |
| if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf { |
| if x[0] != y[0] { |
| return cmp.Compare(x[0], y[0]) |
| } |
| x, y = x[1:], y[1:] |
| continue |
| } |
| |
| // Decode next pair of runes as UTF-8. |
| rx, nx := utf8.DecodeRuneInString(string(truncateMaxUTF8(x))) |
| ry, ny := utf8.DecodeRuneInString(string(truncateMaxUTF8(y))) |
| |
| selfx := isUTF16Self(rx) |
| selfy := isUTF16Self(ry) |
| switch { |
| // The x rune is a single UTF-16 codepoint, while |
| // the y rune is a surrogate pair of UTF-16 codepoints. |
| case selfx && !selfy: |
| ry, _ = utf16.EncodeRune(ry) |
| // The y rune is a single UTF-16 codepoint, while |
| // the x rune is a surrogate pair of UTF-16 codepoints. |
| case selfy && !selfx: |
| rx, _ = utf16.EncodeRune(rx) |
| } |
| if rx != ry { |
| return cmp.Compare(rx, ry) |
| } |
| |
| // Check for invalid UTF-8, in which case, |
| // we just perform a byte-for-byte comparison. |
| if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) { |
| if x[0] != y[0] { |
| return cmp.Compare(x[0], y[0]) |
| } |
| } |
| x, y = x[nx:], y[ny:] |
| } |
| } |
| |
| // truncateMaxUTF8 truncates b such it contains at least one rune. |
| // |
| // The utf8 package currently lacks generic variants, which complicates |
| // generic functions that operates on either []byte or string. |
| // As a hack, we always call the utf8 function operating on strings, |
| // but always truncate the input such that the result is identical. |
| // |
| // Example usage: |
| // |
| // utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) |
| // |
| // Converting a []byte to a string is stack allocated since |
| // truncateMaxUTF8 guarantees that the []byte is short. |
| func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes { |
| // TODO(https://go.dev/issue/56948): Remove this function and |
| // instead directly call generic utf8 functions wherever used. |
| if len(b) > utf8.UTFMax { |
| return b[:utf8.UTFMax] |
| } |
| return b |
| } |
| |
| // TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead. |
| var ErrInvalidUTF8 = errors.New("invalid UTF-8") |
| |
| func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error { |
| what := QuoteRune(prefix) |
| return errors.New("invalid character " + what + " " + where) |
| } |
| |
| func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error { |
| label := "escape sequence" |
| if len(what) > 6 { |
| label = "surrogate pair" |
| } |
| needEscape := strings.IndexFunc(string(what), func(r rune) bool { |
| return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r) |
| }) >= 0 |
| if needEscape { |
| return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string") |
| } else { |
| return errors.New("invalid " + label + " `" + string(what) + "` in string") |
| } |
| } |
| |
| // TruncatePointer optionally truncates the JSON pointer, |
| // enforcing that the length roughly does not exceed n. |
| func TruncatePointer(s string, n int) string { |
| if len(s) <= n { |
| return s |
| } |
| i := n / 2 |
| j := len(s) - n/2 |
| |
| // Avoid truncating a name if there are multiple names present. |
| if k := strings.LastIndexByte(s[:i], '/'); k > 0 { |
| i = k |
| } |
| if k := strings.IndexByte(s[j:], '/'); k >= 0 { |
| j += k + len("/") |
| } |
| |
| // Avoid truncation in the middle of a UTF-8 rune. |
| for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) { |
| i-- |
| } |
| for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) { |
| j++ |
| } |
| |
| // Determine the right middle fragment to use. |
| var middle string |
| switch strings.Count(s[i:j], "/") { |
| case 0: |
| middle = "…" |
| case 1: |
| middle = "…/…" |
| default: |
| middle = "…/…/…" |
| } |
| if strings.HasPrefix(s[i:j], "/") && middle != "…" { |
| middle = strings.TrimPrefix(middle, "…") |
| } |
| if strings.HasSuffix(s[i:j], "/") && middle != "…" { |
| middle = strings.TrimSuffix(middle, "…") |
| } |
| return s[:i] + middle + s[j:] |
| } |
| |
| func isInvalidUTF8(r rune, rn int) bool { |
| return r == utf8.RuneError && rn == 1 |
| } |