| // Copyright 2020 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build goexperiment.jsonv2 |
| |
| package jsontext |
| |
| import ( |
| "bytes" |
| "errors" |
| "io" |
| "slices" |
| "sync" |
| |
| "encoding/json/internal/jsonflags" |
| "encoding/json/internal/jsonwire" |
| ) |
| |
| // NOTE: Value is analogous to v1 json.RawMessage. |
| |
| // AppendFormat formats the JSON value in src and appends it to dst |
| // according to the specified options. |
| // See [Value.Format] for more details about the formatting behavior. |
| // |
| // The dst and src may overlap. |
| // If an error is reported, then the entirety of src is appended to dst. |
| func AppendFormat(dst, src []byte, opts ...Options) ([]byte, error) { |
| e := getBufferedEncoder(opts...) |
| defer putBufferedEncoder(e) |
| e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) |
| if err := e.s.WriteValue(src); err != nil { |
| return append(dst, src...), err |
| } |
| return append(dst, e.s.Buf...), nil |
| } |
| |
| // Value represents a single raw JSON value, which may be one of the following: |
| // - a JSON literal (i.e., null, true, or false) |
| // - a JSON string (e.g., "hello, world!") |
| // - a JSON number (e.g., 123.456) |
| // - an entire JSON object (e.g., {"fizz":"buzz"} ) |
| // - an entire JSON array (e.g., [1,2,3] ) |
| // |
| // Value can represent entire array or object values, while [Token] cannot. |
| // Value may contain leading and/or trailing whitespace. |
| type Value []byte |
| |
| // Clone returns a copy of v. |
| func (v Value) Clone() Value { |
| return bytes.Clone(v) |
| } |
| |
| // String returns the string formatting of v. |
| func (v Value) String() string { |
| if v == nil { |
| return "null" |
| } |
| return string(v) |
| } |
| |
| // IsValid reports whether the raw JSON value is syntactically valid |
| // according to the specified options. |
| // |
| // By default (if no options are specified), it validates according to RFC 7493. |
| // It verifies whether the input is properly encoded as UTF-8, |
| // that escape sequences within strings decode to valid Unicode codepoints, and |
| // that all names in each object are unique. |
| // It does not verify whether numbers are representable within the limits |
| // of any common numeric type (e.g., float64, int64, or uint64). |
| // |
| // Relevant options include: |
| // - [AllowDuplicateNames] |
| // - [AllowInvalidUTF8] |
| // |
| // All other options are ignored. |
| func (v Value) IsValid(opts ...Options) bool { |
| // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. |
| d := getBufferedDecoder(v, opts...) |
| defer putBufferedDecoder(d) |
| _, errVal := d.ReadValue() |
| _, errEOF := d.ReadToken() |
| return errVal == nil && errEOF == io.EOF |
| } |
| |
| // Format formats the raw JSON value in place. |
| // |
| // By default (if no options are specified), it validates according to RFC 7493 |
| // and produces the minimal JSON representation, where |
| // all whitespace is elided and JSON strings use the shortest encoding. |
| // |
| // Relevant options include: |
| // - [AllowDuplicateNames] |
| // - [AllowInvalidUTF8] |
| // - [EscapeForHTML] |
| // - [EscapeForJS] |
| // - [PreserveRawStrings] |
| // - [CanonicalizeRawInts] |
| // - [CanonicalizeRawFloats] |
| // - [ReorderRawObjects] |
| // - [SpaceAfterColon] |
| // - [SpaceAfterComma] |
| // - [Multiline] |
| // - [WithIndent] |
| // - [WithIndentPrefix] |
| // |
| // All other options are ignored. |
| // |
| // It is guaranteed to succeed if the value is valid according to the same options. |
| // If the value is already formatted, then the buffer is not mutated. |
| func (v *Value) Format(opts ...Options) error { |
| // TODO: Document support for [WithByteLimit] and [WithDepthLimit]. |
| return v.format(opts, nil) |
| } |
| |
| // format accepts two []Options to avoid the allocation appending them together. |
| // It is equivalent to v.Format(append(opts1, opts2...)...). |
| func (v *Value) format(opts1, opts2 []Options) error { |
| e := getBufferedEncoder(opts1...) |
| defer putBufferedEncoder(e) |
| e.s.Join(opts2...) |
| e.s.Flags.Set(jsonflags.OmitTopLevelNewline | 1) |
| if err := e.s.WriteValue(*v); err != nil { |
| return err |
| } |
| if !bytes.Equal(*v, e.s.Buf) { |
| *v = append((*v)[:0], e.s.Buf...) |
| } |
| return nil |
| } |
| |
| // Compact removes all whitespace from the raw JSON value. |
| // |
| // It does not reformat JSON strings or numbers to use any other representation. |
| // To maximize the set of JSON values that can be formatted, |
| // this permits values with duplicate names and invalid UTF-8. |
| // |
| // Compact is equivalent to calling [Value.Format] with the following options: |
| // - [AllowDuplicateNames](true) |
| // - [AllowInvalidUTF8](true) |
| // - [PreserveRawStrings](true) |
| // |
| // Any options specified by the caller are applied after the initial set |
| // and may deliberately override prior options. |
| func (v *Value) Compact(opts ...Options) error { |
| return v.format([]Options{ |
| AllowDuplicateNames(true), |
| AllowInvalidUTF8(true), |
| PreserveRawStrings(true), |
| }, opts) |
| } |
| |
| // Indent reformats the whitespace in the raw JSON value so that each element |
| // in a JSON object or array begins on a indented line according to the nesting. |
| // |
| // It does not reformat JSON strings or numbers to use any other representation. |
| // To maximize the set of JSON values that can be formatted, |
| // this permits values with duplicate names and invalid UTF-8. |
| // |
| // Indent is equivalent to calling [Value.Format] with the following options: |
| // - [AllowDuplicateNames](true) |
| // - [AllowInvalidUTF8](true) |
| // - [PreserveRawStrings](true) |
| // - [Multiline](true) |
| // |
| // Any options specified by the caller are applied after the initial set |
| // and may deliberately override prior options. |
| func (v *Value) Indent(opts ...Options) error { |
| return v.format([]Options{ |
| AllowDuplicateNames(true), |
| AllowInvalidUTF8(true), |
| PreserveRawStrings(true), |
| Multiline(true), |
| }, opts) |
| } |
| |
| // Canonicalize canonicalizes the raw JSON value according to the |
| // JSON Canonicalization Scheme (JCS) as defined by RFC 8785 |
| // where it produces a stable representation of a JSON value. |
| // |
| // JSON strings are formatted to use their minimal representation, |
| // JSON numbers are formatted as double precision numbers according |
| // to some stable serialization algorithm. |
| // JSON object members are sorted in ascending order by name. |
| // All whitespace is removed. |
| // |
| // The output stability is dependent on the stability of the application data |
| // (see RFC 8785, Appendix E). It cannot produce stable output from |
| // fundamentally unstable input. For example, if the JSON value |
| // contains ephemeral data (e.g., a frequently changing timestamp), |
| // then the value is still unstable regardless of whether this is called. |
| // |
| // Canonicalize is equivalent to calling [Value.Format] with the following options: |
| // - [CanonicalizeRawInts](true) |
| // - [CanonicalizeRawFloats](true) |
| // - [ReorderRawObjects](true) |
| // |
| // Any options specified by the caller are applied after the initial set |
| // and may deliberately override prior options. |
| // |
| // Note that JCS treats all JSON numbers as IEEE 754 double precision numbers. |
| // Any numbers with precision beyond what is representable by that form |
| // will lose their precision when canonicalized. For example, integer values |
| // beyond ±2⁵³ will lose their precision. To preserve the original representation |
| // of JSON integers, additionally set [CanonicalizeRawInts] to false: |
| // |
| // v.Canonicalize(jsontext.CanonicalizeRawInts(false)) |
| func (v *Value) Canonicalize(opts ...Options) error { |
| return v.format([]Options{ |
| CanonicalizeRawInts(true), |
| CanonicalizeRawFloats(true), |
| ReorderRawObjects(true), |
| }, opts) |
| } |
| |
| // MarshalJSON returns v as the JSON encoding of v. |
| // It returns the stored value as the raw JSON output without any validation. |
| // If v is nil, then this returns a JSON null. |
| func (v Value) MarshalJSON() ([]byte, error) { |
| // NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON. |
| if v == nil { |
| return []byte("null"), nil |
| } |
| return v, nil |
| } |
| |
| // UnmarshalJSON sets v as the JSON encoding of b. |
| // It stores a copy of the provided raw JSON input without any validation. |
| func (v *Value) UnmarshalJSON(b []byte) error { |
| // NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON. |
| if v == nil { |
| return errors.New("jsontext.Value: UnmarshalJSON on nil pointer") |
| } |
| *v = append((*v)[:0], b...) |
| return nil |
| } |
| |
| // Kind returns the starting token kind. |
| // For a valid value, this will never include '}' or ']'. |
| func (v Value) Kind() Kind { |
| if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 { |
| return Kind(v[0]).normalize() |
| } |
| return invalidKind |
| } |
| |
| const commaAndWhitespace = ", \n\r\t" |
| |
| type objectMember struct { |
| // name is the unquoted name. |
| name []byte // e.g., "name" |
| // buffer is the entirety of the raw JSON object member |
| // starting from right after the previous member (or opening '{') |
| // until right after the member value. |
| buffer []byte // e.g., `, \n\r\t"name": "value"` |
| } |
| |
| func (x objectMember) Compare(y objectMember) int { |
| if c := jsonwire.CompareUTF16(x.name, y.name); c != 0 { |
| return c |
| } |
| // With [AllowDuplicateNames] or [AllowInvalidUTF8], |
| // names could be identical, so also sort using the member value. |
| return jsonwire.CompareUTF16( |
| bytes.TrimLeft(x.buffer, commaAndWhitespace), |
| bytes.TrimLeft(y.buffer, commaAndWhitespace)) |
| } |
| |
| var objectMemberPool = sync.Pool{New: func() any { return new([]objectMember) }} |
| |
| func getObjectMembers() *[]objectMember { |
| ns := objectMemberPool.Get().(*[]objectMember) |
| *ns = (*ns)[:0] |
| return ns |
| } |
| func putObjectMembers(ns *[]objectMember) { |
| if cap(*ns) < 1<<10 { |
| clear(*ns) // avoid pinning name and buffer |
| objectMemberPool.Put(ns) |
| } |
| } |
| |
| // mustReorderObjects reorders in-place all object members in a JSON value, |
| // which must be valid otherwise it panics. |
| func mustReorderObjects(b []byte) { |
| // Obtain a buffered encoder just to use its internal buffer as |
| // a scratch buffer for reordering object members. |
| e2 := getBufferedEncoder() |
| defer putBufferedEncoder(e2) |
| |
| // Disable unnecessary checks to syntactically parse the JSON value. |
| d := getBufferedDecoder(b) |
| defer putBufferedDecoder(d) |
| d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) |
| mustReorderObjectsFromDecoder(d, &e2.s.Buf) // per RFC 8785, section 3.2.3 |
| } |
| |
| // mustReorderObjectsFromDecoder recursively reorders all object members in place |
| // according to the ordering specified in RFC 8785, section 3.2.3. |
| // |
| // Pre-conditions: |
| // - The value is valid (i.e., no decoder errors should ever occur). |
| // - Initial call is provided a Decoder reading from the start of v. |
| // |
| // Post-conditions: |
| // - Exactly one JSON value is read from the Decoder. |
| // - All fully-parsed JSON objects are reordered by directly moving |
| // the members in the value buffer. |
| // |
| // The runtime is approximately O(n·log(n)) + O(m·log(m)), |
| // where n is len(v) and m is the total number of object members. |
| func mustReorderObjectsFromDecoder(d *Decoder, scratch *[]byte) { |
| switch tok, err := d.ReadToken(); tok.Kind() { |
| case '{': |
| // Iterate and collect the name and offsets for every object member. |
| members := getObjectMembers() |
| defer putObjectMembers(members) |
| var prevMember objectMember |
| isSorted := true |
| |
| beforeBody := d.InputOffset() // offset after '{' |
| for d.PeekKind() != '}' { |
| beforeName := d.InputOffset() |
| var flags jsonwire.ValueFlags |
| name, _ := d.s.ReadValue(&flags) |
| name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim()) |
| mustReorderObjectsFromDecoder(d, scratch) |
| afterValue := d.InputOffset() |
| |
| currMember := objectMember{name, d.s.buf[beforeName:afterValue]} |
| if isSorted && len(*members) > 0 { |
| isSorted = objectMember.Compare(prevMember, currMember) < 0 |
| } |
| *members = append(*members, currMember) |
| prevMember = currMember |
| } |
| afterBody := d.InputOffset() // offset before '}' |
| d.ReadToken() |
| |
| // Sort the members; return early if it's already sorted. |
| if isSorted { |
| return |
| } |
| firstBufferBeforeSorting := (*members)[0].buffer |
| slices.SortFunc(*members, objectMember.Compare) |
| firstBufferAfterSorting := (*members)[0].buffer |
| |
| // Append the reordered members to a new buffer, |
| // then copy the reordered members back over the original members. |
| // Avoid swapping in place since each member may be a different size |
| // where moving a member over a smaller member may corrupt the data |
| // for subsequent members before they have been moved. |
| // |
| // The following invariant must hold: |
| // sum([m.after-m.before for m in members]) == afterBody-beforeBody |
| commaAndWhitespacePrefix := func(b []byte) []byte { |
| return b[:len(b)-len(bytes.TrimLeft(b, commaAndWhitespace))] |
| } |
| sorted := (*scratch)[:0] |
| for i, member := range *members { |
| switch { |
| case i == 0 && &member.buffer[0] != &firstBufferBeforeSorting[0]: |
| // First member after sorting is not the first member before sorting, |
| // so use the prefix of the first member before sorting. |
| sorted = append(sorted, commaAndWhitespacePrefix(firstBufferBeforeSorting)...) |
| sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) |
| case i != 0 && &member.buffer[0] == &firstBufferBeforeSorting[0]: |
| // Later member after sorting is the first member before sorting, |
| // so use the prefix of the first member after sorting. |
| sorted = append(sorted, commaAndWhitespacePrefix(firstBufferAfterSorting)...) |
| sorted = append(sorted, bytes.TrimLeft(member.buffer, commaAndWhitespace)...) |
| default: |
| sorted = append(sorted, member.buffer...) |
| } |
| } |
| if int(afterBody-beforeBody) != len(sorted) { |
| panic("BUG: length invariant violated") |
| } |
| copy(d.s.buf[beforeBody:afterBody], sorted) |
| |
| // Update scratch buffer to the largest amount ever used. |
| if len(sorted) > len(*scratch) { |
| *scratch = sorted |
| } |
| case '[': |
| for d.PeekKind() != ']' { |
| mustReorderObjectsFromDecoder(d, scratch) |
| } |
| d.ReadToken() |
| default: |
| if err != nil { |
| panic("BUG: " + err.Error()) |
| } |
| } |
| } |