internal/encoding/text/encode.go - protobuf - Git at Google

 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package text

 import (
 	"math"
 	"math/bits"
 	"strconv"
 	"strings"
 	"unicode/utf8"

 	"google.golang.org/protobuf/internal/detrand"
 	"google.golang.org/protobuf/internal/errors"
 )

 // encType represents an encoding type.
 type encType uint8

 const (
 	_ encType = (1 << iota) / 2
 	name
 	scalar
 	messageOpen
 	messageClose
 )

 // Encoder provides methods to write out textproto constructs and values. The user is
 // responsible for producing valid sequences of constructs and values.
 type Encoder struct {
 	encoderState

 	indent      string
 	delims      [2]byte
 	outputASCII bool
 }

 type encoderState struct {
 	lastType encType
 	indents  []byte
 	out      []byte
 }

 // NewEncoder returns an Encoder.
 //
 // If indent is a non-empty string, it causes every entry in a List or Message
 // to be preceded by the indent and trailed by a newline.
 //
 // If delims is not the zero value, it controls the delimiter characters used
 // for messages (e.g., "{}" vs "<>").
 //
 // If outputASCII is true, strings will be serialized in such a way that
 // multi-byte UTF-8 sequences are escaped. This property ensures that the
 // overall output is ASCII (as opposed to UTF-8).
 func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
 	e := &Encoder{}
 	if len(indent) > 0 {
 		if strings.Trim(indent, " \t") != "" {
 			return nil, errors.New("indent may only be composed of space and tab characters")
 		}
 		e.indent = indent
 	}
 	switch delims {
 	case [2]byte{0, 0}:
 		e.delims = [2]byte{'{', '}'}
 	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
 		e.delims = delims
 	default:
 		return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
 	}
 	e.outputASCII = outputASCII

 	return e, nil
 }

 // Bytes returns the content of the written bytes.
 func (e *Encoder) Bytes() []byte {
 	return e.out
 }

 // StartMessage writes out the '{' or '<' symbol.
 func (e *Encoder) StartMessage() {
 	e.prepareNext(messageOpen)
 	e.out = append(e.out, e.delims[0])
 }

 // EndMessage writes out the '}' or '>' symbol.
 func (e *Encoder) EndMessage() {
 	e.prepareNext(messageClose)
 	e.out = append(e.out, e.delims[1])
 }

 // WriteName writes out the field name and the separator ':'.
 func (e *Encoder) WriteName(s string) {
 	e.prepareNext(name)
 	e.out = append(e.out, s...)
 	e.out = append(e.out, ':')
 }

 // WriteBool writes out the given boolean value.
 func (e *Encoder) WriteBool(b bool) {
 	if b {
 		e.WriteLiteral("true")
 	} else {
 		e.WriteLiteral("false")
 	}
 }

 // WriteString writes out the given string value.
 func (e *Encoder) WriteString(s string) {
 	e.prepareNext(scalar)
 	e.out = appendString(e.out, s, e.outputASCII)
 }

 func appendString(out []byte, in string, outputASCII bool) []byte {
 	out = append(out, '"')
 	i := indexNeedEscapeInString(in)
 	in, out = in[i:], append(out, in[:i]...)
 	for len(in) > 0 {
 		switch r, n := utf8.DecodeRuneInString(in); {
 		case r == utf8.RuneError && n == 1:
 			// We do not report invalid UTF-8 because strings in the text format
 			// are used to represent both the proto string and bytes type.
 			r = rune(in[0])
 			fallthrough
 		case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
 			out = append(out, '\\')
 			switch r {
 			case '"', '\\':
 				out = append(out, byte(r))
 			case '\n':
 				out = append(out, 'n')
 			case '\r':
 				out = append(out, 'r')
 			case '\t':
 				out = append(out, 't')
 			default:
 				out = append(out, 'x')
 				out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
 				out = strconv.AppendUint(out, uint64(r), 16)
 			}
 			in = in[n:]
 		case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
 			out = append(out, '\\')
 			if r <= math.MaxUint16 {
 				out = append(out, 'u')
 				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
 				out = strconv.AppendUint(out, uint64(r), 16)
 			} else {
 				out = append(out, 'U')
 				out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
 				out = strconv.AppendUint(out, uint64(r), 16)
 			}
 			in = in[n:]
 		default:
 			i := indexNeedEscapeInString(in[n:])
 			in, out = in[n+i:], append(out, in[:n+i]...)
 		}
 	}
 	out = append(out, '"')
 	return out
 }

 // indexNeedEscapeInString returns the index of the character that needs
 // escaping. If no characters need escaping, this returns the input length.
 func indexNeedEscapeInString(s string) int {
 	for i := 0; i < len(s); i++ {
 		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
 			return i
 		}
 	}
 	return len(s)
 }

 // WriteFloat writes out the given float value for given bitSize.
 func (e *Encoder) WriteFloat(n float64, bitSize int) {
 	e.prepareNext(scalar)
 	e.out = appendFloat(e.out, n, bitSize)
 }

 func appendFloat(out []byte, n float64, bitSize int) []byte {
 	switch {
 	case math.IsNaN(n):
 		return append(out, "nan"...)
 	case math.IsInf(n, +1):
 		return append(out, "inf"...)
 	case math.IsInf(n, -1):
 		return append(out, "-inf"...)
 	default:
 		return strconv.AppendFloat(out, n, 'g', -1, bitSize)
 	}
 }

 // WriteInt writes out the given signed integer value.
 func (e *Encoder) WriteInt(n int64) {
 	e.prepareNext(scalar)
 	e.out = append(e.out, strconv.FormatInt(n, 10)...)
 }

 // WriteUint writes out the given unsigned integer value.
 func (e *Encoder) WriteUint(n uint64) {
 	e.prepareNext(scalar)
 	e.out = append(e.out, strconv.FormatUint(n, 10)...)
 }

 // WriteLiteral writes out the given string as a literal value without quotes.
 // This is used for writing enum literal strings.
 func (e *Encoder) WriteLiteral(s string) {
 	e.prepareNext(scalar)
 	e.out = append(e.out, s...)
 }

 // prepareNext adds possible space and indentation for the next value based
 // on last encType and indent option. It also updates e.lastType to next.
 func (e *Encoder) prepareNext(next encType) {
 	defer func() {
 		e.lastType = next
 	}()

 	// Single line.
 	if len(e.indent) == 0 {
 		// Add space after each field before the next one.
 		if e.lastType&(scalar|messageClose) != 0 && next == name {
 			e.out = append(e.out, ' ')
 			// Add a random extra space to make output unstable.
 			if detrand.Bool() {
 				e.out = append(e.out, ' ')
 			}
 		}
 		return
 	}

 	// Multi-line.
 	switch {
 	case e.lastType == name:
 		e.out = append(e.out, ' ')
 		// Add a random extra space after name: to make output unstable.
 		if detrand.Bool() {
 			e.out = append(e.out, ' ')
 		}

 	case e.lastType == messageOpen && next != messageClose:
 		e.indents = append(e.indents, e.indent...)
 		e.out = append(e.out, '\n')
 		e.out = append(e.out, e.indents...)

 	case e.lastType&(scalar|messageClose) != 0:
 		if next == messageClose {
 			e.indents = e.indents[:len(e.indents)-len(e.indent)]
 		}
 		e.out = append(e.out, '\n')
 		e.out = append(e.out, e.indents...)
 	}
 }

 // Snapshot returns the current snapshot for use in Reset.
 func (e *Encoder) Snapshot() encoderState {
 	return e.encoderState
 }

 // Reset resets the Encoder to the given encoderState from a Snapshot.
 func (e *Encoder) Reset(es encoderState) {
 	e.encoderState = es
 }
	// Copyright 2018 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package text

	import (
	"math"
	"math/bits"
	"strconv"
	"strings"
	"unicode/utf8"

	"google.golang.org/protobuf/internal/detrand"
	"google.golang.org/protobuf/internal/errors"
	)

	// encType represents an encoding type.
	type encType uint8

	const (
	_ encType = (1 << iota) / 2
	name
	scalar
	messageOpen
	messageClose
	)

	// Encoder provides methods to write out textproto constructs and values. The user is
	// responsible for producing valid sequences of constructs and values.
	type Encoder struct {
	encoderState

	indent string
	delims [2]byte
	outputASCII bool
	}

	type encoderState struct {
	lastType encType
	indents []byte
	out []byte
	}

	// NewEncoder returns an Encoder.
	//
	// If indent is a non-empty string, it causes every entry in a List or Message
	// to be preceded by the indent and trailed by a newline.
	//
	// If delims is not the zero value, it controls the delimiter characters used
	// for messages (e.g., "{}" vs "<>").
	//
	// If outputASCII is true, strings will be serialized in such a way that
	// multi-byte UTF-8 sequences are escaped. This property ensures that the
	// overall output is ASCII (as opposed to UTF-8).
	func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
	e := &Encoder{}
	if len(indent) > 0 {
	if strings.Trim(indent, " \t") != "" {
	return nil, errors.New("indent may only be composed of space and tab characters")
	}
	e.indent = indent
	}
	switch delims {
	case [2]byte{0, 0}:
	e.delims = [2]byte{'{', '}'}
	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
	e.delims = delims
	default:
	return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
	}
	e.outputASCII = outputASCII

	return e, nil
	}

	// Bytes returns the content of the written bytes.
	func (e *Encoder) Bytes() []byte {
	return e.out
	}

	// StartMessage writes out the '{' or '<' symbol.
	func (e *Encoder) StartMessage() {
	e.prepareNext(messageOpen)
	e.out = append(e.out, e.delims[0])
	}

	// EndMessage writes out the '}' or '>' symbol.
	func (e *Encoder) EndMessage() {
	e.prepareNext(messageClose)
	e.out = append(e.out, e.delims[1])
	}

	// WriteName writes out the field name and the separator ':'.
	func (e *Encoder) WriteName(s string) {
	e.prepareNext(name)
	e.out = append(e.out, s...)
	e.out = append(e.out, ':')
	}

	// WriteBool writes out the given boolean value.
	func (e *Encoder) WriteBool(b bool) {
	if b {
	e.WriteLiteral("true")
	} else {
	e.WriteLiteral("false")
	}
	}

	// WriteString writes out the given string value.
	func (e *Encoder) WriteString(s string) {
	e.prepareNext(scalar)
	e.out = appendString(e.out, s, e.outputASCII)
	}

	func appendString(out []byte, in string, outputASCII bool) []byte {
	out = append(out, '"')
	i := indexNeedEscapeInString(in)
	in, out = in[i:], append(out, in[:i]...)
	for len(in) > 0 {
	switch r, n := utf8.DecodeRuneInString(in); {
	case r == utf8.RuneError && n == 1:
	// We do not report invalid UTF-8 because strings in the text format
	// are used to represent both the proto string and bytes type.
	r = rune(in[0])
	fallthrough
	case r < ' ' \|\| r == '"' \|\| r == '\\' \|\| r == 0x7f:
	out = append(out, '\\')
	switch r {
	case '"', '\\':
	out = append(out, byte(r))
	case '\n':
	out = append(out, 'n')
	case '\r':
	out = append(out, 'r')
	case '\t':
	out = append(out, 't')
	default:
	out = append(out, 'x')
	out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
	out = strconv.AppendUint(out, uint64(r), 16)
	}
	in = in[n:]
	case r >= utf8.RuneSelf && (outputASCII \|\| r <= 0x009f):
	out = append(out, '\\')
	if r <= math.MaxUint16 {
	out = append(out, 'u')
	out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
	out = strconv.AppendUint(out, uint64(r), 16)
	} else {
	out = append(out, 'U')
	out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
	out = strconv.AppendUint(out, uint64(r), 16)
	}
	in = in[n:]
	default:
	i := indexNeedEscapeInString(in[n:])
	in, out = in[n+i:], append(out, in[:n+i]...)
	}
	}
	out = append(out, '"')
	return out
	}

	// indexNeedEscapeInString returns the index of the character that needs
	// escaping. If no characters need escaping, this returns the input length.
	func indexNeedEscapeInString(s string) int {
	for i := 0; i < len(s); i++ {
	if c := s[i]; c < ' ' \|\| c == '"' \|\| c == '\'' \|\| c == '\\' \|\| c >= 0x7f {
	return i
	}
	}
	return len(s)
	}

	// WriteFloat writes out the given float value for given bitSize.
	func (e *Encoder) WriteFloat(n float64, bitSize int) {
	e.prepareNext(scalar)
	e.out = appendFloat(e.out, n, bitSize)
	}

	func appendFloat(out []byte, n float64, bitSize int) []byte {
	switch {
	case math.IsNaN(n):
	return append(out, "nan"...)
	case math.IsInf(n, +1):
	return append(out, "inf"...)
	case math.IsInf(n, -1):
	return append(out, "-inf"...)
	default:
	return strconv.AppendFloat(out, n, 'g', -1, bitSize)
	}
	}

	// WriteInt writes out the given signed integer value.
	func (e *Encoder) WriteInt(n int64) {
	e.prepareNext(scalar)
	e.out = append(e.out, strconv.FormatInt(n, 10)...)
	}

	// WriteUint writes out the given unsigned integer value.
	func (e *Encoder) WriteUint(n uint64) {
	e.prepareNext(scalar)
	e.out = append(e.out, strconv.FormatUint(n, 10)...)
	}

	// WriteLiteral writes out the given string as a literal value without quotes.
	// This is used for writing enum literal strings.
	func (e *Encoder) WriteLiteral(s string) {
	e.prepareNext(scalar)
	e.out = append(e.out, s...)
	}

	// prepareNext adds possible space and indentation for the next value based
	// on last encType and indent option. It also updates e.lastType to next.
	func (e *Encoder) prepareNext(next encType) {
	defer func() {
	e.lastType = next
	}()

	// Single line.
	if len(e.indent) == 0 {
	// Add space after each field before the next one.
	if e.lastType&(scalar\|messageClose) != 0 && next == name {
	e.out = append(e.out, ' ')
	// Add a random extra space to make output unstable.
	if detrand.Bool() {
	e.out = append(e.out, ' ')
	}
	}
	return
	}

	// Multi-line.
	switch {
	case e.lastType == name:
	e.out = append(e.out, ' ')
	// Add a random extra space after name: to make output unstable.
	if detrand.Bool() {
	e.out = append(e.out, ' ')
	}

	case e.lastType == messageOpen && next != messageClose:
	e.indents = append(e.indents, e.indent...)
	e.out = append(e.out, '\n')
	e.out = append(e.out, e.indents...)

	case e.lastType&(scalar\|messageClose) != 0:
	if next == messageClose {
	e.indents = e.indents[:len(e.indents)-len(e.indent)]
	}
	e.out = append(e.out, '\n')
	e.out = append(e.out, e.indents...)
	}
	}

	// Snapshot returns the current snapshot for use in Reset.
	func (e *Encoder) Snapshot() encoderState {
	return e.encoderState
	}

	// Reset resets the Encoder to the given encoderState from a Snapshot.
	func (e *Encoder) Reset(es encoderState) {
	e.encoderState = es
	}