internal/strs/strings.go - protobuf - Git at Google

 // Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package strs provides string manipulation functionality specific to protobuf.
 package strs

 import (
 	"go/token"
 	"strings"
 	"unicode"
 	"unicode/utf8"

 	"google.golang.org/protobuf/internal/flags"
 	"google.golang.org/protobuf/reflect/protoreflect"
 )

 // EnforceUTF8 reports whether to enforce strict UTF-8 validation.
 func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
 	if flags.ProtoLegacy || fd.Syntax() == protoreflect.Editions {
 		if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
 			return fd.EnforceUTF8()
 		}
 	}
 	return fd.Syntax() == protoreflect.Proto3
 }

 // GoCamelCase camel-cases a protobuf name for use as a Go identifier.
 //
 // If there is an interior underscore followed by a lower case letter,
 // drop the underscore and convert the letter to upper case.
 func GoCamelCase(s string) string {
 	// Invariant: if the next letter is lower case, it must be converted
 	// to upper case.
 	// That is, we process a word at a time, where words are marked by _ or
 	// upper case letter. Digits are treated as words.
 	var b []byte
 	for i := 0; i < len(s); i++ {
 		c := s[i]
 		switch {
 		case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
 			// Skip over '.' in ".{{lowercase}}".
 		case c == '.':
 			b = append(b, '_') // convert '.' to '_'
 		case c == '_' && (i == 0 || s[i-1] == '.'):
 			// Convert initial '_' to ensure we start with a capital letter.
 			// Do the same for '_' after '.' to match historic behavior.
 			b = append(b, 'X') // convert '_' to 'X'
 		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
 			// Skip over '_' in "_{{lowercase}}".
 		case isASCIIDigit(c):
 			b = append(b, c)
 		default:
 			// Assume we have a letter now - if not, it's a bogus identifier.
 			// The next word is a sequence of characters that must start upper case.
 			if isASCIILower(c) {
 				c -= 'a' - 'A' // convert lowercase to uppercase
 			}
 			b = append(b, c)

 			// Accept lower case sequence that follows.
 			for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
 				b = append(b, s[i+1])
 			}
 		}
 	}
 	return string(b)
 }

 // GoSanitized converts a string to a valid Go identifier.
 func GoSanitized(s string) string {
 	// Sanitize the input to the set of valid characters,
 	// which must be '_' or be in the Unicode L or N categories.
 	s = strings.Map(func(r rune) rune {
 		if unicode.IsLetter(r) || unicode.IsDigit(r) {
 			return r
 		}
 		return '_'
 	}, s)

 	// Prepend '_' in the event of a Go keyword conflict or if
 	// the identifier is invalid (does not start in the Unicode L category).
 	r, _ := utf8.DecodeRuneInString(s)
 	if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
 		return "_" + s
 	}
 	return s
 }

 // JSONCamelCase converts a snake_case identifier to a camelCase identifier,
 // according to the protobuf JSON specification.
 func JSONCamelCase(s string) string {
 	var b []byte
 	var wasUnderscore bool
 	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
 		c := s[i]
 		if c != '_' {
 			if wasUnderscore && isASCIILower(c) {
 				c -= 'a' - 'A' // convert to uppercase
 			}
 			b = append(b, c)
 		}
 		wasUnderscore = c == '_'
 	}
 	return string(b)
 }

 // JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
 // according to the protobuf JSON specification.
 func JSONSnakeCase(s string) string {
 	var b []byte
 	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
 		c := s[i]
 		if isASCIIUpper(c) {
 			b = append(b, '_')
 			c += 'a' - 'A' // convert to lowercase
 		}
 		b = append(b, c)
 	}
 	return string(b)
 }

 // MapEntryName derives the name of the map entry message given the field name.
 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
 func MapEntryName(s string) string {
 	var b []byte
 	upperNext := true
 	for _, c := range s {
 		switch {
 		case c == '_':
 			upperNext = true
 		case upperNext:
 			b = append(b, byte(unicode.ToUpper(c)))
 			upperNext = false
 		default:
 			b = append(b, byte(c))
 		}
 	}
 	b = append(b, "Entry"...)
 	return string(b)
 }

 // EnumValueName derives the camel-cased enum value name.
 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
 func EnumValueName(s string) string {
 	var b []byte
 	upperNext := true
 	for _, c := range s {
 		switch {
 		case c == '_':
 			upperNext = true
 		case upperNext:
 			b = append(b, byte(unicode.ToUpper(c)))
 			upperNext = false
 		default:
 			b = append(b, byte(unicode.ToLower(c)))
 			upperNext = false
 		}
 	}
 	return string(b)
 }

 // TrimEnumPrefix trims the enum name prefix from an enum value name,
 // where the prefix is all lowercase without underscores.
 // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
 func TrimEnumPrefix(s, prefix string) string {
 	s0 := s // original input
 	for len(s) > 0 && len(prefix) > 0 {
 		if s[0] == '_' {
 			s = s[1:]
 			continue
 		}
 		if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
 			return s0 // no prefix match
 		}
 		s, prefix = s[1:], prefix[1:]
 	}
 	if len(prefix) > 0 {
 		return s0 // no prefix match
 	}
 	s = strings.TrimLeft(s, "_")
 	if len(s) == 0 {
 		return s0 // avoid returning empty string
 	}
 	return s
 }

 func isASCIILower(c byte) bool {
 	return 'a' <= c && c <= 'z'
 }
 func isASCIIUpper(c byte) bool {
 	return 'A' <= c && c <= 'Z'
 }
 func isASCIIDigit(c byte) bool {
 	return '0' <= c && c <= '9'
 }
	// Copyright 2019 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Package strs provides string manipulation functionality specific to protobuf.
	package strs

	import (
	"go/token"
	"strings"
	"unicode"
	"unicode/utf8"

	"google.golang.org/protobuf/internal/flags"
	"google.golang.org/protobuf/reflect/protoreflect"
	)

	// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
	func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
	if flags.ProtoLegacy \|\| fd.Syntax() == protoreflect.Editions {
	if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
	return fd.EnforceUTF8()
	}
	}
	return fd.Syntax() == protoreflect.Proto3
	}

	// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
	//
	// If there is an interior underscore followed by a lower case letter,
	// drop the underscore and convert the letter to upper case.
	func GoCamelCase(s string) string {
	// Invariant: if the next letter is lower case, it must be converted
	// to upper case.
	// That is, we process a word at a time, where words are marked by _ or
	// upper case letter. Digits are treated as words.
	var b []byte
	for i := 0; i < len(s); i++ {
	c := s[i]
	switch {
	case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
	// Skip over '.' in ".{{lowercase}}".
	case c == '.':
	b = append(b, '_') // convert '.' to '_'
	case c == '_' && (i == 0 \|\| s[i-1] == '.'):
	// Convert initial '_' to ensure we start with a capital letter.
	// Do the same for '_' after '.' to match historic behavior.
	b = append(b, 'X') // convert '_' to 'X'
	case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
	// Skip over '_' in "_{{lowercase}}".
	case isASCIIDigit(c):
	b = append(b, c)
	default:
	// Assume we have a letter now - if not, it's a bogus identifier.
	// The next word is a sequence of characters that must start upper case.
	if isASCIILower(c) {
	c -= 'a' - 'A' // convert lowercase to uppercase
	}
	b = append(b, c)

	// Accept lower case sequence that follows.
	for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
	b = append(b, s[i+1])
	}
	}
	}
	return string(b)
	}

	// GoSanitized converts a string to a valid Go identifier.
	func GoSanitized(s string) string {
	// Sanitize the input to the set of valid characters,
	// which must be '_' or be in the Unicode L or N categories.
	s = strings.Map(func(r rune) rune {
	if unicode.IsLetter(r) \|\| unicode.IsDigit(r) {
	return r
	}
	return '_'
	}, s)

	// Prepend '_' in the event of a Go keyword conflict or if
	// the identifier is invalid (does not start in the Unicode L category).
	r, _ := utf8.DecodeRuneInString(s)
	if token.Lookup(s).IsKeyword() \|\| !unicode.IsLetter(r) {
	return "_" + s
	}
	return s
	}

	// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
	// according to the protobuf JSON specification.
	func JSONCamelCase(s string) string {
	var b []byte
	var wasUnderscore bool
	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
	c := s[i]
	if c != '_' {
	if wasUnderscore && isASCIILower(c) {
	c -= 'a' - 'A' // convert to uppercase
	}
	b = append(b, c)
	}
	wasUnderscore = c == '_'
	}
	return string(b)
	}

	// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
	// according to the protobuf JSON specification.
	func JSONSnakeCase(s string) string {
	var b []byte
	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
	c := s[i]
	if isASCIIUpper(c) {
	b = append(b, '_')
	c += 'a' - 'A' // convert to lowercase
	}
	b = append(b, c)
	}
	return string(b)
	}

	// MapEntryName derives the name of the map entry message given the field name.
	// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
	func MapEntryName(s string) string {
	var b []byte
	upperNext := true
	for _, c := range s {
	switch {
	case c == '_':
	upperNext = true
	case upperNext:
	b = append(b, byte(unicode.ToUpper(c)))
	upperNext = false
	default:
	b = append(b, byte(c))
	}
	}
	b = append(b, "Entry"...)
	return string(b)
	}

	// EnumValueName derives the camel-cased enum value name.
	// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
	func EnumValueName(s string) string {
	var b []byte
	upperNext := true
	for _, c := range s {
	switch {
	case c == '_':
	upperNext = true
	case upperNext:
	b = append(b, byte(unicode.ToUpper(c)))
	upperNext = false
	default:
	b = append(b, byte(unicode.ToLower(c)))
	upperNext = false
	}
	}
	return string(b)
	}

	// TrimEnumPrefix trims the enum name prefix from an enum value name,
	// where the prefix is all lowercase without underscores.
	// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
	func TrimEnumPrefix(s, prefix string) string {
	s0 := s // original input
	for len(s) > 0 && len(prefix) > 0 {
	if s[0] == '_' {
	s = s[1:]
	continue
	}
	if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
	return s0 // no prefix match
	}
	s, prefix = s[1:], prefix[1:]
	}
	if len(prefix) > 0 {
	return s0 // no prefix match
	}
	s = strings.TrimLeft(s, "_")
	if len(s) == 0 {
	return s0 // avoid returning empty string
	}
	return s
	}

	func isASCIILower(c byte) bool {
	return 'a' <= c && c <= 'z'
	}
	func isASCIIUpper(c byte) bool {
	return 'A' <= c && c <= 'Z'
	}
	func isASCIIDigit(c byte) bool {
	return '0' <= c && c <= '9'
	}