| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package strs provides string manipulation functionality specific to protobuf. |
| package strs |
| |
| import ( |
| "go/token" |
| "strings" |
| "unicode" |
| "unicode/utf8" |
| |
| "google.golang.org/protobuf/internal/flags" |
| "google.golang.org/protobuf/reflect/protoreflect" |
| ) |
| |
| // EnforceUTF8 reports whether to enforce strict UTF-8 validation. |
| func EnforceUTF8(fd protoreflect.FieldDescriptor) bool { |
| if flags.ProtoLegacy || fd.Syntax() == protoreflect.Editions { |
| if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok { |
| return fd.EnforceUTF8() |
| } |
| } |
| return fd.Syntax() == protoreflect.Proto3 |
| } |
| |
| // GoCamelCase camel-cases a protobuf name for use as a Go identifier. |
| // |
| // If there is an interior underscore followed by a lower case letter, |
| // drop the underscore and convert the letter to upper case. |
| func GoCamelCase(s string) string { |
| // Invariant: if the next letter is lower case, it must be converted |
| // to upper case. |
| // That is, we process a word at a time, where words are marked by _ or |
| // upper case letter. Digits are treated as words. |
| var b []byte |
| for i := 0; i < len(s); i++ { |
| c := s[i] |
| switch { |
| case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): |
| // Skip over '.' in ".{{lowercase}}". |
| case c == '.': |
| b = append(b, '_') // convert '.' to '_' |
| case c == '_' && (i == 0 || s[i-1] == '.'): |
| // Convert initial '_' to ensure we start with a capital letter. |
| // Do the same for '_' after '.' to match historic behavior. |
| b = append(b, 'X') // convert '_' to 'X' |
| case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): |
| // Skip over '_' in "_{{lowercase}}". |
| case isASCIIDigit(c): |
| b = append(b, c) |
| default: |
| // Assume we have a letter now - if not, it's a bogus identifier. |
| // The next word is a sequence of characters that must start upper case. |
| if isASCIILower(c) { |
| c -= 'a' - 'A' // convert lowercase to uppercase |
| } |
| b = append(b, c) |
| |
| // Accept lower case sequence that follows. |
| for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { |
| b = append(b, s[i+1]) |
| } |
| } |
| } |
| return string(b) |
| } |
| |
| // GoSanitized converts a string to a valid Go identifier. |
| func GoSanitized(s string) string { |
| // Sanitize the input to the set of valid characters, |
| // which must be '_' or be in the Unicode L or N categories. |
| s = strings.Map(func(r rune) rune { |
| if unicode.IsLetter(r) || unicode.IsDigit(r) { |
| return r |
| } |
| return '_' |
| }, s) |
| |
| // Prepend '_' in the event of a Go keyword conflict or if |
| // the identifier is invalid (does not start in the Unicode L category). |
| r, _ := utf8.DecodeRuneInString(s) |
| if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { |
| return "_" + s |
| } |
| return s |
| } |
| |
| // JSONCamelCase converts a snake_case identifier to a camelCase identifier, |
| // according to the protobuf JSON specification. |
| func JSONCamelCase(s string) string { |
| var b []byte |
| var wasUnderscore bool |
| for i := 0; i < len(s); i++ { // proto identifiers are always ASCII |
| c := s[i] |
| if c != '_' { |
| if wasUnderscore && isASCIILower(c) { |
| c -= 'a' - 'A' // convert to uppercase |
| } |
| b = append(b, c) |
| } |
| wasUnderscore = c == '_' |
| } |
| return string(b) |
| } |
| |
| // JSONSnakeCase converts a camelCase identifier to a snake_case identifier, |
| // according to the protobuf JSON specification. |
| func JSONSnakeCase(s string) string { |
| var b []byte |
| for i := 0; i < len(s); i++ { // proto identifiers are always ASCII |
| c := s[i] |
| if isASCIIUpper(c) { |
| b = append(b, '_') |
| c += 'a' - 'A' // convert to lowercase |
| } |
| b = append(b, c) |
| } |
| return string(b) |
| } |
| |
| // MapEntryName derives the name of the map entry message given the field name. |
| // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057 |
| func MapEntryName(s string) string { |
| var b []byte |
| upperNext := true |
| for _, c := range s { |
| switch { |
| case c == '_': |
| upperNext = true |
| case upperNext: |
| b = append(b, byte(unicode.ToUpper(c))) |
| upperNext = false |
| default: |
| b = append(b, byte(c)) |
| } |
| } |
| b = append(b, "Entry"...) |
| return string(b) |
| } |
| |
| // EnumValueName derives the camel-cased enum value name. |
| // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313 |
| func EnumValueName(s string) string { |
| var b []byte |
| upperNext := true |
| for _, c := range s { |
| switch { |
| case c == '_': |
| upperNext = true |
| case upperNext: |
| b = append(b, byte(unicode.ToUpper(c))) |
| upperNext = false |
| default: |
| b = append(b, byte(unicode.ToLower(c))) |
| upperNext = false |
| } |
| } |
| return string(b) |
| } |
| |
| // TrimEnumPrefix trims the enum name prefix from an enum value name, |
| // where the prefix is all lowercase without underscores. |
| // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375 |
| func TrimEnumPrefix(s, prefix string) string { |
| s0 := s // original input |
| for len(s) > 0 && len(prefix) > 0 { |
| if s[0] == '_' { |
| s = s[1:] |
| continue |
| } |
| if unicode.ToLower(rune(s[0])) != rune(prefix[0]) { |
| return s0 // no prefix match |
| } |
| s, prefix = s[1:], prefix[1:] |
| } |
| if len(prefix) > 0 { |
| return s0 // no prefix match |
| } |
| s = strings.TrimLeft(s, "_") |
| if len(s) == 0 { |
| return s0 // avoid returning empty string |
| } |
| return s |
| } |
| |
| func isASCIILower(c byte) bool { |
| return 'a' <= c && c <= 'z' |
| } |
| func isASCIIUpper(c byte) bool { |
| return 'A' <= c && c <= 'Z' |
| } |
| func isASCIIDigit(c byte) bool { |
| return '0' <= c && c <= '9' |
| } |