compiler/protogen, internal/strs, internal/impl: expose enum Go name derivation
In order to migrate v1 to wrap v2, we need a way to reproduce
the awful enum "names" that v1 used, which was the concatenation of
the proto package with the Go identifier used for the enum.
To support this:
* Move the camel case logic from compiler/protogen to internal/strs
* Add a small stub in internal/impl to expose this functionality
Change-Id: I8ff31daa9ae541e5788dc04d2e89eae1574877e4
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/191637
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/cmd/protoc-gen-go/internal_gengo/main.go b/cmd/protoc-gen-go/internal_gengo/main.go
index 0bb1f4f..dfc3102 100644
--- a/cmd/protoc-gen-go/internal_gengo/main.go
+++ b/cmd/protoc-gen-go/internal_gengo/main.go
@@ -780,14 +780,7 @@
func fieldProtobufTagValue(field *protogen.Field) string {
var enumName string
if field.Desc.Kind() == protoreflect.EnumKind {
- // For historical reasons, the name used in the tag is neither
- // the protobuf full name nor the fully qualified Go identifier,
- // but an odd mix of both.
- enumName = field.Enum.GoIdent.GoName
- protoPkg := string(field.Enum.Desc.ParentFile().Package())
- if protoPkg != "" {
- enumName = protoPkg + "." + enumName
- }
+ enumName = protoimpl.X.LegacyEnumName(field.Enum.Desc)
}
return tag.Marshal(field.Desc, enumName)
}
diff --git a/compiler/protogen/names.go b/compiler/protogen/names.go
deleted file mode 100644
index ae41a5a..0000000
--- a/compiler/protogen/names.go
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package protogen
-
-import (
- "fmt"
- "go/token"
- "strconv"
- "strings"
- "unicode"
- "unicode/utf8"
-
- "google.golang.org/protobuf/reflect/protoreflect"
-)
-
-// A GoIdent is a Go identifier, consisting of a name and import path.
-// The name is a single identifier and may not be a dot-qualified selector.
-type GoIdent struct {
- GoName string
- GoImportPath GoImportPath
-}
-
-func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
-
-// newGoIdent returns the Go identifier for a descriptor.
-func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
- name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
- return GoIdent{
- GoName: camelCase(name),
- GoImportPath: f.GoImportPath,
- }
-}
-
-// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
-type GoImportPath string
-
-func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
-
-// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
-func (p GoImportPath) Ident(s string) GoIdent {
- return GoIdent{GoName: s, GoImportPath: p}
-}
-
-// A GoPackageName is the name of a Go package. e.g., "protobuf".
-type GoPackageName string
-
-// cleanPackageName converts a string to a valid Go package name.
-func cleanPackageName(name string) GoPackageName {
- return GoPackageName(cleanGoName(name))
-}
-
-// cleanGoName converts a string to a valid Go identifier.
-func cleanGoName(s string) string {
- // Sanitize the input to the set of valid characters,
- // which must be '_' or be in the Unicode L or N categories.
- s = strings.Map(func(r rune) rune {
- if unicode.IsLetter(r) || unicode.IsDigit(r) {
- return r
- }
- return '_'
- }, s)
-
- // Prepend '_' in the event of a Go keyword conflict or if
- // the identifier is invalid (does not start in the Unicode L category).
- r, _ := utf8.DecodeRuneInString(s)
- if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
- return "_" + s
- }
- return s
-}
-
-// baseName returns the last path element of the name, with the last dotted suffix removed.
-func baseName(name string) string {
- // First, find the last element
- if i := strings.LastIndex(name, "/"); i >= 0 {
- name = name[i+1:]
- }
- // Now drop the suffix
- if i := strings.LastIndex(name, "."); i >= 0 {
- name = name[:i]
- }
- return name
-}
-
-// camelCase converts a name to CamelCase.
-//
-// If there is an interior underscore followed by a lower case letter,
-// drop the underscore and convert the letter to upper case.
-// There is a remote possibility of this rewrite causing a name collision,
-// but it's so remote we're prepared to pretend it's nonexistent - since the
-// C++ generator lowercases names, it's extremely unlikely to have two fields
-// with different capitalizations.
-func camelCase(s string) string {
- // Invariant: if the next letter is lower case, it must be converted
- // to upper case.
- // That is, we process a word at a time, where words are marked by _ or
- // upper case letter. Digits are treated as words.
- var b []byte
- for i := 0; i < len(s); i++ {
- c := s[i]
- switch {
- case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
- // Skip over '.' in ".{{lowercase}}".
- case c == '.':
- b = append(b, '_') // convert '.' to '_'
- case c == '_' && (i == 0 || s[i-1] == '.'):
- // Convert initial '_' to ensure we start with a capital letter.
- // Do the same for '_' after '.' to match historic behavior.
- b = append(b, 'X') // convert '_' to 'X'
- case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
- // Skip over '_' in "_{{lowercase}}".
- case isASCIIDigit(c):
- b = append(b, c)
- default:
- // Assume we have a letter now - if not, it's a bogus identifier.
- // The next word is a sequence of characters that must start upper case.
- if isASCIILower(c) {
- c -= 'a' - 'A' // convert lowercase to uppercase
- }
- b = append(b, c)
-
- // Accept lower case sequence that follows.
- for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
- b = append(b, s[i+1])
- }
- }
- }
- return string(b)
-}
-
-// Is c an ASCII lower-case letter?
-func isASCIILower(c byte) bool {
- return 'a' <= c && c <= 'z'
-}
-
-// Is c an ASCII digit?
-func isASCIIDigit(c byte) bool {
- return '0' <= c && c <= '9'
-}
diff --git a/compiler/protogen/names_test.go b/compiler/protogen/names_test.go
deleted file mode 100644
index 6f03cc9..0000000
--- a/compiler/protogen/names_test.go
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package protogen
-
-import "testing"
-
-func TestCamelCase(t *testing.T) {
- tests := []struct {
- in, want string
- }{
- {"", ""},
- {"one", "One"},
- {"one_two", "OneTwo"},
- {"_my_field_name_2", "XMyFieldName_2"},
- {"Something_Capped", "Something_Capped"},
- {"my_Name", "My_Name"},
- {"OneTwo", "OneTwo"},
- {"_", "X"},
- {"_a_", "XA_"},
- {"one.two", "OneTwo"},
- {"one.Two", "One_Two"},
- {"one_two.three_four", "OneTwoThreeFour"},
- {"one_two.Three_four", "OneTwo_ThreeFour"},
- {"_one._two", "XOne_XTwo"},
- {"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
- {"double__underscore", "Double_Underscore"},
- {"camelCase", "CamelCase"},
- {"go2proto", "Go2Proto"},
- {"世界", "世界"},
- {"x世界", "X世界"},
- {"foo_bar世界", "FooBar世界"},
- }
- for _, tc := range tests {
- if got := camelCase(tc.in); got != tc.want {
- t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want)
- }
- }
-}
-
-func TestCleanGoName(t *testing.T) {
- tests := []struct {
- in, want string
- }{
- {"", "_"},
- {"boo", "boo"},
- {"Boo", "Boo"},
- {"ßoo", "ßoo"},
- {"default", "_default"},
- {"hello", "hello"},
- {"hello-world!!", "hello_world__"},
- {"hello-\xde\xad\xbe\xef\x00", "hello_____"},
- {"hello 世界", "hello_世界"},
- {"世界", "世界"},
- }
- for _, tc := range tests {
- if got := cleanGoName(tc.in); got != tc.want {
- t.Errorf("cleanGoName(%q) = %q, want %q", tc.in, got, tc.want)
- }
- }
-}
diff --git a/compiler/protogen/protogen.go b/compiler/protogen/protogen.go
index ec03704..3be898d 100644
--- a/compiler/protogen/protogen.go
+++ b/compiler/protogen/protogen.go
@@ -30,6 +30,7 @@
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/internal/fieldnum"
+ "google.golang.org/protobuf/internal/strs"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protodesc"
"google.golang.org/protobuf/reflect/protoreflect"
@@ -431,7 +432,7 @@
}
}
f.GoDescriptorIdent = GoIdent{
- GoName: "File_" + cleanGoName(p.GetName()),
+ GoName: "File_" + strs.GoSanitized(p.GetName()),
GoImportPath: f.GoImportPath,
}
f.GeneratedFilenamePrefix = prefix
@@ -499,6 +500,8 @@
}
// A semicolon-delimited suffix delimits the import path and package name.
if i := strings.Index(opt, ";"); i >= 0 {
+ // TODO: The package name is explicitly provided by the .proto file.
+ // Rather than sanitizing it, we should pass it verbatim.
return cleanPackageName(opt[i+1:]), GoImportPath(opt[:i])
}
// The presence of a slash implies there's an import path.
@@ -756,7 +759,7 @@
default:
loc = message.Location.appendPath(fieldnum.DescriptorProto_Field, int32(desc.Index()))
}
- camelCased := camelCase(string(desc.Name()))
+ camelCased := strs.GoCamelCase(string(desc.Name()))
var parentPrefix string
if message != nil {
parentPrefix = message.GoIdent.GoName + "_"
@@ -826,7 +829,7 @@
func newOneof(gen *Plugin, f *File, message *Message, desc protoreflect.OneofDescriptor) *Oneof {
loc := message.Location.appendPath(fieldnum.DescriptorProto_OneofDecl, int32(desc.Index()))
- camelCased := camelCase(string(desc.Name()))
+ camelCased := strs.GoCamelCase(string(desc.Name()))
parentPrefix := message.GoIdent.GoName + "_"
return &Oneof{
Desc: desc,
@@ -860,7 +863,7 @@
loc := f.location(fieldnum.FileDescriptorProto_Service, int32(desc.Index()))
service := &Service{
Desc: desc,
- GoName: camelCase(string(desc.Name())),
+ GoName: strs.GoCamelCase(string(desc.Name())),
Location: loc,
Comments: f.comments[newPathKey(loc.Path)],
}
@@ -889,7 +892,7 @@
loc := service.Location.appendPath(fieldnum.ServiceDescriptorProto_Method, int32(desc.Index()))
method := &Method{
Desc: desc,
- GoName: camelCase(string(desc.Name())),
+ GoName: strs.GoCamelCase(string(desc.Name())),
Parent: service,
Location: loc,
Comments: f.comments[newPathKey(loc.Path)],
@@ -1183,6 +1186,56 @@
return string(b), nil
}
+// A GoIdent is a Go identifier, consisting of a name and import path.
+// The name is a single identifier and may not be a dot-qualified selector.
+type GoIdent struct {
+ GoName string
+ GoImportPath GoImportPath
+}
+
+func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
+
+// newGoIdent returns the Go identifier for a descriptor.
+func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
+ name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
+ return GoIdent{
+ GoName: strs.GoCamelCase(name),
+ GoImportPath: f.GoImportPath,
+ }
+}
+
+// A GoImportPath is the import path of a Go package.
+// For example: "google.golang.org/protobuf/compiler/protogen"
+type GoImportPath string
+
+func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
+
+// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
+func (p GoImportPath) Ident(s string) GoIdent {
+ return GoIdent{GoName: s, GoImportPath: p}
+}
+
+// A GoPackageName is the name of a Go package. e.g., "protobuf".
+type GoPackageName string
+
+// cleanPackageName converts a string to a valid Go package name.
+func cleanPackageName(name string) GoPackageName {
+ return GoPackageName(strs.GoSanitized(name))
+}
+
+// baseName returns the last path element of the name, with the last dotted suffix removed.
+func baseName(name string) string {
+ // First, find the last element
+ if i := strings.LastIndex(name, "/"); i >= 0 {
+ name = name[i+1:]
+ }
+ // Now drop the suffix
+ if i := strings.LastIndex(name, "."); i >= 0 {
+ name = name[:i]
+ }
+ return name
+}
+
type pathType int
const (
diff --git a/internal/impl/legacy_enum.go b/internal/impl/legacy_enum.go
index 279baa9..4ec31df 100644
--- a/internal/impl/legacy_enum.go
+++ b/internal/impl/legacy_enum.go
@@ -11,10 +11,27 @@
"sync"
"google.golang.org/protobuf/internal/filedesc"
+ "google.golang.org/protobuf/internal/strs"
"google.golang.org/protobuf/reflect/protoreflect"
pref "google.golang.org/protobuf/reflect/protoreflect"
)
+// legacyEnumName returns the name of enums used in legacy code.
+// It is neither the protobuf full name nor the qualified Go name,
+// but rather an odd hybrid of both.
+func legacyEnumName(ed pref.EnumDescriptor) string {
+ var protoPkg string
+ enumName := string(ed.FullName())
+ if fd := ed.ParentFile(); fd != nil {
+ protoPkg = string(fd.Package())
+ enumName = strings.TrimPrefix(enumName, protoPkg+".")
+ }
+ if protoPkg == "" {
+ return strs.GoCamelCase(enumName)
+ }
+ return protoPkg + "." + strs.GoCamelCase(enumName)
+}
+
// legacyWrapEnum wraps v as a protoreflect.Enum,
// where v must be a int32 kind and not implement the v2 API already.
func legacyWrapEnum(v reflect.Value) pref.Enum {
diff --git a/internal/impl/legacy_export.go b/internal/impl/legacy_export.go
index 07c16b5..29c1b01 100644
--- a/internal/impl/legacy_export.go
+++ b/internal/impl/legacy_export.go
@@ -21,6 +21,11 @@
// These functions exist to support exported APIs in generated protobufs.
// While these are deprecated, they cannot be removed for compatibility reasons.
+// LegacyEnumName returns the name of enums used in legacy code.
+func (Export) LegacyEnumName(ed pref.EnumDescriptor) string {
+ return legacyEnumName(ed)
+}
+
// UnmarshalJSONEnum unmarshals an enum from a JSON-encoded input.
// The input can either be a string representing the enum value by name,
// or a number representing the enum number itself.
diff --git a/internal/impl/legacy_extension.go b/internal/impl/legacy_extension.go
index b484067..ec5420d 100644
--- a/internal/impl/legacy_extension.go
+++ b/internal/impl/legacy_extension.go
@@ -77,31 +77,10 @@
}
}
- // Reconstruct the legacy enum full name, which is an odd mixture of the
- // proto package name with the Go type name.
+ // Reconstruct the legacy enum full name.
var enumName string
if xd.Kind() == pref.EnumKind {
- // Derive Go type name.
- t := extType
- if t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice {
- t = t.Elem()
- }
- enumName = t.Name()
-
- // Derive the proto package name.
- // For legacy enums, obtain the proto package from the raw descriptor.
- var protoPkg string
- if fd := xd.Enum().ParentFile(); fd != nil {
- protoPkg = string(fd.Package())
- }
- if ed, ok := reflect.Zero(t).Interface().(enumV1); ok && protoPkg == "" {
- b, _ := ed.EnumDescriptor()
- protoPkg = string(legacyLoadFileDesc(b).Package())
- }
-
- if protoPkg != "" {
- enumName = protoPkg + "." + enumName
- }
+ enumName = legacyEnumName(xd.Enum())
}
// Derive the proto file that the extension was declared within.
diff --git a/internal/strs/strings.go b/internal/strs/strings.go
index 2208ff2..0b74e76 100644
--- a/internal/strs/strings.go
+++ b/internal/strs/strings.go
@@ -6,8 +6,10 @@
package strs
import (
+ "go/token"
"strings"
"unicode"
+ "unicode/utf8"
"google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/reflect/protoreflect"
@@ -23,6 +25,68 @@
return fd.Syntax() == protoreflect.Proto3
}
+// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
+//
+// If there is an interior underscore followed by a lower case letter,
+// drop the underscore and convert the letter to upper case.
+func GoCamelCase(s string) string {
+ // Invariant: if the next letter is lower case, it must be converted
+ // to upper case.
+ // That is, we process a word at a time, where words are marked by _ or
+ // upper case letter. Digits are treated as words.
+ var b []byte
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ switch {
+ case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
+ // Skip over '.' in ".{{lowercase}}".
+ case c == '.':
+ b = append(b, '_') // convert '.' to '_'
+ case c == '_' && (i == 0 || s[i-1] == '.'):
+ // Convert initial '_' to ensure we start with a capital letter.
+ // Do the same for '_' after '.' to match historic behavior.
+ b = append(b, 'X') // convert '_' to 'X'
+ case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
+ // Skip over '_' in "_{{lowercase}}".
+ case isASCIIDigit(c):
+ b = append(b, c)
+ default:
+ // Assume we have a letter now - if not, it's a bogus identifier.
+ // The next word is a sequence of characters that must start upper case.
+ if isASCIILower(c) {
+ c -= 'a' - 'A' // convert lowercase to uppercase
+ }
+ b = append(b, c)
+
+ // Accept lower case sequence that follows.
+ for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
+ b = append(b, s[i+1])
+ }
+ }
+ }
+ return string(b)
+}
+
+// GoSanitized converts a string to a valid Go identifier.
+func GoSanitized(s string) string {
+ // Sanitize the input to the set of valid characters,
+ // which must be '_' or be in the Unicode L or N categories.
+ s = strings.Map(func(r rune) rune {
+ if unicode.IsLetter(r) || unicode.IsDigit(r) {
+ return r
+ }
+ return '_'
+ }, s)
+
+ // Prepend '_' in the event of a Go keyword conflict or if
+ // the identifier is invalid (does not start in the Unicode L category).
+ r, _ := utf8.DecodeRuneInString(s)
+ if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
+ return "_" + s
+ }
+ return s
+}
+
// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
// according to the protobuf JSON specification.
func JSONCamelCase(s string) string {
@@ -31,8 +95,7 @@
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
if c != '_' {
- isLower := 'a' <= c && c <= 'z'
- if wasUnderscore && isLower {
+ if wasUnderscore && isASCIILower(c) {
c -= 'a' - 'A' // convert to uppercase
}
b = append(b, c)
@@ -48,8 +111,7 @@
var b []byte
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
- isUpper := 'A' <= c && c <= 'Z'
- if isUpper {
+ if isASCIIUpper(c) {
b = append(b, '_')
c += 'a' - 'A' // convert to lowercase
}
@@ -122,3 +184,13 @@
}
return s
}
+
+func isASCIILower(c byte) bool {
+ return 'a' <= c && c <= 'z'
+}
+func isASCIIUpper(c byte) bool {
+ return 'A' <= c && c <= 'Z'
+}
+func isASCIIDigit(c byte) bool {
+ return '0' <= c && c <= '9'
+}
diff --git a/internal/strs/strings_test.go b/internal/strs/strings_test.go
index 2c4c2ad..0bb894a 100644
--- a/internal/strs/strings_test.go
+++ b/internal/strs/strings_test.go
@@ -9,6 +9,61 @@
"testing"
)
+func TestGoCamelCase(t *testing.T) {
+ tests := []struct {
+ in, want string
+ }{
+ {"", ""},
+ {"one", "One"},
+ {"one_two", "OneTwo"},
+ {"_my_field_name_2", "XMyFieldName_2"},
+ {"Something_Capped", "Something_Capped"},
+ {"my_Name", "My_Name"},
+ {"OneTwo", "OneTwo"},
+ {"_", "X"},
+ {"_a_", "XA_"},
+ {"one.two", "OneTwo"},
+ {"one.Two", "One_Two"},
+ {"one_two.three_four", "OneTwoThreeFour"},
+ {"one_two.Three_four", "OneTwo_ThreeFour"},
+ {"_one._two", "XOne_XTwo"},
+ {"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
+ {"double__underscore", "Double_Underscore"},
+ {"camelCase", "CamelCase"},
+ {"go2proto", "Go2Proto"},
+ {"世界", "世界"},
+ {"x世界", "X世界"},
+ {"foo_bar世界", "FooBar世界"},
+ }
+ for _, tc := range tests {
+ if got := GoCamelCase(tc.in); got != tc.want {
+ t.Errorf("GoCamelCase(%q) = %q, want %q", tc.in, got, tc.want)
+ }
+ }
+}
+
+func TestGoSanitized(t *testing.T) {
+ tests := []struct {
+ in, want string
+ }{
+ {"", "_"},
+ {"boo", "boo"},
+ {"Boo", "Boo"},
+ {"ßoo", "ßoo"},
+ {"default", "_default"},
+ {"hello", "hello"},
+ {"hello-world!!", "hello_world__"},
+ {"hello-\xde\xad\xbe\xef\x00", "hello_____"},
+ {"hello 世界", "hello_世界"},
+ {"世界", "世界"},
+ }
+ for _, tc := range tests {
+ if got := GoSanitized(tc.in); got != tc.want {
+ t.Errorf("GoSanitized(%q) = %q, want %q", tc.in, got, tc.want)
+ }
+ }
+}
+
func TestName(t *testing.T) {
tests := []struct {
in string