protogen: generate message skeletons
Copy generator.CamelCase for camel-casing names, with one change: Convert
'.' in names to '_'. This removes the need for the CamelCaseSlice function
which operates on a []string representing a name split along '.'s.
Add protogen.Message.
Reformat generated code.
Add regenerate.bash, largely copied from regenerate.sh.
Change-Id: Iecf0bfc43b552f53e458499a328b933b0c9c5f82
Reviewed-on: https://go-review.googlesource.com/130915
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/cmd/protoc-gen-go/main.go b/cmd/protoc-gen-go/main.go
index 9ea1420..7973895 100644
--- a/cmd/protoc-gen-go/main.go
+++ b/cmd/protoc-gen-go/main.go
@@ -30,6 +30,21 @@
g.P("// source: ", f.Desc.GetName())
g.P()
g.P("package TODO")
+ g.P()
+
+ for _, m := range f.Messages {
+ genMessage(gen, g, m)
+ }
// TODO: Everything.
}
+
+func genMessage(gen *protogen.Plugin, g *protogen.GeneratedFile, m *protogen.Message) {
+ g.P("type ", m.GoIdent, " struct {")
+ g.P("}")
+ g.P()
+
+ for _, nested := range m.Messages {
+ genMessage(gen, g, nested)
+ }
+}
diff --git a/cmd/protoc-gen-go/testdata/proto2/nested_messages.pb.go b/cmd/protoc-gen-go/testdata/proto2/nested_messages.pb.go
new file mode 100644
index 0000000..014d3a4
--- /dev/null
+++ b/cmd/protoc-gen-go/testdata/proto2/nested_messages.pb.go
@@ -0,0 +1,13 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// source: proto2/nested_messages.proto
+
+package TODO
+
+type Layer1 struct {
+}
+
+type Layer1_Layer2 struct {
+}
+
+type Layer1_Layer2_Layer3 struct {
+}
diff --git a/cmd/protoc-gen-go/testdata/proto2/nested_messages.proto b/cmd/protoc-gen-go/testdata/proto2/nested_messages.proto
new file mode 100644
index 0000000..85e77b2
--- /dev/null
+++ b/cmd/protoc-gen-go/testdata/proto2/nested_messages.proto
@@ -0,0 +1,19 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+syntax = "proto2";
+
+package goproto.protoc.proto2;
+
+option go_package = "google.golang.org/proto/cmd/protoc-gen-go/testdata/proto2";
+
+message Layer1 {
+ message Layer2 {
+ message Layer3 {
+ }
+ optional Layer3 l3 = 1;
+ }
+ optional Layer2 l2 = 1;
+ optional Layer2.Layer3 l3 = 2;
+}
diff --git a/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go b/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go
index 558363d..771028e 100644
--- a/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go
+++ b/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go
@@ -2,3 +2,6 @@
// source: proto2/proto2.proto
package TODO
+
+type Message struct {
+}
diff --git a/protogen/names.go b/protogen/names.go
index 1976cfd..b97c47d 100644
--- a/protogen/names.go
+++ b/protogen/names.go
@@ -8,6 +8,9 @@
"unicode/utf8"
)
+// A GoIdent is a Go identifier.
+type GoIdent string
+
// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
type GoImportPath string
@@ -52,3 +55,62 @@
}
return name
}
+
+// camelCase converts a name to CamelCase.
+//
+// If there is an interior underscore followed by a lower case letter,
+// drop the underscore and convert the letter to upper case.
+// There is a remote possibility of this rewrite causing a name collision,
+// but it's so remote we're prepared to pretend it's nonexistent - since the
+// C++ generator lowercases names, it's extremely unlikely to have two fields
+// with different capitalizations.
+func camelCase(s string) GoIdent {
+ if s == "" {
+ return ""
+ }
+ var t []byte
+ i := 0
+ // Invariant: if the next letter is lower case, it must be converted
+ // to upper case.
+ // That is, we process a word at a time, where words are marked by _ or
+ // upper case letter. Digits are treated as words.
+ for ; i < len(s); i++ {
+ c := s[i]
+ switch {
+ case c == '.':
+ t = append(t, '_') // Convert . to _.
+ case c == '_' && (i == 0 || s[i-1] == '.'):
+ // Convert initial _ to X so we start with a capital letter.
+ // Do the same for _ after .; not strictly necessary, but matches
+ // historic behavior.
+ t = append(t, 'X')
+ case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
+ // Skip the underscore in s.
+ case isASCIIDigit(c):
+ t = append(t, c)
+ default:
+ // Assume we have a letter now - if not, it's a bogus identifier.
+ // The next word is a sequence of characters that must start upper case.
+ if isASCIILower(c) {
+ c ^= ' ' // Make it a capital letter.
+ }
+ t = append(t, c) // Guaranteed not lower case.
+ // Accept lower case sequence that follows.
+ for i+1 < len(s) && isASCIILower(s[i+1]) {
+ i++
+ t = append(t, s[i])
+ }
+ }
+ }
+ return GoIdent(t)
+}
+
+// Is c an ASCII lower-case letter?
+func isASCIILower(c byte) bool {
+ return 'a' <= c && c <= 'z'
+}
+
+// Is c an ASCII digit?
+func isASCIIDigit(c byte) bool {
+ return '0' <= c && c <= '9'
+}
diff --git a/protogen/names_test.go b/protogen/names_test.go
new file mode 100644
index 0000000..021e71a
--- /dev/null
+++ b/protogen/names_test.go
@@ -0,0 +1,33 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package protogen
+
+import "testing"
+
+func TestCamelCase(t *testing.T) {
+ tests := []struct {
+ in string
+ want GoIdent
+ }{
+ {"one", "One"},
+ {"one_two", "OneTwo"},
+ {"_my_field_name_2", "XMyFieldName_2"},
+ {"Something_Capped", "Something_Capped"},
+ {"my_Name", "My_Name"},
+ {"OneTwo", "OneTwo"},
+ {"_", "X"},
+ {"_a_", "XA_"},
+ {"one.two", "One_Two"},
+ {"one_two.three_four", "OneTwo_ThreeFour"},
+ {"_one._two", "XOne_XTwo"},
+ {"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
+ {"double__underscore", "Double_Underscore"},
+ }
+ for _, tc := range tests {
+ if got := camelCase(tc.in); got != tc.want {
+ t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want)
+ }
+ }
+}
diff --git a/protogen/protogen.go b/protogen/protogen.go
index b10edad..f499a8d 100644
--- a/protogen/protogen.go
+++ b/protogen/protogen.go
@@ -11,8 +11,12 @@
package protogen
import (
+ "bufio"
"bytes"
"fmt"
+ "go/parser"
+ "go/printer"
+ "go/token"
"io/ioutil"
"os"
"path/filepath"
@@ -157,9 +161,15 @@
return resp
}
for _, gf := range gen.genFiles {
+ content, err := gf.Content()
+ if err != nil {
+ return &pluginpb.CodeGeneratorResponse{
+ Error: proto.String(err.Error()),
+ }
+ }
resp.File = append(resp.File, &pluginpb.CodeGeneratorResponse_File{
Name: proto.String(gf.path),
- Content: proto.String(string(gf.Content())),
+ Content: proto.String(string(content)),
})
}
return resp
@@ -171,19 +181,44 @@
return f, ok
}
-// A File is a .proto source file.
+// A File describes a .proto source file.
type File struct {
- // TODO: Replace with protoreflect.FileDescriptor.
- Desc *descpb.FileDescriptorProto
+ Desc *descpb.FileDescriptorProto // TODO: protoreflect.FileDescriptor
- // Generate is true if the generator should generate code for this file.
- Generate bool
+ Messages []*Message // top-level message declartions
+ Generate bool // true if we should generate code for this file
}
func newFile(gen *Plugin, p *descpb.FileDescriptorProto) *File {
- return &File{
+ f := &File{
Desc: p,
}
+ for _, d := range p.MessageType {
+ f.Messages = append(f.Messages, newMessage(gen, nil, d))
+ }
+ return f
+}
+
+// A Message describes a message.
+type Message struct {
+ Desc *descpb.DescriptorProto // TODO: protoreflect.MessageDescriptor
+
+ GoIdent GoIdent // name of the generated Go type
+ Messages []*Message // nested message declarations
+}
+
+func newMessage(gen *Plugin, parent *Message, p *descpb.DescriptorProto) *Message {
+ m := &Message{
+ Desc: p,
+ GoIdent: camelCase(p.GetName()),
+ }
+ if parent != nil {
+ m.GoIdent = parent.GoIdent + "_" + m.GoIdent
+ }
+ for _, nested := range p.GetNestedType() {
+ m.Messages = append(m.Messages, newMessage(gen, m, nested))
+ }
+ return m
}
// A GeneratedFile is a generated file.
@@ -219,6 +254,31 @@
}
// Content returns the contents of the generated file.
-func (g *GeneratedFile) Content() []byte {
- return g.buf.Bytes()
+func (g *GeneratedFile) Content() ([]byte, error) {
+ if !strings.HasSuffix(g.path, ".go") {
+ return g.buf.Bytes(), nil
+ }
+
+ // Reformat generated code.
+ original := g.buf.Bytes()
+ fset := token.NewFileSet()
+ ast, err := parser.ParseFile(fset, "", original, parser.ParseComments)
+ if err != nil {
+ // Print out the bad code with line numbers.
+ // This should never happen in practice, but it can while changing generated code
+ // so consider this a debugging aid.
+ var src bytes.Buffer
+ s := bufio.NewScanner(bytes.NewReader(original))
+ for line := 1; s.Scan(); line++ {
+ fmt.Fprintf(&src, "%5d\t%s\n", line, s.Bytes())
+ }
+ return nil, fmt.Errorf("%v: unparsable Go source: %v\n%v", g.path, err, src.String())
+ }
+ var out bytes.Buffer
+ if err = (&printer.Config{Mode: printer.TabIndent | printer.UseSpaces, Tabwidth: 8}).Fprint(&out, fset, ast); err != nil {
+ return nil, fmt.Errorf("%v: can not reformat Go source: %v", g.path, err)
+ }
+ // TODO: Patch annotation locations.
+ return out.Bytes(), nil
+
}
diff --git a/regenerate.bash b/regenerate.bash
new file mode 100755
index 0000000..d6c8652
--- /dev/null
+++ b/regenerate.bash
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -e
+
+# Install the working tree's protoc-gen-gen in a tempdir.
+tmpdir=$(mktemp -d -t protobuf-regen.XXXXXX)
+trap 'rm -rf $tmpdir' EXIT
+mkdir -p $tmpdir/bin
+PATH=$tmpdir/bin:$PATH
+GOBIN=$tmpdir/bin go install ./cmd/protoc-gen-go
+
+# Public imports require at least Go 1.9.
+supportTypeAliases=""
+if go list -f '{{context.ReleaseTags}}' runtime | grep -q go1.9; then
+ supportTypeAliases=1
+fi
+
+# Generate various test protos.
+PROTO_DIRS=(
+ cmd/protoc-gen-go/testdata
+)
+for dir in ${PROTO_DIRS[@]}; do
+ for p in `find $dir -name "*.proto"`; do
+ if [[ $p == */import_public/* && ! $supportTypeAliases ]]; then
+ echo "# $p (skipped)"
+ continue;
+ fi
+ echo "# $p"
+ protoc -I$dir --go_out=paths=source_relative:$dir $p
+ done
+done