protogen: generate message skeletons

Copy generator.CamelCase for camel-casing names, with one change: Convert
'.' in names to '_'. This removes the need for the CamelCaseSlice function
which operates on a []string representing a name split along '.'s.

Add protogen.Message.

Reformat generated code.

Add regenerate.bash, largely copied from regenerate.sh.

Change-Id: Iecf0bfc43b552f53e458499a328b933b0c9c5f82
Reviewed-on: https://go-review.googlesource.com/130915
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/cmd/protoc-gen-go/main.go b/cmd/protoc-gen-go/main.go
index 9ea1420..7973895 100644
--- a/cmd/protoc-gen-go/main.go
+++ b/cmd/protoc-gen-go/main.go
@@ -30,6 +30,21 @@
 	g.P("// source: ", f.Desc.GetName())
 	g.P()
 	g.P("package TODO")
+	g.P()
+
+	for _, m := range f.Messages {
+		genMessage(gen, g, m)
+	}
 
 	// TODO: Everything.
 }
+
+func genMessage(gen *protogen.Plugin, g *protogen.GeneratedFile, m *protogen.Message) {
+	g.P("type ", m.GoIdent, " struct {")
+	g.P("}")
+	g.P()
+
+	for _, nested := range m.Messages {
+		genMessage(gen, g, nested)
+	}
+}
diff --git a/cmd/protoc-gen-go/testdata/proto2/nested_messages.pb.go b/cmd/protoc-gen-go/testdata/proto2/nested_messages.pb.go
new file mode 100644
index 0000000..014d3a4
--- /dev/null
+++ b/cmd/protoc-gen-go/testdata/proto2/nested_messages.pb.go
@@ -0,0 +1,13 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// source: proto2/nested_messages.proto
+
+package TODO
+
+type Layer1 struct {
+}
+
+type Layer1_Layer2 struct {
+}
+
+type Layer1_Layer2_Layer3 struct {
+}
diff --git a/cmd/protoc-gen-go/testdata/proto2/nested_messages.proto b/cmd/protoc-gen-go/testdata/proto2/nested_messages.proto
new file mode 100644
index 0000000..85e77b2
--- /dev/null
+++ b/cmd/protoc-gen-go/testdata/proto2/nested_messages.proto
@@ -0,0 +1,19 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+syntax = "proto2";
+
+package goproto.protoc.proto2;
+
+option go_package = "google.golang.org/proto/cmd/protoc-gen-go/testdata/proto2";
+
+message Layer1 {
+  message Layer2 {
+    message Layer3 {
+    }
+    optional Layer3 l3 = 1;
+  }
+  optional Layer2 l2 = 1;
+  optional Layer2.Layer3 l3 = 2;
+}
diff --git a/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go b/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go
index 558363d..771028e 100644
--- a/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go
+++ b/cmd/protoc-gen-go/testdata/proto2/proto2.pb.go
@@ -2,3 +2,6 @@
 // source: proto2/proto2.proto
 
 package TODO
+
+type Message struct {
+}
diff --git a/protogen/names.go b/protogen/names.go
index 1976cfd..b97c47d 100644
--- a/protogen/names.go
+++ b/protogen/names.go
@@ -8,6 +8,9 @@
 	"unicode/utf8"
 )
 
+// A GoIdent is a Go identifier.
+type GoIdent string
+
 // A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
 type GoImportPath string
 
@@ -52,3 +55,62 @@
 	}
 	return name
 }
+
+// camelCase converts a name to CamelCase.
+//
+// If there is an interior underscore followed by a lower case letter,
+// drop the underscore and convert the letter to upper case.
+// There is a remote possibility of this rewrite causing a name collision,
+// but it's so remote we're prepared to pretend it's nonexistent - since the
+// C++ generator lowercases names, it's extremely unlikely to have two fields
+// with different capitalizations.
+func camelCase(s string) GoIdent {
+	if s == "" {
+		return ""
+	}
+	var t []byte
+	i := 0
+	// Invariant: if the next letter is lower case, it must be converted
+	// to upper case.
+	// That is, we process a word at a time, where words are marked by _ or
+	// upper case letter. Digits are treated as words.
+	for ; i < len(s); i++ {
+		c := s[i]
+		switch {
+		case c == '.':
+			t = append(t, '_') // Convert . to _.
+		case c == '_' && (i == 0 || s[i-1] == '.'):
+			// Convert initial _ to X so we start with a capital letter.
+			// Do the same for _ after .; not strictly necessary, but matches
+			// historic behavior.
+			t = append(t, 'X')
+		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
+			// Skip the underscore in s.
+		case isASCIIDigit(c):
+			t = append(t, c)
+		default:
+			// Assume we have a letter now - if not, it's a bogus identifier.
+			// The next word is a sequence of characters that must start upper case.
+			if isASCIILower(c) {
+				c ^= ' ' // Make it a capital letter.
+			}
+			t = append(t, c) // Guaranteed not lower case.
+			// Accept lower case sequence that follows.
+			for i+1 < len(s) && isASCIILower(s[i+1]) {
+				i++
+				t = append(t, s[i])
+			}
+		}
+	}
+	return GoIdent(t)
+}
+
+// Is c an ASCII lower-case letter?
+func isASCIILower(c byte) bool {
+	return 'a' <= c && c <= 'z'
+}
+
+// Is c an ASCII digit?
+func isASCIIDigit(c byte) bool {
+	return '0' <= c && c <= '9'
+}
diff --git a/protogen/names_test.go b/protogen/names_test.go
new file mode 100644
index 0000000..021e71a
--- /dev/null
+++ b/protogen/names_test.go
@@ -0,0 +1,33 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package protogen
+
+import "testing"
+
+func TestCamelCase(t *testing.T) {
+	tests := []struct {
+		in   string
+		want GoIdent
+	}{
+		{"one", "One"},
+		{"one_two", "OneTwo"},
+		{"_my_field_name_2", "XMyFieldName_2"},
+		{"Something_Capped", "Something_Capped"},
+		{"my_Name", "My_Name"},
+		{"OneTwo", "OneTwo"},
+		{"_", "X"},
+		{"_a_", "XA_"},
+		{"one.two", "One_Two"},
+		{"one_two.three_four", "OneTwo_ThreeFour"},
+		{"_one._two", "XOne_XTwo"},
+		{"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
+		{"double__underscore", "Double_Underscore"},
+	}
+	for _, tc := range tests {
+		if got := camelCase(tc.in); got != tc.want {
+			t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want)
+		}
+	}
+}
diff --git a/protogen/protogen.go b/protogen/protogen.go
index b10edad..f499a8d 100644
--- a/protogen/protogen.go
+++ b/protogen/protogen.go
@@ -11,8 +11,12 @@
 package protogen
 
 import (
+	"bufio"
 	"bytes"
 	"fmt"
+	"go/parser"
+	"go/printer"
+	"go/token"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -157,9 +161,15 @@
 		return resp
 	}
 	for _, gf := range gen.genFiles {
+		content, err := gf.Content()
+		if err != nil {
+			return &pluginpb.CodeGeneratorResponse{
+				Error: proto.String(err.Error()),
+			}
+		}
 		resp.File = append(resp.File, &pluginpb.CodeGeneratorResponse_File{
 			Name:    proto.String(gf.path),
-			Content: proto.String(string(gf.Content())),
+			Content: proto.String(string(content)),
 		})
 	}
 	return resp
@@ -171,19 +181,44 @@
 	return f, ok
 }
 
-// A File is a .proto source file.
+// A File describes a .proto source file.
 type File struct {
-	// TODO: Replace with protoreflect.FileDescriptor.
-	Desc *descpb.FileDescriptorProto
+	Desc *descpb.FileDescriptorProto // TODO: protoreflect.FileDescriptor
 
-	// Generate is true if the generator should generate code for this file.
-	Generate bool
+	Messages []*Message // top-level message declartions
+	Generate bool       // true if we should generate code for this file
 }
 
 func newFile(gen *Plugin, p *descpb.FileDescriptorProto) *File {
-	return &File{
+	f := &File{
 		Desc: p,
 	}
+	for _, d := range p.MessageType {
+		f.Messages = append(f.Messages, newMessage(gen, nil, d))
+	}
+	return f
+}
+
+// A Message describes a message.
+type Message struct {
+	Desc *descpb.DescriptorProto // TODO: protoreflect.MessageDescriptor
+
+	GoIdent  GoIdent    // name of the generated Go type
+	Messages []*Message // nested message declarations
+}
+
+func newMessage(gen *Plugin, parent *Message, p *descpb.DescriptorProto) *Message {
+	m := &Message{
+		Desc:    p,
+		GoIdent: camelCase(p.GetName()),
+	}
+	if parent != nil {
+		m.GoIdent = parent.GoIdent + "_" + m.GoIdent
+	}
+	for _, nested := range p.GetNestedType() {
+		m.Messages = append(m.Messages, newMessage(gen, m, nested))
+	}
+	return m
 }
 
 // A GeneratedFile is a generated file.
@@ -219,6 +254,31 @@
 }
 
 // Content returns the contents of the generated file.
-func (g *GeneratedFile) Content() []byte {
-	return g.buf.Bytes()
+func (g *GeneratedFile) Content() ([]byte, error) {
+	if !strings.HasSuffix(g.path, ".go") {
+		return g.buf.Bytes(), nil
+	}
+
+	// Reformat generated code.
+	original := g.buf.Bytes()
+	fset := token.NewFileSet()
+	ast, err := parser.ParseFile(fset, "", original, parser.ParseComments)
+	if err != nil {
+		// Print out the bad code with line numbers.
+		// This should never happen in practice, but it can while changing generated code
+		// so consider this a debugging aid.
+		var src bytes.Buffer
+		s := bufio.NewScanner(bytes.NewReader(original))
+		for line := 1; s.Scan(); line++ {
+			fmt.Fprintf(&src, "%5d\t%s\n", line, s.Bytes())
+		}
+		return nil, fmt.Errorf("%v: unparsable Go source: %v\n%v", g.path, err, src.String())
+	}
+	var out bytes.Buffer
+	if err = (&printer.Config{Mode: printer.TabIndent | printer.UseSpaces, Tabwidth: 8}).Fprint(&out, fset, ast); err != nil {
+		return nil, fmt.Errorf("%v: can not reformat Go source: %v", g.path, err)
+	}
+	// TODO: Patch annotation locations.
+	return out.Bytes(), nil
+
 }
diff --git a/regenerate.bash b/regenerate.bash
new file mode 100755
index 0000000..d6c8652
--- /dev/null
+++ b/regenerate.bash
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -e
+
+# Install the working tree's protoc-gen-gen in a tempdir.
+tmpdir=$(mktemp -d -t protobuf-regen.XXXXXX)
+trap 'rm -rf $tmpdir' EXIT
+mkdir -p $tmpdir/bin
+PATH=$tmpdir/bin:$PATH
+GOBIN=$tmpdir/bin go install ./cmd/protoc-gen-go
+
+# Public imports require at least Go 1.9.
+supportTypeAliases=""
+if go list -f '{{context.ReleaseTags}}' runtime | grep -q go1.9; then
+  supportTypeAliases=1
+fi
+
+# Generate various test protos.
+PROTO_DIRS=(
+  cmd/protoc-gen-go/testdata
+)
+for dir in ${PROTO_DIRS[@]}; do
+  for p in `find $dir -name "*.proto"`; do
+    if [[ $p == */import_public/* && ! $supportTypeAliases ]]; then
+      echo "# $p (skipped)"
+      continue;
+    fi
+    echo "# $p"
+    protoc -I$dir --go_out=paths=source_relative:$dir $p
+  done
+done