protogen: include source comments in generated code

Add initial handling of location paths (arrays of integers identifying
an entity in a .proto source file).

Expose path info in protogen; each descriptor has a Path field containing
its location path.

Format comments in protoc-gen-go. This contains one change from
github.com/golang/protobuf: Package comments are now included before the
package statement (but not attached to it) and use // comment syntax
instead of /* */. e.g.,

Before:

	package test

	/*
	This package contains interesting messages.
	*/

After:

	// This package contains interesting messages.

	package test

Change-Id: Ieee13ae77b3584f7562183100554d3df732348aa
Reviewed-on: https://go-review.googlesource.com/133915
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/cmd/protoc-gen-go/main.go b/cmd/protoc-gen-go/main.go
index b9223b0..d330941 100644
--- a/cmd/protoc-gen-go/main.go
+++ b/cmd/protoc-gen-go/main.go
@@ -13,6 +13,7 @@
 	"encoding/hex"
 	"fmt"
 	"strconv"
+	"strings"
 
 	"github.com/golang/protobuf/proto"
 	descpb "github.com/golang/protobuf/protoc-gen-go/descriptor"
@@ -31,22 +32,39 @@
 	})
 }
 
-func genFile(gen *protogen.Plugin, f *protogen.File) {
+type File struct {
+	*protogen.File
+	locationMap map[string][]*descpb.SourceCodeInfo_Location
+}
+
+func genFile(gen *protogen.Plugin, file *protogen.File) {
+	f := &File{
+		File:        file,
+		locationMap: make(map[string][]*descpb.SourceCodeInfo_Location),
+	}
+	for _, loc := range file.Proto.GetSourceCodeInfo().GetLocation() {
+		key := pathKey(loc.Path)
+		f.locationMap[key] = append(f.locationMap[key], loc)
+	}
+
 	g := gen.NewGeneratedFile(f.GeneratedFilenamePrefix+".pb.go", f.GoImportPath)
 	g.P("// Code generated by protoc-gen-go. DO NOT EDIT.")
 	g.P("// source: ", f.Desc.Path())
 	g.P()
+	const filePackageField = 2 // FileDescriptorProto.package
+	genComment(g, f, []int32{filePackageField})
+	g.P()
 	g.P("package ", f.GoPackageName)
 	g.P()
 
-	for _, m := range f.Messages {
-		genMessage(gen, g, m)
+	for _, message := range f.Messages {
+		genMessage(gen, g, f, message)
 	}
 
 	genFileDescriptor(gen, g, f)
 }
 
-func genFileDescriptor(gen *protogen.Plugin, g *protogen.GeneratedFile, f *protogen.File) {
+func genFileDescriptor(gen *protogen.Plugin, g *protogen.GeneratedFile, f *File) {
 	// Determine the name of the var holding the file descriptor:
 	//
 	//     fileDescriptor_<hash of filename>
@@ -90,12 +108,37 @@
 	g.P()
 }
 
-func genMessage(gen *protogen.Plugin, g *protogen.GeneratedFile, m *protogen.Message) {
-	g.P("type ", m.GoIdent, " struct {")
+func genMessage(gen *protogen.Plugin, g *protogen.GeneratedFile, f *File, message *protogen.Message) {
+	genComment(g, f, message.Path)
+	g.P("type ", message.GoIdent, " struct {")
 	g.P("}")
 	g.P()
 
-	for _, nested := range m.Messages {
-		genMessage(gen, g, nested)
+	for _, nested := range message.Messages {
+		genMessage(gen, g, f, nested)
 	}
 }
+
+func genComment(g *protogen.GeneratedFile, f *File, path []int32) {
+	for _, loc := range f.locationMap[pathKey(path)] {
+		if loc.LeadingComments == nil {
+			continue
+		}
+		for _, line := range strings.Split(strings.TrimSuffix(loc.GetLeadingComments(), "\n"), "\n") {
+			g.P("//", line)
+		}
+		return
+	}
+}
+
+// pathKey converts a location path to a string suitable for use as a map key.
+func pathKey(path []int32) string {
+	var buf []byte
+	for i, x := range path {
+		if i != 0 {
+			buf = append(buf, ',')
+		}
+		buf = strconv.AppendInt(buf, int64(x), 10)
+	}
+	return string(buf)
+}
diff --git a/cmd/protoc-gen-go/testdata/comments/comments.pb.go b/cmd/protoc-gen-go/testdata/comments/comments.pb.go
new file mode 100644
index 0000000..2b870cd
--- /dev/null
+++ b/cmd/protoc-gen-go/testdata/comments/comments.pb.go
@@ -0,0 +1,45 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// source: comments/comments.proto
+
+// COMMENT: package goproto.protoc.proto2
+
+package proto2
+
+// COMMENT: Message1
+type Message1 struct {
+}
+
+// COMMENT: Message1A
+type Message1_Message1A struct {
+}
+
+// COMMENT: Message1B
+type Message1_Message1B struct {
+}
+
+// COMMENT: Message2
+type Message2 struct {
+}
+
+// COMMENT: Message2A
+type Message2_Message2A struct {
+}
+
+// COMMENT: Message2B
+type Message2_Message2B struct {
+}
+
+func init() { proto.RegisterFile("comments/comments.proto", fileDescriptor_885e8293f1fab554) }
+
+var fileDescriptor_885e8293f1fab554 = []byte{
+	// 136 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x12, 0x4f, 0xce, 0xcf, 0xcd,
+	0x4d, 0xcd, 0x2b, 0x29, 0xd6, 0x87, 0x31, 0xf4, 0x0a, 0x8a, 0xf2, 0x4b, 0xf2, 0x85, 0x44, 0xd3,
+	0xf3, 0xc1, 0x0c, 0x08, 0x37, 0x19, 0x42, 0x19, 0x29, 0xa9, 0x70, 0x71, 0xf8, 0xa6, 0x16, 0x17,
+	0x27, 0xa6, 0xa7, 0x1a, 0x4a, 0x71, 0x73, 0x71, 0xc2, 0xd8, 0x8e, 0xc8, 0x1c, 0x27, 0x24, 0x55,
+	0x46, 0x48, 0x12, 0x46, 0xc8, 0xaa, 0x8c, 0x9c, 0x9c, 0xac, 0xa3, 0x2c, 0xd3, 0xf3, 0xf3, 0xd3,
+	0x73, 0x52, 0xf5, 0xd2, 0xf3, 0x73, 0x12, 0xf3, 0xd2, 0xf5, 0xf2, 0x8b, 0xd2, 0xf5, 0xc1, 0xf6,
+	0xe8, 0x27, 0xe7, 0xa6, 0x40, 0x58, 0xc9, 0xba, 0xe9, 0xa9, 0x79, 0xba, 0xe9, 0xf9, 0xfa, 0x25,
+	0xa9, 0xc5, 0x25, 0x29, 0x89, 0x25, 0x89, 0x10, 0x61, 0x23, 0x40, 0x00, 0x00, 0x00, 0xff, 0xff,
+	0x32, 0x8f, 0xcd, 0x4f, 0xb9, 0x00, 0x00, 0x00,
+}
diff --git a/cmd/protoc-gen-go/testdata/comments/comments.proto b/cmd/protoc-gen-go/testdata/comments/comments.proto
new file mode 100644
index 0000000..d97c8dc
--- /dev/null
+++ b/cmd/protoc-gen-go/testdata/comments/comments.proto
@@ -0,0 +1,32 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+syntax = "proto2";
+
+// COMMENT: package goproto.protoc.proto2
+package goproto.protoc.proto2;
+
+option go_package = "google.golang.org/proto/cmd/protoc-gen-go/testdata/proto2";
+
+// COMMENT: Message1
+message Message1 {
+  // COMMENT: Message1A
+  message Message1A {
+  }
+
+  // COMMENT: Message1B
+  message Message1B {
+  }
+}
+
+// COMMENT: Message2
+message Message2 {
+  // COMMENT: Message2A
+  message Message2A {
+  }
+
+  // COMMENT: Message2B
+  message Message2B {
+  }
+}
diff --git a/protogen/protogen.go b/protogen/protogen.go
index be5e503..967f000 100644
--- a/protogen/protogen.go
+++ b/protogen/protogen.go
@@ -349,7 +349,7 @@
 	f.GeneratedFilenamePrefix = prefix
 
 	for i, mdescs := 0, desc.Messages(); i < mdescs.Len(); i++ {
-		f.Messages = append(f.Messages, newMessage(gen, f, nil, mdescs.Get(i), i))
+		f.Messages = append(f.Messages, newMessage(gen, f, nil, mdescs.Get(i)))
 	}
 	return f, nil
 }
@@ -380,15 +380,23 @@
 
 	GoIdent  GoIdent    // name of the generated Go type
 	Messages []*Message // nested message declarations
+	Path     []int32    // location path of this message
 }
 
-func newMessage(gen *Plugin, f *File, parent *Message, desc protoreflect.MessageDescriptor, index int) *Message {
+func newMessage(gen *Plugin, f *File, parent *Message, desc protoreflect.MessageDescriptor) *Message {
+	var path []int32
+	if parent != nil {
+		path = pathAppend(parent.Path, messageMessageField, int32(desc.Index()))
+	} else {
+		path = []int32{fileMessageField, int32(desc.Index())}
+	}
 	m := &Message{
 		Desc:    desc,
 		GoIdent: newGoIdent(f, desc),
+		Path:    path,
 	}
 	for i, mdescs := 0, desc.Messages(); i < mdescs.Len(); i++ {
-		m.Messages = append(m.Messages, newMessage(gen, f, m, mdescs.Get(i), i))
+		m.Messages = append(m.Messages, newMessage(gen, f, m, mdescs.Get(i)))
 	}
 	return m
 }
@@ -501,3 +509,34 @@
 	pathTypeImport pathType = iota
 	pathTypeSourceRelative
 )
+
+// The SourceCodeInfo message describes the location of elements of a parsed
+// .proto file by way of a "path", which is a sequence of integers that
+// describe the route from a FileDescriptorProto to the relevant submessage.
+// The path alternates between a field number of a repeated field, and an index
+// into that repeated field. The constants below define the field numbers that
+// are used.
+//
+// See descriptor.proto for more information about this.
+const (
+	// field numbers in FileDescriptorProto
+	filePackageField = 2 // package
+	fileMessageField = 4 // message_type
+	fileenumField    = 5 // enum_type
+	// field numbers in DescriptorProto
+	messageFieldField   = 2 // field
+	messageMessageField = 3 // nested_type
+	messageEnumField    = 4 // enum_type
+	messageOneofField   = 8 // oneof_decl
+	// field numbers in EnumDescriptorProto
+	enumValueField = 2 // value
+)
+
+// pathAppend appends elements to a location path.
+// It does not alias the original path.
+func pathAppend(path []int32, a ...int32) []int32 {
+	var n []int32
+	n = append(n, path...)
+	n = append(n, a...)
+	return n
+}