internal/legacy: remove dependency on descriptor.proto

We jump through many hoops to avoid generated protos depending on
internal/legacy. Break the cycle in the other direction: Remove
the dependency on descriptor.proto from internal/legacy by
using a hand-written parser for the few descriptor fields we need.

Still to do: Remove the descriptor.proto dependency from
internal/encoding/tag.

Change-Id: I5fd99a2170470ba8530eb2679b6dde899821bf3e
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171457
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/internal/legacy/enum.go b/internal/legacy/enum.go
index 831b94c..0ee397f 100644
--- a/internal/legacy/enum.go
+++ b/internal/legacy/enum.go
@@ -13,8 +13,6 @@
 	ptype "github.com/golang/protobuf/v2/internal/prototype"
 	pvalue "github.com/golang/protobuf/v2/internal/value"
 	pref "github.com/golang/protobuf/v2/reflect/protoreflect"
-
-	descriptorpb "github.com/golang/protobuf/v2/types/descriptor"
 )
 
 // wrapEnum wraps v as a protoreflect.Enum,
@@ -107,7 +105,7 @@
 	}
 	if ed, ok := ev.(enumV1); ok {
 		b, idxs := ed.EnumDescriptor()
-		fd := LoadFileDesc(b)
+		fd := loadFileDesc(b)
 
 		// Derive syntax.
 		switch fd.GetSyntax() {
@@ -118,7 +116,7 @@
 		}
 
 		// Derive the full name and correct enum descriptor.
-		var ed *descriptorpb.EnumDescriptorProto
+		var ed *enumDescriptorProto
 		e.FullName = pref.FullName(fd.GetPackage())
 		if len(idxs) == 1 {
 			ed = fd.EnumType[idxs[0]]
@@ -135,7 +133,7 @@
 		}
 
 		// Derive the enum values.
-		for _, vd := range ed.GetValue() {
+		for _, vd := range ed.Value {
 			e.Values = append(e.Values, ptype.EnumValue{
 				Name:   pref.Name(vd.GetName()),
 				Number: pref.EnumNumber(vd.GetNumber()),
diff --git a/internal/legacy/extension.go b/internal/legacy/extension.go
index b14b21d..19205ea 100644
--- a/internal/legacy/extension.go
+++ b/internal/legacy/extension.go
@@ -112,7 +112,7 @@
 		}
 		if ed, ok := ev.(enumV1); ok && protoPkg == "" {
 			b, _ := ed.EnumDescriptor()
-			protoPkg = LoadFileDesc(b).GetPackage()
+			protoPkg = loadFileDesc(b).GetPackage()
 		}
 
 		if protoPkg != "" {
diff --git a/internal/legacy/file.go b/internal/legacy/file.go
index f4984c8..0f871a0 100644
--- a/internal/legacy/file.go
+++ b/internal/legacy/file.go
@@ -10,9 +10,7 @@
 	"io/ioutil"
 	"sync"
 
-	"github.com/golang/protobuf/v2/proto"
 	pref "github.com/golang/protobuf/v2/reflect/protoreflect"
-	descriptorpb "github.com/golang/protobuf/v2/types/descriptor"
 )
 
 // Every enum and message type generated by protoc-gen-go since commit 2fc053c5
@@ -34,22 +32,19 @@
 
 var fileDescCache sync.Map // map[*byte]*descriptorpb.FileDescriptorProto
 
-// LoadFileDesc unmarshals b as a compressed FileDescriptorProto message.
+// loadFileDesc unmarshals b as a compressed FileDescriptorProto message.
 //
 // This assumes that b is immutable and that b does not refer to part of a
 // concatenated series of GZIP files (which would require shenanigans that
 // rely on the concatenation properties of both protobufs and GZIP).
 // File descriptors generated by protoc-gen-go do not rely on that property.
-//
-// This is exported for testing purposes.
-func LoadFileDesc(b []byte) *descriptorpb.FileDescriptorProto {
+func loadFileDesc(b []byte) *fileDescriptorProto {
 	// Fast-path: check whether we already have a cached file descriptor.
 	if fd, ok := fileDescCache.Load(&b[0]); ok {
-		return fd.(*descriptorpb.FileDescriptorProto)
+		return fd.(*fileDescriptorProto)
 	}
 
 	// Slow-path: decompress and unmarshal the file descriptor proto.
-	fd := new(descriptorpb.FileDescriptorProto)
 	zr, err := gzip.NewReader(bytes.NewReader(b))
 	if err != nil {
 		panic(err)
@@ -58,12 +53,9 @@
 	if err != nil {
 		panic(err)
 	}
-	err = proto.UnmarshalOptions{DiscardUnknown: true}.Unmarshal(b, fd)
-	if err != nil {
-		panic(err)
-	}
+	fd := parseFileDescProto(b)
 	if fd, ok := fileDescCache.LoadOrStore(&b[0], fd); ok {
-		return fd.(*descriptorpb.FileDescriptorProto)
+		return fd.(*fileDescriptorProto)
 	}
 	return fd
 }
diff --git a/internal/legacy/file_test.go b/internal/legacy/file_test.go
index 1a8df96..15dca7b 100644
--- a/internal/legacy/file_test.go
+++ b/internal/legacy/file_test.go
@@ -5,13 +5,18 @@
 package legacy_test
 
 import (
+	"bytes"
+	"compress/gzip"
+	"io/ioutil"
 	"reflect"
 	"testing"
 
 	legacy "github.com/golang/protobuf/v2/internal/legacy"
 	pragma "github.com/golang/protobuf/v2/internal/pragma"
+	"github.com/golang/protobuf/v2/proto"
 	pdesc "github.com/golang/protobuf/v2/reflect/protodesc"
 	pref "github.com/golang/protobuf/v2/reflect/protoreflect"
+	descriptorpb "github.com/golang/protobuf/v2/types/descriptor"
 	cmp "github.com/google/go-cmp/cmp"
 
 	proto2_20160225 "github.com/golang/protobuf/v2/internal/testprotos/legacy/proto2.v0.0.0-20160225-2fc053c5"
@@ -29,7 +34,20 @@
 )
 
 func mustLoadFileDesc(b []byte, _ []int) pref.FileDescriptor {
-	fd, err := pdesc.NewFile(legacy.LoadFileDesc(b), nil)
+	zr, err := gzip.NewReader(bytes.NewReader(b))
+	if err != nil {
+		panic(err)
+	}
+	b, err = ioutil.ReadAll(zr)
+	if err != nil {
+		panic(err)
+	}
+	p := new(descriptorpb.FileDescriptorProto)
+	err = proto.UnmarshalOptions{DiscardUnknown: true}.Unmarshal(b, p)
+	if err != nil {
+		panic(err)
+	}
+	fd, err := pdesc.NewFile(p, nil)
 	if err != nil {
 		panic(err)
 	}
diff --git a/internal/legacy/message.go b/internal/legacy/message.go
index 9f9742d..39addc4 100644
--- a/internal/legacy/message.go
+++ b/internal/legacy/message.go
@@ -126,7 +126,7 @@
 	}
 	if md, ok := mv.(messageV1); ok {
 		b, idxs := md.Descriptor()
-		fd := LoadFileDesc(b)
+		fd := loadFileDesc(b)
 
 		// Derive syntax.
 		switch fd.GetSyntax() {
diff --git a/internal/legacy/parse.go b/internal/legacy/parse.go
new file mode 100644
index 0000000..eac1256
--- /dev/null
+++ b/internal/legacy/parse.go
@@ -0,0 +1,169 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package legacy
+
+import (
+	"github.com/golang/protobuf/v2/internal/encoding/wire"
+	"github.com/golang/protobuf/v2/internal/fieldnum"
+)
+
+// To avoid a dependency from legacy to descriptor.proto, use a hand-written parser
+// for the bits of the descriptor we need.
+//
+// TODO: Consider unifying this with the parser in fileinit.
+
+type fileDescriptorProto struct {
+	Syntax      string
+	Package     string
+	EnumType    []*enumDescriptorProto
+	MessageType []*descriptorProto
+}
+
+func (fd fileDescriptorProto) GetSyntax() string  { return fd.Syntax }
+func (fd fileDescriptorProto) GetPackage() string { return fd.Package }
+
+func parseFileDescProto(b []byte) *fileDescriptorProto {
+	fd := &fileDescriptorProto{}
+	for len(b) > 0 {
+		num, typ, n := wire.ConsumeTag(b)
+		parseCheck(n)
+		b = b[n:]
+		switch typ {
+		case wire.BytesType:
+			v, n := wire.ConsumeBytes(b)
+			b = b[n:]
+			switch num {
+			case fieldnum.FileDescriptorProto_Syntax:
+				fd.Syntax = string(v)
+			case fieldnum.FileDescriptorProto_Package:
+				fd.Package = string(v)
+			case fieldnum.FileDescriptorProto_EnumType:
+				fd.EnumType = append(fd.EnumType, parseEnumDescProto(v))
+			case fieldnum.FileDescriptorProto_MessageType:
+				fd.MessageType = append(fd.MessageType, parseDescProto(v))
+			}
+		default:
+			n := wire.ConsumeFieldValue(num, typ, b)
+			parseCheck(n)
+			b = b[n:]
+		}
+	}
+	return fd
+}
+
+type descriptorProto struct {
+	Name       string
+	NestedType []*descriptorProto
+	EnumType   []*enumDescriptorProto
+}
+
+func (md descriptorProto) GetName() string { return md.Name }
+
+func parseDescProto(b []byte) *descriptorProto {
+	md := &descriptorProto{}
+	for len(b) > 0 {
+		num, typ, n := wire.ConsumeTag(b)
+		parseCheck(n)
+		b = b[n:]
+		switch typ {
+		case wire.BytesType:
+			v, n := wire.ConsumeBytes(b)
+			parseCheck(n)
+			b = b[n:]
+			switch num {
+			case fieldnum.DescriptorProto_Name:
+				md.Name = string(v)
+			case fieldnum.DescriptorProto_NestedType:
+				md.NestedType = append(md.NestedType, parseDescProto(v))
+			case fieldnum.DescriptorProto_EnumType:
+				md.EnumType = append(md.EnumType, parseEnumDescProto(v))
+			}
+		default:
+			n := wire.ConsumeFieldValue(num, typ, b)
+			parseCheck(n)
+			b = b[n:]
+		}
+	}
+	return md
+}
+
+type enumDescriptorProto struct {
+	Name  string
+	Value []*enumValueDescriptorProto
+}
+
+func (ed enumDescriptorProto) GetName() string { return ed.Name }
+
+func parseEnumDescProto(b []byte) *enumDescriptorProto {
+	ed := &enumDescriptorProto{}
+	for len(b) > 0 {
+		num, typ, n := wire.ConsumeTag(b)
+		parseCheck(n)
+		b = b[n:]
+		switch typ {
+		case wire.BytesType:
+			v, n := wire.ConsumeBytes(b)
+			parseCheck(n)
+			b = b[n:]
+			switch num {
+			case fieldnum.EnumDescriptorProto_Name:
+				ed.Name = string(v)
+			case fieldnum.EnumDescriptorProto_Value:
+				ed.Value = append(ed.Value, parseEnumValueDescProto(v))
+			}
+		default:
+			n := wire.ConsumeFieldValue(num, typ, b)
+			parseCheck(n)
+			b = b[n:]
+		}
+	}
+	return ed
+}
+
+type enumValueDescriptorProto struct {
+	Name   string
+	Number int32
+}
+
+func (ed enumValueDescriptorProto) GetName() string  { return ed.Name }
+func (ed enumValueDescriptorProto) GetNumber() int32 { return ed.Number }
+
+func parseEnumValueDescProto(b []byte) *enumValueDescriptorProto {
+	vd := &enumValueDescriptorProto{}
+	for len(b) > 0 {
+		num, typ, n := wire.ConsumeTag(b)
+		parseCheck(n)
+		b = b[n:]
+		switch typ {
+		case wire.VarintType:
+			v, n := wire.ConsumeVarint(b)
+			parseCheck(n)
+			b = b[n:]
+			switch num {
+			case fieldnum.EnumValueDescriptorProto_Number:
+				vd.Number = int32(v)
+			}
+		case wire.BytesType:
+			v, n := wire.ConsumeBytes(b)
+			parseCheck(n)
+			b = b[n:]
+			switch num {
+			case fieldnum.EnumDescriptorProto_Name:
+				vd.Name = string(v)
+			}
+		default:
+			n := wire.ConsumeFieldValue(num, typ, b)
+			parseCheck(n)
+			b = b[n:]
+		}
+	}
+	return vd
+}
+
+func parseCheck(n int) {
+	if n < 0 {
+		panic(wire.ParseError(n))
+	}
+}