reflect/protoreflect: clarify Get semantics on unpopulated fields

Clearly specify that Get on an unpopulated field:
* returns the default value for scalars
* returns a mutable (but empty) List for repeated fields
* returns a mutable (but empty) Map for map fields
* returns an invalid value for message fields

The difference in semantics between List+Maps and Messages is because
protobuf semantics provide no distinction between an unpopulated and empty list
or map. On the other hand, there is a semantic difference between an unpopulated
message and an empty message.

Default values for scalars is trivial to implement with FieldDescriptor.Default.

A mutable, but empty List and Map is easy to implement for known fields since
known fields are generated as a slice or map field in a struct.
Since struct fields are addressable, the implementation can just return a
reference to the slice or map.

Repeated, extension fields are a little more tricky since extension fields
are implemented under the hood as a map[FieldNumber]Extension.
Rather than allocating an empty list in KnownFields.Get upon first retrieval
(which presents a race), delegate the work to ExtensionFieldTypes.Register,
which must occur before any Get operation. Register is not a concurrent-safe
operation, so that is an excellent time to initilize empty lists.
The implementation of extensions will need to be careful that Clear on a repeated
field simply truncates it zero instead of deleting the object.

For unpopulated messages, we return an invalid value, instead of the prior
behavior of returning a typed nil-pointer to the Go type for the message.
The approach is problematic because it assumes that
1) all messages are always implemented on a pointer reciever
2) a typed nil-pointer is an appropriate "read-only, but empty" message
These assumptions are not true of all message types (e.g., dynamic messages).

Change-Id: Ie96e6744c890308d9de738b6cf01d3b19e7e7c6a
Reviewed-on: https://go-review.googlesource.com/c/150319
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/internal/impl/legacy_extension.go b/internal/impl/legacy_extension.go
index d54da34..a7a28cf 100644
--- a/internal/impl/legacy_extension.go
+++ b/internal/impl/legacy_extension.go
@@ -59,16 +59,9 @@
 	}
 	t := legacyExtensionTypeOf(x.desc)
 	if x.val == nil {
-		if t.Cardinality() == pref.Repeated {
-			// TODO: What is the zero value for Lists?
-			// TODO: This logic is racy.
-			v := t.ValueOf(t.New())
-			x.val = t.InterfaceOf(v)
-			p.x.Set(n, x)
-			return v
-		}
+		// NOTE: x.val is never nil for Lists since they are always populated
+		// during ExtensionFieldTypes.Register.
 		if t.Kind() == pref.MessageKind || t.Kind() == pref.GroupKind {
-			// TODO: What is the zero value for Messages?
 			return pref.Value{}
 		}
 		return t.Default()
@@ -91,6 +84,11 @@
 	if x.desc == nil {
 		return
 	}
+	t := legacyExtensionTypeOf(x.desc)
+	if t.Cardinality() == pref.Repeated {
+		t.ValueOf(x.val).List().Truncate(0)
+		return
+	}
 	x.val = nil
 	p.x.Set(n, x)
 }
@@ -146,6 +144,11 @@
 		panic("extension descriptor already registered")
 	}
 	x.desc = legacyExtensionDescOf(t, p.mi.goType)
+	if t.Cardinality() == pref.Repeated {
+		// If the field is repeated, initialize the entry with an empty list
+		// so that future Get operations can return a mutable and concrete list.
+		x.val = t.InterfaceOf(t.ValueOf(t.New()))
+	}
 	p.x.Set(t.Number(), x)
 }
 
@@ -154,6 +157,13 @@
 		return
 	}
 	x := p.x.Get(t.Number())
+	if t.Cardinality() == pref.Repeated {
+		// Treat an empty repeated field as unpopulated.
+		v := reflect.ValueOf(x.val)
+		if x.val == nil || v.IsNil() || v.Elem().Len() == 0 {
+			x.val = nil
+		}
+	}
 	if x.val != nil {
 		panic("value for extension descriptor still populated")
 	}
diff --git a/internal/impl/legacy_test.go b/internal/impl/legacy_test.go
index 4df07a3..2333fc5 100644
--- a/internal/impl/legacy_test.go
+++ b/internal/impl/legacy_test.go
@@ -831,11 +831,8 @@
 	}
 	for i, xt := range extensions {
 		var got interface{}
-		v := fs.Get(xt.Number())
-		if xt.Cardinality() != pref.Repeated && xt.Kind() == pref.MessageKind {
-			got = v.Interface()
-		} else {
-			got = xt.InterfaceOf(v) // TODO: Simplify this if InterfaceOf allows nil
+		if v := fs.Get(xt.Number()); v.IsValid() {
+			got = xt.InterfaceOf(v)
 		}
 		want := defaultValues[i]
 		if diff := cmp.Diff(want, got, opts); diff != "" {
@@ -924,9 +921,16 @@
 	}
 
 	// Clear the field for all extension types.
-	for _, xt := range extensions {
+	for _, xt := range extensions[:len(extensions)/2] {
 		fs.Clear(xt.Number())
 	}
+	for i, xt := range extensions[len(extensions)/2:] {
+		if i%2 == 0 {
+			fs.Clear(xt.Number())
+		} else {
+			fs.Get(xt.Number()).List().Truncate(0)
+		}
+	}
 	if n := fs.Len(); n != 0 {
 		t.Errorf("KnownFields.Len() = %v, want 0", n)
 	}
diff --git a/internal/impl/message_field.go b/internal/impl/message_field.go
index 2686a97..75cf867 100644
--- a/internal/impl/message_field.go
+++ b/internal/impl/message_field.go
@@ -61,9 +61,7 @@
 			rv := p.apply(fieldOffset).asType(fs.Type).Elem()
 			if rv.IsNil() || rv.Elem().Type().Elem() != ot {
 				if fd.Kind() == pref.MessageKind || fd.Kind() == pref.GroupKind {
-					// TODO: Should this return an invalid protoreflect.Value?
-					rv = reflect.Zero(ot.Field(0).Type)
-					return conv.PBValueOf(rv)
+					return pref.Value{}
 				}
 				return fd.Default()
 			}
@@ -96,7 +94,7 @@
 				pv := pref.ValueOf(conv.MessageType.New().ProtoReflect())
 				rv.Set(conv.GoValueOf(pv))
 			}
-			return rv.Interface().(pref.Message)
+			return conv.PBValueOf(rv).Message()
 		},
 	}
 }
@@ -253,19 +251,18 @@
 			return !rv.IsNil()
 		},
 		get: func(p pointer) pref.Value {
-			// TODO: If rv.IsNil(), should this return a typed-nil pointer or
-			// an invalid protoreflect.Value?
-			//
-			// Returning a typed nil pointer assumes that such values
-			// are valid for all possible custom message types,
-			// which may not be case for dynamic messages.
 			rv := p.apply(fieldOffset).asType(fs.Type).Elem()
+			if rv.IsNil() {
+				return pref.Value{}
+			}
 			return conv.PBValueOf(rv)
 		},
 		set: func(p pointer, v pref.Value) {
-			// TODO: Similarly, is it valid to set this to a typed nil pointer?
 			rv := p.apply(fieldOffset).asType(fs.Type).Elem()
 			rv.Set(conv.GoValueOf(v))
+			if rv.IsNil() {
+				panic("invalid nil pointer")
+			}
 		},
 		clear: func(p pointer) {
 			rv := p.apply(fieldOffset).asType(fs.Type).Elem()
diff --git a/internal/impl/message_test.go b/internal/impl/message_test.go
index b3393d4..999bdd6 100644
--- a/internal/impl/message_test.go
+++ b/internal/impl/message_test.go
@@ -1017,7 +1017,6 @@
 func (*EnumMessages_OneofM3) isEnumMessages_Union() {}
 
 func TestEnumMessages(t *testing.T) {
-	// TODO: Test behavior of Get on unpopulated message.
 	wantL := MessageOf(&proto2_20180125.Message{OptionalFloat: protoV1.Float32(math.E)})
 	wantM := &EnumMessages{EnumP2: EnumProto2(1234).Enum()}
 	wantM2a := &ScalarProto2{Float32: protoV1.Float32(math.Pi)}
@@ -1033,7 +1032,7 @@
 
 	testMessage(t, nil, &EnumMessages{}, messageOps{
 		hasFields{1: false, 2: false, 3: false, 4: false, 5: false, 6: false, 7: false, 8: false, 9: false, 10: false, 11: false, 12: false},
-		getFields{1: VE(0xbeef), 2: VE(1), 9: VE(0xbeef), 10: VE(1)},
+		getFields{1: VE(0xbeef), 2: VE(1), 3: V(nil), 4: V(nil), 9: VE(0xbeef), 10: VE(1)},
 
 		// Test singular enums.
 		setFields{1: VE(0xdead), 2: VE(0)},
diff --git a/internal/value/list.go b/internal/value/list.go
index 2eec007..ba82b30 100644
--- a/internal/value/list.go
+++ b/internal/value/list.go
@@ -35,19 +35,13 @@
 }
 func (ls listReflect) Mutable(i int) pref.Mutable {
 	// Mutable is only valid for messages and panics for other kinds.
-	rv := ls.v.Index(i)
-	if rv.IsNil() {
-		// TODO: Is checking for nil proper behavior for custom messages?
-		pv := pref.ValueOf(ls.conv.MessageType.New().ProtoReflect())
-		rv.Set(ls.conv.GoValueOf(pv))
-	}
-	return rv.Interface().(pref.Message)
+	return ls.conv.PBValueOf(ls.v.Index(i)).Message()
 }
 func (ls listReflect) MutableAppend() pref.Mutable {
 	// MutableAppend is only valid for messages and panics for other kinds.
 	pv := pref.ValueOf(ls.conv.MessageType.New().ProtoReflect())
 	ls.v.Set(reflect.Append(ls.v, ls.conv.GoValueOf(pv)))
-	return ls.v.Index(ls.Len() - 1).Interface().(pref.Message)
+	return pv.Message()
 }
 func (ls listReflect) Truncate(i int) {
 	ls.v.Set(ls.v.Slice(0, i))
diff --git a/internal/value/map.go b/internal/value/map.go
index d2ef659..db9656e 100644
--- a/internal/value/map.go
+++ b/internal/value/map.go
@@ -57,13 +57,12 @@
 	}
 	rk := ms.keyConv.GoValueOf(k.Value())
 	rv := ms.v.MapIndex(rk)
-	if !rv.IsValid() || rv.IsNil() {
-		// TODO: Is checking for nil proper behavior for custom messages?
+	if !rv.IsValid() {
 		pv := pref.ValueOf(ms.valConv.MessageType.New().ProtoReflect())
 		rv = ms.valConv.GoValueOf(pv)
 		ms.v.SetMapIndex(rk, rv)
 	}
-	return rv.Interface().(pref.Message)
+	return ms.valConv.PBValueOf(rv).Message()
 }
 func (ms mapReflect) Range(f func(pref.MapKey, pref.Value) bool) {
 	for _, k := range ms.v.MapKeys() {
diff --git a/reflect/protoreflect/value.go b/reflect/protoreflect/value.go
index 93ff1ff..66cb5da 100644
--- a/reflect/protoreflect/value.go
+++ b/reflect/protoreflect/value.go
@@ -69,8 +69,11 @@
 	Has(FieldNumber) bool
 
 	// Get retrieves the value for a field with the given field number.
-	// It returns the default value for unpopulated fields.
-	// It returns an invalid value for unknown fields.
+	// If the field is unpopulated, it returns the default value for scalars,
+	// a mutable empty List for empty repeated fields, a mutable empty Map for
+	// empty map fields, and an invalid value for message fields.
+	// If the field is unknown (does not appear in MessageDescriptor.Fields
+	// or ExtensionFieldTypes), it returns an invalid value.
 	Get(FieldNumber) Value
 
 	// Set stores the value for a field with the given field number.