internal/catmsg: added encoders and decoders

Change-Id: I168f19c881ae8dcc4163b9ab96e3ea8cc06158f5
Reviewed-on: https://go-review.googlesource.com/41331
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/internal/catmsg/catmsg.go b/internal/catmsg/catmsg.go
new file mode 100644
index 0000000..1013b43
--- /dev/null
+++ b/internal/catmsg/catmsg.go
@@ -0,0 +1,367 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package catmsg contains support types for package x/text/message/catalog.
+//
+// This package contains the low-level implementations of Message used by the
+// catalog package and provides primitives for other packages to implement their
+// own. For instance, the plural package provides functionality for selecting
+// translation strings based on the plural category of substitution arguments.
+//
+//
+// Encoding and Decoding
+//
+// Catalogs store Messages encoded as a single string. Compiling a message into
+// a string both results in compacter representation and speeds up evaluation.
+//
+// A Message must implement a Compile method to convert its arbitrary
+// representation to a string. The Compile method takes an Encoder which
+// facilitates serializing the message. Encoders also provide more context of
+// the messages's creation (such as for which language the message is intended),
+// which may not be known at the time of the creation of the message.
+//
+// Each message type must also have an accompanying decoder registered to decode
+// the message. This decoder takes a Decoder argument which provides the
+// counterparts for the decoding.
+//
+//
+// Renderers
+//
+// A Decoder must be initialized with a Renderer implementation. These
+// implementations must be provided by packages that use Catalogs, typically
+// formatting packages such as x/text/message. A typical user will not need to
+// worry about this type; it is only relevant to packages that do string
+// formatting and want to use the catalog package to handle localized strings.
+//
+// A package that uses catalogs for selecting strings receives selection results
+// as sequence of substrings passed to the Renderer. The following snippet shows
+// how to express the above example using the message package.
+//
+//   message.Set(language.English, "You are %d minute(s) late.",
+//       catalog.Var("minutes", plural.Select(1, "one", "minute")),
+//       catalog.String("You are %[1]d ${minutes} late."))
+
+//   p := message.NewPrinter(language.English)
+//
+//   p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
+//
+// To evaluate the Printf, package message wraps the arguments in a Renderer
+// that is passed to the catalog for message decoding. The call sequence that
+// results from evaluating the above message, assuming the person is rather
+// tardy, is:
+//
+//   Render("You are %[1]d ")
+//   Arg(1)
+//   Render("minutes")
+//   Render(" late.")
+//
+// The calls to Arg is caused by the plural.Select execution, which evaluates
+// the argument to determine whether the singular or plural message form should
+// be selected. The calls to Render reports the partial results to the message
+// package for further evaluation.
+package catmsg
+
+import (
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+	"sync"
+
+	"golang.org/x/text/language"
+)
+
+// A Handle refers to a registered message type.
+type Handle int
+
+// First is used as a Handle to EncodeMessageType, followed by a series of calls
+// to EncodeMessage, to implement selecting the first matching Message.
+//
+// TODO: this can be removed once we either can use type aliases or if the
+// internals of this package are merged with the catalog package.
+var First Handle = msgFirst
+
+// A Handler decodes and evaluates data compiled by a Message and sends the
+// result to the Decoder. The output may depend on the value of the substitution
+// arguments, accessible by the Decoder's Arg method. The Handler returns false
+// if there is no translation for the given substitution arguments.
+type Handler func(d *Decoder) bool
+
+// Register records the existence of a message type and returns a Handle that
+// can be used in the Encoder's EncodeMessageType method to create such
+// messages. The prefix of the name should be the package path followed by
+// an optional disambiguating string.
+// Register will panic if a handle for the same name was already registered.
+func Register(name string, handler Handler) Handle {
+	mutex.Lock()
+	defer mutex.Unlock()
+
+	if _, ok := names[name]; ok {
+		panic(fmt.Errorf("catmsg: handler for %q already exists", name))
+	}
+	h := Handle(len(handlers))
+	names[name] = h
+	handlers = append(handlers, handler)
+	return h
+}
+
+// These handlers require fixed positions in the handlers slice.
+const (
+	msgVars Handle = iota
+	msgFirst
+	msgRaw
+	msgString
+	numFixed
+)
+
+const prefix = "golang.org/x/text/internal/catmsg."
+
+var (
+	mutex sync.Mutex
+	names = map[string]Handle{
+		prefix + "Vars":   msgVars,
+		prefix + "First":  msgFirst,
+		prefix + "Raw":    msgRaw,
+		prefix + "String": msgString,
+	}
+	handlers = make([]Handler, numFixed)
+)
+
+func init() {
+	// This handler is a message type wrapper that initializes a decoder
+	// with a variable block. This message type, if present, is always at the
+	// start of an encoded message.
+	handlers[msgVars] = func(d *Decoder) bool {
+		blockSize := int(d.DecodeUint())
+		d.vars = d.data[:blockSize]
+		d.data = d.data[blockSize:]
+		return d.executeMessage()
+	}
+
+	// First takes the first message in a sequence that results in a match for
+	// the given substitution arguments.
+	handlers[msgFirst] = func(d *Decoder) bool {
+		for !d.Done() {
+			if d.ExecuteMessage() {
+				return true
+			}
+		}
+		return false
+	}
+
+	handlers[msgRaw] = func(d *Decoder) bool {
+		d.Render(d.data)
+		return true
+	}
+
+	// A String message alternates between a string constant and a variable
+	// substitution.
+	handlers[msgString] = func(d *Decoder) bool {
+		for !d.Done() {
+			if str := d.DecodeString(); str != "" {
+				d.Render(str)
+			}
+			if d.Done() {
+				break
+			}
+			d.ExecuteSubstitution()
+		}
+		return true
+	}
+}
+
+var (
+	// ErrIncomplete indicates a compiled message does not define translations
+	// for all possible argument values. If this message is returned, evaluating
+	// a message may result in the ErrNoMatch error.
+	ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
+
+	// ErrNoMatch indicates no translation message matched the given input
+	// parameters when evaluating a message.
+	ErrNoMatch = errors.New("catmsg: no translation for inputs")
+)
+
+// A Message holds a collection of translations for the same phrase that may
+// vary based on the values of substitution arguments.
+type Message interface {
+	// Compile encodes the format string(s) of the message as a string for later
+	// evaluation.
+	//
+	// The first call Compile makes on the encoder must be EncodeMessageType.
+	// The handle passed to this call may either be a handle returned by
+	// Register to encode a single custom message, or HandleFirst followed by
+	// a sequence of calls to EncodeMessage.
+	//
+	// Compile must return ErrIncomplete if it is possible for evaluation to
+	// not match any translation for a given set of formatting parameters.
+	// For example, selecting a translation based on plural form may not yield
+	// a match if the form "Other" is not one of the selectors.
+	//
+	// Compile may return any other application-specific error. For backwards
+	// compatibility with package like fmt, which often do not do sanity
+	// checking of format strings ahead of time, Compile should still make an
+	// effort to have some sensible fallback in case of an error.
+	Compile(e *Encoder) error
+}
+
+// Compile converts a Message to a data string that can be stored in a Catalog.
+// The resulting string can subsequently be decoded by passing to the Execute
+// method of a Decoder.
+func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
+	// TODO: pass macros so they can be used for validation.
+	v := &Encoder{inBody: true} // encoder for variables
+	v.root = v
+	e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
+	err = m.Compile(e)
+	// This package serves te message package, which in turn is meant to be a
+	// drop-in replacement for fmt.  With the fmt package, format strings are
+	// evaluated lazily and errors are handled by substituting strings in the
+	// result, rather then returning an error. Dealing with multiple languages
+	// makes it more important to check errors ahead of time. We chose to be
+	// consistent and compatible and allow graceful degradation in case of
+	// errors.
+	buf := e.buf[stripPrefix(e.buf):]
+	if len(v.buf) > 0 {
+		// Prepend variable block.
+		b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
+		b[0] = byte(msgVars)
+		b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
+		b = append(b, v.buf...)
+		b = append(b, buf...)
+		buf = b
+	}
+	if err == nil {
+		err = v.err
+	}
+	return string(buf), err
+}
+
+// Var defines a message that can be substituted for a placeholder of the same
+// name. If an expression does not result in a string after evaluation, Name is
+// used as the substitution. For example:
+//    Var{
+//      Name:    "minutes",
+//      Message: plural.Select(1, "one", "minute"),
+//    }
+// will resolve to minute for singular and minutes for plural forms.
+type Var struct {
+	Name    string
+	Message Message
+}
+
+var errIsVar = errors.New("catmsg: variable used as message")
+
+// Compile implements Message.
+//
+// Note that this method merely registers a variable; it does not create an
+// encoded message.
+func (v *Var) Compile(e *Encoder) error {
+	if err := e.addVar(v.Name, v.Message); err != nil {
+		return err
+	}
+	// Using a Var by itself is an error. If it is in a sequence followed by
+	// other messages referring to it, this error will be ignored.
+	return errIsVar
+}
+
+// Raw is a message consisting of a single format string that is passed as is
+// to the Renderer.
+//
+// Note that a Renderer may still do its own variable substitution.
+type Raw string
+
+// Compile implements Message.
+func (r Raw) Compile(e *Encoder) (err error) {
+	e.EncodeMessageType(msgRaw)
+	// Special case: raw strings don't have a size encoding and so don't use
+	// EncodeString.
+	e.buf = append(e.buf, r...)
+	return nil
+}
+
+// String is a message consisting of a single format string which contains
+// placeholders that may be substituted with variables.
+//
+// Variable substitutions are marked with placeholders and a variable name of
+// the form ${name}. Any other substitutions such as Go templates or
+// printf-style substitutions are left to be done by the Renderer.
+//
+// When evaluation a string interpolation, a Renderer will receive separate
+// calls for each placeholder and interstitial string. For example, for the
+// message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
+// is:
+//   d.Render("%[1]v ")
+//   d.Arg(1)
+//   d.Render(resultOfInvites)
+//   d.Render(" %[2]v to ")
+//   d.Arg(1)
+//   d.Render(resultOfTheir)
+//   d.Render(" party.")
+// where the messages for "invites" and "their" both use a plural.Select
+// referring to the first argument.
+//
+// Strings may also invoke macros. Macros are essentially variables that can be
+// reused. Macros may, for instance, be used to make selections between
+// different conjugations of a verb. See the catalog package description for an
+// overview of macros.
+type String string
+
+// Compile implements Message. It parses the placeholder formats and returns
+// any error.
+func (s String) Compile(e *Encoder) (err error) {
+	msg := string(s)
+	const subStart = "${"
+	hasHeader := false
+	p := 0
+	b := []byte{}
+	for {
+		i := strings.Index(msg[p:], subStart)
+		if i == -1 {
+			break
+		}
+		b = append(b, msg[p:p+i]...)
+		p += i + len(subStart)
+		if i = strings.IndexByte(msg[p:], '}'); i == -1 {
+			b = append(b, "$!(MISSINGBRACE)"...)
+			err = fmt.Errorf("catmsg: missing '}'")
+			p = len(msg)
+			break
+		}
+		name := strings.TrimSpace(msg[p : p+i])
+		if q := strings.IndexByte(name, '('); q == -1 {
+			if !hasHeader {
+				hasHeader = true
+				e.EncodeMessageType(msgString)
+			}
+			e.EncodeString(string(b))
+			e.EncodeSubstitution(name)
+			b = b[:0]
+		} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
+			// TODO: what should the error be?
+			b = append(b, "$!(MISSINGPAREN)"...)
+			err = fmt.Errorf("catmsg: missing ')'")
+		} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
+			// TODO: handle more than one argument
+			b = append(b, "$!(BADNUM)"...)
+			err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
+		} else {
+			if !hasHeader {
+				hasHeader = true
+				e.EncodeMessageType(msgString)
+			}
+			e.EncodeString(string(b))
+			e.EncodeSubstitution(name[:q], int(x))
+			b = b[:0]
+		}
+		p += i + 1
+	}
+	b = append(b, msg[p:]...)
+	if !hasHeader {
+		// Simplify string to a raw string.
+		Raw(string(b)).Compile(e)
+	} else if len(b) > 0 {
+		e.EncodeString(string(b))
+	}
+	return err
+}
diff --git a/internal/catmsg/catmsg_test.go b/internal/catmsg/catmsg_test.go
new file mode 100644
index 0000000..d06502b
--- /dev/null
+++ b/internal/catmsg/catmsg_test.go
@@ -0,0 +1,294 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package catmsg
+
+import (
+	"errors"
+	"strings"
+	"testing"
+
+	"golang.org/x/text/language"
+)
+
+type renderer struct {
+	args   []int
+	result string
+}
+
+func (r *renderer) Arg(i int) interface{} {
+	if i >= len(r.args) {
+		return nil
+	}
+	return r.args[i]
+}
+
+func (r *renderer) Render(s string) {
+	if r.result != "" {
+		r.result += "|"
+	}
+	r.result += s
+}
+
+func TestCodec(t *testing.T) {
+	type test struct {
+		args   []int
+		out    string
+		decErr string
+	}
+	single := func(out, err string) []test { return []test{{out: out, decErr: err}} }
+	testCases := []struct {
+		desc   string
+		m      Message
+		enc    string
+		encErr string
+		tests  []test
+	}{{
+		desc:   "unused variable",
+		m:      &Var{"name", String("foo")},
+		encErr: errIsVar.Error(),
+		tests:  single("", ""),
+	}, {
+		desc:  "empty",
+		m:     empty{},
+		tests: single("", ""),
+	}, {
+		desc:  "sequence with empty",
+		m:     seq{empty{}},
+		tests: single("", ""),
+	}, {
+		desc:  "raw string",
+		m:     Raw("foo"),
+		tests: single("foo", ""),
+	}, {
+		desc:  "raw string no sub",
+		m:     Raw("${foo}"),
+		enc:   "\x02${foo}",
+		tests: single("${foo}", ""),
+	}, {
+		desc:  "simple string",
+		m:     String("foo"),
+		tests: single("foo", ""),
+	}, {
+		desc:   "missing var",
+		m:      String("foo${bar}"),
+		enc:    "\x03\x03foo\x02\x03bar",
+		encErr: `unknown var "bar"`,
+		tests:  single("foo|bar", ""),
+	}, {
+		desc: "empty var",
+		m: seq{
+			&Var{"bar", seq{}},
+			String("foo${bar}"),
+		},
+		enc: "\x00\x05\x04\x02bar\x03\x03foo\x00\x00",
+		// TODO: recognize that it is cheaper to substitute bar.
+		tests: single("foo|bar", ""),
+	}, {
+		desc: "var after value",
+		m: seq{
+			String("foo${bar}"),
+			&Var{"bar", String("baz")},
+		},
+		encErr: errIsVar.Error(),
+		tests:  single("foo|bar", ""),
+	}, {
+		desc: "substitution",
+		m: seq{
+			&Var{"bar", String("baz")},
+			String("foo${bar}"),
+		},
+		tests: single("foo|baz", ""),
+	}, {
+		desc: "shadowed variable",
+		m: seq{
+			&Var{"bar", String("baz")},
+			seq{
+				&Var{"bar", String("BAZ")},
+				String("foo${bar}"),
+			},
+		},
+		tests: single("foo|BAZ", ""),
+	}, {
+		desc: "not shadowed variable",
+		m: seq{
+			&Var{"bar", String("baz")},
+			seq{
+				String("foo${bar}"),
+				&Var{"bar", String("BAZ")},
+			},
+		},
+		encErr: errIsVar.Error(),
+		tests:  single("foo|baz", ""),
+	}, {
+		desc: "duplicate variable",
+		m: seq{
+			&Var{"bar", String("baz")},
+			&Var{"bar", String("BAZ")},
+			String("${bar}"),
+		},
+		encErr: "catmsg: duplicate variable \"bar\"",
+		tests:  single("baz", ""),
+	}, {
+		desc: "complete incomplete variable",
+		m: seq{
+			&Var{"bar", incomplete{}},
+			String("${bar}"),
+		},
+		enc: "\x00\t\b\x01\x01\x04\x04\x02bar\x03\x00\x00\x00",
+		// TODO: recognize that it is cheaper to substitute bar.
+		tests: single("bar", ""),
+	}, {
+		desc: "incomplete sequence",
+		m: seq{
+			incomplete{},
+			incomplete{},
+		},
+		encErr: ErrIncomplete.Error(),
+		tests:  single("", ErrNoMatch.Error()),
+	}, {
+		desc: "compile error variable",
+		m: seq{
+			&Var{"bar", errorCompileMsg{}},
+			String("${bar}"),
+		},
+		encErr: errCompileTest.Error(),
+		tests:  single("bar", ""),
+	}, {
+		desc:   "compile error message",
+		m:      errorCompileMsg{},
+		encErr: errCompileTest.Error(),
+		tests:  single("", ""),
+	}, {
+		desc: "compile error sequence",
+		m: seq{
+			errorCompileMsg{},
+			errorCompileMsg{},
+		},
+		encErr: errCompileTest.Error(),
+		tests:  single("", ""),
+	}, {
+		desc:  "macro",
+		m:     String("${exists(1)}"),
+		tests: single("you betya!", ""),
+	}, {
+		desc:  "macro incomplete",
+		m:     String("${incomplete(1)}"),
+		enc:   "\x03\x00\x01\nincomplete\x01",
+		tests: single("incomplete", ""),
+	}, {
+		desc:  "macro undefined at end",
+		m:     String("${undefined(1)}"),
+		enc:   "\x03\x00\x01\tundefined\x01",
+		tests: single("undefined", "catmsg: undefined macro \"undefined\""),
+	}, {
+		desc:  "macro undefined with more text following",
+		m:     String("${undefined(1)}."),
+		enc:   "\x03\x00\x01\tundefined\x01\x01.",
+		tests: single("undefined|.", "catmsg: undefined macro \"undefined\""),
+	}, {
+		desc:   "macro missing paren",
+		m:      String("${missing(1}"),
+		encErr: "catmsg: missing ')'",
+		tests:  single("$!(MISSINGPAREN)", ""),
+	}, {
+		desc:   "macro bad num",
+		m:      String("aa${bad(a)}"),
+		encErr: "catmsg: invalid number \"a\"",
+		tests:  single("aa$!(BADNUM)", ""),
+	}, {
+		desc:   "var missing brace",
+		m:      String("a${missing"),
+		encErr: "catmsg: missing '}'",
+		tests:  single("a$!(MISSINGBRACE)", ""),
+	}}
+	r := &renderer{}
+	dec := NewDecoder(language.Und, r, macros)
+	for _, tc := range testCases {
+		t.Run(tc.desc, func(t *testing.T) {
+
+			data, err := Compile(language.Und, macros, tc.m)
+			if failErr(err, tc.encErr) {
+				t.Errorf("encoding error: got %+q; want %+q", err, tc.encErr)
+			}
+			if tc.enc != "" && data != tc.enc {
+				t.Errorf("encoding: got %+q; want %+q", data, tc.enc)
+			}
+			for _, st := range tc.tests {
+				t.Run("", func(t *testing.T) {
+					*r = renderer{args: st.args}
+					if err = dec.Execute(data); failErr(err, st.decErr) {
+						t.Errorf("decoding error: got %+q; want %+q", err, st.decErr)
+					}
+					if r.result != st.out {
+						t.Errorf("decode: got %+q; want %+q", r.result, st.out)
+					}
+				})
+			}
+		})
+	}
+}
+
+func failErr(got error, want string) bool {
+	if got == nil {
+		return want != ""
+	}
+	return want == "" || !strings.Contains(got.Error(), want)
+}
+
+type seq []Message
+
+func (s seq) Compile(e *Encoder) (err error) {
+	err = ErrIncomplete
+	e.EncodeMessageType(First)
+	for _, m := range s {
+		// Pass only the last error, but allow erroneous or complete messages
+		// here to allow testing different scenarios.
+		err = e.EncodeMessage(m)
+	}
+	return err
+}
+
+type empty struct{}
+
+func (empty) Compile(e *Encoder) (err error) { return nil }
+
+var msgIncomplete = Register(
+	"golang.org/x/text/internal/catmsg.incomplete",
+	func(d *Decoder) bool { return false })
+
+type incomplete struct{}
+
+func (incomplete) Compile(e *Encoder) (err error) {
+	e.EncodeMessageType(msgIncomplete)
+	return ErrIncomplete
+}
+
+type errorCompileMsg struct{}
+
+var errCompileTest = errors.New("catmsg: compile error test")
+
+func (errorCompileMsg) Compile(e *Encoder) (err error) {
+	return errCompileTest
+}
+
+type dictionary struct{}
+
+var (
+	macros       = dictionary{}
+	dictMessages = map[string]string{
+		"exists":     compile(String("you betya!")),
+		"incomplete": compile(incomplete{}),
+	}
+)
+
+func (d dictionary) Lookup(key string) (data string, ok bool) {
+	data, ok = dictMessages[key]
+	return
+}
+
+func compile(m Message) (data string) {
+	data, _ = Compile(language.Und, macros, m)
+	return data
+}
diff --git a/internal/catmsg/codec.go b/internal/catmsg/codec.go
new file mode 100755
index 0000000..e959b08
--- /dev/null
+++ b/internal/catmsg/codec.go
@@ -0,0 +1,407 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package catmsg
+
+import (
+	"errors"
+	"fmt"
+
+	"golang.org/x/text/language"
+)
+
+// A Renderer renders a Message.
+type Renderer interface {
+	// Render renders the given string. The given string may be interpreted as a
+	// format string, such as the one used by the fmt package or a template.
+	Render(s string)
+
+	// Arg returns the i-th argument passed to format a message. This method
+	// should return nil if there is no such argument. Messages need access to
+	// arguments to allow selecting a message based on linguistic features of
+	// those arguments.
+	Arg(i int) interface{}
+}
+
+// A Dictionary specifies a source of messages, including variables or macros.
+type Dictionary interface {
+	// Lookup returns the message for the given key. It returns false for ok if
+	// such a message could not be found.
+	Lookup(key string) (data string, ok bool)
+
+	// TODO: consider returning an interface, instead of a string. This will
+	// allow implementations to do their own message type decoding.
+}
+
+// An Encoder serializes a Message to a string.
+type Encoder struct {
+	// The root encoder is used for storing encoded variables.
+	root *Encoder
+	// The parent encoder provides the surrounding scopes for resolving variable
+	// names.
+	parent *Encoder
+
+	tag language.Tag
+
+	// buf holds the encoded message so far. After a message completes encoding,
+	// the contents of buf, prefixed by the encoded length, are flushed to the
+	// parent buffer.
+	buf []byte
+
+	// vars is the lookup table of variables in the current scope.
+	vars []keyVal
+
+	err    error
+	inBody bool // if false next call must be EncodeMessageType
+}
+
+type keyVal struct {
+	key    string
+	offset int
+}
+
+// Language reports the language for which the encoded message will be stored
+// in the Catalog.
+func (e *Encoder) Language() language.Tag { return e.tag }
+
+func (e *Encoder) setError(err error) {
+	if e.root.err == nil {
+		e.root.err = err
+	}
+}
+
+// EncodeUint encodes x.
+func (e *Encoder) EncodeUint(x uint64) {
+	e.checkInBody()
+	var buf [maxVarintBytes]byte
+	n := encodeUint(buf[:], x)
+	e.buf = append(e.buf, buf[:n]...)
+}
+
+// EncodeString encodes s.
+func (e *Encoder) EncodeString(s string) {
+	e.checkInBody()
+	e.EncodeUint(uint64(len(s)))
+	e.buf = append(e.buf, s...)
+}
+
+// EncodeMessageType marks the current message to be of type h.
+//
+// It must be the first call of a Message's Compile method.
+func (e *Encoder) EncodeMessageType(h Handle) {
+	if e.inBody {
+		panic("catmsg: EncodeMessageType not the first method called")
+	}
+	e.inBody = true
+	e.EncodeUint(uint64(h))
+}
+
+// EncodeMessage serializes the given message inline at the current position.
+func (e *Encoder) EncodeMessage(m Message) error {
+	e = &Encoder{root: e.root, parent: e}
+	err := m.Compile(e)
+	if _, ok := m.(*Var); !ok {
+		e.flushTo(e.parent)
+	}
+	return err
+}
+
+func (e *Encoder) checkInBody() {
+	if !e.inBody {
+		panic("catmsg: expected prior call to EncodeMessageType")
+	}
+}
+
+// stripPrefix indicates the number of prefix bytes that must be stripped to
+// turn a single-element sequence into a message that is just this single member
+// without its size prefix. If the message can be stripped, b[1:n] contains the
+// size prefix.
+func stripPrefix(b []byte) (n int) {
+	if len(b) > 0 && Handle(b[0]) == msgFirst {
+		x, n, _ := decodeUint(b[1:])
+		if 1+n+int(x) == len(b) {
+			return 1 + n
+		}
+	}
+	return 0
+}
+
+func (e *Encoder) flushTo(dst *Encoder) {
+	data := e.buf
+	p := stripPrefix(data)
+	if p > 0 {
+		data = data[1:]
+	} else {
+		// Prefix the size.
+		dst.EncodeUint(uint64(len(data)))
+	}
+	dst.buf = append(dst.buf, data...)
+}
+
+func (e *Encoder) addVar(key string, m Message) error {
+	for _, v := range e.parent.vars {
+		if v.key == key {
+			err := fmt.Errorf("catmsg: duplicate variable %q", key)
+			e.setError(err)
+			return err
+		}
+	}
+	scope := e.parent
+	// If a variable message is Incomplete, and does not evaluate to a message
+	// during execution, we fall back to the variable name. We encode this by
+	// appending the variable name if the message reports it's incomplete.
+
+	err := m.Compile(e)
+	if err != ErrIncomplete {
+		e.setError(err)
+	}
+	switch {
+	case len(e.buf) == 1 && Handle(e.buf[0]) == msgFirst: // empty sequence
+		e.buf = e.buf[:0]
+		e.inBody = false
+		fallthrough
+	case len(e.buf) == 0:
+		// Empty message.
+		if err := String(key).Compile(e); err != nil {
+			e.setError(err)
+		}
+	case err == ErrIncomplete:
+		if Handle(e.buf[0]) != msgFirst {
+			seq := &Encoder{root: e.root, parent: e}
+			seq.EncodeMessageType(First)
+			e.flushTo(seq)
+			e = seq
+		}
+		// e contains a sequence; append the fallback string.
+		e.EncodeMessage(String(key))
+	}
+
+	// Flush result to variable heap.
+	offset := len(e.root.buf)
+	e.flushTo(e.root)
+	e.buf = e.buf[:0]
+
+	// Record variable offset in current scope.
+	scope.vars = append(scope.vars, keyVal{key: key, offset: offset})
+	return err
+}
+
+const (
+	substituteVar = iota
+	substituteMacro
+	substituteError
+)
+
+// EncodeSubstitution inserts a resolved reference to a variable or macro.
+//
+// This call must be matched with a call to ExecuteSubstitution at decoding
+// time.
+func (e *Encoder) EncodeSubstitution(name string, arguments ...int) {
+	if arity := len(arguments); arity > 0 {
+		// TODO: also resolve macros.
+		e.EncodeUint(substituteMacro)
+		e.EncodeString(name)
+		for _, a := range arguments {
+			e.EncodeUint(uint64(a))
+		}
+		return
+	}
+	for scope := e; scope != nil; scope = scope.parent {
+		for _, v := range scope.vars {
+			if v.key != name {
+				continue
+			}
+			e.EncodeUint(substituteVar) // TODO: support arity > 0
+			e.EncodeUint(uint64(v.offset))
+			return
+		}
+	}
+	// TODO: refer to dictionary-wide scoped variables.
+	e.EncodeUint(substituteError)
+	e.EncodeString(name)
+	e.setError(fmt.Errorf("catmsg: unknown var %q", name))
+}
+
+// A Decoder deserializes and evaluates messages that are encoded by an encoder.
+type Decoder struct {
+	tag    language.Tag
+	dst    Renderer
+	macros Dictionary
+
+	err  error
+	vars string
+	data string
+
+	macroArg int // TODO: allow more than one argument
+}
+
+// NewDecoder returns a new Decoder.
+//
+// Decoders are designed to be reused for multiple invocations of Execute.
+// Only one goroutine may call Execute concurrently.
+func NewDecoder(tag language.Tag, r Renderer, macros Dictionary) *Decoder {
+	return &Decoder{
+		tag:    tag,
+		dst:    r,
+		macros: macros,
+	}
+}
+
+func (d *Decoder) setError(err error) {
+	if d.err == nil {
+		d.err = err
+	}
+}
+
+// Language returns the language in which the message is being rendered.
+//
+// The destination language may be a child language of the language used for
+// encoding. For instance, a decoding language of "pt-PT"" is consistent with an
+// encoding language of "pt".
+func (d *Decoder) Language() language.Tag { return d.tag }
+
+// Done reports whether there are more bytes to process in this message.
+func (d *Decoder) Done() bool { return len(d.data) == 0 }
+
+// Render implements Renderer.
+func (d *Decoder) Render(s string) { d.dst.Render(s) }
+
+// Arg implements Renderer.
+//
+// During evaluation of macros, the argument positions may be mapped to
+// arguments that differ from the original call.
+func (d *Decoder) Arg(i int) interface{} {
+	if d.macroArg != 0 {
+		if i != 1 {
+			panic("catmsg: only macros with single argument supported")
+		}
+		i = d.macroArg
+	}
+	return d.dst.Arg(i)
+}
+
+// DecodeUint decodes a number that was encoded with EncodeUint and advances the
+// position.
+func (d *Decoder) DecodeUint() uint64 {
+	x, n, err := decodeUintString(d.data)
+	d.data = d.data[n:]
+	if err != nil {
+		d.setError(err)
+	}
+	return x
+}
+
+// DecodeString decodes a string that was encoded with EncodeString and advances
+// the position.
+func (d *Decoder) DecodeString() string {
+	size := d.DecodeUint()
+	s := d.data[:size]
+	d.data = d.data[size:]
+	return s
+}
+
+// SkipMessage skips the message at the current location and advances the
+// position.
+func (d *Decoder) SkipMessage() {
+	n := int(d.DecodeUint())
+	d.data = d.data[n:]
+}
+
+// Execute decodes and evaluates msg.
+//
+// Only one goroutine may call execute.
+func (d *Decoder) Execute(msg string) error {
+	d.err = nil
+	if !d.execute(msg) {
+		return ErrNoMatch
+	}
+	return d.err
+}
+
+func (d *Decoder) execute(msg string) bool {
+	saved := d.data
+	d.data = msg
+	ok := d.executeMessage()
+	d.data = saved
+	return ok
+}
+
+// executeMessageFromData is like execute, but also decodes a leading message
+// size and clips the given string accordingly.
+//
+// It reports the number of bytes consumed and whether a message was selected.
+func (d *Decoder) executeMessageFromData(s string) (n int, ok bool) {
+	saved := d.data
+	d.data = s
+	size := int(d.DecodeUint())
+	n = len(s) - len(d.data)
+	// Sanitize the setting. This allows skipping a size argument for
+	// RawString and method Done.
+	d.data = d.data[:size]
+	ok = d.executeMessage()
+	n += size - len(d.data)
+	d.data = saved
+	return n, ok
+}
+
+var errUnknownHandler = errors.New("catmsg: string contains unsupported handler")
+
+// executeMessage reads the handle id, initializes the decoder and executes the
+// message. It is assumed that all of d.data[d.p:] is the single message.
+func (d *Decoder) executeMessage() bool {
+	if d.Done() {
+		// We interpret no data as a valid empty message.
+		return true
+	}
+	handle := d.DecodeUint()
+
+	var fn Handler
+	mutex.Lock()
+	if int(handle) < len(handlers) {
+		fn = handlers[handle]
+	}
+	mutex.Unlock()
+	if fn == nil {
+		d.setError(errUnknownHandler)
+		d.execute(fmt.Sprintf("\x02$!(UNKNOWNMSGHANDLER=%#x)", handle))
+		return true
+	}
+	return fn(d)
+}
+
+// ExecuteMessage decodes and executes the message at the current position.
+func (d *Decoder) ExecuteMessage() bool {
+	n, ok := d.executeMessageFromData(d.data)
+	d.data = d.data[n:]
+	return ok
+}
+
+// ExecuteSubstitution executes the message corresponding to the substitution
+// as encoded by EncodeSubstitution.
+func (d *Decoder) ExecuteSubstitution() {
+	switch x := d.DecodeUint(); x {
+	case substituteVar:
+		offset := d.DecodeUint()
+		d.executeMessageFromData(d.vars[offset:])
+	case substituteMacro:
+		name := d.DecodeString()
+		data, ok := d.macros.Lookup(name)
+		old := d.macroArg
+		// TODO: support macros of arity other than 1.
+		d.macroArg = int(d.DecodeUint())
+		switch {
+		case !ok:
+			// TODO: detect this at creation time.
+			d.setError(fmt.Errorf("catmsg: undefined macro %q", name))
+			fallthrough
+		case !d.execute(data):
+			d.dst.Render(name) // fall back to macro name.
+		}
+		d.macroArg = old
+	case substituteError:
+		d.dst.Render(d.DecodeString())
+	default:
+		panic("catmsg: unreachable")
+	}
+}