message/pipeline: further restructuring

- prepares to move more code out of gotext
- accumulating results of operations in State
  make it easier to add generic pipeline actions
  and reuse state between operations

Change-Id: I8a109075348c03bfd05359635a20e668fcaf09e0
Reviewed-on: https://go-review.googlesource.com/83655
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/cmd/gotext/extract.go b/cmd/gotext/extract.go
index f1f700b..20b94d3 100644
--- a/cmd/gotext/extract.go
+++ b/cmd/gotext/extract.go
@@ -46,7 +46,11 @@
 		SourceLanguage: tag,
 		Packages:       args,
 	}
-	out, err := pipeline.Extract(config)
+	state, err := pipeline.Extract(config)
+	if err != nil {
+		return wrap(err, "extract failed")
+	}
+	out := state.Extracted
 
 	data, err := json.MarshalIndent(out, "", "    ")
 	if err != nil {
diff --git a/cmd/gotext/generate.go b/cmd/gotext/generate.go
index 2d34465..a108dee 100644
--- a/cmd/gotext/generate.go
+++ b/cmd/gotext/generate.go
@@ -50,8 +50,8 @@
 	// manually created and stored in the textdata directory.
 
 	// Build up index of translations and original messages.
-	extracted := pipeline.Locale{}
-	translations := []*pipeline.Locale{}
+	extracted := pipeline.Messages{}
+	translations := []pipeline.Messages{}
 
 	err = filepath.Walk(*dir, func(path string, f os.FileInfo, err error) error {
 		if err != nil {
@@ -80,11 +80,11 @@
 		if err != nil {
 			return wrap(err, "read file failed")
 		}
-		var locale pipeline.Locale
+		var locale pipeline.Messages
 		if err := json.Unmarshal(b, &locale); err != nil {
 			return wrap(err, "parsing translation file failed")
 		}
-		translations = append(translations, &locale)
+		translations = append(translations, locale)
 		return nil
 	})
 	if err != nil {
diff --git a/message/pipeline/extract.go b/message/pipeline/extract.go
index 20ad914..e202fe4 100644
--- a/message/pipeline/extract.go
+++ b/message/pipeline/extract.go
@@ -28,8 +28,12 @@
 // - handle features (gender, plural)
 // - message rewriting
 
+// - %m substitutions
+// - `msg:"etc"` tags
+// - msg/Msg top-level vars and strings.
+
 // Extract extracts all strings form the package defined in Config.
-func Extract(c *Config) (*Locale, error) {
+func Extract(c *Config) (*State, error) {
 	conf := loader.Config{}
 	prog, err := loadPackages(&conf, c.Packages)
 	if err != nil {
@@ -189,11 +193,14 @@
 		}
 	}
 
-	out := &Locale{
-		Language: c.SourceLanguage,
-		Messages: messages,
-	}
-	return out, nil
+	return &State{
+		Config:  *c,
+		program: prog,
+		Extracted: Messages{
+			Language: c.SourceLanguage,
+			Messages: messages,
+		},
+	}, nil
 }
 
 func posString(conf loader.Config, info *loader.PackageInfo, pos token.Pos) string {
diff --git a/message/pipeline/generate.go b/message/pipeline/generate.go
index c5e51bd..2b1c875 100644
--- a/message/pipeline/generate.go
+++ b/message/pipeline/generate.go
@@ -18,13 +18,57 @@
 	"golang.org/x/text/internal/catmsg"
 	"golang.org/x/text/internal/gen"
 	"golang.org/x/text/language"
+	"golang.org/x/tools/go/loader"
 )
 
 var transRe = regexp.MustCompile(`messages\.(.*)\.json`)
 
-// Generate writes a Go file with the given package name to w, which defines a
+// Generate writes a Go file that defines a Catalog with translated messages.
+func (s *State) Generate() error {
+	filename := s.Config.CatalogFile
+	prog, err := loadPackages(&loader.Config{}, []string{filename})
+	if err != nil {
+		return wrap(err, "could not load package")
+	}
+	pkgs := prog.InitialPackages()
+	if len(pkgs) != 1 {
+		return errorf("more than one package selected: %v", pkgs)
+	}
+	pkg := pkgs[0].Pkg.Name()
+
+	cw, err := s.generate()
+	if err != nil {
+		return err
+	}
+	cw.WriteGoFile(filename, pkg) // TODO: WriteGoFile should return error.
+	return err
+}
+
+// WriteGen writes a Go file with the given package name to w that defines a
 // Catalog with translated messages.
-func Generate(w io.Writer, pkg string, extracted *Locale, trans ...*Locale) (n int, err error) {
+func (s *State) WriteGen(w io.Writer, pkg string) error {
+	cw, err := s.generate()
+	if err != nil {
+		return err
+	}
+	_, err = cw.WriteGo(w, pkg, "")
+	return err
+}
+
+// Generate is deprecated; use (*State).Generate().
+func Generate(w io.Writer, pkg string, extracted *Messages, trans ...Messages) (n int, err error) {
+	s := State{
+		Extracted:    *extracted,
+		Translations: trans,
+	}
+	cw, err := s.generate()
+	if err != nil {
+		return 0, err
+	}
+	return cw.WriteGo(w, pkg, "")
+}
+
+func (s *State) generate() (*gen.CodeWriter, error) {
 	// TODO: add in external input. Right now we assume that all files are
 	// manually created and stored in the textdata directory.
 
@@ -34,7 +78,7 @@
 	langVars := []string{}
 	usedKeys := map[string]int{}
 
-	for _, loc := range trans {
+	for _, loc := range s.Translations {
 		tag := loc.Language
 		if _, ok := translations[tag]; !ok {
 			translations[tag] = map[string]Message{}
@@ -44,7 +88,7 @@
 			if !m.Translation.IsEmpty() {
 				for _, id := range m.ID {
 					if _, ok := translations[tag][id]; ok {
-						logf("Duplicate translation in locale %q for message %q", tag, id)
+						warnf("Duplicate translation in locale %q for message %q", tag, id)
 					}
 					translations[tag][id] = m
 				}
@@ -58,7 +102,7 @@
 	for _, tag := range languages {
 		langVars = append(langVars, strings.Replace(tag.String(), "-", "_", -1))
 		dict := translations[tag]
-		for _, msg := range extracted.Messages {
+		for _, msg := range s.Extracted.Messages {
 			for _, id := range msg.ID {
 				if trans, ok := dict[id]; ok && !trans.Translation.IsEmpty() {
 					if _, ok := usedKeys[msg.Key]; !ok {
@@ -67,7 +111,7 @@
 					break
 				}
 				// TODO: log missing entry.
-				logf("%s: Missing entry for %q.", tag, id)
+				warnf("%s: Missing entry for %q.", tag, id)
 			}
 		}
 	}
@@ -78,12 +122,12 @@
 		Fallback  language.Tag
 		Languages []string
 	}{
-		Fallback:  extracted.Language,
+		Fallback:  s.Extracted.Language,
 		Languages: langVars,
 	}
 
 	if err := lookup.Execute(cw, x); err != nil {
-		return 0, wrap(err, "error")
+		return nil, wrap(err, "error")
 	}
 
 	keyToIndex := []string{}
@@ -100,21 +144,21 @@
 	for i, tag := range languages {
 		dict := translations[tag]
 		a := make([]string, len(usedKeys))
-		for _, msg := range extracted.Messages {
+		for _, msg := range s.Extracted.Messages {
 			for _, id := range msg.ID {
 				if trans, ok := dict[id]; ok && !trans.Translation.IsEmpty() {
 					m, err := assemble(&msg, &trans.Translation)
 					if err != nil {
-						return 0, wrap(err, "error")
+						return nil, wrap(err, "error")
 					}
 					// TODO: support macros.
 					data, err := catmsg.Compile(tag, nil, m)
 					if err != nil {
-						return 0, wrap(err, "error")
+						return nil, wrap(err, "error")
 					}
 					key := usedKeys[msg.Key]
 					if d := a[key]; d != "" && d != data {
-						logf("Duplicate non-consistent translation for key %q, picking the one for message %q", msg.Key, id)
+						warnf("Duplicate non-consistent translation for key %q, picking the one for message %q", msg.Key, id)
 					}
 					a[key] = string(data)
 					break
@@ -131,7 +175,7 @@
 		cw.WriteVar(langVars[i]+"Index", index)
 		cw.WriteConst(langVars[i]+"Data", strings.Join(a, ""))
 	}
-	return cw.WriteGo(w, pkg, "")
+	return cw, nil
 }
 
 func assemble(m *Message, t *Text) (msg catmsg.Message, err error) {
diff --git a/message/pipeline/message.go b/message/pipeline/message.go
index 8e54700..b9b0b3a 100644
--- a/message/pipeline/message.go
+++ b/message/pipeline/message.go
@@ -23,28 +23,8 @@
 // the format string "%d file(s) remaining".
 // See the examples directory for examples of extracted messages.
 
-// Config contains configuration for the translation pipeline.
-type Config struct {
-	SourceLanguage language.Tag
-
-	// Supported indicates the languages for which data should be generated.
-	// If unspecified, it will attempt to derive the set of supported languages
-	// from the context.
-	Supported []language.Tag
-
-	Packages []string
-
-	// TODO:
-	// - Printf-style configuration
-	// - Template-style configuration
-	// - Extraction options
-	// - Rewrite options
-	// - Generation options
-}
-
-// A Locale is used to store all information for a single locale. This type is
-// used both for extraction and injection.
-type Locale struct {
+// Messages is used to store translations for a single language.
+type Messages struct {
 	Language language.Tag    `json:"language"`
 	Messages []Message       `json:"messages"`
 	Macros   map[string]Text `json:"macros,omitempty"`
diff --git a/message/pipeline/pipeline.go b/message/pipeline/pipeline.go
index 733a50c..848a47a 100644
--- a/message/pipeline/pipeline.go
+++ b/message/pipeline/pipeline.go
@@ -13,6 +13,7 @@
 	"go/parser"
 	"log"
 
+	"golang.org/x/text/language"
 	"golang.org/x/tools/go/loader"
 )
 
@@ -22,6 +23,140 @@
 	gotextSuffix = ".gotext.json"
 )
 
+// Config contains configuration for the translation pipeline.
+type Config struct {
+	// Supported indicates the languages for which data should be generated.
+	// The default is to support all locales for which there are matching
+	// translation files.
+	Supported []language.Tag
+
+	// --- Extraction
+
+	SourceLanguage language.Tag
+
+	Packages []string
+
+	// --- File structure
+
+	// Dir is the root dir for all operations.
+	Dir string
+
+	// TranslationsPattern is a regular expression for input translation files
+	// that match anywhere in the directory structure rooted at Dir.
+	TranslationsPattern string
+
+	// OutPattern defines the location for translation files for a certain
+	// language. The default is "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
+	OutPattern string
+
+	// Format defines the file format for generated translation files.
+	// The default is XMB. Alternatives are GetText, XLIFF, L20n, GoText.
+	Format string
+
+	Ext string
+
+	// TODO:
+	// Actions are additional actions to be performed after the initial extract
+	// and merge.
+	// Actions []struct {
+	// 	Name    string
+	// 	Options map[string]string
+	// }
+
+	// --- Generation
+
+	// CatalogFile may be in a different package. It is not defined, it will
+	// be written to stdout.
+	CatalogFile string
+
+	// DeclareVar defines a variable to which to assing the generated Catalog.
+	DeclareVar string
+
+	// SetDefault determines whether to assign the generated Catalog to
+	// message.DefaultCatalog. The default for this is true if DeclareVar is
+	// not defined, false otherwise.
+	SetDefault bool
+
+	// TODO:
+	// - Printf-style configuration
+	// - Template-style configuration
+	// - Extraction options
+	// - Rewrite options
+	// - Generation options
+}
+
+// Operations:
+// - extract:       get the strings
+// - disambiguate:  find messages with the same key, but possible different meaning.
+// - create out:    create a list of messages that need translations
+// - load trans:    load the list of current translations
+// - merge:         assign list of translations as done
+// - (action)expand:    analyze features and create example sentences for each version.
+// - (action)googletrans:   pre-populate messages with automatic translations.
+// - (action)export:    send out messages somewhere non-standard
+// - (action)import:    load messages from somewhere non-standard
+// - vet program:   don't pass "foo" + var + "bar" strings. Not using funcs for translated strings.
+// - vet trans:     coverage: all translations/ all features.
+// - generate:      generate Go code
+
+// State holds all accumulated information on translations during processing.
+type State struct {
+	Config Config
+
+	Package string
+	program *loader.Program
+
+	Extracted Messages `json:"messages"`
+
+	// Messages includes all messages for which there need to be translations.
+	// Duplicates may be eliminated. Generation will be done from these messages
+	// (usually after merging).
+	Messages []Messages
+
+	// Translations are incoming translations for the application messages.
+	Translations []Messages
+}
+
+// A full-cycle pipeline example:
+//
+//  func updateAll(c *Config) error {
+//  	s := Extract(c)
+//  	s.Import()
+//  	s.Merge()
+//  	for range s.Config.Actions {
+//  		if s.Err != nil {
+//  			return s.Err
+//  		}
+//  		//  TODO: do the actions.
+//  	}
+//  	if err := s.Export(); err != nil {
+//  		return err
+//  	}
+//  	if err := s.Generate(); err != nil {
+//  		return err
+//  	}
+//  	return nil
+//  }
+
+// Import loads existing translation files.
+func (s *State) Import() error {
+	panic("unimplemented")
+	return nil
+}
+
+// Merge merges the extracted messages with the existing translations.
+func (s *State) Merge() error {
+	panic("unimplemented")
+	return nil
+
+}
+
+// Export writes out the messages to translation out files.
+func (s *State) Export() error {
+	panic("unimplemented")
+	return nil
+}
+
 // NOTE: The command line tool already prefixes with "gotext:".
 var (
 	wrap = func(err error, msg string) error {
@@ -33,8 +168,8 @@
 	errorf = fmt.Errorf
 )
 
-// TODO: don't log.
-func logf(format string, args ...interface{}) {
+func warnf(format string, args ...interface{}) {
+	// TODO: don't log.
 	log.Printf(format, args...)
 }
 
diff --git a/message/pipeline/rewrite.go b/message/pipeline/rewrite.go
index fa78324..cf1511f 100644
--- a/message/pipeline/rewrite.go
+++ b/message/pipeline/rewrite.go
@@ -24,11 +24,11 @@
 // machinery and rewrites strings to adopt best practices when possible.
 // If w is not nil the generated files are written to it, each files with a
 // "--- <filename>" header. Otherwise the files are overwritten.
-func Rewrite(w io.Writer, goPackage string) error {
+func Rewrite(w io.Writer, args ...string) error {
 	conf := &loader.Config{
 		AllowErrors: true, // Allow unused instances of message.Printer.
 	}
-	prog, err := loadPackages(conf, []string{goPackage})
+	prog, err := loadPackages(conf, args)
 	if err != nil {
 		return wrap(err, "")
 	}