cmd/gotext: first stab at message extraction

Lot’s of TODOs, but its a start.

The command infrastructure in main.go has been largely copied from the go tool code.

Change-Id: Iec26504b1a9a834ab7156a988417946b187cbcd2
Reviewed-on: https://go-review.googlesource.com/28594
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/cmd/gotext/doc.go b/cmd/gotext/doc.go
new file mode 100644
index 0000000..54eb485
--- /dev/null
+++ b/cmd/gotext/doc.go
@@ -0,0 +1,35 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// DO NOT EDIT THIS FILE. GENERATED BY go generate.
+// Edit the documentation in other files and rerun go generate to generate this one.
+
+// gotext is a tool for managing text in Go source code.
+//
+// Usage:
+//
+// 	gotext command [arguments]
+//
+// The commands are:
+//
+// 	extract     extract strings to be translated from code
+//
+// Use "go help [command]" for more information about a command.
+//
+// Additional help topics:
+//
+//
+// Use "gotext help [topic]" for more information about that topic.
+//
+//
+// Extract strings to be translated from code
+//
+// Usage:
+//
+// 	go extract <package>*
+//
+//
+//
+//
+package main
diff --git a/cmd/gotext/extract.go b/cmd/gotext/extract.go
new file mode 100644
index 0000000..79a9b59
--- /dev/null
+++ b/cmd/gotext/extract.go
@@ -0,0 +1,195 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"go/ast"
+	"go/build"
+	"go/constant"
+	"go/format"
+	"go/parser"
+	"go/types"
+	"io/ioutil"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+
+	"golang.org/x/tools/go/loader"
+)
+
+// TODO:
+// - merge information into existing files
+// - handle different file formats (PO, XLIFF)
+// - handle features (gender, plural)
+// - message rewriting
+
+var cmdExtract = &Command{
+	Run:       runExtract,
+	UsageLine: "extract <package>*",
+	Short:     "extract strings to be translated from code",
+}
+
+func runExtract(cmd *Command, args []string) error {
+	if len(args) == 0 {
+		args = []string{"."}
+	}
+
+	conf := loader.Config{
+		Build:      &build.Default,
+		ParserMode: parser.ParseComments,
+	}
+
+	// Use the initial packages from the command line.
+	args, err := conf.FromArgs(args, false)
+	if err != nil {
+		return err
+	}
+
+	// Load, parse and type-check the whole program.
+	iprog, err := conf.Load()
+	if err != nil {
+		return err
+	}
+
+	// print returns Go syntax for the specified node.
+	print := func(n ast.Node) string {
+		var buf bytes.Buffer
+		format.Node(&buf, conf.Fset, n)
+		return buf.String()
+	}
+
+	var translations []Translation
+
+	for _, info := range iprog.InitialPackages() {
+		for _, f := range info.Files {
+			// Associate comments with nodes.
+			cmap := ast.NewCommentMap(iprog.Fset, f, f.Comments)
+			getComment := func(n ast.Node) string {
+				cs := cmap.Filter(n).Comments()
+				if len(cs) > 0 {
+					return strings.TrimSpace(cs[0].Text())
+				}
+				return ""
+			}
+
+			// Find function calls.
+			ast.Inspect(f, func(n ast.Node) bool {
+				call, ok := n.(*ast.CallExpr)
+				if !ok {
+					return true
+				}
+
+				// Skip calls of functions other than
+				// (*message.Printer).{Sp,Fp,P}rintf.
+				sel, ok := call.Fun.(*ast.SelectorExpr)
+				if !ok {
+					return true
+				}
+				meth := info.Selections[sel]
+				if meth == nil || meth.Kind() != types.MethodVal {
+					return true
+				}
+				// TODO: remove cheap hack and check if the type either
+				// implements some interface or is specifically of type
+				// "golang.org/x/text/message".Printer.
+				m, ok := extractFuncs[path.Base(meth.Recv().String())]
+				if !ok {
+					return true
+				}
+
+				// argn is the index of the format string.
+				argn, ok := m[meth.Obj().Name()]
+				if !ok || argn >= len(call.Args) {
+					return true
+				}
+
+				// Skip calls with non-constant format string.
+				fmtstr := info.Types[call.Args[argn]].Value
+				if fmtstr == nil || fmtstr.Kind() != constant.String {
+					return true
+				}
+
+				posn := conf.Fset.Position(call.Lparen)
+				filepos := fmt.Sprintf("%s:%d:%d", filepath.Base(posn.Filename), posn.Line, posn.Column)
+
+				// TODO: identify the type of the format argument. If it is not
+				// a string, multiple keys may be defined.
+				var key []string
+
+				// TODO: replace substitutions (%v) with a translator friendly
+				// notation. For instance:
+				//     "%d files remaining" -> "{numFiles} files remaining", or
+				//     "%d files remaining" -> "{arg1} files remaining"
+				// Alternatively, this could be done at a later stage.
+				msg := constant.StringVal(fmtstr)
+
+				// Construct a Translation unit.
+				c := Translation{
+					Key:              key,
+					Position:         filepath.Join(info.Pkg.Path(), filepos),
+					Original:         Text{Msg: msg},
+					ExtractedComment: getComment(call.Args[0]),
+					// TODO(fix): this doesn't get the before comment.
+					// Comment: getComment(call),
+				}
+
+				for i, arg := range call.Args[argn+1:] {
+					var val string
+					if v := info.Types[arg].Value; v != nil {
+						val = v.ExactString()
+					}
+					posn := conf.Fset.Position(arg.Pos())
+					filepos := fmt.Sprintf("%s:%d:%d", filepath.Base(posn.Filename), posn.Line, posn.Column)
+					c.Args = append(c.Args, Argument{
+						ID:             i + 1,
+						Type:           info.Types[arg].Type.String(),
+						UnderlyingType: info.Types[arg].Type.Underlying().String(),
+						Expr:           print(arg),
+						Value:          val,
+						Comment:        getComment(arg),
+						Position:       filepath.Join(info.Pkg.Path(), filepos),
+						// TODO report whether it implements
+						// interfaces plural.Interface,
+						// gender.Interface.
+					})
+				}
+
+				translations = append(translations, c)
+				return true
+			})
+		}
+	}
+
+	data, err := json.MarshalIndent(translations, "", "    ")
+	if err != nil {
+		return err
+	}
+	for _, tag := range getLangs() {
+		// TODO: merge with existing files, don't overwrite.
+		os.MkdirAll(*dir, 0744)
+		file := filepath.Join(*dir, fmt.Sprintf("gotext_%v.out.json", tag))
+		if err := ioutil.WriteFile(file, data, 0744); err != nil {
+			return fmt.Errorf("could not create file: %v", err)
+		}
+	}
+	return nil
+}
+
+// extractFuncs indicates the types and methods for which to extract strings,
+// and which argument to extract.
+// TODO: use the types in conf.Import("golang.org/x/text/message") to extract
+// the correct instances.
+var extractFuncs = map[string]map[string]int{
+	// TODO: Printer -> *golang.org/x/text/message.Printer
+	"message.Printer": {
+		"Printf":  0,
+		"Sprintf": 0,
+		"Fprintf": 1,
+	},
+}
diff --git a/cmd/gotext/main.go b/cmd/gotext/main.go
new file mode 100644
index 0000000..b03eb55
--- /dev/null
+++ b/cmd/gotext/main.go
@@ -0,0 +1,356 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go build -o gotext.latest
+//go:generate ./gotext.latest help gendocumentation
+//go:generate rm gotext.latest
+
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"flag"
+	"fmt"
+	"go/build"
+	"go/format"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"strings"
+	"sync"
+	"text/template"
+	"unicode"
+	"unicode/utf8"
+
+	"golang.org/x/text/language"
+	"golang.org/x/tools/go/buildutil"
+)
+
+func init() {
+	flag.Var((*buildutil.TagsFlag)(&build.Default.BuildTags), "tags", buildutil.TagsFlagDoc)
+}
+
+var (
+	dir   = flag.String("dir", "textdata", "default subdirectory to store translation files")
+	langs = flag.String("lang", "en", "comma-separated list of languages to process")
+)
+
+// NOTE: the Command struct is copied from the go tool in core.
+
+// A Command is an implementation of a go command
+// like go build or go fix.
+type Command struct {
+	// Run runs the command.
+	// The args are the arguments after the command name.
+	Run func(cmd *Command, args []string) error
+
+	// UsageLine is the one-line usage message.
+	// The first word in the line is taken to be the command name.
+	UsageLine string
+
+	// Short is the short description shown in the 'go help' output.
+	Short string
+
+	// Long is the long message shown in the 'go help <this-command>' output.
+	Long string
+
+	// Flag is a set of flags specific to this command.
+	Flag flag.FlagSet
+}
+
+// Name returns the command's name: the first word in the usage line.
+func (c *Command) Name() string {
+	name := c.UsageLine
+	i := strings.Index(name, " ")
+	if i >= 0 {
+		name = name[:i]
+	}
+	return name
+}
+
+func (c *Command) Usage() {
+	fmt.Fprintf(os.Stderr, "usage: %s\n\n", c.UsageLine)
+	fmt.Fprintf(os.Stderr, "%s\n", strings.TrimSpace(c.Long))
+	os.Exit(2)
+}
+
+// Runnable reports whether the command can be run; otherwise
+// it is a documentation pseudo-command such as importpath.
+func (c *Command) Runnable() bool {
+	return c.Run != nil
+}
+
+// Commands lists the available commands and help topics.
+// The order here is the order in which they are printed by 'go help'.
+var commands = []*Command{
+	cmdExtract,
+	// TODO:
+	// - generate code from translations.
+	// - update: full-cycle update of extraction, sending, and integration
+	// - report: report of freshness of translations
+}
+
+var exitStatus = 0
+var exitMu sync.Mutex
+
+func setExitStatus(n int) {
+	exitMu.Lock()
+	if exitStatus < n {
+		exitStatus = n
+	}
+	exitMu.Unlock()
+}
+
+var origEnv []string
+
+func main() {
+	flag.Usage = usage
+	flag.Parse()
+	log.SetFlags(0)
+
+	args := flag.Args()
+	if len(args) < 1 {
+		usage()
+	}
+
+	if args[0] == "help" {
+		help(args[1:])
+		return
+	}
+
+	for _, cmd := range commands {
+		if cmd.Name() == args[0] && cmd.Runnable() {
+			cmd.Flag.Usage = func() { cmd.Usage() }
+			cmd.Flag.Parse(args[1:])
+			args = cmd.Flag.Args()
+			if err := cmd.Run(cmd, args); err != nil {
+				fatalf("gotext: %v", err)
+			}
+			exit()
+			return
+		}
+	}
+
+	fmt.Fprintf(os.Stderr, "gotext: unknown subcommand %q\nRun 'go help' for usage.\n", args[0])
+	setExitStatus(2)
+	exit()
+}
+
+var usageTemplate = `gotext is a tool for managing text in Go source code.
+
+Usage:
+
+	gotext command [arguments]
+
+The commands are:
+{{range .}}{{if .Runnable}}
+	{{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}}
+
+Use "go help [command]" for more information about a command.
+
+Additional help topics:
+{{range .}}{{if not .Runnable}}
+	{{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}}
+
+Use "gotext help [topic]" for more information about that topic.
+
+`
+
+var helpTemplate = `{{if .Runnable}}usage: go {{.UsageLine}}
+
+{{end}}{{.Long | trim}}
+`
+
+var documentationTemplate = `{{range .}}{{if .Short}}{{.Short | capitalize}}
+
+{{end}}{{if .Runnable}}Usage:
+
+	go {{.UsageLine}}
+
+{{end}}{{.Long | trim}}
+
+
+{{end}}`
+
+// commentWriter writes a Go comment to the underlying io.Writer,
+// using line comment form (//).
+type commentWriter struct {
+	W            io.Writer
+	wroteSlashes bool // Wrote "//" at the beginning of the current line.
+}
+
+func (c *commentWriter) Write(p []byte) (int, error) {
+	var n int
+	for i, b := range p {
+		if !c.wroteSlashes {
+			s := "//"
+			if b != '\n' {
+				s = "// "
+			}
+			if _, err := io.WriteString(c.W, s); err != nil {
+				return n, err
+			}
+			c.wroteSlashes = true
+		}
+		n0, err := c.W.Write(p[i : i+1])
+		n += n0
+		if err != nil {
+			return n, err
+		}
+		if b == '\n' {
+			c.wroteSlashes = false
+		}
+	}
+	return len(p), nil
+}
+
+// An errWriter wraps a writer, recording whether a write error occurred.
+type errWriter struct {
+	w   io.Writer
+	err error
+}
+
+func (w *errWriter) Write(b []byte) (int, error) {
+	n, err := w.w.Write(b)
+	if err != nil {
+		w.err = err
+	}
+	return n, err
+}
+
+// tmpl executes the given template text on data, writing the result to w.
+func tmpl(w io.Writer, text string, data interface{}) {
+	t := template.New("top")
+	t.Funcs(template.FuncMap{"trim": strings.TrimSpace, "capitalize": capitalize})
+	template.Must(t.Parse(text))
+	ew := &errWriter{w: w}
+	err := t.Execute(ew, data)
+	if ew.err != nil {
+		// I/O error writing. Ignore write on closed pipe.
+		if strings.Contains(ew.err.Error(), "pipe") {
+			os.Exit(1)
+		}
+		fatalf("writing output: %v", ew.err)
+	}
+	if err != nil {
+		panic(err)
+	}
+}
+
+func capitalize(s string) string {
+	if s == "" {
+		return s
+	}
+	r, n := utf8.DecodeRuneInString(s)
+	return string(unicode.ToTitle(r)) + s[n:]
+}
+
+func printUsage(w io.Writer) {
+	bw := bufio.NewWriter(w)
+	tmpl(bw, usageTemplate, commands)
+	bw.Flush()
+}
+
+func usage() {
+	printUsage(os.Stderr)
+	os.Exit(2)
+}
+
+// help implements the 'help' command.
+func help(args []string) {
+	if len(args) == 0 {
+		printUsage(os.Stdout)
+		// not exit 2: succeeded at 'go help'.
+		return
+	}
+	if len(args) != 1 {
+		fmt.Fprintf(os.Stderr, "usage: go help command\n\nToo many arguments given.\n")
+		os.Exit(2) // failed at 'go help'
+	}
+
+	arg := args[0]
+
+	// 'go help documentation' generates doc.go.
+	if strings.HasSuffix(arg, "documentation") {
+		w := &bytes.Buffer{}
+
+		fmt.Fprintln(w, "// Copyright 2016 The Go Authors. All rights reserved.")
+		fmt.Fprintln(w, "// Use of this source code is governed by a BSD-style")
+		fmt.Fprintln(w, "// license that can be found in the LICENSE file.")
+		fmt.Fprintln(w)
+		fmt.Fprintln(w, "// DO NOT EDIT THIS FILE. GENERATED BY go generate.")
+		fmt.Fprintln(w, "// Edit the documentation in other files and rerun go generate to generate this one.")
+		fmt.Fprintln(w)
+		buf := new(bytes.Buffer)
+		printUsage(buf)
+		usage := &Command{Long: buf.String()}
+		tmpl(&commentWriter{W: w}, documentationTemplate, append([]*Command{usage}, commands...))
+		fmt.Fprintln(w, "package main")
+		if arg == "gendocumentation" {
+			b, err := format.Source(w.Bytes())
+			if err != nil {
+				errorf("Could not format generated docs: %v\n", err)
+			}
+			if err := ioutil.WriteFile("doc.go", b, 0666); err != nil {
+				errorf("Could not create file alldocs.go: %v\n", err)
+			}
+		} else {
+			fmt.Println(w.String())
+		}
+		return
+	}
+
+	for _, cmd := range commands {
+		if cmd.Name() == arg {
+			tmpl(os.Stdout, helpTemplate, cmd)
+			// not exit 2: succeeded at 'go help cmd'.
+			return
+		}
+	}
+
+	fmt.Fprintf(os.Stderr, "Unknown help topic %#q.  Run 'go help'.\n", arg)
+	os.Exit(2) // failed at 'go help cmd'
+}
+
+func getLangs() (tags []language.Tag) {
+	for _, t := range strings.Split(*langs, ",") {
+		tag, err := language.Parse(t)
+		if err != nil {
+			fatalf("gotext: could not parse language %q: %v", t, err)
+		}
+		tags = append(tags, tag)
+	}
+	return tags
+}
+
+var atexitFuncs []func()
+
+func atexit(f func()) {
+	atexitFuncs = append(atexitFuncs, f)
+}
+
+func exit() {
+	for _, f := range atexitFuncs {
+		f()
+	}
+	os.Exit(exitStatus)
+}
+
+func fatalf(format string, args ...interface{}) {
+	errorf(format, args...)
+	exit()
+}
+
+func errorf(format string, args ...interface{}) {
+	log.Printf(format, args...)
+	setExitStatus(1)
+}
+
+func exitIfErrors() {
+	if exitStatus != 0 {
+		exit()
+	}
+}
diff --git a/cmd/gotext/message.go b/cmd/gotext/message.go
new file mode 100644
index 0000000..67a622f
--- /dev/null
+++ b/cmd/gotext/message.go
@@ -0,0 +1,127 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+// TODO: these definitions should be moved to a package so that the can be used
+// by other tools.
+
+// The file contains the structures used to define translations of a certain
+// messages.
+//
+// A translation may have multiple translations strings, or messages, depending
+// on the feature values of the various arguments. For instance, consider
+// a hypothetical translation from English to English, where the source defines
+// the format string "%d file(s) remaining". A completed translation, expressed
+// in JS, for this format string could look like:
+//
+// {
+//     "Key": [
+//         "\"%d files(s) remaining\""
+//     ],
+//     "Original": {
+//         "Msg": "\"%d files(s) remaining\""
+//     },
+//     "Translation": {
+// 	       "Select": {
+// 	           "Feature": "plural",
+//             "Arg": 1,
+//             "Case": {
+//                 "one":   { "Msg": "1 file remaining" },
+//                 "other": { "Msg": "%d files remaining" }
+//             },
+//         },
+//     },
+//     "Args": [
+//         {
+//             "ID": 2,
+//             "Type": "int",
+//             "UnderlyingType": "int",
+//             "Expr": "nFiles",
+//             "Comment": "number of files remaining",
+//             "Position": "golang.org/x/text/cmd/gotext/demo.go:34:3"
+//         }
+//     ],
+//     "Position": "golang.org/x/text/cmd/gotext/demo.go:33:10",
+// }
+//
+// Alternatively, the Translation section could be written as:
+//
+//     "Translation": {
+// 	       "Msg": "%d %[files]s remaining",
+//         "Var": {
+//             "files" : {
+//                 "Select": {
+//         	           "Feature": "plural",
+//                     "Arg": 1,
+//                     "Case": {
+//                         "one":   { "Msg": "file" },
+//                         "other": { "Msg": "files" }
+//                     }
+//                 }
+//             }
+//         }
+//     }
+
+// A Translation describes a translation for a single language for a single
+// message.
+type Translation struct {
+	// Key contains a list of identifiers for the message. If this list is empty
+	// Original is used as the key.
+	Key               []string `json:"key,omitempty"`
+	Original          Text     `json:"original"`
+	Translation       Text     `json:"translation"`
+	ExtractedComment  string   `json:"extractedComment,omitempty"`
+	TranslatorComment string   `json:"translatorComment,omitempty"`
+
+	Args []Argument `json:"args,omitempty"`
+
+	// Extraction information.
+	Position string `json:"position,omitempty"` // filePosition:line
+}
+
+// An Argument contains information about the arguments passed to a message.
+type Argument struct {
+	ID             interface{} `json:"id"` // An int for printf-style calls, but could be a string.
+	Type           string      `json:"type"`
+	UnderlyingType string      `json:"underlyingType"`
+	Expr           string      `json:"expr"`
+	Value          string      `json:"value,omitempty"`
+	Comment        string      `json:"comment,omitempty"`
+	Position       string      `json:"position,omitempty"`
+
+	// Features contains the features that are available for the implementation
+	// of this argument.
+	Features []Feature `json:"features,omitempty"`
+}
+
+// Feature holds information about a feature that can be implemented by
+// an Argument.
+type Feature struct {
+	Type string `json:"type"` // Right now this is only gender and plural.
+
+	// TODO: possible values and examples for the language under consideration.
+
+}
+
+// Text defines a message to be displayed.
+type Text struct {
+	// Msg and Select contains the message to be displayed. Within a Text value
+	// either Msg or Select is defined.
+	Msg    string  `json:"msg,omitempty"`
+	Select *Select `json:"select,omitempty"`
+	// Var defines a map of variables that may be substituted in the selected
+	// message.
+	Var map[string]Text `json:"var,omitempty"`
+	// Example contains an example message formatted with default values.
+	Example string `json:"example,omitempty"`
+}
+
+// Type Select selects a Text based on the feature value associated with
+// a feature of a certain argument.
+type Select struct {
+	Feature string          `json:"feature"` // Name of variable or Feature type
+	Arg     interface{}     `json:"arg"`     // The argument ID.
+	Cases   map[string]Text `json:"cases"`
+}