message/pipeline: handle globals

Include global variables that are assigned a constant value
and that are
1) explicitly passed to a printer method
2) prefixed with Msg or msg.

Note that comments are not yet properly extracted.

Change-Id: Id7b4aaa1f4d4516adf2fbcc7bb529235a57db56a
Reviewed-on: https://go-review.googlesource.com/105016
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ross Light <light@google.com>
diff --git a/internal/format/parser.go b/internal/format/parser.go
index f4f37f4..855aed7 100644
--- a/internal/format/parser.go
+++ b/internal/format/parser.go
@@ -244,6 +244,7 @@
 		p.Status = StatusBadArgNum
 	case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb.
 		p.Status = StatusMissingArg
+		p.ArgNum++
 	case verb == 'v':
 		// Go syntax
 		p.SharpV = p.Sharp
diff --git a/message/pipeline/extract.go b/message/pipeline/extract.go
index 327670f..3db4172 100644
--- a/message/pipeline/extract.go
+++ b/message/pipeline/extract.go
@@ -33,9 +33,7 @@
 // - handle features (gender, plural)
 // - message rewriting
 
-// - %m substitutions
 // - `msg:"etc"` tags
-// - msg/Msg top-level vars and strings.
 
 // Extract extracts all strings form the package defined in Config.
 func Extract(c *Config) (*State, error) {
@@ -64,16 +62,16 @@
 	callGraph *callgraph.Graph
 
 	// Calls and other expressions to collect.
-	exprs    map[token.Pos]ast.Expr
+	globals  map[token.Pos]*constData
 	funcs    map[token.Pos]*callData
 	messages []Message
 }
 
 func newExtracter(c *Config) (x *extracter, err error) {
 	x = &extracter{
-		conf:  loader.Config{},
-		exprs: map[token.Pos]ast.Expr{},
-		funcs: map[token.Pos]*callData{},
+		conf:    loader.Config{},
+		globals: map[token.Pos]*constData{},
+		funcs:   map[token.Pos]*callData{},
 	}
 
 	x.iprog, err = loadPackages(&x.conf, c.Packages)
@@ -81,7 +79,7 @@
 		return nil, wrap(err, "")
 	}
 
-	x.prog = ssautil.CreateProgram(x.iprog, 0)
+	x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits)
 	x.prog.Build()
 
 	x.callGraph = cha.CallGraph(x.prog)
@@ -89,10 +87,21 @@
 	return x, nil
 }
 
+func (x *extracter) globalData(pos token.Pos) *constData {
+	cd := x.globals[pos]
+	if cd == nil {
+		cd = &constData{}
+		x.globals[pos] = cd
+	}
+	return cd
+}
+
 func (x *extracter) seedEndpoints() {
 	pkg := x.prog.Package(x.iprog.Package("golang.org/x/text/message").Pkg)
 	typ := types.NewPointer(pkg.Type("Printer").Type())
 
+	x.processGlobalVars()
+
 	x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{
 		formatPos: 1,
 		argPos:    2,
@@ -110,8 +119,66 @@
 	})
 }
 
+// processGlobalVars finds string constants that are assigned to global
+// variables.
+func (x *extracter) processGlobalVars() {
+	for _, p := range x.prog.AllPackages() {
+		m, ok := p.Members["init"]
+		if !ok {
+			continue
+		}
+		for _, b := range m.(*ssa.Function).Blocks {
+			for _, i := range b.Instrs {
+				s, ok := i.(*ssa.Store)
+				if !ok {
+					continue
+				}
+				a, ok := s.Addr.(*ssa.Global)
+				if !ok {
+					continue
+				}
+				t := a.Type()
+				for {
+					p, ok := t.(*types.Pointer)
+					if !ok {
+						break
+					}
+					t = p.Elem()
+				}
+				if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String {
+					continue
+				}
+				x.visitInit(a, s.Val)
+			}
+		}
+	}
+}
+
+type constData struct {
+	call   *callData // to provide a signature for the constants
+	values []constVal
+	others []token.Pos // Assigned to other global data.
+}
+
+func (d *constData) visit(x *extracter, f func(c constant.Value)) {
+	for _, v := range d.values {
+		f(v.value)
+	}
+	for _, p := range d.others {
+		if od, ok := x.globals[p]; ok {
+			od.visit(x, f)
+		}
+	}
+}
+
+type constVal struct {
+	value constant.Value
+	pos   token.Pos
+}
+
 type callData struct {
 	call    ssa.CallInstruction
+	expr    *ast.CallExpr
 	formats []constant.Value
 
 	callee    *callData
@@ -196,6 +263,68 @@
 	}
 }
 
+// visitInit evaluates and collects values assigned to global variables in an
+// init function.
+func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) {
+	if v == nil {
+		return
+	}
+	x.debug(v, "GLOBAL", v)
+
+	switch v := v.(type) {
+	case *ssa.Phi:
+		for _, e := range v.Edges {
+			x.visitInit(global, e)
+		}
+
+	case *ssa.Const:
+		// Only record strings with letters.
+		if str := constant.StringVal(v.Value); isMsg(str) {
+			cd := x.globalData(global.Pos())
+			cd.values = append(cd.values, constVal{v.Value, v.Pos()})
+		}
+		// TODO: handle %m-directive.
+
+	case *ssa.Global:
+		cd := x.globalData(global.Pos())
+		cd.others = append(cd.others, v.Pos())
+
+	case *ssa.FieldAddr, *ssa.Field:
+		// TODO: mark field index v.Field of v.X.Type() for extraction. extract
+		// an example args as to give parameters for the translator.
+
+	case *ssa.Slice:
+		if v.Low == nil && v.High == nil && v.Max == nil {
+			x.visitInit(global, v.X)
+		}
+
+	case *ssa.Alloc:
+		if ref := v.Referrers(); ref == nil {
+			for _, r := range *ref {
+				values := []ssa.Value{}
+				for _, o := range r.Operands(nil) {
+					if o == nil || *o == v {
+						continue
+					}
+					values = append(values, *o)
+				}
+				// TODO: return something different if we care about multiple
+				// values as well.
+				if len(values) == 1 {
+					x.visitInit(global, values[0])
+				}
+			}
+		}
+
+	case ssa.Instruction:
+		rands := v.Operands(nil)
+		if len(rands) == 1 && rands[0] != nil {
+			x.visitInit(global, *rands[0])
+		}
+	}
+	return
+}
+
 // visitFormats finds the original source of the value. The returned index is
 // position of the argument if originated from a function argument or -1
 // otherwise.
@@ -220,8 +349,7 @@
 		// TODO: handle %m-directive.
 
 	case *ssa.Global:
-		// TODO: record value if a string and try to determine a possible
-		// constant value from the ast data.
+		x.globalData(v.Pos()).call = call
 
 	case *ssa.FieldAddr, *ssa.Field:
 		// TODO: mark field index v.Field of v.X.Type() for extraction. extract
@@ -356,6 +484,7 @@
 }
 
 type packageExtracter struct {
+	f    *ast.File
 	x    *extracter
 	info *loader.PackageInfo
 	cmap ast.CommentMap
@@ -371,33 +500,73 @@
 
 func (x *extracter) extractMessages() {
 	prog := x.iprog
+	files := []packageExtracter{}
 	for _, info := range x.iprog.AllPackages {
 		for _, f := range info.Files {
 			// Associate comments with nodes.
 			px := packageExtracter{
-				x, info,
+				f, x, info,
 				ast.NewCommentMap(prog.Fset, f, f.Comments),
 			}
-
-			// Find function calls.
-			ast.Inspect(f, func(n ast.Node) bool {
-				switch v := n.(type) {
-				case *ast.CallExpr:
-					return px.handleCall(v)
-				}
-				return true
-			})
+			files = append(files, px)
 		}
 	}
+	for _, px := range files {
+		ast.Inspect(px.f, func(n ast.Node) bool {
+			switch v := n.(type) {
+			case *ast.CallExpr:
+				if d := x.funcs[v.Lparen]; d != nil {
+					d.expr = v
+				}
+			}
+			return true
+		})
+	}
+	for _, px := range files {
+		ast.Inspect(px.f, func(n ast.Node) bool {
+			switch v := n.(type) {
+			case *ast.CallExpr:
+				return px.handleCall(v)
+			case *ast.ValueSpec:
+				return px.handleGlobal(v)
+			}
+			return true
+		})
+	}
+}
+
+func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool {
+	comment := px.getComment(spec)
+
+	for _, ident := range spec.Names {
+		data, ok := px.x.globals[ident.Pos()]
+		if !ok {
+			continue
+		}
+		name := ident.Name
+		var arguments []argument
+		if data.call != nil {
+			arguments = px.getArguments(data.call)
+		} else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") {
+			continue
+		}
+		data.visit(px.x, func(c constant.Value) {
+			px.addMessage(spec.Pos(), []string{name}, c, comment, arguments)
+		})
+	}
+
+	return true
 }
 
 func (px packageExtracter) handleCall(call *ast.CallExpr) bool {
 	x := px.x
-	info := px.info
 	data := x.funcs[call.Lparen]
 	if data == nil || len(data.formats) == 0 {
 		return true
 	}
+	if data.expr != call {
+		panic("invariant `data.call != call` failed")
+	}
 	x.debug(data.call, "INSERT", data.formats)
 
 	argn := data.callFormatPos()
@@ -406,6 +575,8 @@
 	}
 	format := call.Args[argn]
 
+	arguments := px.getArguments(data)
+
 	comment := ""
 	key := []string{}
 	if ident, ok := format.(*ast.Ident); ok {
@@ -415,18 +586,28 @@
 			comment = v.Comment.Text()
 		}
 	}
+	if c := px.getComment(call.Args[0]); c != "" {
+		comment = c
+	}
 
+	formats := data.formats
+	for _, c := range formats {
+		px.addMessage(call.Lparen, key, c, comment, arguments)
+	}
+	return true
+}
+
+func (px packageExtracter) getArguments(data *callData) []argument {
 	arguments := []argument{}
-	simArgs := []interface{}{}
+	x := px.x
+	info := px.info
 	if data.callArgsStart() >= 0 {
-		args := call.Args[data.callArgsStart():]
-		simArgs = make([]interface{}, len(args))
+		args := data.expr.Args[data.callArgsStart():]
 		for i, arg := range args {
 			expr := x.print(arg)
 			val := ""
 			if v := info.Types[arg].Value; v != nil {
 				val = v.ExactString()
-				simArgs[i] = val
 				switch arg.(type) {
 				case *ast.BinaryExpr, *ast.UnaryExpr:
 					expr = val
@@ -446,62 +627,76 @@
 			})
 		}
 	}
+	return arguments
+}
 
-	formats := data.formats
-	for _, c := range formats {
-		key := append([]string{}, key...)
-		fmtMsg := constant.StringVal(c)
-		msg := ""
+func (px packageExtracter) addMessage(
+	pos token.Pos,
+	key []string,
+	c constant.Value,
+	comment string,
+	arguments []argument) {
+	x := px.x
+	fmtMsg := constant.StringVal(c)
 
-		ph := placeholders{index: map[string]string{}}
+	ph := placeholders{index: map[string]string{}}
 
-		trimmed, _, _ := trimWS(fmtMsg)
+	trimmed, _, _ := trimWS(fmtMsg)
 
-		p := fmtparser.Parser{}
-		p.Reset(simArgs)
-		for p.SetFormat(trimmed); p.Scan(); {
-			switch p.Status {
-			case fmtparser.StatusText:
-				msg += p.Text()
-			case fmtparser.StatusSubstitution,
-				fmtparser.StatusBadWidthSubstitution,
-				fmtparser.StatusBadPrecSubstitution:
-				arguments[p.ArgNum-1].used = true
-				arg := arguments[p.ArgNum-1]
-				sub := p.Text()
-				if !p.HasIndex {
-					r, sz := utf8.DecodeLastRuneInString(sub)
-					sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
-				}
-				msg += fmt.Sprintf("{%s}", ph.addArg(&arg, sub))
-			}
-		}
-		key = append(key, msg)
-
-		// Add additional Placeholders that can be used in translations
-		// that are not present in the string.
-		for _, arg := range arguments {
-			if arg.used {
-				continue
-			}
-			ph.addArg(&arg, fmt.Sprintf("%%[%d]v", arg.ArgNum))
-		}
-
-		if c := px.getComment(call.Args[0]); c != "" {
-			comment = c
-		}
-
-		x.messages = append(x.messages, Message{
-			ID:      key,
-			Key:     fmtMsg,
-			Message: Text{Msg: msg},
-			// TODO(fix): this doesn't get the before comment.
-			Comment:      comment,
-			Placeholders: ph.slice,
-			Position:     posString(&x.conf, info.Pkg, call.Lparen),
-		})
+	p := fmtparser.Parser{}
+	simArgs := make([]interface{}, len(arguments))
+	for i, v := range arguments {
+		simArgs[i] = v
 	}
-	return true
+	msg := ""
+	p.Reset(simArgs)
+	for p.SetFormat(trimmed); p.Scan(); {
+		name := ""
+		var arg *argument
+		switch p.Status {
+		case fmtparser.StatusText:
+			msg += p.Text()
+			continue
+		case fmtparser.StatusSubstitution,
+			fmtparser.StatusBadWidthSubstitution,
+			fmtparser.StatusBadPrecSubstitution:
+			arguments[p.ArgNum-1].used = true
+			arg = &arguments[p.ArgNum-1]
+			name = getID(arg)
+		case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg:
+			arg = &argument{
+				ArgNum:   p.ArgNum,
+				Position: posString(&x.conf, px.info.Pkg, pos),
+			}
+			name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum)
+		}
+		sub := p.Text()
+		if !p.HasIndex {
+			r, sz := utf8.DecodeLastRuneInString(sub)
+			sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
+		}
+		msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub))
+	}
+	key = append(key, msg)
+
+	// Add additional Placeholders that can be used in translations
+	// that are not present in the string.
+	for _, arg := range arguments {
+		if arg.used {
+			continue
+		}
+		ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum))
+	}
+
+	x.messages = append(x.messages, Message{
+		ID:      key,
+		Key:     fmtMsg,
+		Message: Text{Msg: msg},
+		// TODO(fix): this doesn't get the before comment.
+		Comment:      comment,
+		Placeholders: ph.slice,
+		Position:     posString(&x.conf, px.info.Pkg, pos),
+	})
 }
 
 func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string {
@@ -544,22 +739,45 @@
 	return s
 }
 
+// verbToPlaceholder gives a name for a placeholder based on the substitution
+// verb. This is only to be used if there is otherwise no other type information
+// available.
+func verbToPlaceholder(sub string, pos int) (name, underlying string) {
+	r, _ := utf8.DecodeLastRuneInString(sub)
+	name = fmt.Sprintf("Arg_%d", pos)
+	switch r {
+	case 's', 'q':
+		underlying = "string"
+	case 'd':
+		name = "Integer"
+		underlying = "int"
+	case 'e', 'f', 'g':
+		name = "Number"
+		underlying = "float64"
+	case 'm':
+		name = "Message"
+		underlying = "string"
+	default:
+		underlying = "interface{}"
+	}
+	return name, underlying
+}
+
 type placeholders struct {
 	index map[string]string
 	slice []Placeholder
 }
 
-func (p *placeholders) addArg(arg *argument, sub string) (id string) {
-	id = getID(arg)
-	id1 := id
-	alt, ok := p.index[id1]
+func (p *placeholders) addArg(arg *argument, name, sub string) (id string) {
+	id = name
+	alt, ok := p.index[id]
 	for i := 1; ok && alt != sub; i++ {
-		id1 = fmt.Sprintf("%s_%d", id, i)
-		alt, ok = p.index[id1]
+		id = fmt.Sprintf("%s_%d", name, i)
+		alt, ok = p.index[id]
 	}
-	p.index[id1] = sub
+	p.index[id] = sub
 	p.slice = append(p.slice, Placeholder{
-		ID:             id1,
+		ID:             id,
 		String:         sub,
 		Type:           arg.Type,
 		UnderlyingType: arg.UnderlyingType,
@@ -567,7 +785,7 @@
 		Expr:           arg.Expr,
 		Comment:        arg.Comment,
 	})
-	return id1
+	return id
 }
 
 func getLastComponent(s string) string {
diff --git a/message/pipeline/testdata/ssa/extracted.gotext.json b/message/pipeline/testdata/ssa/extracted.gotext.json
index cd24934..0d886b3 100644
--- a/message/pipeline/testdata/ssa/extracted.gotext.json
+++ b/message/pipeline/testdata/ssa/extracted.gotext.json
@@ -66,28 +66,28 @@
                     "expr": "true"
                 }
             ],
-            "position": "testdata/ssa/ssa.go:20:9"
+            "position": "testdata/ssa/ssa.go:22:9"
         },
         {
             "id": "empty string",
             "key": "empty string",
             "message": "empty string",
             "translation": "",
-            "position": "testdata/ssa/ssa.go:21:9"
+            "position": "testdata/ssa/ssa.go:23:9"
         },
         {
             "id": "Lovely weather today!",
             "key": "Lovely weather today!",
             "message": "Lovely weather today!",
             "translation": "",
-            "position": "testdata/ssa/ssa.go:22:8"
+            "position": "testdata/ssa/ssa.go:24:8"
         },
         {
             "id": "number one",
             "key": "number one",
             "message": "number one",
             "translation": "",
-            "position": "testdata/ssa/ssa.go:30:8"
+            "position": "testdata/ssa/ssa.go:32:8"
         },
         {
             "id": [
@@ -107,7 +107,7 @@
                     "expr": "c"
                 }
             ],
-            "position": "testdata/ssa/ssa.go:77:10"
+            "position": "testdata/ssa/ssa.go:79:10"
         },
         {
             "id": [
@@ -127,7 +127,7 @@
                     "expr": "args"
                 }
             ],
-            "position": "testdata/ssa/ssa.go:86:11"
+            "position": "testdata/ssa/ssa.go:88:11"
         },
         {
             "id": [
@@ -155,7 +155,7 @@
                     "expr": "b"
                 }
             ],
-            "position": "testdata/ssa/ssa.go:137:7"
+            "position": "testdata/ssa/ssa.go:139:7"
         },
         {
             "id": [
@@ -183,7 +183,7 @@
                     "expr": "b"
                 }
             ],
-            "position": "testdata/ssa/ssa.go:137:7"
+            "position": "testdata/ssa/ssa.go:139:7"
         },
         {
             "id": [
@@ -193,7 +193,7 @@
             "key": "foo",
             "message": "foo",
             "translation": "",
-            "position": "testdata/ssa/ssa.go:151:8"
+            "position": "testdata/ssa/ssa.go:153:8"
         },
         {
             "id": [
@@ -203,7 +203,7 @@
             "key": "bar",
             "message": "bar",
             "translation": "",
-            "position": "testdata/ssa/ssa.go:151:8"
+            "position": "testdata/ssa/ssa.go:153:8"
         },
         {
             "id": [
@@ -213,7 +213,7 @@
             "key": "baz",
             "message": "baz",
             "translation": "",
-            "position": "testdata/ssa/ssa.go:151:8"
+            "position": "testdata/ssa/ssa.go:153:8"
         },
         {
             "id": [
@@ -223,7 +223,76 @@
             "key": "const str",
             "message": "const str",
             "translation": "",
-            "position": "testdata/ssa/ssa.go:165:11"
+            "position": "testdata/ssa/ssa.go:168:11"
+        },
+        {
+            "id": [
+                "globalStr",
+                "See you around in {City}!"
+            ],
+            "key": "See you around in %s!",
+            "message": "See you around in {City}!",
+            "translation": "",
+            "placeholders": [
+                {
+                    "id": "City",
+                    "string": "%[1]s",
+                    "type": "string",
+                    "underlyingType": "string",
+                    "argNum": 1,
+                    "expr": "city"
+                }
+            ],
+            "position": "testdata/ssa/ssa.go:181:5"
+        },
+        {
+            "id": [
+                "constFood",
+                "Please eat your {Food}!"
+            ],
+            "key": "Please eat your %s!",
+            "message": "Please eat your {Food}!",
+            "translation": "",
+            "comment": "Ho ho ho",
+            "placeholders": [
+                {
+                    "id": "Food",
+                    "string": "%[1]s",
+                    "type": "string",
+                    "underlyingType": "string",
+                    "argNum": 1,
+                    "expr": "food",
+                    "comment": "the food to be consumed by the subject"
+                }
+            ],
+            "position": "testdata/ssa/ssa.go:193:2"
+        },
+        {
+            "id": [
+                "msgHello",
+                "Hello, {Integer} and {Arg_2}!"
+            ],
+            "key": "Hello, %d and %s!",
+            "message": "Hello, {Integer} and {Arg_2}!",
+            "translation": "",
+            "comment": "Ho ho ho",
+            "placeholders": [
+                {
+                    "id": "Integer",
+                    "string": "%[1]d",
+                    "type": "",
+                    "underlyingType": "int",
+                    "argNum": 1
+                },
+                {
+                    "id": "Arg_2",
+                    "string": "%[2]s",
+                    "type": "",
+                    "underlyingType": "string",
+                    "argNum": 2
+                }
+            ],
+            "position": "testdata/ssa/ssa.go:193:2"
         }
     ]
 }
\ No newline at end of file
diff --git a/message/pipeline/testdata/ssa/ssa.go b/message/pipeline/testdata/ssa/ssa.go
index aae2e8d..1591242 100644
--- a/message/pipeline/testdata/ssa/ssa.go
+++ b/message/pipeline/testdata/ssa/ssa.go
@@ -17,6 +17,8 @@
 	gwrapf("global printer used %s", "ARG1")
 
 	w := wrapped{p}
+
+	// Comment about wrapf.
 	w.wrapf("number: %d, string: %s, bool: %v", 2, "STRING ARG", true)
 	w.wrapf("empty string")
 	w.wrap("Lovely weather today!")
@@ -160,6 +162,7 @@
 }
 
 func freeConst(p *message.Printer) {
+	// str is a message
 	const str = "const str"
 	fn := func(p *message.Printer) {
 		p.Printf(str)
@@ -168,8 +171,32 @@
 }
 
 func global(p *message.Printer) {
-	// TODO: pick up evaluations of globals to string.
-	p.Printf(globalStr)
+	// city describes the expected next meeting place
+	city := "Amsterdam"
+	// See a person around.
+	p.Printf(globalStr, city)
 }
 
-var globalStr = "global string"
+// globalStr is a global variable with a string constant assigned to it.
+var globalStr = "See you around in %s!"
+
+func global2(p *message.Printer) {
+	const food = "Pastrami"
+	wrapf(p, constFood,
+		food, // the food to be consumed by the subject
+	)
+}
+
+// Comment applying to all constants in a block are ignored.
+var (
+	// Ho ho ho
+	notAMessage, constFood, msgHello = "NOPE!", consume, hello
+)
+
+// A block comment.
+var (
+	// This comment takes precedence.
+	hello = "Hello, %d and %s!"
+
+	consume = "Please eat your %s!"
+)