blob: 3db41725cfd61803faf265f629bd90edeaebe69e [file] [log] [blame]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pipeline
import (
"bytes"
"fmt"
"go/ast"
"go/constant"
"go/format"
"go/token"
"go/types"
"path/filepath"
"strings"
"unicode"
"unicode/utf8"
fmtparser "golang.org/x/text/internal/format"
"golang.org/x/tools/go/callgraph"
"golang.org/x/tools/go/callgraph/cha"
"golang.org/x/tools/go/loader"
"golang.org/x/tools/go/ssa"
"golang.org/x/tools/go/ssa/ssautil"
)
const debug = false
// TODO:
// - merge information into existing files
// - handle different file formats (PO, XLIFF)
// - handle features (gender, plural)
// - message rewriting
// - `msg:"etc"` tags
// Extract extracts all strings form the package defined in Config.
func Extract(c *Config) (*State, error) {
x, err := newExtracter(c)
if err != nil {
return nil, wrap(err, "")
}
x.seedEndpoints()
x.extractMessages()
return &State{
Config: *c,
program: x.iprog,
Extracted: Messages{
Language: c.SourceLanguage,
Messages: x.messages,
},
}, nil
}
type extracter struct {
conf loader.Config
iprog *loader.Program
prog *ssa.Program
callGraph *callgraph.Graph
// Calls and other expressions to collect.
globals map[token.Pos]*constData
funcs map[token.Pos]*callData
messages []Message
}
func newExtracter(c *Config) (x *extracter, err error) {
x = &extracter{
conf: loader.Config{},
globals: map[token.Pos]*constData{},
funcs: map[token.Pos]*callData{},
}
x.iprog, err = loadPackages(&x.conf, c.Packages)
if err != nil {
return nil, wrap(err, "")
}
x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits)
x.prog.Build()
x.callGraph = cha.CallGraph(x.prog)
return x, nil
}
func (x *extracter) globalData(pos token.Pos) *constData {
cd := x.globals[pos]
if cd == nil {
cd = &constData{}
x.globals[pos] = cd
}
return cd
}
func (x *extracter) seedEndpoints() {
pkg := x.prog.Package(x.iprog.Package("golang.org/x/text/message").Pkg)
typ := types.NewPointer(pkg.Type("Printer").Type())
x.processGlobalVars()
x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{
formatPos: 1,
argPos: 2,
isMethod: true,
})
x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Sprintf"), &callData{
formatPos: 1,
argPos: 2,
isMethod: true,
})
x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Fprintf"), &callData{
formatPos: 2,
argPos: 3,
isMethod: true,
})
}
// processGlobalVars finds string constants that are assigned to global
// variables.
func (x *extracter) processGlobalVars() {
for _, p := range x.prog.AllPackages() {
m, ok := p.Members["init"]
if !ok {
continue
}
for _, b := range m.(*ssa.Function).Blocks {
for _, i := range b.Instrs {
s, ok := i.(*ssa.Store)
if !ok {
continue
}
a, ok := s.Addr.(*ssa.Global)
if !ok {
continue
}
t := a.Type()
for {
p, ok := t.(*types.Pointer)
if !ok {
break
}
t = p.Elem()
}
if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String {
continue
}
x.visitInit(a, s.Val)
}
}
}
}
type constData struct {
call *callData // to provide a signature for the constants
values []constVal
others []token.Pos // Assigned to other global data.
}
func (d *constData) visit(x *extracter, f func(c constant.Value)) {
for _, v := range d.values {
f(v.value)
}
for _, p := range d.others {
if od, ok := x.globals[p]; ok {
od.visit(x, f)
}
}
}
type constVal struct {
value constant.Value
pos token.Pos
}
type callData struct {
call ssa.CallInstruction
expr *ast.CallExpr
formats []constant.Value
callee *callData
isMethod bool
formatPos int
argPos int // varargs at this position in the call
argTypes []int // arguments extractable from this position
}
func (c *callData) callFormatPos() int {
c = c.callee
if c.isMethod {
return c.formatPos - 1
}
return c.formatPos
}
func (c *callData) callArgsStart() int {
c = c.callee
if c.isMethod {
return c.argPos - 1
}
return c.argPos
}
func (c *callData) Pos() token.Pos { return c.call.Pos() }
func (c *callData) Pkg() *types.Package { return c.call.Parent().Pkg.Pkg }
func (x *extracter) handleFunc(f *ssa.Function, fd *callData) {
for _, e := range x.callGraph.Nodes[f].In {
if e.Pos() == 0 {
continue
}
call := e.Site
caller := x.funcs[call.Pos()]
if caller != nil {
// TODO: theoretically a format string could be passed to multiple
// arguments of a function. Support this eventually.
continue
}
x.debug(call, "CALL", f.String())
caller = &callData{
call: call,
callee: fd,
formatPos: -1,
argPos: -1,
}
// Offset by one if we are invoking an interface method.
offset := 0
if call.Common().IsInvoke() {
offset = -1
}
x.funcs[call.Pos()] = caller
if fd.argPos >= 0 {
x.visitArgs(caller, call.Common().Args[fd.argPos+offset])
}
x.visitFormats(caller, call.Common().Args[fd.formatPos+offset])
}
}
type posser interface {
Pos() token.Pos
Parent() *ssa.Function
}
func (x *extracter) debug(v posser, header string, args ...interface{}) {
if debug {
pos := ""
if p := v.Parent(); p != nil {
pos = posString(&x.conf, p.Package().Pkg, v.Pos())
}
if header != "CALL" && header != "INSERT" {
header = " " + header
}
fmt.Printf("%-32s%-10s%-15T ", pos+fmt.Sprintf("@%d", v.Pos()), header, v)
for _, a := range args {
fmt.Printf(" %v", a)
}
fmt.Println()
}
}
// visitInit evaluates and collects values assigned to global variables in an
// init function.
func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) {
if v == nil {
return
}
x.debug(v, "GLOBAL", v)
switch v := v.(type) {
case *ssa.Phi:
for _, e := range v.Edges {
x.visitInit(global, e)
}
case *ssa.Const:
// Only record strings with letters.
if str := constant.StringVal(v.Value); isMsg(str) {
cd := x.globalData(global.Pos())
cd.values = append(cd.values, constVal{v.Value, v.Pos()})
}
// TODO: handle %m-directive.
case *ssa.Global:
cd := x.globalData(global.Pos())
cd.others = append(cd.others, v.Pos())
case *ssa.FieldAddr, *ssa.Field:
// TODO: mark field index v.Field of v.X.Type() for extraction. extract
// an example args as to give parameters for the translator.
case *ssa.Slice:
if v.Low == nil && v.High == nil && v.Max == nil {
x.visitInit(global, v.X)
}
case *ssa.Alloc:
if ref := v.Referrers(); ref == nil {
for _, r := range *ref {
values := []ssa.Value{}
for _, o := range r.Operands(nil) {
if o == nil || *o == v {
continue
}
values = append(values, *o)
}
// TODO: return something different if we care about multiple
// values as well.
if len(values) == 1 {
x.visitInit(global, values[0])
}
}
}
case ssa.Instruction:
rands := v.Operands(nil)
if len(rands) == 1 && rands[0] != nil {
x.visitInit(global, *rands[0])
}
}
return
}
// visitFormats finds the original source of the value. The returned index is
// position of the argument if originated from a function argument or -1
// otherwise.
func (x *extracter) visitFormats(call *callData, v ssa.Value) {
if v == nil {
return
}
x.debug(v, "VALUE", v)
switch v := v.(type) {
case *ssa.Phi:
for _, e := range v.Edges {
x.visitFormats(call, e)
}
case *ssa.Const:
// Only record strings with letters.
if isMsg(constant.StringVal(v.Value)) {
x.debug(call.call, "FORMAT", v.Value.ExactString())
call.formats = append(call.formats, v.Value)
}
// TODO: handle %m-directive.
case *ssa.Global:
x.globalData(v.Pos()).call = call
case *ssa.FieldAddr, *ssa.Field:
// TODO: mark field index v.Field of v.X.Type() for extraction. extract
// an example args as to give parameters for the translator.
case *ssa.Slice:
if v.Low == nil && v.High == nil && v.Max == nil {
x.visitFormats(call, v.X)
}
case *ssa.Parameter:
// TODO: handle the function for the index parameter.
f := v.Parent()
for i, p := range f.Params {
if p == v {
if call.formatPos < 0 {
call.formatPos = i
// TODO: is there a better way to detect this is calling
// a method rather than a function?
call.isMethod = len(f.Params) > f.Signature.Params().Len()
x.handleFunc(v.Parent(), call)
} else if debug && i != call.formatPos {
// TODO: support this.
fmt.Printf("WARNING:%s: format string passed to arg %d and %d\n",
posString(&x.conf, call.Pkg(), call.Pos()),
call.formatPos, i)
}
}
}
case *ssa.Alloc:
if ref := v.Referrers(); ref == nil {
for _, r := range *ref {
values := []ssa.Value{}
for _, o := range r.Operands(nil) {
if o == nil || *o == v {
continue
}
values = append(values, *o)
}
// TODO: return something different if we care about multiple
// values as well.
if len(values) == 1 {
x.visitFormats(call, values[0])
}
}
}
// TODO:
// case *ssa.Index:
// // Get all values in the array if applicable
// case *ssa.IndexAddr:
// // Get all values in the slice or *array if applicable.
// case *ssa.Lookup:
// // Get all values in the map if applicable.
case *ssa.FreeVar:
// TODO: find the link between free variables and parameters:
//
// func freeVar(p *message.Printer, str string) {
// fn := func(p *message.Printer) {
// p.Printf(str)
// }
// fn(p)
// }
case ssa.Instruction:
rands := v.Operands(nil)
if len(rands) == 1 && rands[0] != nil {
x.visitFormats(call, *rands[0])
}
case *ssa.Call:
}
}
// Note: a function may have an argument marked as both format and passthrough.
// visitArgs collects information on arguments. For wrapped functions it will
// just determine the position of the variable args slice.
func (x *extracter) visitArgs(fd *callData, v ssa.Value) {
if v == nil {
return
}
x.debug(v, "ARGV", v)
switch v := v.(type) {
case *ssa.Slice:
if v.Low == nil && v.High == nil && v.Max == nil {
x.visitArgs(fd, v.X)
}
case *ssa.Parameter:
// TODO: handle the function for the index parameter.
f := v.Parent()
for i, p := range f.Params {
if p == v {
fd.argPos = i
}
}
case *ssa.Alloc:
if ref := v.Referrers(); ref == nil {
for _, r := range *ref {
values := []ssa.Value{}
for _, o := range r.Operands(nil) {
if o == nil || *o == v {
continue
}
values = append(values, *o)
}
// TODO: return something different if we care about
// multiple values as well.
if len(values) == 1 {
x.visitArgs(fd, values[0])
}
}
}
case ssa.Instruction:
rands := v.Operands(nil)
if len(rands) == 1 && rands[0] != nil {
x.visitArgs(fd, *rands[0])
}
}
}
// print returns Go syntax for the specified node.
func (x *extracter) print(n ast.Node) string {
var buf bytes.Buffer
format.Node(&buf, x.conf.Fset, n)
return buf.String()
}
type packageExtracter struct {
f *ast.File
x *extracter
info *loader.PackageInfo
cmap ast.CommentMap
}
func (px packageExtracter) getComment(n ast.Node) string {
cs := px.cmap.Filter(n).Comments()
if len(cs) > 0 {
return strings.TrimSpace(cs[0].Text())
}
return ""
}
func (x *extracter) extractMessages() {
prog := x.iprog
files := []packageExtracter{}
for _, info := range x.iprog.AllPackages {
for _, f := range info.Files {
// Associate comments with nodes.
px := packageExtracter{
f, x, info,
ast.NewCommentMap(prog.Fset, f, f.Comments),
}
files = append(files, px)
}
}
for _, px := range files {
ast.Inspect(px.f, func(n ast.Node) bool {
switch v := n.(type) {
case *ast.CallExpr:
if d := x.funcs[v.Lparen]; d != nil {
d.expr = v
}
}
return true
})
}
for _, px := range files {
ast.Inspect(px.f, func(n ast.Node) bool {
switch v := n.(type) {
case *ast.CallExpr:
return px.handleCall(v)
case *ast.ValueSpec:
return px.handleGlobal(v)
}
return true
})
}
}
func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool {
comment := px.getComment(spec)
for _, ident := range spec.Names {
data, ok := px.x.globals[ident.Pos()]
if !ok {
continue
}
name := ident.Name
var arguments []argument
if data.call != nil {
arguments = px.getArguments(data.call)
} else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") {
continue
}
data.visit(px.x, func(c constant.Value) {
px.addMessage(spec.Pos(), []string{name}, c, comment, arguments)
})
}
return true
}
func (px packageExtracter) handleCall(call *ast.CallExpr) bool {
x := px.x
data := x.funcs[call.Lparen]
if data == nil || len(data.formats) == 0 {
return true
}
if data.expr != call {
panic("invariant `data.call != call` failed")
}
x.debug(data.call, "INSERT", data.formats)
argn := data.callFormatPos()
if argn >= len(call.Args) {
return true
}
format := call.Args[argn]
arguments := px.getArguments(data)
comment := ""
key := []string{}
if ident, ok := format.(*ast.Ident); ok {
key = append(key, ident.Name)
if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil {
// TODO: get comment above ValueSpec as well
comment = v.Comment.Text()
}
}
if c := px.getComment(call.Args[0]); c != "" {
comment = c
}
formats := data.formats
for _, c := range formats {
px.addMessage(call.Lparen, key, c, comment, arguments)
}
return true
}
func (px packageExtracter) getArguments(data *callData) []argument {
arguments := []argument{}
x := px.x
info := px.info
if data.callArgsStart() >= 0 {
args := data.expr.Args[data.callArgsStart():]
for i, arg := range args {
expr := x.print(arg)
val := ""
if v := info.Types[arg].Value; v != nil {
val = v.ExactString()
switch arg.(type) {
case *ast.BinaryExpr, *ast.UnaryExpr:
expr = val
}
}
arguments = append(arguments, argument{
ArgNum: i + 1,
Type: info.Types[arg].Type.String(),
UnderlyingType: info.Types[arg].Type.Underlying().String(),
Expr: expr,
Value: val,
Comment: px.getComment(arg),
Position: posString(&x.conf, info.Pkg, arg.Pos()),
// TODO report whether it implements
// interfaces plural.Interface,
// gender.Interface.
})
}
}
return arguments
}
func (px packageExtracter) addMessage(
pos token.Pos,
key []string,
c constant.Value,
comment string,
arguments []argument) {
x := px.x
fmtMsg := constant.StringVal(c)
ph := placeholders{index: map[string]string{}}
trimmed, _, _ := trimWS(fmtMsg)
p := fmtparser.Parser{}
simArgs := make([]interface{}, len(arguments))
for i, v := range arguments {
simArgs[i] = v
}
msg := ""
p.Reset(simArgs)
for p.SetFormat(trimmed); p.Scan(); {
name := ""
var arg *argument
switch p.Status {
case fmtparser.StatusText:
msg += p.Text()
continue
case fmtparser.StatusSubstitution,
fmtparser.StatusBadWidthSubstitution,
fmtparser.StatusBadPrecSubstitution:
arguments[p.ArgNum-1].used = true
arg = &arguments[p.ArgNum-1]
name = getID(arg)
case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg:
arg = &argument{
ArgNum: p.ArgNum,
Position: posString(&x.conf, px.info.Pkg, pos),
}
name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum)
}
sub := p.Text()
if !p.HasIndex {
r, sz := utf8.DecodeLastRuneInString(sub)
sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
}
msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub))
}
key = append(key, msg)
// Add additional Placeholders that can be used in translations
// that are not present in the string.
for _, arg := range arguments {
if arg.used {
continue
}
ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum))
}
x.messages = append(x.messages, Message{
ID: key,
Key: fmtMsg,
Message: Text{Msg: msg},
// TODO(fix): this doesn't get the before comment.
Comment: comment,
Placeholders: ph.slice,
Position: posString(&x.conf, px.info.Pkg, pos),
})
}
func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string {
p := conf.Fset.Position(pos)
file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
return filepath.Join(pkg.Path(), file)
}
func getID(arg *argument) string {
s := getLastComponent(arg.Expr)
s = strip(s)
s = strings.Replace(s, " ", "", -1)
// For small variable names, use user-defined types for more info.
if len(s) <= 2 && arg.UnderlyingType != arg.Type {
s = getLastComponent(arg.Type)
}
return strings.Title(s)
}
// strip is a dirty hack to convert function calls to placeholder IDs.
func strip(s string) string {
s = strings.Map(func(r rune) rune {
if unicode.IsSpace(r) || r == '-' {
return '_'
}
if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) {
return -1
}
return r
}, s)
// Strip "Get" from getter functions.
if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") {
if len(s) > len("get") {
r, _ := utf8.DecodeRuneInString(s)
if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark
s = s[len("get"):]
}
}
}
return s
}
// verbToPlaceholder gives a name for a placeholder based on the substitution
// verb. This is only to be used if there is otherwise no other type information
// available.
func verbToPlaceholder(sub string, pos int) (name, underlying string) {
r, _ := utf8.DecodeLastRuneInString(sub)
name = fmt.Sprintf("Arg_%d", pos)
switch r {
case 's', 'q':
underlying = "string"
case 'd':
name = "Integer"
underlying = "int"
case 'e', 'f', 'g':
name = "Number"
underlying = "float64"
case 'm':
name = "Message"
underlying = "string"
default:
underlying = "interface{}"
}
return name, underlying
}
type placeholders struct {
index map[string]string
slice []Placeholder
}
func (p *placeholders) addArg(arg *argument, name, sub string) (id string) {
id = name
alt, ok := p.index[id]
for i := 1; ok && alt != sub; i++ {
id = fmt.Sprintf("%s_%d", name, i)
alt, ok = p.index[id]
}
p.index[id] = sub
p.slice = append(p.slice, Placeholder{
ID: id,
String: sub,
Type: arg.Type,
UnderlyingType: arg.UnderlyingType,
ArgNum: arg.ArgNum,
Expr: arg.Expr,
Comment: arg.Comment,
})
return id
}
func getLastComponent(s string) string {
return s[1+strings.LastIndexByte(s, '.'):]
}
// isMsg returns whether s should be translated.
func isMsg(s string) bool {
// TODO: parse as format string and omit strings that contain letters
// coming from format verbs.
for _, r := range s {
if unicode.In(r, unicode.L) {
return true
}
}
return false
}