blob: cafd6f29b7f87c92e0f1136bdd3c0583c784de49 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package pipeline provides tools for creating translation pipelines.
//
// NOTE: UNDER DEVELOPMENT. API MAY CHANGE.
package pipeline
import (
"bytes"
"encoding/json"
"fmt"
"go/build"
"go/parser"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
"strings"
"text/template"
"unicode"
"golang.org/x/text/internal"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/tools/go/loader"
)
const (
extractFile = "extracted.gotext.json"
outFile = "out.gotext.json"
gotextSuffix = "gotext.json"
)
// Config contains configuration for the translation pipeline.
type Config struct {
// Supported indicates the languages for which data should be generated.
// The default is to support all locales for which there are matching
// translation files.
Supported []language.Tag
// --- Extraction
SourceLanguage language.Tag
Packages []string
// --- File structure
// Dir is the root dir for all operations.
Dir string
// TranslationsPattern is a regular expression to match incoming translation
// files. These files may appear in any directory rooted at Dir.
// language for the translation files is determined as follows:
// 1. From the Language field in the file.
// 2. If not present, from a valid language tag in the filename, separated
// by dots (e.g. "en-US.json" or "incoming.pt_PT.xmb").
// 3. If not present, from a the closest subdirectory in which the file
// is contained that parses as a valid language tag.
TranslationsPattern string
// OutPattern defines the location for translation files for a certain
// language. The default is "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
OutPattern string
// Format defines the file format for generated translation files.
// The default is XMB. Alternatives are GetText, XLIFF, L20n, GoText.
Format string
Ext string
// TODO:
// Actions are additional actions to be performed after the initial extract
// and merge.
// Actions []struct {
// Name string
// Options map[string]string
// }
// --- Generation
// GenFile may be in a different package. It is not defined, it will
// be written to stdout.
GenFile string
// GenPackage is the package or relative path into which to generate the
// file. If not specified it is relative to the current directory.
GenPackage string
// DeclareVar defines a variable to which to assing the generated Catalog.
DeclareVar string
// SetDefault determines whether to assign the generated Catalog to
// message.DefaultCatalog. The default for this is true if DeclareVar is
// not defined, false otherwise.
SetDefault bool
// TODO:
// - Printf-style configuration
// - Template-style configuration
// - Extraction options
// - Rewrite options
// - Generation options
}
// Operations:
// - extract: get the strings
// - disambiguate: find messages with the same key, but possible different meaning.
// - create out: create a list of messages that need translations
// - load trans: load the list of current translations
// - merge: assign list of translations as done
// - (action)expand: analyze features and create example sentences for each version.
// - (action)googletrans: pre-populate messages with automatic translations.
// - (action)export: send out messages somewhere non-standard
// - (action)import: load messages from somewhere non-standard
// - vet program: don't pass "foo" + var + "bar" strings. Not using funcs for translated strings.
// - vet trans: coverage: all translations/ all features.
// - generate: generate Go code
// State holds all accumulated information on translations during processing.
type State struct {
Config Config
Package string
program *loader.Program
Extracted Messages `json:"messages"`
// Messages includes all messages for which there need to be translations.
// Duplicates may be eliminated. Generation will be done from these messages
// (usually after merging).
Messages []Messages
// Translations are incoming translations for the application messages.
Translations []Messages
}
func (s *State) dir() string {
if d := s.Config.Dir; d != "" {
return d
}
return "./locales"
}
func outPattern(s *State) (string, error) {
c := s.Config
pat := c.OutPattern
if pat == "" {
pat = "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
}
ext := c.Ext
if ext == "" {
ext = c.Format
}
if ext == "" {
ext = gotextSuffix
}
t, err := template.New("").Parse(pat)
if err != nil {
return "", wrap(err, "error parsing template")
}
buf := bytes.Buffer{}
err = t.Execute(&buf, map[string]string{
"Dir": s.dir(),
"Language": "%s",
"Ext": ext,
})
return filepath.FromSlash(buf.String()), wrap(err, "incorrect OutPattern")
}
var transRE = regexp.MustCompile(`.*\.` + gotextSuffix)
// Import loads existing translation files.
func (s *State) Import() error {
outPattern, err := outPattern(s)
if err != nil {
return err
}
re := transRE
if pat := s.Config.TranslationsPattern; pat != "" {
if re, err = regexp.Compile(pat); err != nil {
return wrapf(err, "error parsing regexp %q", s.Config.TranslationsPattern)
}
}
x := importer{s, outPattern, re}
return x.walkImport(s.dir(), s.Config.SourceLanguage)
}
type importer struct {
state *State
outPattern string
transFile *regexp.Regexp
}
func (i *importer) walkImport(path string, tag language.Tag) error {
files, err := ioutil.ReadDir(path)
if err != nil {
return nil
}
for _, f := range files {
name := f.Name()
tag := tag
if f.IsDir() {
if t, err := language.Parse(name); err == nil {
tag = t
}
// We ignore errors
if err := i.walkImport(filepath.Join(path, name), tag); err != nil {
return err
}
continue
}
for _, l := range strings.Split(name, ".") {
if t, err := language.Parse(l); err == nil {
tag = t
}
}
file := filepath.Join(path, name)
// TODO: Should we skip files that match output files?
if fmt.Sprintf(i.outPattern, tag) == file {
continue
}
// TODO: handle different file formats.
if !i.transFile.MatchString(name) {
continue
}
b, err := ioutil.ReadFile(file)
if err != nil {
return wrap(err, "read file failed")
}
var translations Messages
if err := json.Unmarshal(b, &translations); err != nil {
return wrap(err, "parsing translation file failed")
}
i.state.Translations = append(i.state.Translations, translations)
}
return nil
}
// Merge merges the extracted messages with the existing translations.
func (s *State) Merge() error {
if s.Messages != nil {
panic("already merged")
}
// Create an index for each unique message.
// Duplicates are okay as long as the substitution arguments are okay as
// well.
// Top-level messages are okay to appear in multiple substitution points.
// Collect key equivalence.
msgs := []*Message{}
keyToIDs := map[string]*Message{}
for _, m := range s.Extracted.Messages {
m := m
if prev, ok := keyToIDs[m.Key]; ok {
if err := checkEquivalence(&m, prev); err != nil {
warnf("Key %q matches conflicting messages: %v and %v", m.Key, prev.ID, m.ID)
// TODO: track enough information so that the rewriter can
// suggest/disambiguate messages.
}
// TODO: add position to message.
continue
}
i := len(msgs)
msgs = append(msgs, &m)
keyToIDs[m.Key] = msgs[i]
}
// Messages with different keys may still refer to the same translated
// message (e.g. different whitespace). Filter these.
idMap := map[string]bool{}
filtered := []*Message{}
for _, m := range msgs {
found := false
for _, id := range m.ID {
found = found || idMap[id]
}
if !found {
filtered = append(filtered, m)
}
for _, id := range m.ID {
idMap[id] = true
}
}
// Build index of translations.
translations := map[language.Tag]map[string]Message{}
languages := append([]language.Tag{}, s.Config.Supported...)
for _, t := range s.Translations {
tag := t.Language
if _, ok := translations[tag]; !ok {
translations[tag] = map[string]Message{}
languages = append(languages, tag)
}
for _, m := range t.Messages {
if !m.Translation.IsEmpty() {
for _, id := range m.ID {
if _, ok := translations[tag][id]; ok {
warnf("Duplicate translation in locale %q for message %q", tag, id)
}
translations[tag][id] = m
}
}
}
}
languages = internal.UniqueTags(languages)
for _, tag := range languages {
ms := Messages{Language: tag}
for _, orig := range filtered {
m := *orig
m.Key = ""
m.Position = ""
for _, id := range m.ID {
if t, ok := translations[tag][id]; ok {
m.Translation = t.Translation
if t.TranslatorComment != "" {
m.TranslatorComment = t.TranslatorComment
m.Fuzzy = t.Fuzzy
}
break
}
}
if tag == s.Config.SourceLanguage && m.Translation.IsEmpty() {
m.Translation = m.Message
if m.TranslatorComment == "" {
m.TranslatorComment = "Copied from source."
m.Fuzzy = true
}
}
// TODO: if translation is empty: pre-expand based on available
// linguistic features. This may also be done as a plugin.
ms.Messages = append(ms.Messages, m)
}
s.Messages = append(s.Messages, ms)
}
return nil
}
// Export writes out the messages to translation out files.
func (s *State) Export() error {
path, err := outPattern(s)
if err != nil {
return wrap(err, "export failed")
}
for _, out := range s.Messages {
// TODO: inject translations from existing files to avoid retranslation.
data, err := json.MarshalIndent(out, "", " ")
if err != nil {
return wrap(err, "JSON marshal failed")
}
file := fmt.Sprintf(path, out.Language)
if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil {
return wrap(err, "dir create failed")
}
if err := ioutil.WriteFile(file, data, 0644); err != nil {
return wrap(err, "write failed")
}
}
return nil
}
var (
ws = runes.In(unicode.White_Space).Contains
notWS = runes.NotIn(unicode.White_Space).Contains
)
func trimWS(s string) (trimmed, leadWS, trailWS string) {
trimmed = strings.TrimRightFunc(s, ws)
trailWS = s[len(trimmed):]
if i := strings.IndexFunc(trimmed, notWS); i > 0 {
leadWS = trimmed[:i]
trimmed = trimmed[i:]
}
return trimmed, leadWS, trailWS
}
// NOTE: The command line tool already prefixes with "gotext:".
var (
wrap = func(err error, msg string) error {
if err == nil {
return nil
}
return fmt.Errorf("%s: %v", msg, err)
}
wrapf = func(err error, msg string, args ...interface{}) error {
if err == nil {
return nil
}
return wrap(err, fmt.Sprintf(msg, args...))
}
errorf = fmt.Errorf
)
func warnf(format string, args ...interface{}) {
// TODO: don't log.
log.Printf(format, args...)
}
func loadPackages(conf *loader.Config, args []string) (*loader.Program, error) {
if len(args) == 0 {
args = []string{"."}
}
conf.Build = &build.Default
conf.ParserMode = parser.ParseComments
// Use the initial packages from the command line.
args, err := conf.FromArgs(args, false)
if err != nil {
return nil, wrap(err, "loading packages failed")
}
// Load, parse and type-check the whole program.
return conf.Load()
}