// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build ignore

// This tool generates types for the various XML formats of CLDR.
package main

import (
	"archive/zip"
	"bytes"
	"encoding/xml"
	"flag"
	"fmt"
	"io"
	"log"
	"os"
	"regexp"
	"strings"

	"golang.org/x/text/internal/gen"
)

var outputFile = flag.String("output", "xml.go", "output file name")

func main() {
	flag.Parse()

	r := gen.OpenCLDRCoreZip()
	buffer, err := io.ReadAll(r)
	if err != nil {
		log.Fatal("Could not read zip file")
	}
	r.Close()
	z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
	if err != nil {
		log.Fatalf("Could not read zip archive: %v", err)
	}

	var buf bytes.Buffer

	version := gen.CLDRVersion()

	for _, dtd := range files {
		for _, f := range z.File {
			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
				r, err := f.Open()
				failOnError(err)

				b := makeBuilder(&buf, dtd)
				b.parseDTD(r)
				b.resolve(b.index[dtd.top[0]])
				b.write()
				if b.version != "" && version != b.version {
					println(f.Name)
					log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
				}
				break
			}
		}
	}
	fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
	fmt.Fprintf(&buf, "const Version = %q\n", version)

	gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
}

func failOnError(err error) {
	if err != nil {
		log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
		os.Exit(1)
	}
}

// configuration data per DTD type
type dtd struct {
	file string   // base file name
	root string   // Go name of the root XML element
	top  []string // create a different type for this section

	skipElem    []string // hard-coded or deprecated elements
	skipAttr    []string // attributes to exclude
	predefined  []string // hard-coded elements exist of the form <name>Elem
	forceRepeat []string // elements to make slices despite DTD
}

var files = []dtd{
	{
		file: "ldmlBCP47",
		root: "LDMLBCP47",
		top:  []string{"ldmlBCP47"},
		skipElem: []string{
			"cldrVersion", // deprecated, not used
		},
	},
	{
		file: "ldmlSupplemental",
		root: "SupplementalData",
		top:  []string{"supplementalData"},
		skipElem: []string{
			"cldrVersion", // deprecated, not used
		},
		forceRepeat: []string{
			"plurals", // data defined in plurals.xml and ordinals.xml
		},
	},
	{
		file: "ldml",
		root: "LDML",
		top: []string{
			"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
		},
		skipElem: []string{
			"cp",       // not used anywhere
			"special",  // not used anywhere
			"fallback", // deprecated, not used
			"alias",    // in Common
			"default",  // in Common
		},
		skipAttr: []string{
			"hiraganaQuarternary", // typo in DTD, correct version included as well
		},
		predefined: []string{"rules"},
	},
}

var comments = map[string]string{
	"ldmlBCP47": `
// LDMLBCP47 holds information on allowable values for various variables in LDML.
`,
	"supplementalData": `
// SupplementalData holds information relevant for internationalization
// and proper use of CLDR, but that is not contained in the locale hierarchy.
`,
	"ldml": `
// LDML is the top-level type for locale-specific data.
`,
	"collation": `
// Collation contains rules that specify a certain sort-order,
// as a tailoring of the root order. 
// The parsed rules are obtained by passing a RuleProcessor to Collation's
// Process method.
`,
	"calendar": `
// Calendar specifies the fields used for formatting and parsing dates and times.
// The month and quarter names are identified numerically, starting at 1.
// The day (of the week) names are identified with short strings, since there is
// no universally-accepted numeric designation.
`,
	"dates": `
// Dates contains information regarding the format and parsing of dates and times.
`,
	"localeDisplayNames": `
// LocaleDisplayNames specifies localized display names for scripts, languages,
// countries, currencies, and variants.
`,
	"numbers": `
// Numbers supplies information for formatting and parsing numbers and currencies.
`,
}

type element struct {
	name      string // XML element name
	category  string // elements contained by this element
	signature string // category + attrKey*

	attr []*attribute // attributes supported by this element.
	sub  []struct {   // parsed and evaluated sub elements of this element.
		e      *element
		repeat bool // true if the element needs to be a slice
	}

	resolved bool // prevent multiple resolutions of this element.
}

type attribute struct {
	name string
	key  string
	list []string

	tag string // Go tag
}

var (
	reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
	reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
	reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
	reToken = regexp.MustCompile(`\w\-`)
)

// builder is used to read in the DTD files from CLDR and generate Go code
// to be used with the encoding/xml package.
type builder struct {
	w       io.Writer
	index   map[string]*element
	elem    []*element
	info    dtd
	version string
}

func makeBuilder(w io.Writer, d dtd) builder {
	return builder{
		w:     w,
		index: make(map[string]*element),
		elem:  []*element{},
		info:  d,
	}
}

// parseDTD parses a DTD file.
func (b *builder) parseDTD(r io.Reader) {
	for d := xml.NewDecoder(r); ; {
		t, err := d.Token()
		if t == nil {
			break
		}
		failOnError(err)
		dir, ok := t.(xml.Directive)
		if !ok {
			continue
		}
		m := reHead.FindSubmatch(dir)
		dir = dir[len(m[0]):]
		ename := string(m[2])
		el, elementFound := b.index[ename]
		switch string(m[1]) {
		case "ELEMENT":
			if elementFound {
				log.Fatal("parseDTD: duplicate entry for element %q", ename)
			}
			m := reElem.FindSubmatch(dir)
			if m == nil {
				log.Fatalf("parseDTD: invalid element %q", string(dir))
			}
			if len(m[0]) != len(dir) {
				log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
			}
			s := string(m[1])
			el = &element{
				name:     ename,
				category: s,
			}
			b.index[ename] = el
		case "ATTLIST":
			if !elementFound {
				log.Fatalf("parseDTD: unknown element %q", ename)
			}
			s := string(dir)
			m := reAttr.FindStringSubmatch(s)
			if m == nil {
				log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
			}
			if m[4] == "FIXED" {
				b.version = m[5]
			} else {
				switch m[1] {
				case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
				case "type", "choice":
				default:
					el.attr = append(el.attr, &attribute{
						name: m[1],
						key:  s,
						list: reToken.FindAllString(m[3], -1),
					})
					el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
				}
			}
		}
	}
}

var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)

// resolve takes a parsed element and converts it into structured data
// that can be used to generate the XML code.
func (b *builder) resolve(e *element) {
	if e.resolved {
		return
	}
	b.elem = append(b.elem, e)
	e.resolved = true
	s := e.category
	found := make(map[string]bool)
	sequenceStart := []int{}
	for len(s) > 0 {
		m := reCat.FindStringSubmatch(s)
		if m == nil {
			log.Fatalf("%s: invalid category string %q", e.name, s)
		}
		repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
		switch m[1] {
		case "":
		case "(":
			sequenceStart = append(sequenceStart, len(e.sub))
		case ")":
			if len(sequenceStart) == 0 {
				log.Fatalf("%s: unmatched closing parenthesis", e.name)
			}
			for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
				e.sub[i].repeat = e.sub[i].repeat || repeat
			}
			sequenceStart = sequenceStart[:len(sequenceStart)-1]
		default:
			if in(b.info.skipElem, m[1]) {
			} else if sub, ok := b.index[m[1]]; ok {
				if !found[sub.name] {
					e.sub = append(e.sub, struct {
						e      *element
						repeat bool
					}{sub, repeat})
					found[sub.name] = true
					b.resolve(sub)
				}
			} else if m[1] == "#PCDATA" || m[1] == "ANY" {
			} else if m[1] != "EMPTY" {
				log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
			}
		}
		s = s[len(m[0]):]
	}
}

// return true if s is contained in set.
func in(set []string, s string) bool {
	for _, v := range set {
		if v == s {
			return true
		}
	}
	return false
}

var repl = strings.NewReplacer("-", " ", "_", " ")

// title puts the first character or each character following '_' in title case and
// removes all occurrences of '_'.
func title(s string) string {
	return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
}

// writeElem generates Go code for a single element, recursively.
func (b *builder) writeElem(tab int, e *element) {
	p := func(f string, x ...interface{}) {
		f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
		fmt.Fprintf(b.w, f, x...)
	}
	if len(e.sub) == 0 && len(e.attr) == 0 {
		p("Common")
		return
	}
	p("struct {")
	tab++
	p("\nCommon")
	for _, attr := range e.attr {
		if !in(b.info.skipAttr, attr.name) {
			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
		}
	}
	for _, sub := range e.sub {
		if in(b.info.predefined, sub.e.name) {
			p("\n%sElem", sub.e.name)
			continue
		}
		if in(b.info.skipElem, sub.e.name) {
			continue
		}
		p("\n%s ", title(sub.e.name))
		if sub.repeat {
			p("[]")
		}
		p("*")
		if in(b.info.top, sub.e.name) {
			p(title(sub.e.name))
		} else {
			b.writeElem(tab, sub.e)
		}
		p(" `xml:\"%s\"`", sub.e.name)
	}
	tab--
	p("\n}")
}

// write generates the Go XML code.
func (b *builder) write() {
	for i, name := range b.info.top {
		e := b.index[name]
		if e != nil {
			fmt.Fprintf(b.w, comments[name])
			name := title(e.name)
			if i == 0 {
				name = b.info.root
			}
			fmt.Fprintf(b.w, "type %s ", name)
			b.writeElem(0, e)
			fmt.Fprint(b.w, "\n")
		}
	}
}
