locale/maketables.go - exp - Git at Google

 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // +build ignore

 // Locale identifier table generator.
 // Data read from the web.

 package main

 import (
 	"bufio"
 	"code.google.com/p/go.exp/locale/cldr"
 	"flag"
 	"fmt"
 	"hash"
 	"hash/fnv"
 	"io"
 	"log"
 	"math"
 	"net/http"
 	"os"
 	"path"
 	"reflect"
 	"sort"
 	"strconv"
 	"strings"
 )

 var (
 	url = flag.String("cldr",
 		"http://www.unicode.org/Public/cldr/"+cldr.Version+"/core.zip",
 		"URL of CLDR archive.")
 	iana = flag.String("iana",
 		"http://www.iana.org/assignments/language-subtag-registry",
 		"URL of IANA language subtag registry.")
 	test = flag.Bool("test", false,
 		"test existing tables; can be used to compare web data with package data.")
 	localFiles = flag.Bool("local", false,
 		"data files have been copied to the current directory; for debugging only.")
 )

 var comment = []string{
 	`
 lang holds an alphabetically sorted list of BCP 47 language identifiers.
 All entries are 4 bytes. The index of the identifier (divided by 4) is the language ID.
 For 2-byte language identifiers, the two successive bytes have the following meaning:
     - if the first letter of the 2- and 3-letter ISO codes are the same:
       the second and third letter of the 3-letter ISO code.
     - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
 For 3-byte language identifiers the 4th byte is 0.`,
 	`
 langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
 in lookup tables. The language ids for these language codes are derived directly
 from the letters and are not consecutive.`,
 	`
 altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
 to 2-letter language codes that cannot be derived using the method described above.
 Each 3-letter code is followed by its 1-byte langID.`,
 	`
 tagAlias holds a mapping from legacy and grandfathered tags to their locale ID.`,
 	`
 langOldMap maps deprecated langIDs to their suggested replacements.`,
 	`
 langMacroMap maps languages to their macro language replacement, if applicable.`,
 	`
 script is an alphabetically sorted list of ISO 15924 codes. The index
 of the script in the string, divided by 4, is the internal script ID.`,
 	`
 isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
 for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
 the UN.M49 codes used for groups.)`,
 	`
 regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
 Each 2-letter codes is followed by two bytes with the following meaning:
     - [A-Z}{2}: the first letter of the 2-letter code plus these two
                 letters form the 3-letter ISO code.
     - 0, n:     index into altRegionISO3.`,
 	`
 m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
 codes indicating collections of regions.`,
 	`
 altRegionISO3 holds a list of 3-letter region codes that cannot be
 mapped to 2-letter codes using the default algorithm. This is a short list.`,
 	`
 altRegionIDs holsd a list of regionIDs the positions of which match those
 of the 3-letter ISO codes in altRegionISO3.`,
 	`
 currency holds an alphabetically sorted list of canonical 3-letter currency identifiers.
 Each identifier is followed by a byte of which the 6 most significant bits
 indicated the rounding and the least 2 significant bits indicate the
 number of decimal positions.`,
 	`
 suppressScript is an index from langID to the dominant script for that language,
 if it exists.  If a script is given, it should be suppressed from the language tag.`,
 	`
 nRegionGroups is the number of region groups.  All regionIDs < nRegionGroups
 are groups.`,
 	`
 regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
 where each set holds all groupings that are directly connected in a region
 containment graph.`,
 	`
 regionInclusionBits is an array of bit vectors where every vector represents
 a set of region groupings.  These sets are used to compute the distance
 between two regions for the purpos of locale matching.`,
 	`
 regionInclusionNext marks, for each entry in regionInclusionBits, the set of
 all groups that are reachable from the groups set in the respective entry.`,
 }

 // TODO: consider changing some of these strutures to tries. This can reduce
 // memory, but may increase the need for memory allocations. This could be
 // mitigated if we can piggyback on locale strings for common cases.

 func failOnError(e error) {
 	if e != nil {
 		log.Panic(e)
 	}
 }

 type setType int

 const (
 	Indexed setType = 1 + iota // all elements must be of same size
 	Linear
 )

 type stringSet struct {
 	s              []string
 	sorted, frozen bool

 	// We often need to update values after the creation of an index is completed.
 	// We include a convenience map for keeping track of this.
 	update map[string]string
 	typ    setType // used for checking.
 }

 func (ss *stringSet) clone() stringSet {
 	c := *ss
 	c.s = append([]string(nil), c.s...)
 	return c
 }

 func (ss *stringSet) setType(t setType) {
 	if ss.typ != t && ss.typ != 0 {
 		log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
 	}
 }

 // parse parses a whitespace-separated string and initializes ss with its
 // components.
 func (ss *stringSet) parse(s string) {
 	scan := bufio.NewScanner(strings.NewReader(s))
 	scan.Split(bufio.ScanWords)
 	for scan.Scan() {
 		ss.add(scan.Text())
 	}
 }

 func (ss *stringSet) assertChangeable() {
 	if ss.frozen {
 		log.Panic("attempt to modify a frozen stringSet")
 	}
 }

 func (ss *stringSet) add(s string) {
 	ss.assertChangeable()
 	ss.s = append(ss.s, s)
 	ss.sorted = ss.frozen
 }

 func (ss *stringSet) freeze() {
 	ss.compact()
 	ss.frozen = true
 }

 func (ss *stringSet) compact() {
 	if ss.sorted {
 		return
 	}
 	a := ss.s
 	sort.Strings(a)
 	k := 0
 	for i := 1; i < len(a); i++ {
 		if a[k] != a[i] {
 			a[k+1] = a[i]
 			k++
 		}
 	}
 	ss.s = a[:k+1]
 	ss.sorted = ss.frozen
 }

 type funcSorter struct {
 	fn func(a, b string) bool
 	sort.StringSlice
 }

 func (s funcSorter) Less(i, j int) bool {
 	return s.fn(s.StringSlice[i], s.StringSlice[j])
 }

 func (ss *stringSet) sortFunc(f func(a, b string) bool) {
 	ss.compact()
 	sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
 }

 func (ss *stringSet) remove(s string) {
 	ss.assertChangeable()
 	if i, ok := ss.find(s); ok {
 		copy(ss.s[i:], ss.s[i+1:])
 		ss.s = ss.s[:len(ss.s)-1]
 	}
 }

 func (ss *stringSet) replace(ol, nu string) {
 	ss.s[ss.index(ol)] = nu
 	ss.sorted = ss.frozen
 }

 func (ss *stringSet) index(s string) int {
 	ss.setType(Indexed)
 	i, ok := ss.find(s)
 	if !ok {
 		if i < len(ss.s) {
 			log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
 		}
 		log.Panicf("find: item %q is not in list", s)

 	}
 	return i
 }

 func (ss *stringSet) find(s string) (int, bool) {
 	ss.compact()
 	i := sort.SearchStrings(ss.s, s)
 	return i, i != len(ss.s) && ss.s[i] == s
 }

 func (ss *stringSet) slice() []string {
 	ss.compact()
 	return ss.s
 }

 func (ss *stringSet) updateLater(v, key string) {
 	if ss.update == nil {
 		ss.update = map[string]string{}
 	}
 	ss.update[v] = key
 }

 // join joins the string and ensures that all entries are of the same length.
 func (ss *stringSet) join() string {
 	ss.setType(Indexed)
 	n := len(ss.s[0])
 	for _, s := range ss.s {
 		if len(s) != n {
 			log.Panic("join: not all entries are of the same length")
 		}
 	}
 	ss.s = append(ss.s, strings.Repeat("\xff", n))
 	return strings.Join(ss.s, "")
 }

 // ianaEntry holds information for an entry in the IANA Language Subtag Repository.
 // All types use the same entry.
 // See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
 // fields.
 type ianaEntry struct {
 	typ            string
 	tag            string
 	description    []string
 	scope          string
 	added          string
 	preferred      string
 	deprecated     string
 	suppressScript string
 	macro          string
 	prefix         []string
 }

 type builder struct {
 	w      io.Writer   // multi writer
 	out    io.Writer   // set to Stdout
 	hash32 hash.Hash32 // for checking whether tables have changed.
 	size   int
 	data   *cldr.CLDR
 	supp   *cldr.SupplementalData

 	// indices
 	locale      stringSet // common locales
 	lang        stringSet // canonical language ids (2 or 3 letter ISO codes) with data
 	langNoIndex stringSet // 3-letter ISO codes with no associated data
 	script      stringSet // 4-letter ISO codes
 	region      stringSet // 2-letter ISO or 3-digit UN M49 codes
 	currency    stringSet // 3-letter ISO currency codes

 	// langInfo
 	registry map[string]*ianaEntry
 }

 func openReader(url *string) io.ReadCloser {
 	if *localFiles {
 		pwd, _ := os.Getwd()
 		*url = "file://" + path.Join(pwd, path.Base(*url))
 	}
 	t := &http.Transport{}
 	t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
 	c := &http.Client{Transport: t}
 	resp, err := c.Get(*url)
 	failOnError(err)
 	if resp.StatusCode != 200 {
 		log.Fatalf(`bad GET status for "%s": %s`, *url, resp.Status)
 	}
 	return resp.Body
 }

 func newBuilder() *builder {
 	r := openReader(url)
 	defer r.Close()
 	d := &cldr.Decoder{}
 	d.SetDirFilter("supplemental")
 	data, err := d.DecodeZip(r)
 	failOnError(err)
 	b := builder{
 		out:    os.Stdout,
 		data:   data,
 		supp:   data.Supplemental(),
 		hash32: fnv.New32(),
 	}
 	b.w = io.MultiWriter(b.out, b.hash32)
 	b.parseRegistry()
 	return &b
 }

 func (b *builder) parseRegistry() {
 	r := openReader(iana)
 	defer r.Close()
 	b.registry = make(map[string]*ianaEntry)

 	scan := bufio.NewScanner(r)
 	scan.Split(bufio.ScanWords)
 	var record *ianaEntry
 	for more := scan.Scan(); more; {
 		key := scan.Text()
 		more = scan.Scan()
 		value := scan.Text()
 		switch key {
 		case "Type:":
 			record = &ianaEntry{typ: value}
 		case "Subtag:", "Tag:":
 			record.tag = value
 			if info, ok := b.registry[value]; ok {
 				if info.typ != "language" || record.typ != "extlang" {
 					log.Fatalf("parseRegistry: tag %q already exists", value)
 				}
 			} else {
 				b.registry[value] = record
 			}
 		case "Suppress-Script:":
 			record.suppressScript = value
 		case "Added:":
 			record.added = value
 		case "Deprecated:":
 			record.deprecated = value
 		case "Macrolanguage:":
 			record.macro = value
 		case "Preferred-Value:":
 			record.preferred = value
 		case "Prefix:":
 			record.prefix = append(record.prefix, value)
 		case "Scope:":
 			record.scope = value
 		case "Description:":
 			buf := []byte(value)
 			for more = scan.Scan(); more; more = scan.Scan() {
 				b := scan.Bytes()
 				if b[0] == '%' || b[len(b)-1] == ':' {
 					break
 				}
 				buf = append(buf, ' ')
 				buf = append(buf, b...)
 			}
 			record.description = append(record.description, string(buf))
 			continue
 		default:
 			continue
 		}
 		more = scan.Scan()
 	}
 	if scan.Err() != nil {
 		log.Panic(scan.Err())
 	}
 }

 var commentIndex = make(map[string]string)

 func init() {
 	for _, s := range comment {
 		key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
 		commentIndex[key] = strings.Replace(s, "\n", "\n// ", -1)
 	}
 }

 func (b *builder) comment(name string) {
 	fmt.Fprintln(b.out, commentIndex[name])
 }

 func (b *builder) pf(f string, x ...interface{}) {
 	fmt.Fprintf(b.w, f, x...)
 	fmt.Fprint(b.w, "\n")
 }

 func (b *builder) p(x ...interface{}) {
 	fmt.Fprintln(b.w, x...)
 }

 func (b *builder) addSize(s int) {
 	b.size += s
 	b.pf("// Size: %d bytes", s)
 }

 func (b *builder) addArraySize(s, n int) {
 	b.size += s
 	b.pf("// Size: %d bytes, %d elements", s, n)
 }

 func (b *builder) writeConst(name string, x interface{}) {
 	b.comment(name)
 	b.pf("const %s = %v", name, x)
 }

 func (b *builder) writeSlice(name string, ss interface{}) {
 	b.comment(name)
 	v := reflect.ValueOf(ss)
 	t := v.Type().Elem()
 	b.addArraySize(v.Len()*int(t.Size()), v.Len())
 	fmt.Fprintf(b.w, `var %s = [%d]%s{`, name, v.Len(), t)
 	for i := 0; i < v.Len(); i++ {
 		if t.Kind() == reflect.Struct {
 			fmt.Fprintf(b.w, "\n\t%#v, ", v.Index(i).Interface())
 		} else {
 			if i%12 == 0 {
 				fmt.Fprintf(b.w, "\n\t")
 			}
 			fmt.Fprintf(b.w, "%d, ", v.Index(i).Interface())
 		}
 	}
 	b.p("\n}")
 }

 // writeStringSlice writes a slice of strings. This produces a lot
 // of overhead. It should typically only be used for debugging.
 // TODO: remove
 func (b *builder) writeStringSlice(name string, ss []string) {
 	b.comment(name)
 	t := reflect.TypeOf(ss).Elem()
 	sz := len(ss) * int(t.Size())
 	for _, s := range ss {
 		sz += len(s)
 	}
 	b.addArraySize(sz, len(ss))
 	b.pf(`var %s = [%d]%s{`, name, len(ss), t)
 	for i := 0; i < len(ss); i++ {
 		b.pf("\t%q,", ss[i])
 	}
 	b.p("}")
 }

 func (b *builder) writeString(name, s string) {
 	b.comment(name)
 	b.addSize(len(s) + int(reflect.TypeOf(s).Size()))
 	if len(s) < 40 {
 		b.pf(`var %s string = %q`, name, s)
 		return
 	}
 	const cpl = 60
 	b.pf(`var %s string = "" +`, name)
 	for {
 		n := cpl
 		if n > len(s) {
 			n = len(s)
 		}
 		var q string
 		for {
 			q = strconv.Quote(s[:n])
 			if len(q) <= cpl+2 {
 				break
 			}
 			n--
 		}
 		if n < len(s) {
 			b.pf(`	%s +`, q)
 			s = s[n:]
 		} else {
 			b.pf(`	%s`, q)
 			break
 		}
 	}
 }

 const base = 'z' - 'a' + 1

 func strToInt(s string) uint {
 	v := uint(0)
 	for i := 0; i < len(s); i++ {
 		v *= base
 		v += uint(s[i] - 'a')
 	}
 	return v
 }

 func (b *builder) writeBitVector(name string, ss []string) {
 	vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
 	for _, s := range ss {
 		v := strToInt(s)
 		vec[v/8] |= 1 << (v % 8)
 	}
 	b.writeSlice(name, vec)
 }

 // TODO: convert this type into a list or two-stage trie.
 func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
 	b.comment(name)
 	v := reflect.ValueOf(m)
 	sz := v.Len() * (2 + int(v.Type().Key().Size()))
 	for _, k := range m {
 		sz += len(k)
 	}
 	b.addSize(sz)
 	keys := []string{}
 	b.pf(`var %s = map[string]uint16{`, name)
 	for k := range m {
 		keys = append(keys, k)
 	}
 	sort.Strings(keys)
 	for _, k := range keys {
 		b.pf("\t%q: %v,", k, f(m[k]))
 	}
 	b.p("}")
 }

 func (b *builder) langIndex(s string) uint16 {
 	if i, ok := b.lang.find(s); ok {
 		return uint16(i)
 	}
 	return uint16(strToInt(s)) + uint16(len(b.lang.s))
 }

 // inc advances the string to its lexicographical successor.
 func inc(s string) string {
 	i := len(s) - 1
 	for ; s[i]+1 > 'z'; i-- {
 	}
 	return fmt.Sprintf("%s%s%s", s[:i], string(s[i]+1), s[i+1:])
 }

 func (b *builder) parseIndices() {
 	meta := b.supp.Metadata

 	for k, v := range b.registry {
 		var ss *stringSet
 		switch v.typ {
 		case "language":
 			if len(k) == 2 || v.suppressScript != "" || v.scope == "special" {
 				b.lang.add(k)
 				continue
 			} else {
 				ss = &b.langNoIndex
 			}
 		case "region":
 			ss = &b.region
 		case "script":
 			ss = &b.script
 		default:
 			continue
 		}
 		if s := strings.SplitN(k, "..", 2); len(s) > 1 {
 			for a := s[0]; a <= s[1]; a = inc(a) {
 				ss.add(a)
 			}
 		} else {
 			ss.add(k)
 		}
 	}

 	// currency codes
 	for _, reg := range b.supp.CurrencyData.Region {
 		for _, cur := range reg.Currency {
 			b.currency.add(cur.Iso4217)
 		}
 	}

 	// common locales
 	b.locale.parse(meta.DefaultContent.Locales)
 }

 // writeLanguage generates all tables needed for language canonicalization.
 func (b *builder) writeLanguage() {
 	meta := b.supp.Metadata

 	b.writeConst("unknownLang", b.lang.index("und"))

 	// Get language codes that need to be mapped (overlong 3-letter codes, deprecated
 	// 2-letter codes and grandfathered tags.
 	langOldMap := stringSet{}

 	// Mappings for macro languages
 	langMacroMap := stringSet{}

 	// altLangISO3 get the alternative ISO3 names that need to be mapped.
 	altLangISO3 := stringSet{}

 	// legacyTag maps from tag to language code.
 	legacyTag := make(map[string]string)

 	lang := b.lang.clone()
 	for _, a := range meta.Alias.LanguageAlias {
 		if a.Replacement == "" {
 			a.Replacement = "und"
 		}
 		// TODO: support mapping to tags
 		repl := strings.SplitN(a.Replacement, "_", 2)[0]
 		if a.Reason == "overlong" {
 			if len(a.Replacement) == 2 && len(a.Type) == 3 {
 				lang.updateLater(a.Replacement, a.Type)
 			}
 		} else if len(a.Type) <= 3 {
 			if a.Reason != "deprecated" {
 				langMacroMap.add(a.Type)
 				langMacroMap.updateLater(a.Type, repl)
 			}
 		} else {
 			legacyTag[strings.Replace(a.Type, "_", "-", -1)] = repl
 		}
 	}
 	for k, v := range b.registry {
 		// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
 		if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
 			langOldMap.add(k)
 			langOldMap.updateLater(k, v.preferred)
 		}
 	}
 	// Fix CLDR mappings.
 	lang.updateLater("tl", "tgl")
 	lang.updateLater("sh", "hbs")
 	lang.updateLater("mo", "mol")
 	lang.updateLater("no", "nor")
 	lang.updateLater("tw", "twi")
 	lang.updateLater("nb", "nob")
 	lang.updateLater("ak", "aka")

 	// Ensure that each 2-letter code is matched with a 3-letter code.
 	for _, v := range lang.s {
 		s, ok := lang.update[v]
 		if !ok {
 			if s, ok = lang.update[langOldMap.update[v]]; !ok {
 				continue
 			}
 			lang.update[v] = s
 		}
 		if v[0] != s[0] {
 			altLangISO3.add(s)
 			altLangISO3.updateLater(s, v)
 		}
 	}

 	// Complete canonialized language tags.
 	lang.freeze()
 	for i, v := range lang.s {
 		// We can avoid these manual entries by using the IANI registry directly.
 		// Seems easier to update the list manually, as changes are rare.
 		// The panic in this loop will trigger if we miss an entry.
 		add := ""
 		if s, ok := lang.update[v]; ok {
 			if s[0] == v[0] {
 				add = s[1:]
 			} else {
 				add = string([]byte{0, byte(altLangISO3.index(s))})
 			}
 		} else if len(v) == 3 {
 			add = "\x00"
 		} else {
 			log.Panicf("no data for long form of %q", v)
 		}
 		lang.s[i] += add
 	}
 	b.writeString("lang", lang.join())

 	b.writeConst("langNoIndexOffset", len(b.lang.s))

 	// space of all valid 3-letter language identifiers.
 	b.writeBitVector("langNoIndex", b.langNoIndex.slice())

 	for i, s := range altLangISO3.slice() {
 		idx := b.lang.index(altLangISO3.update[s])
 		altLangISO3.s[i] += string([]byte{byte(idx)})
 	}
 	b.writeString("altLangISO3", altLangISO3.join())

 	makeMap := func(name string, ss *stringSet) {
 		ss.sortFunc(func(i, j string) bool {
 			return b.langIndex(i) < b.langIndex(j)
 		})
 		m := []struct{ from, to uint16 }{}
 		for _, s := range ss.s {
 			m = append(m, struct{ from, to uint16 }{
 				b.langIndex(s),
 				b.langIndex(ss.update[s]),
 			})
 		}
 		b.writeSlice(name, m)
 	}
 	makeMap("langOldMap", &langOldMap)
 	makeMap("langMacroMap", &langMacroMap)

 	b.writeMapFunc("tagAlias", legacyTag, func(s string) uint16 {
 		return uint16(b.langIndex(s))
 	})
 }

 func (b *builder) writeScript() {
 	unknown := uint8(b.script.index("Zzzz"))
 	b.writeConst("unknownScript", unknown)
 	b.writeString("script", b.script.join())

 	supp := make([]uint8, len(b.lang.slice()))
 	for i, v := range b.lang.slice() {
 		supp[i] = unknown
 		if sc := b.registry[v].suppressScript; sc != "" {
 			supp[i] = uint8(b.script.index(sc))
 		}
 	}
 	b.writeSlice("suppressScript", supp)
 }

 func parseM49(s string) uint16 {
 	if len(s) == 0 {
 		return 0
 	}
 	v, err := strconv.ParseUint(s, 10, 10)
 	failOnError(err)
 	return uint16(v)
 }

 func (b *builder) writeRegion() {
 	b.writeConst("unknownRegion", b.region.index("ZZ"))

 	isoOffset := b.region.index("AA")
 	m49map := make([]uint16, len(b.region.slice()))
 	altRegionISO3 := ""
 	altRegionIDs := []uint16{}

 	b.writeConst("isoRegionOffset", isoOffset)

 	// 2-letter region lookup and mapping to numeric codes.
 	regionISO := b.region.clone()
 	regionISO.s = regionISO.s[isoOffset:]
 	regionISO.sorted = false
 	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
 		i := regionISO.index(tc.Type)
 		if len(tc.Alpha3) == 3 {
 			if tc.Alpha3[0] == tc.Type[0] {
 				regionISO.s[i] += tc.Alpha3[1:]
 			} else {
 				regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
 				altRegionISO3 += tc.Alpha3
 				altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
 			}
 		}
 		if d := m49map[isoOffset+i]; d != 0 {
 			log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
 		}
 		m49map[isoOffset+i] = parseM49(tc.Numeric)
 	}
 	for i, s := range regionISO.s {
 		if len(s) != 4 {
 			regionISO.s[i] = s + "  "
 		}
 	}
 	b.writeString("regionISO", regionISO.join())
 	b.writeString("altRegionISO3", altRegionISO3)
 	b.writeSlice("altRegionIDs", altRegionIDs)

 	// 3-digit region lookup, groupings.
 	for i := 0; i < isoOffset; i++ {
 		m49map[i] = parseM49(b.region.s[i])
 	}
 	b.writeSlice("m49", m49map)
 }

 func (b *builder) writeLocale() {
 	b.writeStringSlice("locale", b.locale.slice())
 }

 func (b *builder) writeLanguageInfo() {
 }

 func (b *builder) writeCurrencies() {
 	unknown := b.currency.index("XXX")
 	digits := map[string]uint64{}
 	rounding := map[string]uint64{}
 	for _, info := range b.supp.CurrencyData.Fractions[0].Info {
 		var err error
 		digits[info.Iso4217], err = strconv.ParseUint(info.Digits, 10, 2)
 		failOnError(err)
 		rounding[info.Iso4217], err = strconv.ParseUint(info.Rounding, 10, 6)
 		failOnError(err)
 	}
 	for i, cur := range b.currency.slice() {
 		d := uint64(2) // default number of decimal positions
 		if dd, ok := digits[cur]; ok {
 			d = dd
 		}
 		var r uint64
 		if r = rounding[cur]; r == 0 {
 			r = 1 // default rounding increment in units 10^{-digits)
 		}
 		b.currency.s[i] += string([]byte{byte(r<<2 + d)})
 	}
 	b.writeString("currency", b.currency.join())
 	// Hack alert: gofmt indents a trailing comment after an indented string.
 	// Write this constant after currency to force a proper indentation of
 	// the final comment.
 	b.writeConst("unknownCurrency", unknown)
 }

 func (b *builder) writeRegionInclusionData() {
 	type index uint
 	groups := make(map[int]index)
 	// Create group indices.
 	for i := 0; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
 		groups[i] = index(i)
 	}
 	for _, g := range b.supp.TerritoryContainment.Group {
 		group := b.region.index(g.Type)
 		if _, ok := groups[group]; !ok {
 			groups[group] = index(len(groups))
 		}
 	}
 	if len(groups) > 32 {
 		log.Fatalf("only 32 groups supported, found %d", len(groups))
 	}
 	b.writeConst("nRegionGroups", len(groups))
 	mm := make(map[int][]index)
 	for _, g := range b.supp.TerritoryContainment.Group {
 		group := b.region.index(g.Type)
 		for _, mem := range strings.Split(g.Contains, " ") {
 			r := b.region.index(mem)
 			mm[r] = append(mm[r], groups[group])
 			if g, ok := groups[r]; ok {
 				mm[group] = append(mm[group], g)
 			}
 		}
 	}
 	regionInclusion := make([]uint8, len(b.region.s))
 	bvs := make(map[uint32]index)
 	// Make the first bitvector positions correspond with the groups.
 	for r, i := range groups {
 		bv := uint32(1 << i)
 		for _, g := range mm[r] {
 			bv |= 1 << g
 		}
 		bvs[bv] = i
 		regionInclusion[r] = uint8(bvs[bv])
 	}
 	for r := 0; r < len(b.region.s); r++ {
 		if _, ok := groups[r]; !ok {
 			bv := uint32(0)
 			for _, g := range mm[r] {
 				bv |= 1 << g
 			}
 			if bv == 0 {
 				// Pick the world for unspecified regions.
 				bv = 1 << groups[b.region.index("001")]
 			}
 			if _, ok := bvs[bv]; !ok {
 				bvs[bv] = index(len(bvs))
 			}
 			regionInclusion[r] = uint8(bvs[bv])
 		}
 	}
 	b.writeSlice("regionInclusion", regionInclusion)
 	regionInclusionBits := make([]uint32, len(bvs))
 	for k, v := range bvs {
 		regionInclusionBits[v] = uint32(k)
 	}
 	// Add bit vectors for increasingly large distances until a fixed point is reached.
 	regionInclusionNext := []uint8{}
 	for i := 0; i < len(regionInclusionBits); i++ {
 		bits := regionInclusionBits[i]
 		next := bits
 		for i := uint(0); i < uint(len(groups)); i++ {
 			if bits&(1<<i) != 0 {
 				next |= regionInclusionBits[i]
 			}
 		}
 		if _, ok := bvs[next]; !ok {
 			bvs[next] = index(len(bvs))
 			regionInclusionBits = append(regionInclusionBits, next)
 		}
 		regionInclusionNext = append(regionInclusionNext, uint8(bvs[next]))
 	}
 	b.writeSlice("regionInclusionBits", regionInclusionBits)
 	b.writeSlice("regionInclusionNext", regionInclusionNext)
 }

 var header = `// Generated by running
 //		maketables -url=%s -iana=%s
 // DO NOT EDIT

 package locale
 `

 func main() {
 	flag.Parse()
 	b := newBuilder()
 	fmt.Fprintf(b.out, header, *url, *iana)

 	b.parseIndices()
 	b.writeLanguage()
 	b.writeScript()
 	b.writeRegion()
 	// TODO: b.writeLocale()
 	b.writeCurrencies()
 	b.writeRegionInclusionData()

 	fmt.Fprintf(b.out, "\n// Size: %.1fK (%d bytes); Check: %X\n", float32(b.size)/1024, b.size, b.hash32.Sum32())
 }