language: clean up gen

Mostly switch to use of CodeWriter.

Remove many old helper functions and move
main to top.

Change-Id: I957408391c26d49e1db13265dc8ebb7829e267eb
Reviewed-on: https://go-review.googlesource.com/95823
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/gen.go b/language/gen.go
index 5a0b393..a850a4e 100644
--- a/language/gen.go
+++ b/language/gen.go
@@ -14,7 +14,6 @@
 	"fmt"
 	"io"
 	"log"
-	"reflect"
 	"sort"
 	"strconv"
 	"strings"
@@ -33,27 +32,17 @@
 		"output file for generated tables")
 )
 
-var comment = []string{
-	`
-matchLang holds pairs of langIDs of base languages that are typically
-mutually intelligible. Each pair is associated with a confidence and
-whether the intelligibility goes one or both ways.`,
-	`
-matchScript holds pairs of scriptIDs where readers of one script
-can typically also read the other. Each is associated with a confidence.`,
-	`
-nRegionGroups is the number of region groups.`,
-	`
-regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
-where each set holds all groupings that are directly connected in a region
-containment graph.`,
-	`
-regionInclusionBits is an array of bit vectors where every vector represents
-a set of region groupings.  These sets are used to compute the distance
-between two regions for the purpose of language matching.`,
-	`
-regionInclusionNext marks, for each entry in regionInclusionBits, the set of
-all groups that are reachable from the groups set in the respective entry.`,
+func main() {
+	gen.Init()
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("tables.go", "language")
+
+	b := newBuilder(w)
+	gen.WriteCLDRVersion(w)
+
+	b.writeConstants()
+	b.writeMatchData()
 }
 
 type builder struct {
@@ -61,20 +50,18 @@
 	hw   io.Writer // MultiWriter for w and w.Hash
 	data *cldr.CLDR
 	supp *cldr.SupplementalData
-
-	// lang   index
-	region index
-	script index
 }
 
 func (b *builder) langIndex(s string) uint16 {
 	return uint16(language.MustParseBase(s))
 }
 
-type index func(s string) int
+func (b *builder) regionIndex(s string) int {
+	return int(language.MustParseRegion(s))
+}
 
-func (i index) index(s string) int {
-	return i(s)
+func (b *builder) scriptIndex(s string) int {
+	return int(language.MustParseScript(s))
 }
 
 func newBuilder(w *gen.CodeWriter) *builder {
@@ -90,84 +77,18 @@
 		hw:   io.MultiWriter(w, w.Hash),
 		data: data,
 		supp: data.Supplemental(),
-
-		script: func(s string) int {
-			return int(language.MustParseScript(s))
-		},
-
-		region: func(s string) int {
-			return int(language.MustParseRegion(s))
-		},
 	}
 	return &b
 }
 
-var commentIndex = make(map[string]string)
-
-func init() {
-	for _, s := range comment {
-		key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
-		commentIndex[key] = s
-	}
-}
-
-func (b *builder) comment(name string) {
-	if s := commentIndex[name]; len(s) > 0 {
-		b.w.WriteComment(s)
-	} else {
-		fmt.Fprintln(b.w)
-	}
-}
-
-func (b *builder) pf(f string, x ...interface{}) {
-	fmt.Fprintf(b.hw, f, x...)
-	fmt.Fprint(b.hw, "\n")
-}
-
-func (b *builder) p(x ...interface{}) {
-	fmt.Fprintln(b.hw, x...)
-}
-
-func (b *builder) addSize(s int) {
-	b.w.Size += s
-	b.pf("// Size: %d bytes", s)
-}
-
-func (b *builder) writeConst(name string, x interface{}) {
-	b.comment(name)
-	b.w.WriteConst(name, x)
-}
-
 // writeConsts computes f(v) for all v in values and writes the results
 // as constants named _v to a single constant block.
 func (b *builder) writeConsts(f func(string) int, values ...string) {
-	b.pf("const (")
+	fmt.Fprintln(b.w, "const (")
 	for _, v := range values {
-		b.pf("\t_%s = %v", v, f(v))
+		fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
 	}
-	b.pf(")")
-}
-
-// writeType writes the type of the given value, which must be a struct.
-func (b *builder) writeType(value interface{}) {
-	b.comment(reflect.TypeOf(value).Name())
-	b.w.WriteType(value)
-}
-
-func (b *builder) writeSlice(name string, ss interface{}) {
-	b.writeSliceAddSize(name, 0, ss)
-}
-
-func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) {
-	b.comment(name)
-	b.w.Size += extraSize
-	v := reflect.ValueOf(ss)
-	t := v.Type().Elem()
-	b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len())
-
-	fmt.Fprintf(b.w, "var %s = ", name)
-	b.w.WriteArray(ss)
-	b.p()
+	fmt.Fprintln(b.w, ")")
 }
 
 // TODO: region inclusion data will probably not be use used in future matchers.
@@ -197,8 +118,8 @@
 
 func (b *builder) writeConstants() {
 	b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
-	b.writeConsts(b.region, regionConsts...)
-	b.writeConsts(b.script, scriptConsts...)
+	b.writeConsts(b.regionIndex, regionConsts...)
+	b.writeConsts(b.scriptIndex, scriptConsts...)
 }
 
 type mutualIntelligibility struct {
@@ -250,12 +171,12 @@
 			todo := []string{r}
 			for k := 0; k < len(todo); k++ {
 				r := todo[k]
-				regionToGroups[b.region.index(r)] |= 1 << uint8(i)
+				regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
 				todo = append(todo, regionHierarchy[r]...)
 			}
 		}
 	}
-	b.writeSlice("regionToGroups", regionToGroups)
+	b.w.WriteVar("regionToGroups", regionToGroups)
 
 	// maps language id to in- and out-of-group region.
 	paradigmLocales := [][3]uint16{}
@@ -266,16 +187,16 @@
 			pc := strings.SplitN(locales[i+j], "-", 2)
 			x[0] = b.langIndex(pc[0])
 			if len(pc) == 2 {
-				x[1+j] = uint16(b.region.index(pc[1]))
+				x[1+j] = uint16(b.regionIndex(pc[1]))
 			}
 		}
 		paradigmLocales = append(paradigmLocales, x)
 	}
-	b.writeSlice("paradigmLocales", paradigmLocales)
+	b.w.WriteVar("paradigmLocales", paradigmLocales)
 
-	b.writeType(mutualIntelligibility{})
-	b.writeType(scriptIntelligibility{})
-	b.writeType(regionIntelligibility{})
+	b.w.WriteType(mutualIntelligibility{})
+	b.w.WriteType(scriptIntelligibility{})
+	b.w.WriteType(regionIntelligibility{})
 
 	matchLang := []mutualIntelligibility{}
 	matchScript := []scriptIntelligibility{}
@@ -301,16 +222,16 @@
 			matchScript = append(matchScript, scriptIntelligibility{
 				wantLang:   uint16(b.langIndex(d[0])),
 				haveLang:   uint16(b.langIndex(s[0])),
-				wantScript: uint8(b.script.index(d[1])),
-				haveScript: uint8(b.script.index(s[1])),
+				wantScript: uint8(b.scriptIndex(d[1])),
+				haveScript: uint8(b.scriptIndex(s[1])),
 				distance:   uint8(distance),
 			})
 			if m.Oneway != "true" {
 				matchScript = append(matchScript, scriptIntelligibility{
 					wantLang:   uint16(b.langIndex(s[0])),
 					haveLang:   uint16(b.langIndex(d[0])),
-					wantScript: uint8(b.script.index(s[1])),
-					haveScript: uint8(b.script.index(d[1])),
+					wantScript: uint8(b.scriptIndex(s[1])),
+					haveScript: uint8(b.scriptIndex(d[1])),
 					distance:   uint8(distance),
 				})
 			}
@@ -352,7 +273,7 @@
 				distance: uint8(distance),
 			}
 			if d[1] != "*" {
-				ri.script = uint8(b.script.index(d[1]))
+				ri.script = uint8(b.scriptIndex(d[1]))
 			}
 			switch {
 			case d[2] == "*":
@@ -372,28 +293,22 @@
 	sort.SliceStable(matchLang, func(i, j int) bool {
 		return matchLang[i].distance < matchLang[j].distance
 	})
-	b.writeSlice("matchLang", matchLang)
+	b.w.WriteComment(`
+		matchLang holds pairs of langIDs of base languages that are typically
+		mutually intelligible. Each pair is associated with a confidence and
+		whether the intelligibility goes one or both ways.`)
+	b.w.WriteVar("matchLang", matchLang)
 
+	b.w.WriteComment(`
+		matchScript holds pairs of scriptIDs where readers of one script
+		can typically also read the other. Each is associated with a confidence.`)
 	sort.SliceStable(matchScript, func(i, j int) bool {
 		return matchScript[i].distance < matchScript[j].distance
 	})
-	b.writeSlice("matchScript", matchScript)
+	b.w.WriteVar("matchScript", matchScript)
 
 	sort.SliceStable(matchRegion, func(i, j int) bool {
 		return matchRegion[i].distance < matchRegion[j].distance
 	})
-	b.writeSlice("matchRegion", matchRegion)
-}
-
-func main() {
-	gen.Init()
-
-	w := gen.NewCodeWriter()
-	defer w.WriteGoFile("tables.go", "language")
-
-	b := newBuilder(w)
-	gen.WriteCLDRVersion(w)
-
-	b.writeConstants()
-	b.writeMatchData()
+	b.w.WriteVar("matchRegion", matchRegion)
 }
diff --git a/language/tables.go b/language/tables.go
index cb7e1f8..5552ab3 100644
--- a/language/tables.go
+++ b/language/tables.go
@@ -128,8 +128,7 @@
 	_Zzzz = 242
 )
 
-// Size: 357 bytes, 357 elements
-var regionToGroups = [357]uint8{
+var regionToGroups = []uint8{ // 357 elements
 	// Entry 0 - 3F
 	0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
 	0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
@@ -181,14 +180,13 @@
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00,
-}
+} // Size: 381 bytes
 
-// Size: 18 bytes, 3 elements
-var paradigmLocales = [3][3]uint16{
+var paradigmLocales = [][3]uint16{ // 3 elements
 	0: [3]uint16{0x139, 0x0, 0x7b},
 	1: [3]uint16{0x13e, 0x0, 0x1f},
 	2: [3]uint16{0x3c0, 0x41, 0xee},
-}
+} // Size: 42 bytes
 
 type mutualIntelligibility struct {
 	want     uint16
@@ -196,7 +194,6 @@
 	distance uint8
 	oneway   bool
 }
-
 type scriptIntelligibility struct {
 	wantLang   uint16
 	haveLang   uint16
@@ -204,7 +201,6 @@
 	haveScript uint8
 	distance   uint8
 }
-
 type regionIntelligibility struct {
 	lang     uint16
 	script   uint8
@@ -215,8 +211,7 @@
 // matchLang holds pairs of langIDs of base languages that are typically
 // mutually intelligible. Each pair is associated with a confidence and
 // whether the intelligibility goes one or both ways.
-// Size: 678 bytes, 113 elements
-var matchLang = [113]mutualIntelligibility{
+var matchLang = []mutualIntelligibility{ // 113 elements
 	0:   {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
 	1:   {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
 	2:   {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
@@ -330,12 +325,11 @@
 	110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
 	111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
 	112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
-}
+} // Size: 702 bytes
 
 // matchScript holds pairs of scriptIDs where readers of one script
 // can typically also read the other. Each is associated with a confidence.
-// Size: 208 bytes, 26 elements
-var matchScript = [26]scriptIntelligibility{
+var matchScript = []scriptIntelligibility{ // 26 elements
 	0:  {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
 	1:  {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
 	2:  {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
@@ -362,10 +356,9 @@
 	23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
 	24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
 	25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
-}
+} // Size: 232 bytes
 
-// Size: 90 bytes, 15 elements
-var matchRegion = [15]regionIntelligibility{
+var matchRegion = []regionIntelligibility{ // 15 elements
 	0:  {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
 	1:  {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
 	2:  {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
@@ -381,6 +374,6 @@
 	12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
 	13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
 	14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
-}
+} // Size: 114 bytes
 
-// Total table size 1351 bytes (1KiB); checksum: B5A6F1BC
+// Total table size 1471 bytes (1KiB); checksum: 5E04E5F6