Marcel van Lohuizen | b8e57db | 2015-12-05 12:05:49 +0100 | [diff] [blame] | 1 | // Copyright 2014 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Russ Cox | 8f690f2 | 2021-02-19 18:54:44 -0500 | [diff] [blame] | 5 | //go:build ignore |
Marcel van Lohuizen | b8e57db | 2015-12-05 12:05:49 +0100 | [diff] [blame] | 6 | // +build ignore |
| 7 | |
| 8 | // Generator for display name tables. |
| 9 | |
| 10 | package main |
| 11 | |
| 12 | import ( |
| 13 | "bytes" |
| 14 | "flag" |
| 15 | "fmt" |
| 16 | "log" |
| 17 | "reflect" |
| 18 | "sort" |
| 19 | "strings" |
| 20 | |
Marcel van Lohuizen | b8e57db | 2015-12-05 12:05:49 +0100 | [diff] [blame] | 21 | "golang.org/x/text/internal/gen" |
| 22 | "golang.org/x/text/language" |
Marcel van Lohuizen | 51beaed | 2015-12-07 10:14:37 +0100 | [diff] [blame] | 23 | "golang.org/x/text/unicode/cldr" |
Marcel van Lohuizen | b8e57db | 2015-12-05 12:05:49 +0100 | [diff] [blame] | 24 | ) |
| 25 | |
| 26 | var ( |
| 27 | test = flag.Bool("test", false, |
| 28 | "test existing tables; can be used to compare web data with package data.") |
| 29 | outputFile = flag.String("output", "tables.go", "output file") |
| 30 | |
| 31 | stats = flag.Bool("stats", false, "prints statistics to stderr") |
| 32 | |
| 33 | short = flag.Bool("short", false, `Use "short" alternatives, when available.`) |
| 34 | draft = flag.String("draft", |
| 35 | "contributed", |
| 36 | `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) |
| 37 | pkg = flag.String("package", |
| 38 | "display", |
| 39 | "the name of the package in which the generated file is to be included") |
| 40 | |
| 41 | tags = newTagSet("tags", |
| 42 | []language.Tag{}, |
| 43 | "space-separated list of tags to include or empty for all") |
| 44 | dict = newTagSet("dict", |
| 45 | dictTags(), |
| 46 | "space-separated list or tags for which to include a Dictionary. "+ |
| 47 | `"" means the common list from go.text/language.`) |
| 48 | ) |
| 49 | |
| 50 | func dictTags() (tag []language.Tag) { |
| 51 | // TODO: replace with language.Common.Tags() once supported. |
| 52 | const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " + |
| 53 | "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " + |
| 54 | "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " + |
| 55 | "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " + |
| 56 | "zh zh-Hans zh-Hant zu" |
| 57 | |
| 58 | for _, s := range strings.Split(str, " ") { |
| 59 | tag = append(tag, language.MustParse(s)) |
| 60 | } |
| 61 | return tag |
| 62 | } |
| 63 | |
| 64 | func main() { |
| 65 | gen.Init() |
| 66 | |
| 67 | // Read the CLDR zip file. |
| 68 | r := gen.OpenCLDRCoreZip() |
| 69 | defer r.Close() |
| 70 | |
| 71 | d := &cldr.Decoder{} |
| 72 | d.SetDirFilter("main", "supplemental") |
| 73 | d.SetSectionFilter("localeDisplayNames") |
| 74 | data, err := d.DecodeZip(r) |
| 75 | if err != nil { |
| 76 | log.Fatalf("DecodeZip: %v", err) |
| 77 | } |
| 78 | |
| 79 | w := gen.NewCodeWriter() |
| 80 | defer w.WriteGoFile(*outputFile, "display") |
| 81 | |
| 82 | gen.WriteCLDRVersion(w) |
| 83 | |
| 84 | b := builder{ |
| 85 | w: w, |
| 86 | data: data, |
| 87 | group: make(map[string]*group), |
| 88 | } |
| 89 | b.generate() |
| 90 | } |
| 91 | |
| 92 | const tagForm = language.All |
| 93 | |
| 94 | // tagSet is used to parse command line flags of tags. It implements the |
| 95 | // flag.Value interface. |
| 96 | type tagSet map[language.Tag]bool |
| 97 | |
| 98 | func newTagSet(name string, tags []language.Tag, usage string) tagSet { |
| 99 | f := tagSet(make(map[language.Tag]bool)) |
| 100 | for _, t := range tags { |
| 101 | f[t] = true |
| 102 | } |
| 103 | flag.Var(f, name, usage) |
| 104 | return f |
| 105 | } |
| 106 | |
| 107 | // String implements the String method of the flag.Value interface. |
| 108 | func (f tagSet) String() string { |
| 109 | tags := []string{} |
| 110 | for t := range f { |
| 111 | tags = append(tags, t.String()) |
| 112 | } |
| 113 | sort.Strings(tags) |
| 114 | return strings.Join(tags, " ") |
| 115 | } |
| 116 | |
| 117 | // Set implements Set from the flag.Value interface. |
| 118 | func (f tagSet) Set(s string) error { |
| 119 | if s != "" { |
| 120 | for _, s := range strings.Split(s, " ") { |
| 121 | if s != "" { |
| 122 | tag, err := tagForm.Parse(s) |
| 123 | if err != nil { |
| 124 | return err |
| 125 | } |
| 126 | f[tag] = true |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | return nil |
| 131 | } |
| 132 | |
| 133 | func (f tagSet) contains(t language.Tag) bool { |
| 134 | if len(f) == 0 { |
| 135 | return true |
| 136 | } |
| 137 | return f[t] |
| 138 | } |
| 139 | |
| 140 | // builder is used to create all tables with display name information. |
| 141 | type builder struct { |
| 142 | w *gen.CodeWriter |
| 143 | |
| 144 | data *cldr.CLDR |
| 145 | |
| 146 | fromLocs []string |
| 147 | |
| 148 | // destination tags for the current locale. |
| 149 | toTags []string |
| 150 | toTagIndex map[string]int |
| 151 | |
| 152 | // list of supported tags |
| 153 | supported []language.Tag |
| 154 | |
| 155 | // key-value pairs per group |
| 156 | group map[string]*group |
| 157 | |
| 158 | // statistics |
| 159 | sizeIndex int // total size of all indexes of headers |
| 160 | sizeData int // total size of all data of headers |
| 161 | totalSize int |
| 162 | } |
| 163 | |
| 164 | type group struct { |
| 165 | // Maps from a given language to the Namer data for this language. |
| 166 | lang map[language.Tag]keyValues |
| 167 | headers []header |
| 168 | |
| 169 | toTags []string |
| 170 | threeStart int |
| 171 | fourPlusStart int |
| 172 | } |
| 173 | |
| 174 | // set sets the typ to the name for locale loc. |
| 175 | func (g *group) set(t language.Tag, typ, name string) { |
| 176 | kv := g.lang[t] |
| 177 | if kv == nil { |
| 178 | kv = make(keyValues) |
| 179 | g.lang[t] = kv |
| 180 | } |
| 181 | if kv[typ] == "" { |
| 182 | kv[typ] = name |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | type keyValues map[string]string |
| 187 | |
| 188 | type header struct { |
| 189 | tag language.Tag |
| 190 | data string |
| 191 | index []uint16 |
| 192 | } |
| 193 | |
| 194 | var versionInfo = `// Version is deprecated. Use CLDRVersion. |
| 195 | const Version = %#v |
| 196 | |
| 197 | ` |
| 198 | |
| 199 | var self = language.MustParse("mul") |
| 200 | |
| 201 | // generate builds and writes all tables. |
| 202 | func (b *builder) generate() { |
| 203 | fmt.Fprintf(b.w, versionInfo, cldr.Version) |
| 204 | |
| 205 | b.filter() |
| 206 | b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { |
| 207 | if ldn.Languages != nil { |
| 208 | for _, v := range ldn.Languages.Language { |
Marcel van Lohuizen | acd49d4 | 2017-09-06 11:14:54 +0200 | [diff] [blame] | 209 | lang := v.Type |
| 210 | if lang == "root" { |
| 211 | // We prefer the data from "und" |
| 212 | // TODO: allow both the data for root and und somehow. |
| 213 | continue |
| 214 | } |
| 215 | tag := tagForm.MustParse(lang) |
Marcel van Lohuizen | b8e57db | 2015-12-05 12:05:49 +0100 | [diff] [blame] | 216 | if tags.contains(tag) { |
| 217 | g.set(loc, tag.String(), v.Data()) |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | }) |
| 222 | b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { |
| 223 | if ldn.Scripts != nil { |
| 224 | for _, v := range ldn.Scripts.Script { |
Marcel van Lohuizen | f79ed80 | 2015-12-14 18:19:08 +0100 | [diff] [blame] | 225 | code := language.MustParseScript(v.Type) |
| 226 | if code.IsPrivateUse() { // Qaaa..Qabx |
| 227 | // TODO: data currently appears to be very meager. |
| 228 | // Reconsider if we have data for English. |
| 229 | if loc == language.English { |
| 230 | log.Fatal("Consider including data for private use scripts.") |
| 231 | } |
| 232 | continue |
| 233 | } |
| 234 | g.set(loc, code.String(), v.Data()) |
Marcel van Lohuizen | b8e57db | 2015-12-05 12:05:49 +0100 | [diff] [blame] | 235 | } |
| 236 | } |
| 237 | }) |
| 238 | b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { |
| 239 | if ldn.Territories != nil { |
| 240 | for _, v := range ldn.Territories.Territory { |
| 241 | g.set(loc, language.MustParseRegion(v.Type).String(), v.Data()) |
| 242 | } |
| 243 | } |
| 244 | }) |
| 245 | |
| 246 | b.makeSupported() |
| 247 | |
| 248 | b.writeParents() |
| 249 | |
| 250 | b.writeGroup("lang") |
| 251 | b.writeGroup("script") |
| 252 | b.writeGroup("region") |
| 253 | |
| 254 | b.w.WriteConst("numSupported", len(b.supported)) |
| 255 | buf := bytes.Buffer{} |
| 256 | for _, tag := range b.supported { |
| 257 | fmt.Fprint(&buf, tag.String(), "|") |
| 258 | } |
| 259 | b.w.WriteConst("supported", buf.String()) |
| 260 | |
| 261 | b.writeDictionaries() |
| 262 | |
| 263 | b.supported = []language.Tag{self} |
| 264 | |
| 265 | // Compute the names of locales in their own language. Some of these names |
| 266 | // may be specified in their parent locales. We iterate the maximum depth |
| 267 | // of the parent three times to match successive parents of tags until a |
| 268 | // possible match is found. |
| 269 | for i := 0; i < 4; i++ { |
| 270 | b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) { |
| 271 | parent := tag |
| 272 | if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) { |
| 273 | parent, _ = language.Raw.Compose(b) |
| 274 | } |
| 275 | if ldn.Languages != nil { |
| 276 | for _, v := range ldn.Languages.Language { |
| 277 | key := tagForm.MustParse(v.Type) |
| 278 | saved := key |
| 279 | if key == parent { |
| 280 | g.set(self, tag.String(), v.Data()) |
| 281 | } |
| 282 | for k := 0; k < i; k++ { |
| 283 | key = key.Parent() |
| 284 | } |
| 285 | if key == tag { |
| 286 | g.set(self, saved.String(), v.Data()) // set does not overwrite a value. |
| 287 | } |
| 288 | } |
| 289 | } |
| 290 | }) |
| 291 | } |
| 292 | |
| 293 | b.writeGroup("self") |
| 294 | } |
| 295 | |
| 296 | func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) { |
| 297 | b.sizeIndex = 0 |
| 298 | b.sizeData = 0 |
| 299 | b.toTags = nil |
| 300 | b.fromLocs = nil |
| 301 | b.toTagIndex = make(map[string]int) |
| 302 | |
| 303 | g := b.group[name] |
| 304 | if g == nil { |
| 305 | g = &group{lang: make(map[language.Tag]keyValues)} |
| 306 | b.group[name] = g |
| 307 | } |
| 308 | for _, loc := range b.data.Locales() { |
| 309 | // We use RawLDML instead of LDML as we are managing our own inheritance |
| 310 | // in this implementation. |
| 311 | ldml := b.data.RawLDML(loc) |
| 312 | |
| 313 | // We do not support the POSIX variant (it is not a supported BCP 47 |
| 314 | // variant). This locale also doesn't happen to contain any data, so |
| 315 | // we'll skip it by checking for this. |
| 316 | tag, err := tagForm.Parse(loc) |
| 317 | if err != nil { |
| 318 | if ldml.LocaleDisplayNames != nil { |
| 319 | log.Fatalf("setData: %v", err) |
| 320 | } |
| 321 | continue |
| 322 | } |
| 323 | if ldml.LocaleDisplayNames != nil && tags.contains(tag) { |
| 324 | f(g, tag, ldml.LocaleDisplayNames) |
| 325 | } |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | func (b *builder) filter() { |
| 330 | filter := func(s *cldr.Slice) { |
| 331 | if *short { |
| 332 | s.SelectOnePerGroup("alt", []string{"short", ""}) |
| 333 | } else { |
| 334 | s.SelectOnePerGroup("alt", []string{"stand-alone", ""}) |
| 335 | } |
| 336 | d, err := cldr.ParseDraft(*draft) |
| 337 | if err != nil { |
| 338 | log.Fatalf("filter: %v", err) |
| 339 | } |
| 340 | s.SelectDraft(d) |
| 341 | } |
| 342 | for _, loc := range b.data.Locales() { |
| 343 | if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil { |
| 344 | if ldn.Languages != nil { |
| 345 | s := cldr.MakeSlice(&ldn.Languages.Language) |
| 346 | if filter(&s); len(ldn.Languages.Language) == 0 { |
| 347 | ldn.Languages = nil |
| 348 | } |
| 349 | } |
| 350 | if ldn.Scripts != nil { |
| 351 | s := cldr.MakeSlice(&ldn.Scripts.Script) |
| 352 | if filter(&s); len(ldn.Scripts.Script) == 0 { |
| 353 | ldn.Scripts = nil |
| 354 | } |
| 355 | } |
| 356 | if ldn.Territories != nil { |
| 357 | s := cldr.MakeSlice(&ldn.Territories.Territory) |
| 358 | if filter(&s); len(ldn.Territories.Territory) == 0 { |
| 359 | ldn.Territories = nil |
| 360 | } |
| 361 | } |
| 362 | } |
| 363 | } |
| 364 | } |
| 365 | |
| 366 | // makeSupported creates a list of all supported locales. |
| 367 | func (b *builder) makeSupported() { |
| 368 | // tags across groups |
| 369 | for _, g := range b.group { |
| 370 | for t, _ := range g.lang { |
| 371 | b.supported = append(b.supported, t) |
| 372 | } |
| 373 | } |
| 374 | b.supported = b.supported[:unique(tagsSorter(b.supported))] |
| 375 | |
| 376 | } |
| 377 | |
| 378 | type tagsSorter []language.Tag |
| 379 | |
| 380 | func (a tagsSorter) Len() int { return len(a) } |
| 381 | func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 382 | func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() } |
| 383 | |
| 384 | func (b *builder) writeGroup(name string) { |
| 385 | g := b.group[name] |
| 386 | |
| 387 | for _, kv := range g.lang { |
| 388 | for t, _ := range kv { |
| 389 | g.toTags = append(g.toTags, t) |
| 390 | } |
| 391 | } |
| 392 | g.toTags = g.toTags[:unique(tagsBySize(g.toTags))] |
| 393 | |
| 394 | // Allocate header per supported value. |
| 395 | g.headers = make([]header, len(b.supported)) |
| 396 | for i, sup := range b.supported { |
| 397 | kv, ok := g.lang[sup] |
| 398 | if !ok { |
| 399 | g.headers[i].tag = sup |
| 400 | continue |
| 401 | } |
| 402 | data := []byte{} |
| 403 | index := make([]uint16, len(g.toTags), len(g.toTags)+1) |
| 404 | for j, t := range g.toTags { |
| 405 | index[j] = uint16(len(data)) |
| 406 | data = append(data, kv[t]...) |
| 407 | } |
| 408 | index = append(index, uint16(len(data))) |
| 409 | |
| 410 | // Trim the tail of the index. |
| 411 | // TODO: indexes can be reduced in size quite a bit more. |
| 412 | n := len(index) |
| 413 | for ; n >= 2 && index[n-2] == index[n-1]; n-- { |
| 414 | } |
| 415 | index = index[:n] |
| 416 | |
| 417 | // Workaround for a bug in CLDR 26. |
Kevin Burke | 647d7ef | 2018-08-04 08:55:54 -0700 | [diff] [blame] | 418 | // See https://unicode.org/cldr/trac/ticket/8042. |
Marcel van Lohuizen | b8e57db | 2015-12-05 12:05:49 +0100 | [diff] [blame] | 419 | if cldr.Version == "26" && sup.String() == "hsb" { |
| 420 | data = bytes.Replace(data, []byte{'"'}, nil, 1) |
| 421 | } |
| 422 | g.headers[i] = header{sup, string(data), index} |
| 423 | } |
| 424 | g.writeTable(b.w, name) |
| 425 | } |
| 426 | |
| 427 | type tagsBySize []string |
| 428 | |
| 429 | func (l tagsBySize) Len() int { return len(l) } |
| 430 | func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] } |
| 431 | func (l tagsBySize) Less(i, j int) bool { |
| 432 | a, b := l[i], l[j] |
| 433 | // Sort single-tag entries based on size first. Otherwise alphabetic. |
| 434 | if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) { |
| 435 | return len(a) < len(b) |
| 436 | } |
| 437 | return a < b |
| 438 | } |
| 439 | |
| 440 | // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent |
| 441 | // of tags[i]. |
| 442 | func parentIndices(tags []language.Tag) []int16 { |
| 443 | index := make(map[language.Tag]int16) |
| 444 | for i, t := range tags { |
| 445 | index[t] = int16(i) |
| 446 | } |
| 447 | |
| 448 | // Construct default parents. |
| 449 | parents := make([]int16, len(tags)) |
| 450 | for i, t := range tags { |
| 451 | parents[i] = -1 |
| 452 | for t = t.Parent(); t != language.Und; t = t.Parent() { |
| 453 | if j, ok := index[t]; ok { |
| 454 | parents[i] = j |
| 455 | break |
| 456 | } |
| 457 | } |
| 458 | } |
| 459 | return parents |
| 460 | } |
| 461 | |
| 462 | func (b *builder) writeParents() { |
| 463 | parents := parentIndices(b.supported) |
| 464 | fmt.Fprintf(b.w, "var parents = ") |
| 465 | b.w.WriteArray(parents) |
| 466 | } |
| 467 | |
| 468 | // writeKeys writes keys to a special index used by the display package. |
| 469 | // tags are assumed to be sorted by length. |
| 470 | func writeKeys(w *gen.CodeWriter, name string, keys []string) { |
| 471 | w.Size += int(3 * reflect.TypeOf("").Size()) |
| 472 | w.WriteComment("Number of keys: %d", len(keys)) |
| 473 | fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name) |
| 474 | for i := 2; i <= 4; i++ { |
| 475 | sub := []string{} |
| 476 | for _, t := range keys { |
| 477 | if len(t) != i { |
| 478 | break |
| 479 | } |
| 480 | sub = append(sub, t) |
| 481 | } |
| 482 | s := strings.Join(sub, "") |
| 483 | w.WriteString(s) |
| 484 | fmt.Fprintf(w, ",\n") |
| 485 | keys = keys[len(sub):] |
| 486 | } |
| 487 | fmt.Fprintln(w, "\t}") |
| 488 | if len(keys) > 0 { |
| 489 | w.Size += int(reflect.TypeOf([]string{}).Size()) |
| 490 | fmt.Fprintf(w, "\t%sTagsLong = ", name) |
| 491 | w.WriteSlice(keys) |
| 492 | } |
| 493 | fmt.Fprintln(w, ")\n") |
| 494 | } |
| 495 | |
| 496 | // identifier creates an identifier from the given tag. |
| 497 | func identifier(t language.Tag) string { |
| 498 | return strings.Replace(t.String(), "-", "", -1) |
| 499 | } |
| 500 | |
| 501 | func (h *header) writeEntry(w *gen.CodeWriter, name string) { |
| 502 | if len(dict) > 0 && dict.contains(h.tag) { |
| 503 | fmt.Fprintf(w, "\t{ // %s\n", h.tag) |
| 504 | fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name) |
| 505 | fmt.Fprintln(w, "\t},") |
| 506 | } else if len(h.data) == 0 { |
| 507 | fmt.Fprintln(w, "\t\t{}, //", h.tag) |
| 508 | } else { |
| 509 | fmt.Fprintf(w, "\t{ // %s\n", h.tag) |
| 510 | w.WriteString(h.data) |
| 511 | fmt.Fprintln(w, ",") |
| 512 | w.WriteSlice(h.index) |
| 513 | fmt.Fprintln(w, ",\n\t},") |
| 514 | } |
| 515 | } |
| 516 | |
| 517 | // write the data for the given header as single entries. The size for this data |
| 518 | // was already accounted for in writeEntry. |
| 519 | func (h *header) writeSingle(w *gen.CodeWriter, name string) { |
| 520 | if len(dict) > 0 && dict.contains(h.tag) { |
| 521 | tag := identifier(h.tag) |
| 522 | w.WriteConst(tag+name+"Str", h.data) |
| 523 | |
| 524 | // Note that we create a slice instead of an array. If we use an array |
| 525 | // we need to refer to it as a[:] in other tables, which will cause the |
| 526 | // array to always be included by the linker. See Issue 7651. |
| 527 | w.WriteVar(tag+name+"Idx", h.index) |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | // WriteTable writes an entry for a single Namer. |
| 532 | func (g *group) writeTable(w *gen.CodeWriter, name string) { |
| 533 | start := w.Size |
| 534 | writeKeys(w, name, g.toTags) |
| 535 | w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size()) |
| 536 | |
| 537 | fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers)) |
| 538 | |
| 539 | title := strings.Title(name) |
| 540 | for _, h := range g.headers { |
| 541 | h.writeEntry(w, title) |
| 542 | } |
| 543 | fmt.Fprintln(w, "}\n") |
| 544 | |
| 545 | for _, h := range g.headers { |
| 546 | h.writeSingle(w, title) |
| 547 | } |
| 548 | n := w.Size - start |
| 549 | fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000) |
| 550 | } |
| 551 | |
| 552 | func (b *builder) writeDictionaries() { |
| 553 | fmt.Fprintln(b.w, "// Dictionary entries of frequent languages") |
| 554 | fmt.Fprintln(b.w, "var (") |
| 555 | parents := parentIndices(b.supported) |
| 556 | |
| 557 | for i, t := range b.supported { |
| 558 | if dict.contains(t) { |
| 559 | ident := identifier(t) |
| 560 | fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t) |
| 561 | if p := parents[i]; p == -1 { |
| 562 | fmt.Fprintln(b.w, "\t\tnil,") |
| 563 | } else { |
| 564 | fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p])) |
| 565 | } |
| 566 | fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident) |
| 567 | fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident) |
| 568 | fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident) |
| 569 | fmt.Fprintln(b.w, "\t}") |
| 570 | } |
| 571 | } |
| 572 | fmt.Fprintln(b.w, ")") |
| 573 | |
| 574 | var s string |
| 575 | var a []uint16 |
| 576 | sz := reflect.TypeOf(s).Size() |
| 577 | sz += reflect.TypeOf(a).Size() |
| 578 | sz *= 3 |
| 579 | sz += reflect.TypeOf(&a).Size() |
| 580 | n := int(sz) * len(dict) |
| 581 | fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000) |
| 582 | |
| 583 | b.w.Size += n |
| 584 | } |
| 585 | |
| 586 | // unique sorts the given lists and removes duplicate entries by swapping them |
| 587 | // past position k, where k is the number of unique values. It returns k. |
| 588 | func unique(a sort.Interface) int { |
| 589 | if a.Len() == 0 { |
| 590 | return 0 |
| 591 | } |
| 592 | sort.Sort(a) |
| 593 | k := 1 |
| 594 | for i := 1; i < a.Len(); i++ { |
| 595 | if a.Less(k-1, i) { |
| 596 | if k != i { |
| 597 | a.Swap(k, i) |
| 598 | } |
| 599 | k++ |
| 600 | } |
| 601 | } |
| 602 | return k |
| 603 | } |