language: add Go test data in standard form
generated from data_test.go
The generation code is transitional and will
be removed with the next change.
Change-Id: I50e0a91e9d3a5e7cede617714619a3b7be031b8c
Reviewed-on: https://go-review.googlesource.com/55390
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/data_test.go b/language/data_test.go
index dbbf94a..e1b9606 100644
--- a/language/data_test.go
+++ b/language/data_test.go
@@ -4,6 +4,35 @@
package language
+import (
+ "flag"
+ "fmt"
+ "os"
+ "testing"
+)
+
+var outfile = flag.String("genucd", "", "generate UCD file from test data")
+
+func TestGenerate(t *testing.T) {
+ if *outfile == "" {
+ return
+ }
+
+ w, err := os.Create(*outfile)
+ if err != nil {
+ t.Error(err)
+ }
+ defer w.Close()
+
+ for _, tc := range matchTests {
+ fmt.Fprintln(w, "# "+tc.comment)
+ for _, t := range tc.test {
+ fmt.Fprintf(w, "%s ; %s ; %s\n", tc.supported, t.desired, t.match)
+ }
+ fmt.Fprintln(w)
+ }
+}
+
type matchTest struct {
comment string
supported string
diff --git a/language/testdata/GoLocaleMatcherTest.txt b/language/testdata/GoLocaleMatcherTest.txt
new file mode 100644
index 0000000..cc59e9b
--- /dev/null
+++ b/language/testdata/GoLocaleMatcherTest.txt
@@ -0,0 +1,224 @@
+# basics
+fr, en-GB, en ; en-GB ; en-GB
+fr, en-GB, en ; en-US ; en
+fr, en-GB, en ; fr-FR ; fr
+fr, en-GB, en ; ja-JP ; fr
+
+# script fallbacks
+zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
+zh-CN, zh-TW, iw ; zh ; zh-CN
+zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
+zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
+zh-CN, zh-TW, iw ; he-IT ; iw
+
+# language-specific script fallbacks 1
+en, sr, nl ; sr-Latn ; sr
+en, sr, nl ; sh ; en
+en, sr, nl ; hr ; en
+en, sr, nl ; bs ; en
+en, sr, nl ; nl-Cyrl ; sr
+
+# language-specific script fallbacks 2
+en, sh ; sr ; sh
+en, sh ; sr-Cyrl ; sh
+en, sh ; hr ; sh
+
+# don't match hr to sr-Latn
+en, sr-Latn ; hr ; en
+
+# both deprecated and not
+fil, tl, iw, he ; he-IT ; he
+fil, tl, iw, he ; he ; he
+fil, tl, iw, he ; iw ; iw
+fil, tl, iw, he ; fil-IT ; fil
+fil, tl, iw, he ; fil ; fil
+fil, tl, iw, he ; tl ; tl
+
+# nearby languages
+en, fil, ro, nn ; tl ; fil
+en, fil, ro, nn ; mo ; ro
+en, fil, ro, nn ; nb ; nn
+en, fil, ro, nn ; ja ; en
+
+# nearby languages: Nynorsk to Bokmål
+en, nb ; nn ; nb
+
+# nearby languages: Danish does not match nn
+en, nn ; da ; en
+
+# nearby languages: Danish matches no
+en, no ; da ; no
+
+# nearby languages: Danish matches nb
+en, nb ; da ; nb
+
+# prefer matching languages over language variants.
+nn, en-GB ; no, en-US ; en-GB
+nn, en-GB ; nb, en-US ; en-GB
+
+# deprecated version is closer than same language with other differences
+nl, he, en-GB ; iw, en-US ; he
+
+# macro equivalent is closer than same language with other differences
+nl, zh, en-GB, no ; cmn, en-US ; zh
+nl, zh, en-GB, no ; nb, en-US ; no
+
+# legacy equivalent is closer than same language with other differences
+nl, fil, en-GB ; tl, en-US ; fil
+
+# exact over equivalent
+en, ro, mo, ro-MD ; ro ; ro
+en, ro, mo, ro-MD ; mo ; mo
+en, ro, mo, ro-MD ; ro-MD ; ro-MD
+
+# maximization of legacy
+sr-Cyrl, sr-Latn, ro, ro-MD ; sh ; sr-Latn
+sr-Cyrl, sr-Latn, ro, ro-MD ; mo ; ro-MD
+
+# empty
+ ; fr ; und
+ ; en ; und
+
+# private use subtags
+fr, en-GB, x-bork, es-ES, es-419 ; x-piglatin ; fr
+fr, en-GB, x-bork, es-ES, es-419 ; x-bork ; x-bork
+
+# grandfathered codes
+fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
+fr, i-klingon, en-Latn-US ; i-klingon ; tlh
+
+# exact match
+fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
+
+# simple variant match
+fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB
+fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
+
+# best match for traditional Chinese
+fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
+fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
+fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
+fr, zh-Hans-CN, en-US ; zh-Hant-CN, en ; en-US
+fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
+
+# more specific script should win in case regions are identical
+af, af-Latn, af-Arab ; af ; af
+af, af-Latn, af-Arab ; af-ZA ; af
+af, af-Latn, af-Arab ; af-Latn-ZA ; af-Latn
+af, af-Latn, af-Arab ; af-Latn ; af-Latn
+
+# more specific region should win
+nl, nl-NL, nl-BE ; nl ; nl
+nl, nl-NL, nl-BE ; nl-Latn ; nl
+nl, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
+nl, nl-NL, nl-BE ; nl-NL ; nl-NL
+
+# region may replace matched if matched is enclosing
+es-419,es ; es-MX ; es-MX
+es-419,es ; es-SG ; es
+
+# more specific region wins over more specific script
+nl, nl-Latn, nl-NL, nl-BE ; nl ; nl
+nl, nl-Latn, nl-NL, nl-BE ; nl-Latn ; nl-Latn
+nl, nl-Latn, nl-NL, nl-BE ; nl-NL ; nl-NL
+nl, nl-Latn, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
+
+# region distance Portuguese
+pt, pt-PT ; pt-ES ; pt-PT
+
+# region distance French
+en, fr, fr-CA, fr-CH ; fr-US ; fr-CA
+
+# region distance German
+de-AT, de-DE, de-CH ; de ; de-DE
+
+# en-AU is closer to en-GB than to en (which is en-US)
+en, en-GB, es-ES, es-419 ; en-AU ; en-GB
+en, en-GB, es-ES, es-419 ; es-MX ; es-MX
+en, en-GB, es-ES, es-419 ; es-PT ; es-ES
+
+# undefined
+it, fr ; und ; it
+
+# und does not match en
+it, en ; und ; it
+
+# undefined in priority list
+it, und ; und ; und
+it, und ; en ; it
+
+# undefined
+it, fr, zh ; und-FR ; fr
+it, fr, zh ; und-CN ; zh
+it, fr, zh ; und-Hans ; zh
+it, fr, zh ; und-Hant ; zh
+it, fr, zh ; und-Latn ; it
+
+# match on maximized tag
+fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja
+fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja
+
+# pick best maximized tag
+ja, ja-Jpan-US, ja-JP, en, ru ; ja-Jpan, ru ; ja
+ja, ja-Jpan-US, ja-JP, en, ru ; ja-JP, ru ; ja-JP
+ja, ja-Jpan-US, ja-JP, en, ru ; ja-US, ru ; ja-Jpan-US
+
+# termination: pick best maximized match
+ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan-JP, ru ; ja-JP
+ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan, ru ; ja-Jpan
+
+# no match on maximized
+en, de, fr, ja ; de-CH, fr ; fr
+
+# parent relation preserved
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-150 ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-AU ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-BE ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GG ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GI ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-HK ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IE ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IM ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IN ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-JE ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-NZ ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-PK ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-SG ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-DE ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-AR ; es-AR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-BO ; es-BO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CL ; es-CL
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CO ; es-CO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CR ; es-CR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CU ; es-CU
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-DO ; es-DO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-EC ; es-EC
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-GT ; es-GT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-HN ; es-HN
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-MX ; es-MX
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-NI ; es-NI
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PA ; es-PA
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PE ; es-PE
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PR ; es-PR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PT ; es
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PY ; es-PY
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-SV ; es-SV
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-US ; es-419
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-UY ; es-UY
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-VE ; es-VE
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-AO ; pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-CV ; pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-GW ; pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MO ; pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MZ ; pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-ST ; pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-TL ; pt-PT
+
+# preserve Unicode extension
+en, de, sl-nedis ; de-FR-u-co-phonebk ; de-u-co-phonebk
+en, de, sl-nedis ; sl-nedis-u-cu-eur ; sl-nedis-u-cu-eur
+en, de, sl-nedis ; sl-u-cu-eur ; sl-nedis-u-cu-eur
+en, de, sl-nedis ; sl-HR-nedis-u-cu-eur ; sl-nedis-u-cu-eur
+