language: add Go test data in standard form

 generated from data_test.go

The generation code is transitional and will
be removed with the next change.

Change-Id: I50e0a91e9d3a5e7cede617714619a3b7be031b8c
Reviewed-on: https://go-review.googlesource.com/55390
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/data_test.go b/language/data_test.go
index dbbf94a..e1b9606 100644
--- a/language/data_test.go
+++ b/language/data_test.go
@@ -4,6 +4,35 @@
 
 package language
 
+import (
+	"flag"
+	"fmt"
+	"os"
+	"testing"
+)
+
+var outfile = flag.String("genucd", "", "generate UCD file from test data")
+
+func TestGenerate(t *testing.T) {
+	if *outfile == "" {
+		return
+	}
+
+	w, err := os.Create(*outfile)
+	if err != nil {
+		t.Error(err)
+	}
+	defer w.Close()
+
+	for _, tc := range matchTests {
+		fmt.Fprintln(w, "# "+tc.comment)
+		for _, t := range tc.test {
+			fmt.Fprintf(w, "%s ; 	%s ; 	%s\n", tc.supported, t.desired, t.match)
+		}
+		fmt.Fprintln(w)
+	}
+}
+
 type matchTest struct {
 	comment   string
 	supported string
diff --git a/language/testdata/GoLocaleMatcherTest.txt b/language/testdata/GoLocaleMatcherTest.txt
new file mode 100644
index 0000000..cc59e9b
--- /dev/null
+++ b/language/testdata/GoLocaleMatcherTest.txt
@@ -0,0 +1,224 @@
+# basics
+fr, en-GB, en ; 	en-GB ; 	en-GB
+fr, en-GB, en ; 	en-US ; 	en
+fr, en-GB, en ; 	fr-FR ; 	fr
+fr, en-GB, en ; 	ja-JP ; 	fr
+
+# script fallbacks
+zh-CN, zh-TW, iw ; 	zh-Hant ; 	zh-TW
+zh-CN, zh-TW, iw ; 	zh ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh-Hans-CN ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh-Hant-HK ; 	zh-TW
+zh-CN, zh-TW, iw ; 	he-IT ; 	iw
+
+# language-specific script fallbacks 1
+en, sr, nl ; 	sr-Latn ; 	sr
+en, sr, nl ; 	sh ; 	en
+en, sr, nl ; 	hr ; 	en
+en, sr, nl ; 	bs ; 	en
+en, sr, nl ; 	nl-Cyrl ; 	sr
+
+# language-specific script fallbacks 2
+en, sh ; 	sr ; 	sh
+en, sh ; 	sr-Cyrl ; 	sh
+en, sh ; 	hr ; 	sh
+
+# don't match hr to sr-Latn
+en, sr-Latn ; 	hr ; 	en
+
+# both deprecated and not
+fil, tl, iw, he ; 	he-IT ; 	he
+fil, tl, iw, he ; 	he ; 	he
+fil, tl, iw, he ; 	iw ; 	iw
+fil, tl, iw, he ; 	fil-IT ; 	fil
+fil, tl, iw, he ; 	fil ; 	fil
+fil, tl, iw, he ; 	tl ; 	tl
+
+# nearby languages
+en, fil, ro, nn ; 	tl ; 	fil
+en, fil, ro, nn ; 	mo ; 	ro
+en, fil, ro, nn ; 	nb ; 	nn
+en, fil, ro, nn ; 	ja ; 	en
+
+# nearby languages: Nynorsk to Bokmål
+en, nb ; 	nn ; 	nb
+
+# nearby languages: Danish does not match nn
+en, nn ; 	da ; 	en
+
+# nearby languages: Danish matches no
+en, no ; 	da ; 	no
+
+# nearby languages: Danish matches nb
+en, nb ; 	da ; 	nb
+
+# prefer matching languages over language variants.
+nn, en-GB ; 	no, en-US ; 	en-GB
+nn, en-GB ; 	nb, en-US ; 	en-GB
+
+# deprecated version is closer than same language with other differences
+nl, he, en-GB ; 	iw, en-US ; 	he
+
+# macro equivalent is closer than same language with other differences
+nl, zh, en-GB, no ; 	cmn, en-US ; 	zh
+nl, zh, en-GB, no ; 	nb, en-US ; 	no
+
+# legacy equivalent is closer than same language with other differences
+nl, fil, en-GB ; 	tl, en-US ; 	fil
+
+# exact over equivalent
+en, ro, mo, ro-MD ; 	ro ; 	ro
+en, ro, mo, ro-MD ; 	mo ; 	mo
+en, ro, mo, ro-MD ; 	ro-MD ; 	ro-MD
+
+# maximization of legacy
+sr-Cyrl, sr-Latn, ro, ro-MD ; 	sh ; 	sr-Latn
+sr-Cyrl, sr-Latn, ro, ro-MD ; 	mo ; 	ro-MD
+
+# empty
+ ; 	fr ; 	und
+ ; 	en ; 	und
+
+# private use subtags
+fr, en-GB, x-bork, es-ES, es-419 ; 	x-piglatin ; 	fr
+fr, en-GB, x-bork, es-ES, es-419 ; 	x-bork ; 	x-bork
+
+# grandfathered codes
+fr, i-klingon, en-Latn-US ; 	en-GB-oed ; 	en-Latn-US
+fr, i-klingon, en-Latn-US ; 	i-klingon ; 	tlh
+
+# exact match
+fr, en-GB, ja, es-ES, es-MX ; 	ja, de ; 	ja
+
+# simple variant match
+fr, en-GB, ja, es-ES, es-MX ; 	de, en-US ; 	en-GB
+fr, en-GB, ja, es-ES, es-MX ; 	de, zh ; 	fr
+
+# best match for traditional Chinese
+fr, zh-Hans-CN, en-US ; 	zh-TW ; 	zh-Hans-CN
+fr, zh-Hans-CN, en-US ; 	zh-Hant ; 	zh-Hans-CN
+fr, zh-Hans-CN, en-US ; 	zh-TW, en ; 	en-US
+fr, zh-Hans-CN, en-US ; 	zh-Hant-CN, en ; 	en-US
+fr, zh-Hans-CN, en-US ; 	zh-Hans, en ; 	zh-Hans-CN
+
+# more specific script should win in case regions are identical
+af, af-Latn, af-Arab ; 	af ; 	af
+af, af-Latn, af-Arab ; 	af-ZA ; 	af
+af, af-Latn, af-Arab ; 	af-Latn-ZA ; 	af-Latn
+af, af-Latn, af-Arab ; 	af-Latn ; 	af-Latn
+
+# more specific region should win
+nl, nl-NL, nl-BE ; 	nl ; 	nl
+nl, nl-NL, nl-BE ; 	nl-Latn ; 	nl
+nl, nl-NL, nl-BE ; 	nl-Latn-NL ; 	nl-NL
+nl, nl-NL, nl-BE ; 	nl-NL ; 	nl-NL
+
+# region may replace matched if matched is enclosing
+es-419,es ; 	es-MX ; 	es-MX
+es-419,es ; 	es-SG ; 	es
+
+# more specific region wins over more specific script
+nl, nl-Latn, nl-NL, nl-BE ; 	nl ; 	nl
+nl, nl-Latn, nl-NL, nl-BE ; 	nl-Latn ; 	nl-Latn
+nl, nl-Latn, nl-NL, nl-BE ; 	nl-NL ; 	nl-NL
+nl, nl-Latn, nl-NL, nl-BE ; 	nl-Latn-NL ; 	nl-NL
+
+# region distance Portuguese
+pt, pt-PT ; 	pt-ES ; 	pt-PT
+
+# region distance French
+en, fr, fr-CA, fr-CH ; 	fr-US ; 	fr-CA
+
+# region distance German
+de-AT, de-DE, de-CH ; 	de ; 	de-DE
+
+# en-AU is closer to en-GB than to en (which is en-US)
+en, en-GB, es-ES, es-419 ; 	en-AU ; 	en-GB
+en, en-GB, es-ES, es-419 ; 	es-MX ; 	es-MX
+en, en-GB, es-ES, es-419 ; 	es-PT ; 	es-ES
+
+# undefined
+it, fr ; 	und ; 	it
+
+# und does not match en
+it, en ; 	und ; 	it
+
+# undefined in priority list
+it, und ; 	und ; 	und
+it, und ; 	en ; 	it
+
+# undefined
+it, fr, zh ; 	und-FR ; 	fr
+it, fr, zh ; 	und-CN ; 	zh
+it, fr, zh ; 	und-Hans ; 	zh
+it, fr, zh ; 	und-Hant ; 	zh
+it, fr, zh ; 	und-Latn ; 	it
+
+# match on maximized tag
+fr, en-GB, ja, es-ES, es-MX ; 	ja-JP, en-GB ; 	ja
+fr, en-GB, ja, es-ES, es-MX ; 	ja-Jpan-JP, en-GB ; 	ja
+
+# pick best maximized tag
+ja, ja-Jpan-US, ja-JP, en, ru ; 	ja-Jpan, ru ; 	ja
+ja, ja-Jpan-US, ja-JP, en, ru ; 	ja-JP, ru ; 	ja-JP
+ja, ja-Jpan-US, ja-JP, en, ru ; 	ja-US, ru ; 	ja-Jpan-US
+
+# termination: pick best maximized match
+ja, ja-Jpan, ja-JP, en, ru ; 	ja-Jpan-JP, ru ; 	ja-JP
+ja, ja-Jpan, ja-JP, en, ru ; 	ja-Jpan, ru ; 	ja-Jpan
+
+# no match on maximized
+en, de, fr, ja ; 	de-CH, fr ; 	fr
+
+# parent relation preserved
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-150 ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-AU ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-BE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-GG ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-GI ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-HK ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-IE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-IM ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-IN ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-JE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-MT ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-NZ ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-PK ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-SG ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-DE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-MT ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-AR ; 	es-AR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-BO ; 	es-BO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CL ; 	es-CL
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CO ; 	es-CO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CR ; 	es-CR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CU ; 	es-CU
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-DO ; 	es-DO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-EC ; 	es-EC
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-GT ; 	es-GT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-HN ; 	es-HN
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-MX ; 	es-MX
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-NI ; 	es-NI
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PA ; 	es-PA
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PE ; 	es-PE
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PR ; 	es-PR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PT ; 	es
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PY ; 	es-PY
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-SV ; 	es-SV
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-US ; 	es-419
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-UY ; 	es-UY
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-VE ; 	es-VE
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-AO ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-CV ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-GW ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-MO ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-MZ ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-ST ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-TL ; 	pt-PT
+
+# preserve Unicode extension
+en, de, sl-nedis ; 	de-FR-u-co-phonebk ; 	de-u-co-phonebk
+en, de, sl-nedis ; 	sl-nedis-u-cu-eur ; 	sl-nedis-u-cu-eur
+en, de, sl-nedis ; 	sl-u-cu-eur ; 	sl-nedis-u-cu-eur
+en, de, sl-nedis ; 	sl-HR-nedis-u-cu-eur ; 	sl-nedis-u-cu-eur
+