language: turn parsing panics into ErrSyntax

We keep finding new panics in the language parser.
Limit the damage by reporting those inputs as syntax errors.

Change-Id: I786fe127c3df7e4c8e042d15095d3acf3c4e4a50
Reviewed-on: https://go-review.googlesource.com/c/text/+/340830
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
diff --git a/internal/language/language.go b/internal/language/language.go
index f41aedc..6105bc7 100644
--- a/internal/language/language.go
+++ b/internal/language/language.go
@@ -251,6 +251,13 @@
 
 // ParseExtension parses s as an extension and returns it on success.
 func ParseExtension(s string) (ext string, err error) {
+	defer func() {
+		if recover() != nil {
+			ext = ""
+			err = ErrSyntax
+		}
+	}()
+
 	scan := makeScannerString(s)
 	var end int
 	if n := len(scan.token); n != 1 {
@@ -461,7 +468,14 @@
 // ParseBase parses a 2- or 3-letter ISO 639 code.
 // It returns a ValueError if s is a well-formed but unknown language identifier
 // or another error if another error occurred.
-func ParseBase(s string) (Language, error) {
+func ParseBase(s string) (l Language, err error) {
+	defer func() {
+		if recover() != nil {
+			l = 0
+			err = ErrSyntax
+		}
+	}()
+
 	if n := len(s); n < 2 || 3 < n {
 		return 0, ErrSyntax
 	}
@@ -472,7 +486,14 @@
 // ParseScript parses a 4-letter ISO 15924 code.
 // It returns a ValueError if s is a well-formed but unknown script identifier
 // or another error if another error occurred.
-func ParseScript(s string) (Script, error) {
+func ParseScript(s string) (scr Script, err error) {
+	defer func() {
+		if recover() != nil {
+			scr = 0
+			err = ErrSyntax
+		}
+	}()
+
 	if len(s) != 4 {
 		return 0, ErrSyntax
 	}
@@ -489,7 +510,14 @@
 // ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
 // It returns a ValueError if s is a well-formed but unknown region identifier
 // or another error if another error occurred.
-func ParseRegion(s string) (Region, error) {
+func ParseRegion(s string) (r Region, err error) {
+	defer func() {
+		if recover() != nil {
+			r = 0
+			err = ErrSyntax
+		}
+	}()
+
 	if n := len(s); n < 2 || 3 < n {
 		return 0, ErrSyntax
 	}
@@ -578,7 +606,14 @@
 
 // ParseVariant parses and returns a Variant. An error is returned if s is not
 // a valid variant.
-func ParseVariant(s string) (Variant, error) {
+func ParseVariant(s string) (v Variant, err error) {
+	defer func() {
+		if recover() != nil {
+			v = Variant{}
+			err = ErrSyntax
+		}
+	}()
+
 	s = strings.ToLower(s)
 	if id, ok := variantIndex[s]; ok {
 		return Variant{id, s}, nil
diff --git a/internal/language/parse.go b/internal/language/parse.go
index c696fd0..47ee0fe 100644
--- a/internal/language/parse.go
+++ b/internal/language/parse.go
@@ -232,6 +232,13 @@
 	if s == "" {
 		return Und, ErrSyntax
 	}
+	defer func() {
+		if recover() != nil {
+			t = Und
+			err = ErrSyntax
+			return
+		}
+	}()
 	if len(s) <= maxAltTaglen {
 		b := [maxAltTaglen]byte{}
 		for i, c := range s {
diff --git a/language/parse.go b/language/parse.go
index 11acfd8..59b0410 100644
--- a/language/parse.go
+++ b/language/parse.go
@@ -43,6 +43,13 @@
 // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
 // The resulting tag is canonicalized using the canonicalization type c.
 func (c CanonType) Parse(s string) (t Tag, err error) {
+	defer func() {
+		if recover() != nil {
+			t = Tag{}
+			err = language.ErrSyntax
+		}
+	}()
+
 	tt, err := language.Parse(s)
 	if err != nil {
 		return makeTag(tt), err
@@ -79,6 +86,13 @@
 // tag is returned after canonicalizing using CanonType c. If one or more errors
 // are encountered, one of the errors is returned.
 func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
+	defer func() {
+		if recover() != nil {
+			t = Tag{}
+			err = language.ErrSyntax
+		}
+	}()
+
 	var b language.Builder
 	if err = update(&b, part...); err != nil {
 		return und, err
@@ -142,6 +156,14 @@
 // Tags with a weight of zero will be dropped. An error will be returned if the
 // input could not be parsed.
 func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
+	defer func() {
+		if recover() != nil {
+			tag = nil
+			q = nil
+			err = language.ErrSyntax
+		}
+	}()
+
 	var entry string
 	for s != "" {
 		if entry, s = split(s, ','); entry == "" {