encoding/ianaindex: add ASCII, document Index.Encoding

Index.Encoding returns a nil Encoding in case the charset is valid but
unsupported by the library. Document this behavior.

Because of this, US-ASCII is seen as unsupported.
Register it as a regular encoding. The decoder replaces non-ASCII bytes
with the unicode replacement character. The encoder returns a
RepertoireError when a non-ASCII rune is encountered.

Fixes golang/go#19421

Change-Id: I4c24ba2114a5012be88488e63aa6e57df955eb96
GitHub-Last-Rev: 418ee6dd3fda047db01bb087a3a77360f60624a8
GitHub-Pull-Request: golang/text#10
Reviewed-on: https://go-review.googlesource.com/c/text/+/212077
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/encoding/ianaindex/ascii.go b/encoding/ianaindex/ascii.go
new file mode 100644
index 0000000..9792f81
--- /dev/null
+++ b/encoding/ianaindex/ascii.go
@@ -0,0 +1,74 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ianaindex
+
+import (
+	"unicode"
+	"unicode/utf8"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/internal"
+	"golang.org/x/text/transform"
+	"golang.org/x/text/encoding/internal/identifier"
+)
+
+type asciiDecoder struct {
+	transform.NopResetter
+}
+
+func (d asciiDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	for _, c := range src {
+		if c > unicode.MaxASCII {
+			r := unicode.ReplacementChar
+			if nDst + utf8.RuneLen(r) > len(dst) {
+				err = transform.ErrShortDst
+				break
+			}
+			nDst += utf8.EncodeRune(dst[nDst:], r)
+			nSrc++
+			continue
+		}
+
+		if nDst >= len(dst) {
+			err = transform.ErrShortDst
+			break
+		}
+		dst[nDst] = c
+		nDst++
+		nSrc++
+	}
+	return nDst, nSrc, err
+}
+
+type asciiEncoder struct {
+	transform.NopResetter
+}
+
+func (d asciiEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	for _, c := range src {
+		if c > unicode.MaxASCII {
+			err = internal.RepertoireError(encoding.ASCIISub)
+			break
+		}
+
+		if nDst >= len(dst) {
+			err = transform.ErrShortDst
+			break
+		}
+		dst[nDst] = c
+		nDst++
+		nSrc++
+	}
+	return nDst, nSrc, err
+}
+
+var asciiEnc = &internal.Encoding{
+	Encoding: &internal.SimpleEncoding{
+		asciiDecoder{},
+		asciiEncoder{},
+	},
+	Name: "US-ASCII",
+	MIB:  identifier.ASCII,
+}
diff --git a/encoding/ianaindex/ascii_test.go b/encoding/ianaindex/ascii_test.go
new file mode 100644
index 0000000..a184ab9
--- /dev/null
+++ b/encoding/ianaindex/ascii_test.go
@@ -0,0 +1,38 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ianaindex
+
+import (
+	"unicode"
+	"testing"
+
+	"golang.org/x/text/encoding"
+)
+
+func TestASCIIDecoder(t *testing.T) {
+	repl := string(unicode.ReplacementChar)
+	input := "Comment Candide fut élevé dans un beau château"
+	want := "Comment Candide fut " + repl + repl + "lev" + repl + repl + " dans un beau ch" + repl + repl + "teau"
+	got, err := asciiEnc.NewDecoder().String(input)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != want {
+		t.Fatalf("asciiEnc.NewDecoder().String() = %q, want %q", got, want)
+	}
+}
+
+func TestASCIIEncoder(t *testing.T) {
+	repl := string(encoding.ASCIISub)
+	input := "Comment Candide fut élevé dans un beau château"
+	want := "Comment Candide fut " + repl + "lev" + repl + " dans un beau ch" + repl + "teau"
+	got, err := encoding.ReplaceUnsupported(asciiEnc.NewEncoder()).String(input)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != want {
+		t.Fatalf("asciiEnc.NewEncoder().String() = %q, want %q", got, want)
+	}
+}
diff --git a/encoding/ianaindex/ianaindex.go b/encoding/ianaindex/ianaindex.go
index 49b3070..f4b1887 100644
--- a/encoding/ianaindex/ianaindex.go
+++ b/encoding/ianaindex/ianaindex.go
@@ -69,6 +69,10 @@
 
 // Encoding returns an Encoding for IANA-registered names. Matching is
 // case-insensitive.
+//
+// If the provided name doesn't match a IANA-registered charset, an error is
+// returned. If the name matches a IANA-registered charset but isn't supported,
+// a nil encoding and a nil error are returned.
 func (x *Index) Encoding(name string) (encoding.Encoding, error) {
 	name = strings.TrimSpace(name)
 	// First try without lowercasing (possibly creating an allocation).
@@ -150,6 +154,7 @@
 }
 
 var encodings = [numIANA]encoding.Encoding{
+	enc3:    asciiEnc,
 	enc106:  unicode.UTF8,
 	enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
 	enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
diff --git a/encoding/ianaindex/ianaindex_test.go b/encoding/ianaindex/ianaindex_test.go
index 20a2131..d545fcf 100644
--- a/encoding/ianaindex/ianaindex_test.go
+++ b/encoding/ianaindex/ianaindex_test.go
@@ -74,6 +74,7 @@
 		{MIME, "  l5  ", "ISO-8859-9", nil},
 		{MIME, "latin5 ", "ISO-8859-9", nil},
 		{MIME, "LATIN5 ", "ISO-8859-9", nil},
+		{MIME, "us-ascii", "US-ASCII", nil},
 		{MIME, "latin 5", "", errInvalidName},
 		{MIME, "latin-5", "", errInvalidName},