encoding/ianaindex: add ASCII, document Index.Encoding
Index.Encoding returns a nil Encoding in case the charset is valid but
unsupported by the library. Document this behavior.
Because of this, US-ASCII is seen as unsupported.
Register it as a regular encoding. The decoder replaces non-ASCII bytes
with the unicode replacement character. The encoder returns a
RepertoireError when a non-ASCII rune is encountered.
Fixes golang/go#19421
Change-Id: I4c24ba2114a5012be88488e63aa6e57df955eb96
GitHub-Last-Rev: 418ee6dd3fda047db01bb087a3a77360f60624a8
GitHub-Pull-Request: golang/text#10
Reviewed-on: https://go-review.googlesource.com/c/text/+/212077
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/encoding/ianaindex/ascii.go b/encoding/ianaindex/ascii.go
new file mode 100644
index 0000000..9792f81
--- /dev/null
+++ b/encoding/ianaindex/ascii.go
@@ -0,0 +1,74 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ianaindex
+
+import (
+ "unicode"
+ "unicode/utf8"
+
+ "golang.org/x/text/encoding"
+ "golang.org/x/text/encoding/internal"
+ "golang.org/x/text/transform"
+ "golang.org/x/text/encoding/internal/identifier"
+)
+
+type asciiDecoder struct {
+ transform.NopResetter
+}
+
+func (d asciiDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ for _, c := range src {
+ if c > unicode.MaxASCII {
+ r := unicode.ReplacementChar
+ if nDst + utf8.RuneLen(r) > len(dst) {
+ err = transform.ErrShortDst
+ break
+ }
+ nDst += utf8.EncodeRune(dst[nDst:], r)
+ nSrc++
+ continue
+ }
+
+ if nDst >= len(dst) {
+ err = transform.ErrShortDst
+ break
+ }
+ dst[nDst] = c
+ nDst++
+ nSrc++
+ }
+ return nDst, nSrc, err
+}
+
+type asciiEncoder struct {
+ transform.NopResetter
+}
+
+func (d asciiEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ for _, c := range src {
+ if c > unicode.MaxASCII {
+ err = internal.RepertoireError(encoding.ASCIISub)
+ break
+ }
+
+ if nDst >= len(dst) {
+ err = transform.ErrShortDst
+ break
+ }
+ dst[nDst] = c
+ nDst++
+ nSrc++
+ }
+ return nDst, nSrc, err
+}
+
+var asciiEnc = &internal.Encoding{
+ Encoding: &internal.SimpleEncoding{
+ asciiDecoder{},
+ asciiEncoder{},
+ },
+ Name: "US-ASCII",
+ MIB: identifier.ASCII,
+}
diff --git a/encoding/ianaindex/ascii_test.go b/encoding/ianaindex/ascii_test.go
new file mode 100644
index 0000000..a184ab9
--- /dev/null
+++ b/encoding/ianaindex/ascii_test.go
@@ -0,0 +1,38 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ianaindex
+
+import (
+ "unicode"
+ "testing"
+
+ "golang.org/x/text/encoding"
+)
+
+func TestASCIIDecoder(t *testing.T) {
+ repl := string(unicode.ReplacementChar)
+ input := "Comment Candide fut élevé dans un beau château"
+ want := "Comment Candide fut " + repl + repl + "lev" + repl + repl + " dans un beau ch" + repl + repl + "teau"
+ got, err := asciiEnc.NewDecoder().String(input)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got != want {
+ t.Fatalf("asciiEnc.NewDecoder().String() = %q, want %q", got, want)
+ }
+}
+
+func TestASCIIEncoder(t *testing.T) {
+ repl := string(encoding.ASCIISub)
+ input := "Comment Candide fut élevé dans un beau château"
+ want := "Comment Candide fut " + repl + "lev" + repl + " dans un beau ch" + repl + "teau"
+ got, err := encoding.ReplaceUnsupported(asciiEnc.NewEncoder()).String(input)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got != want {
+ t.Fatalf("asciiEnc.NewEncoder().String() = %q, want %q", got, want)
+ }
+}
diff --git a/encoding/ianaindex/ianaindex.go b/encoding/ianaindex/ianaindex.go
index 49b3070..f4b1887 100644
--- a/encoding/ianaindex/ianaindex.go
+++ b/encoding/ianaindex/ianaindex.go
@@ -69,6 +69,10 @@
// Encoding returns an Encoding for IANA-registered names. Matching is
// case-insensitive.
+//
+// If the provided name doesn't match a IANA-registered charset, an error is
+// returned. If the name matches a IANA-registered charset but isn't supported,
+// a nil encoding and a nil error are returned.
func (x *Index) Encoding(name string) (encoding.Encoding, error) {
name = strings.TrimSpace(name)
// First try without lowercasing (possibly creating an allocation).
@@ -150,6 +154,7 @@
}
var encodings = [numIANA]encoding.Encoding{
+ enc3: asciiEnc,
enc106: unicode.UTF8,
enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
diff --git a/encoding/ianaindex/ianaindex_test.go b/encoding/ianaindex/ianaindex_test.go
index 20a2131..d545fcf 100644
--- a/encoding/ianaindex/ianaindex_test.go
+++ b/encoding/ianaindex/ianaindex_test.go
@@ -74,6 +74,7 @@
{MIME, " l5 ", "ISO-8859-9", nil},
{MIME, "latin5 ", "ISO-8859-9", nil},
{MIME, "LATIN5 ", "ISO-8859-9", nil},
+ {MIME, "us-ascii", "US-ASCII", nil},
{MIME, "latin 5", "", errInvalidName},
{MIME, "latin-5", "", errInvalidName},