| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package charmap |
| |
| import ( |
| "testing" |
| |
| "golang.org/x/text/encoding" |
| "golang.org/x/text/encoding/internal" |
| "golang.org/x/text/encoding/internal/enctest" |
| "golang.org/x/text/transform" |
| ) |
| |
| func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) { |
| return "Decode", e.NewDecoder(), nil |
| } |
| |
| func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) { |
| return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement |
| } |
| |
| func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) { |
| return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f) |
| } |
| |
| func TestNonRepertoire(t *testing.T) { |
| testCases := []struct { |
| init func(e encoding.Encoding) (string, transform.Transformer, error) |
| e encoding.Encoding |
| src, want string |
| }{ |
| {dec, Windows1252, "\x81", "\ufffd"}, |
| |
| {encEBCDIC, CodePage037, "갂", ""}, |
| |
| {encEBCDIC, CodePage1047, "갂", ""}, |
| {encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"}, |
| |
| {encEBCDIC, CodePage1140, "갂", ""}, |
| {encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"}, |
| |
| {encASCIISuperset, Windows1252, "갂", ""}, |
| {encASCIISuperset, Windows1252, "a갂", "a"}, |
| {encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"}, |
| } |
| for _, tc := range testCases { |
| dir, tr, wantErr := tc.init(tc.e) |
| |
| dst, _, err := transform.String(tr, tc.src) |
| if err != wantErr { |
| t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr) |
| } |
| if got := string(dst); got != tc.want { |
| t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want) |
| } |
| } |
| } |
| |
| func TestBasics(t *testing.T) { |
| testCases := []struct { |
| e encoding.Encoding |
| encoded string |
| utf8 string |
| }{{ |
| e: CodePage037, |
| encoded: "\xc8\x51\xba\x93\xcf", |
| utf8: "Hé[lõ", |
| }, { |
| e: CodePage437, |
| encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe", |
| utf8: "Héllô ¥º⌠£╛", |
| }, { |
| e: CodePage866, |
| encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1", |
| utf8: "Hє╙o Ш¤Я▌б", |
| }, { |
| e: CodePage1047, |
| encoded: "\xc8\x54\x93\x93\x9f", |
| utf8: "Hèll¤", |
| }, { |
| e: CodePage1140, |
| encoded: "\xc8\x9f\x93\x93\xcf", |
| utf8: "H€llõ", |
| }, { |
| e: ISO8859_2, |
| encoded: "Hel\xe5\xf5", |
| utf8: "Helĺő", |
| }, { |
| e: ISO8859_3, |
| encoded: "He\xbd\xd4", |
| utf8: "He½Ô", |
| }, { |
| e: ISO8859_4, |
| encoded: "Hel\xb6\xf8", |
| utf8: "Helļø", |
| }, { |
| e: ISO8859_5, |
| encoded: "H\xd7\xc6o", |
| utf8: "HзЦo", |
| }, { |
| e: ISO8859_6, |
| encoded: "Hel\xc2\xc9", |
| utf8: "Helآة", |
| }, { |
| e: ISO8859_7, |
| encoded: "H\xeel\xebo", |
| utf8: "Hξlλo", |
| }, { |
| e: ISO8859_8, |
| encoded: "Hel\xf5\xed", |
| utf8: "Helץם", |
| }, { |
| e: ISO8859_9, |
| encoded: "\xdeayet", |
| utf8: "Şayet", |
| }, { |
| e: ISO8859_10, |
| encoded: "H\xea\xbfo", |
| utf8: "Hęŋo", |
| }, { |
| e: ISO8859_13, |
| encoded: "H\xe6l\xf9o", |
| utf8: "Hęlło", |
| }, { |
| e: ISO8859_14, |
| encoded: "He\xfe\xd0o", |
| utf8: "HeŷŴo", |
| }, { |
| e: ISO8859_15, |
| encoded: "H\xa4ll\xd8", |
| utf8: "H€llØ", |
| }, { |
| e: ISO8859_16, |
| encoded: "H\xe6ll\xbd", |
| utf8: "Hællœ", |
| }, { |
| e: KOI8R, |
| encoded: "He\x93\xad\x9c", |
| utf8: "He⌠╜°", |
| }, { |
| e: KOI8U, |
| encoded: "He\x93\xad\x9c", |
| utf8: "He⌠ґ°", |
| }, { |
| e: Macintosh, |
| encoded: "He\xdf\xd7", |
| utf8: "Hefl◊", |
| }, { |
| e: MacintoshCyrillic, |
| encoded: "He\xbe\x94", |
| utf8: "HeЊФ", |
| }, { |
| e: Windows874, |
| encoded: "He\xb7\xf0", |
| utf8: "Heท๐", |
| }, { |
| e: Windows1250, |
| encoded: "He\xe5\xe5o", |
| utf8: "Heĺĺo", |
| }, { |
| e: Windows1251, |
| encoded: "H\xball\xfe", |
| utf8: "Hєllю", |
| }, { |
| e: Windows1252, |
| encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0", |
| utf8: "Héllô ¥º®£Ð", |
| }, { |
| e: Windows1253, |
| encoded: "H\xe5ll\xd6", |
| utf8: "HεllΦ", |
| }, { |
| e: Windows1254, |
| encoded: "\xd0ello", |
| utf8: "Ğello", |
| }, { |
| e: Windows1255, |
| encoded: "He\xd4o", |
| utf8: "Heװo", |
| }, { |
| e: Windows1256, |
| encoded: "H\xdbllo", |
| utf8: "Hغllo", |
| }, { |
| e: Windows1257, |
| encoded: "He\xeflo", |
| utf8: "Heļlo", |
| }, { |
| e: Windows1258, |
| encoded: "Hell\xf5", |
| utf8: "Hellơ", |
| }, { |
| e: XUserDefined, |
| encoded: "\x00\x40\x7f\x80\xab\xff", |
| utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff", |
| }} |
| |
| for _, tc := range testCases { |
| enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "") |
| } |
| } |
| |
| var windows1255TestCases = []struct { |
| b byte |
| ok bool |
| r rune |
| }{ |
| {'\x00', true, '\u0000'}, |
| {'\x1a', true, '\u001a'}, |
| {'\x61', true, '\u0061'}, |
| {'\x7f', true, '\u007f'}, |
| {'\x80', true, '\u20ac'}, |
| {'\x95', true, '\u2022'}, |
| {'\xa0', true, '\u00a0'}, |
| {'\xc0', true, '\u05b0'}, |
| {'\xfc', true, '\ufffd'}, |
| {'\xfd', true, '\u200e'}, |
| {'\xfe', true, '\u200f'}, |
| {'\xff', true, '\ufffd'}, |
| {encoding.ASCIISub, false, '\u0400'}, |
| {encoding.ASCIISub, false, '\u2603'}, |
| {encoding.ASCIISub, false, '\U0001f4a9'}, |
| } |
| |
| func TestDecodeByte(t *testing.T) { |
| for _, tc := range windows1255TestCases { |
| if !tc.ok { |
| continue |
| } |
| |
| got := Windows1255.DecodeByte(tc.b) |
| want := tc.r |
| if got != want { |
| t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want) |
| } |
| } |
| } |
| |
| func TestEncodeRune(t *testing.T) { |
| for _, tc := range windows1255TestCases { |
| // There can be multiple tc.b values that map to tc.r = '\ufffd'. |
| if tc.r == '\ufffd' { |
| continue |
| } |
| |
| gotB, gotOK := Windows1255.EncodeRune(tc.r) |
| wantB, wantOK := tc.b, tc.ok |
| if gotB != wantB || gotOK != wantOK { |
| t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK) |
| } |
| } |
| } |
| |
| func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) } |
| |
| func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) } |