Nigel Tao | 7db922b | 2012-12-14 08:44:53 +1100 | [diff] [blame] | 1 | // Copyright 2012 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package idna |
| 6 | |
| 7 | import ( |
| 8 | "strings" |
| 9 | "testing" |
| 10 | ) |
| 11 | |
| 12 | var punycodeTestCases = [...]struct { |
| 13 | s, encoded string |
| 14 | }{ |
| 15 | {"", ""}, |
| 16 | {"-", "--"}, |
| 17 | {"-a", "-a-"}, |
| 18 | {"-a-", "-a--"}, |
| 19 | {"a", "a-"}, |
| 20 | {"a-", "a--"}, |
| 21 | {"a-b", "a-b-"}, |
| 22 | {"books", "books-"}, |
| 23 | {"bücher", "bcher-kva"}, |
| 24 | {"Hello世界", "Hello-ck1hg65u"}, |
| 25 | {"ü", "tda"}, |
| 26 | {"üý", "tdac"}, |
| 27 | |
| 28 | // The test cases below come from RFC 3492 section 7.1 with Errata 3026. |
| 29 | { |
| 30 | // (A) Arabic (Egyptian). |
| 31 | "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" + |
| 32 | "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F", |
| 33 | "egbpdaj6bu4bxfgehfvwxn", |
| 34 | }, |
| 35 | { |
| 36 | // (B) Chinese (simplified). |
| 37 | "\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587", |
| 38 | "ihqwcrb4cv8a8dqg056pqjye", |
| 39 | }, |
| 40 | { |
| 41 | // (C) Chinese (traditional). |
| 42 | "\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587", |
| 43 | "ihqwctvzc91f659drss3x8bo0yb", |
| 44 | }, |
| 45 | { |
| 46 | // (D) Czech. |
| 47 | "\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074" + |
| 48 | "\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D" + |
| 49 | "\u0065\u0073\u006B\u0079", |
| 50 | "Proprostnemluvesky-uyb24dma41a", |
| 51 | }, |
| 52 | { |
| 53 | // (E) Hebrew. |
| 54 | "\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8" + |
| 55 | "\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2" + |
| 56 | "\u05D1\u05E8\u05D9\u05EA", |
| 57 | "4dbcagdahymbxekheh6e0a7fei0b", |
| 58 | }, |
| 59 | { |
| 60 | // (F) Hindi (Devanagari). |
| 61 | "\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D" + |
| 62 | "\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939" + |
| 63 | "\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947" + |
| 64 | "\u0939\u0948\u0902", |
| 65 | "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", |
| 66 | }, |
| 67 | { |
| 68 | // (G) Japanese (kanji and hiragana). |
| 69 | "\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092" + |
| 70 | "\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B", |
| 71 | "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", |
| 72 | }, |
| 73 | { |
| 74 | // (H) Korean (Hangul syllables). |
| 75 | "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774" + |
| 76 | "\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74" + |
| 77 | "\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C", |
| 78 | "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j" + |
| 79 | "psd879ccm6fea98c", |
| 80 | }, |
| 81 | { |
| 82 | // (I) Russian (Cyrillic). |
| 83 | "\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E" + |
| 84 | "\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440" + |
| 85 | "\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A" + |
| 86 | "\u0438", |
| 87 | "b1abfaaepdrnnbgefbadotcwatmq2g4l", |
| 88 | }, |
| 89 | { |
| 90 | // (J) Spanish. |
| 91 | "\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070" + |
| 92 | "\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070" + |
| 93 | "\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061" + |
| 94 | "\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070" + |
| 95 | "\u0061\u00F1\u006F\u006C", |
| 96 | "PorqunopuedensimplementehablarenEspaol-fmd56a", |
| 97 | }, |
| 98 | { |
| 99 | // (K) Vietnamese. |
| 100 | "\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B" + |
| 101 | "\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068" + |
| 102 | "\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067" + |
| 103 | "\u0056\u0069\u1EC7\u0074", |
| 104 | "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", |
| 105 | }, |
| 106 | { |
| 107 | // (L) 3<nen>B<gumi><kinpachi><sensei>. |
| 108 | "\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F", |
| 109 | "3B-ww4c5e180e575a65lsy2b", |
| 110 | }, |
| 111 | { |
| 112 | // (M) <amuro><namie>-with-SUPER-MONKEYS. |
| 113 | "\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074" + |
| 114 | "\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D" + |
| 115 | "\u004F\u004E\u004B\u0045\u0059\u0053", |
| 116 | "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", |
| 117 | }, |
| 118 | { |
| 119 | // (N) Hello-Another-Way-<sorezore><no><basho>. |
| 120 | "\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F" + |
| 121 | "\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D" + |
| 122 | "\u305D\u308C\u305E\u308C\u306E\u5834\u6240", |
| 123 | "Hello-Another-Way--fc4qua05auwb3674vfr0b", |
| 124 | }, |
| 125 | { |
| 126 | // (O) <hitotsu><yane><no><shita>2. |
| 127 | "\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032", |
| 128 | "2-u9tlzr9756bt3uc0v", |
| 129 | }, |
| 130 | { |
| 131 | // (P) Maji<de>Koi<suru>5<byou><mae> |
| 132 | "\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059" + |
| 133 | "\u308B\u0035\u79D2\u524D", |
| 134 | "MajiKoi5-783gue6qz075azm5e", |
| 135 | }, |
| 136 | { |
| 137 | // (Q) <pafii>de<runba> |
| 138 | "\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0", |
| 139 | "de-jg4avhby1noc0d", |
| 140 | }, |
| 141 | { |
| 142 | // (R) <sono><supiido><de> |
| 143 | "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067", |
| 144 | "d9juau41awczczp", |
| 145 | }, |
| 146 | { |
| 147 | // (S) -> $1.00 <- |
| 148 | "\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020" + |
| 149 | "\u003C\u002D", |
| 150 | "-> $1.00 <--", |
| 151 | }, |
| 152 | } |
| 153 | |
| 154 | func TestPunycode(t *testing.T) { |
| 155 | for _, tc := range punycodeTestCases { |
| 156 | if got, err := decode(tc.encoded); err != nil { |
| 157 | t.Errorf("decode(%q): %v", tc.encoded, err) |
| 158 | } else if got != tc.s { |
| 159 | t.Errorf("decode(%q): got %q, want %q", tc.encoded, got, tc.s) |
| 160 | } |
| 161 | |
| 162 | if got, err := encode("", tc.s); err != nil { |
| 163 | t.Errorf(`encode("", %q): %v`, tc.s, err) |
| 164 | } else if got != tc.encoded { |
| 165 | t.Errorf(`encode("", %q): got %q, want %q`, tc.s, got, tc.encoded) |
| 166 | } |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | var punycodeErrorTestCases = [...]string{ |
| 171 | "decode -", // A sole '-' is invalid. |
| 172 | "decode foo\x00bar", // '\x00' is not in [0-9A-Za-z]. |
| 173 | "decode foo#bar", // '#' is not in [0-9A-Za-z]. |
| 174 | "decode foo\u00A3bar", // '\u00A3' is not in [0-9A-Za-z]. |
| 175 | "decode 9", // "9a" decodes to codepoint \u00A3; "9" is truncated. |
| 176 | "decode 99999a", // "99999a" decodes to codepoint \U0048A3C1, which is > \U0010FFFF. |
| 177 | "decode 9999999999a", // "9999999999a" overflows the int32 calculation. |
| 178 | |
| 179 | "encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow. |
| 180 | } |
| 181 | |
| 182 | func TestPunycodeErrors(t *testing.T) { |
| 183 | for _, tc := range punycodeErrorTestCases { |
| 184 | var err error |
| 185 | switch { |
| 186 | case strings.HasPrefix(tc, "decode "): |
| 187 | _, err = decode(tc[7:]) |
| 188 | case strings.HasPrefix(tc, "encode "): |
| 189 | _, err = encode("", tc[7:]) |
| 190 | } |
| 191 | if err == nil { |
| 192 | if len(tc) > 256 { |
| 193 | tc = tc[:100] + "..." + tc[len(tc)-100:] |
| 194 | } |
| 195 | t.Errorf("no error for %s", tc) |
| 196 | } |
| 197 | } |
| 198 | } |