| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package idna |
| |
| import ( |
| "encoding/hex" |
| "fmt" |
| "regexp" |
| "strconv" |
| "strings" |
| "testing" |
| |
| "golang.org/x/text/internal/testtext" |
| ) |
| |
| func TestAllocToUnicode(t *testing.T) { |
| avg := testtext.AllocsPerRun(1000, func() { |
| ToUnicode("www.golang.org") |
| }) |
| if avg > 0 { |
| t.Errorf("got %f; want 0", avg) |
| } |
| } |
| |
| func TestAllocToASCII(t *testing.T) { |
| avg := testtext.AllocsPerRun(1000, func() { |
| ToASCII("www.golang.org") |
| }) |
| if avg > 0 { |
| t.Errorf("got %f; want 0", avg) |
| } |
| } |
| |
| func TestProfiles(t *testing.T) { |
| testCases := []struct { |
| name string |
| want, got *Profile |
| }{ |
| {"Punycode", punycode, New()}, |
| {"Registration", registration, New(ValidateForRegistration())}, |
| {"Registration", registration, New( |
| ValidateForRegistration(), |
| VerifyDNSLength(true), |
| BidiRule(), |
| )}, |
| {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(transitionalLookup))}, |
| {"Display", display, New(MapForLookup(), BidiRule())}, |
| } |
| for _, tc := range testCases { |
| // Functions are not comparable, but the printed version will include |
| // their pointers. |
| got := fmt.Sprintf("%#v", tc.got) |
| want := fmt.Sprintf("%#v", tc.want) |
| if got != want { |
| t.Errorf("%s: \ngot %#v,\nwant %#v", tc.name, got, want) |
| } |
| } |
| } |
| |
| // doTest performs a single test f(input) and verifies that the output matches |
| // out and that the returned error is expected. The errors string contains |
| // all allowed error codes as categorized in |
| // https://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt: |
| // P: Processing |
| // V: Validity |
| // A: to ASCII |
| // B: Bidi |
| // C: Context J |
| func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) { |
| errors = strings.Trim(errors, "[]") |
| test := "ok" |
| if errors != "" { |
| test = "err:" + errors |
| } |
| // Replace some of the escape sequences to make it easier to single out |
| // tests on the command name. |
| in := strings.Trim(strconv.QuoteToASCII(input), `"`) |
| in = strings.Replace(in, `\u`, "#", -1) |
| in = strings.Replace(in, `\U`, "#", -1) |
| name = fmt.Sprintf("%s/%s/%s", name, in, test) |
| |
| t.Run(name, func(t *testing.T) { |
| got, err := f(input) |
| |
| if err != nil { |
| code := err.(interface { |
| code() string |
| }).code() |
| if strings.Index(errors, code) == -1 { |
| t.Errorf("error %q not in set of expected errors {%v}", code, errors) |
| } |
| } else if errors != "" { |
| t.Errorf("got %+q, no errors; want error in {%v}", got, errors) |
| } |
| |
| if want != "" && got != want { |
| t.Errorf(`input=%+q string: got %+q; want %+q`, input, got, want) |
| } |
| }) |
| } |
| |
| var unescapeRE = regexp.MustCompile(`\\u([0-9a-zA-Z]{4})`) |
| |
| func unescape(s string) string { |
| return unescapeRE.ReplaceAllStringFunc(s, func(v string) string { |
| var d [2]byte |
| hex.Decode(d[:], []byte(v[2:])) |
| return string(rune(d[0])<<8 | rune(d[1])) |
| }) |
| } |
| |
| func BenchmarkProfile(b *testing.B) { |
| for i := 0; i < b.N; i++ { |
| Lookup.ToASCII("www.yahoogle.com") |
| } |
| } |
| |
| // TestLabelErrors tests strings returned in case of error. All results should |
| // be identical to the reference implementation and can be verified at |
| // https://unicode.org/cldr/utility/idna.jsp. The reference implementation, |
| // however, seems to not display Bidi and ContextJ errors. |
| // |
| // In some cases the behavior of browsers is added as a comment. In all cases, |
| // whenever a resolve search returns an error here, Chrome will treat the input |
| // string as a search string (including those for Bidi and Context J errors), |
| // unless noted otherwise. |
| func TestLabelErrors(t *testing.T) { |
| encode := func(s string) string { s, _ = encode(acePrefix, s); return s } |
| type kind struct { |
| name string |
| f func(string) (string, error) |
| } |
| punyA := kind{"PunycodeA", punycode.ToASCII} |
| resolve := kind{"ResolveA", Lookup.ToASCII} |
| display := kind{"ToUnicode", Display.ToUnicode} |
| p := New(VerifyDNSLength(true), MapForLookup(), BidiRule()) |
| lengthU := kind{"CheckLengthU", p.ToUnicode} |
| lengthA := kind{"CheckLengthA", p.ToASCII} |
| p = New(MapForLookup(), StrictDomainName(false)) |
| std3 := kind{"STD3", p.ToASCII} |
| p = New(MapForLookup(), CheckHyphens(false)) |
| hyphens := kind{"CheckHyphens", p.ToASCII} |
| p = New(MapForLookup(), Transitional(true)) |
| transitional := kind{"Transitional", p.ToASCII} |
| p = New(MapForLookup(), Transitional(false)) |
| nontransitional := kind{"Nontransitional", p.ToASCII} |
| |
| testCases := []struct { |
| kind |
| input string |
| want string |
| wantErr string |
| }{ |
| {lengthU, "", "", code16("A4", "X4_2")}, // From UTS 46 conformance test. |
| {lengthA, "", "", "A4"}, |
| |
| {lengthU, "xn--", "", code16("A4", "X4_2")}, |
| {lengthU, "foo.xn--", "foo.", code16("A4", "X4_2")}, // TODO: is dropping xn-- correct? |
| {lengthU, "xn--.foo", ".foo", code16("A4", "X4_2")}, |
| {lengthU, "foo.xn--.bar", "foo..bar", code16("A4", "X4_2")}, |
| |
| {display, "xn--", "", ""}, |
| {display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct? |
| {display, "xn--.foo", ".foo", ""}, |
| {display, "foo.xn--.bar", "foo..bar", ""}, |
| |
| {lengthA, "a..b", "a..b", "A4"}, |
| {punyA, ".b", ".b", ""}, |
| // For backwards compatibility, the Punycode profile does not map runes. |
| {punyA, "\u3002b", "xn--b-83t", ""}, |
| {punyA, "..b", "..b", ""}, |
| |
| {lengthA, ".b", ".b", "A4"}, |
| {lengthA, "\u3002b", ".b", "A4"}, |
| {lengthA, "..b", "..b", "A4"}, |
| {lengthA, "b..", "b..", code16("", "A4")}, |
| |
| // Sharpened Bidi rules for Unicode 10.0.0. Apply for ALL labels in ANY |
| // of the labels is RTL. |
| {lengthA, "\ufe05\u3002\u3002\U0002603e\u1ce0", "..xn--t6f5138v", "A4"}, |
| {lengthA, "FAX\u2a77\U0001d186\u3002\U0001e942\U000e0181\u180c", "", "B6"}, |
| |
| {resolve, "a..b", "a..b", ""}, |
| // Note that leading dots are not stripped. This is to be consistent |
| // with the Punycode profile as well as the conformance test. |
| {resolve, ".b", ".b", ""}, |
| {resolve, "\u3002b", ".b", ""}, |
| {resolve, "..b", "..b", ""}, |
| {resolve, "b..", "b..", ""}, |
| {resolve, "\xed", "", "P1"}, |
| |
| // Raw punycode |
| {punyA, "", "", ""}, |
| {punyA, "*.foo.com", "*.foo.com", ""}, |
| {punyA, "Foo.com", "Foo.com", ""}, |
| |
| // STD3 rules |
| {display, "*.foo.com", "*.foo.com", code16("P1", "U1")}, |
| {std3, "*.foo.com", "*.foo.com", ""}, |
| |
| // Hyphens |
| {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"}, |
| {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""}, |
| {display, "-label-.com", "-label-.com", "V3"}, |
| {hyphens, "-label-.com", "-label-.com", ""}, |
| |
| // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of |
| // Chrome, modern Firefox, Safari, and IE. |
| {resolve, "lab⒐be", "xn--labbe-zh9b", code16("P1", "V7")}, // encode("lab⒐be") |
| {display, "lab⒐be", "lab⒐be", code16("P1", "V7")}, |
| {transitional, "plan⒐faß.de", "xn--planfass-c31e.de", code16("P1", "V7")}, // encode("plan⒐fass") + ".de" |
| {display, "Plan⒐faß.de", "plan⒐faß.de", code16("P1", "V7")}, |
| |
| // Transitional vs Nontransitional processing |
| {transitional, "Plan9faß.de", "plan9fass.de", ""}, |
| {nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""}, |
| |
| // Chrome 54.0 recognizes the error and treats this input verbatim as a |
| // search string. |
| // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the |
| // punycode on the result using transitional mapping. |
| // Firefox 49.0.1 goes haywire on this string and prints a bunch of what |
| // seems to be nested punycode encodings. |
| {transitional, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", code16("P1", "V7")}, |
| {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", code16("P1", "V7")}, |
| |
| {transitional, "a\u200Cb", "ab", ""}, |
| {display, "a\u200Cb", "a\u200Cb", "C"}, |
| |
| {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"}, |
| {display, "a\u200Cb", "a\u200Cb", "C"}, |
| |
| {resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"}, |
| { |
| // Notice how the string gets transformed, even with an error. |
| // Chrome will use the original string if it finds an error, so not |
| // the transformed one. |
| display, |
| "gr\ufecb\ufeae\ufe91\ufef2.de", |
| "gr\u0639\u0631\u0628\u064a.de", |
| "B", |
| }, |
| |
| {resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ |
| {display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"}, |
| |
| // normalize input |
| {resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢ |
| {display, "a\u0323\u0322", "\u1ea1\u0322", ""}, |
| |
| // Non-normalized strings are not normalized when they originate from |
| // punycode. Despite the error, Chrome, Safari and Firefox will attempt |
| // to look up the input punycode. |
| {resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"}, |
| {display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"}, |
| } |
| |
| for _, tc := range testCases { |
| doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr) |
| } |
| } |
| |
| func TestTransitionalDefault(t *testing.T) { |
| want := "xn--strae-oqa.de" |
| if transitionalLookup { |
| want = "strasse.de" |
| } |
| doTest(t, Lookup.ToASCII, "Lookup", "straße.de", want, "") |
| } |