| // Copyright 2012 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package publicsuffix |
| |
| import ( |
| "sort" |
| "strings" |
| "testing" |
| ) |
| |
| func TestNodeLabel(t *testing.T) { |
| for i, want := range nodeLabels { |
| got := nodeLabel(uint32(i)) |
| if got != want { |
| t.Errorf("%d: got %q, want %q", i, got, want) |
| } |
| } |
| } |
| |
| func TestFind(t *testing.T) { |
| testCases := []string{ |
| "", |
| "a", |
| "a0", |
| "aaaa", |
| "ao", |
| "ap", |
| "ar", |
| "aro", |
| "arp", |
| "arpa", |
| "arpaa", |
| "arpb", |
| "az", |
| "b", |
| "b0", |
| "ba", |
| "z", |
| "zu", |
| "zv", |
| "zw", |
| "zx", |
| "zy", |
| "zz", |
| "zzzz", |
| } |
| for _, tc := range testCases { |
| got := find(tc, 0, numTLD) |
| want := notFound |
| for i := uint32(0); i < numTLD; i++ { |
| if tc == nodeLabel(i) { |
| want = i |
| break |
| } |
| } |
| if got != want { |
| t.Errorf("%q: got %d, want %d", tc, got, want) |
| } |
| } |
| } |
| |
| func TestICANN(t *testing.T) { |
| testCases := map[string]bool{ |
| "foo.org": true, |
| "foo.co.uk": true, |
| "foo.dyndns.org": false, |
| "foo.go.dyndns.org": false, |
| "foo.blogspot.co.uk": false, |
| "foo.intranet": false, |
| } |
| for domain, want := range testCases { |
| _, got := PublicSuffix(domain) |
| if got != want { |
| t.Errorf("%q: got %v, want %v", domain, got, want) |
| } |
| } |
| } |
| |
| var publicSuffixTestCases = []struct { |
| domain string |
| wantPS string |
| wantICANN bool |
| }{ |
| // Empty string. |
| {"", "", false}, |
| |
| // The .ao rules are: |
| // ao |
| // ed.ao |
| // gv.ao |
| // og.ao |
| // co.ao |
| // pb.ao |
| // it.ao |
| {"ao", "ao", true}, |
| {"www.ao", "ao", true}, |
| {"pb.ao", "pb.ao", true}, |
| {"www.pb.ao", "pb.ao", true}, |
| {"www.xxx.yyy.zzz.pb.ao", "pb.ao", true}, |
| |
| // The .ar rules are: |
| // ar |
| // com.ar |
| // edu.ar |
| // gob.ar |
| // gov.ar |
| // int.ar |
| // mil.ar |
| // net.ar |
| // org.ar |
| // tur.ar |
| // blogspot.com.ar (in the PRIVATE DOMAIN section). |
| {"ar", "ar", true}, |
| {"www.ar", "ar", true}, |
| {"nic.ar", "ar", true}, |
| {"www.nic.ar", "ar", true}, |
| {"com.ar", "com.ar", true}, |
| {"www.com.ar", "com.ar", true}, |
| {"blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN. |
| {"www.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN. |
| {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN. |
| {"logspot.com.ar", "com.ar", true}, |
| {"zlogspot.com.ar", "com.ar", true}, |
| {"zblogspot.com.ar", "com.ar", true}, |
| |
| // The .arpa rules are: |
| // arpa |
| // e164.arpa |
| // in-addr.arpa |
| // ip6.arpa |
| // iris.arpa |
| // uri.arpa |
| // urn.arpa |
| {"arpa", "arpa", true}, |
| {"www.arpa", "arpa", true}, |
| {"urn.arpa", "urn.arpa", true}, |
| {"www.urn.arpa", "urn.arpa", true}, |
| {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa", true}, |
| |
| // The relevant {kobe,kyoto}.jp rules are: |
| // jp |
| // *.kobe.jp |
| // !city.kobe.jp |
| // kyoto.jp |
| // ide.kyoto.jp |
| {"jp", "jp", true}, |
| {"kobe.jp", "jp", true}, |
| {"c.kobe.jp", "c.kobe.jp", true}, |
| {"b.c.kobe.jp", "c.kobe.jp", true}, |
| {"a.b.c.kobe.jp", "c.kobe.jp", true}, |
| {"city.kobe.jp", "kobe.jp", true}, |
| {"www.city.kobe.jp", "kobe.jp", true}, |
| {"kyoto.jp", "kyoto.jp", true}, |
| {"test.kyoto.jp", "kyoto.jp", true}, |
| {"ide.kyoto.jp", "ide.kyoto.jp", true}, |
| {"b.ide.kyoto.jp", "ide.kyoto.jp", true}, |
| {"a.b.ide.kyoto.jp", "ide.kyoto.jp", true}, |
| |
| // The .tw rules are: |
| // tw |
| // edu.tw |
| // gov.tw |
| // mil.tw |
| // com.tw |
| // net.tw |
| // org.tw |
| // idv.tw |
| // game.tw |
| // ebiz.tw |
| // club.tw |
| // 網路.tw (xn--zf0ao64a.tw) |
| // 組織.tw (xn--uc0atv.tw) |
| // 商業.tw (xn--czrw28b.tw) |
| // blogspot.tw |
| {"tw", "tw", true}, |
| {"aaa.tw", "tw", true}, |
| {"www.aaa.tw", "tw", true}, |
| {"xn--czrw28b.aaa.tw", "tw", true}, |
| {"edu.tw", "edu.tw", true}, |
| {"www.edu.tw", "edu.tw", true}, |
| {"xn--czrw28b.edu.tw", "edu.tw", true}, |
| {"xn--czrw28b.tw", "xn--czrw28b.tw", true}, |
| {"www.xn--czrw28b.tw", "xn--czrw28b.tw", true}, |
| {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw", true}, |
| {"xn--kpry57d.tw", "tw", true}, |
| |
| // The .uk rules are: |
| // uk |
| // ac.uk |
| // co.uk |
| // gov.uk |
| // ltd.uk |
| // me.uk |
| // net.uk |
| // nhs.uk |
| // org.uk |
| // plc.uk |
| // police.uk |
| // *.sch.uk |
| // blogspot.co.uk (in the PRIVATE DOMAIN section). |
| {"uk", "uk", true}, |
| {"aaa.uk", "uk", true}, |
| {"www.aaa.uk", "uk", true}, |
| {"mod.uk", "uk", true}, |
| {"www.mod.uk", "uk", true}, |
| {"sch.uk", "uk", true}, |
| {"mod.sch.uk", "mod.sch.uk", true}, |
| {"www.sch.uk", "www.sch.uk", true}, |
| {"co.uk", "co.uk", true}, |
| {"www.co.uk", "co.uk", true}, |
| {"blogspot.co.uk", "blogspot.co.uk", false}, // PRIVATE DOMAIN. |
| {"blogspot.nic.uk", "uk", true}, |
| {"blogspot.sch.uk", "blogspot.sch.uk", true}, |
| |
| // The .рф rules are |
| // рф (xn--p1ai) |
| {"xn--p1ai", "xn--p1ai", true}, |
| {"aaa.xn--p1ai", "xn--p1ai", true}, |
| {"www.xxx.yyy.xn--p1ai", "xn--p1ai", true}, |
| |
| // The .bd rules are: |
| // *.bd |
| {"bd", "bd", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†). |
| {"www.bd", "www.bd", true}, |
| {"xxx.www.bd", "www.bd", true}, |
| {"zzz.bd", "zzz.bd", true}, |
| {"www.zzz.bd", "zzz.bd", true}, |
| {"www.xxx.yyy.zzz.bd", "zzz.bd", true}, |
| |
| // The .ck rules are: |
| // *.ck |
| // !www.ck |
| {"ck", "ck", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†). |
| {"www.ck", "ck", true}, |
| {"xxx.www.ck", "ck", true}, |
| {"zzz.ck", "zzz.ck", true}, |
| {"www.zzz.ck", "zzz.ck", true}, |
| {"www.xxx.yyy.zzz.ck", "zzz.ck", true}, |
| |
| // The .myjino.ru rules (in the PRIVATE DOMAIN section) are: |
| // myjino.ru |
| // *.hosting.myjino.ru |
| // *.landing.myjino.ru |
| // *.spectrum.myjino.ru |
| // *.vps.myjino.ru |
| {"myjino.ru", "myjino.ru", false}, |
| {"aaa.myjino.ru", "myjino.ru", false}, |
| {"bbb.ccc.myjino.ru", "myjino.ru", false}, |
| {"hosting.ddd.myjino.ru", "myjino.ru", false}, |
| {"landing.myjino.ru", "myjino.ru", false}, |
| {"www.landing.myjino.ru", "www.landing.myjino.ru", false}, |
| {"spectrum.vps.myjino.ru", "spectrum.vps.myjino.ru", false}, |
| |
| // The .uberspace.de rules (in the PRIVATE DOMAIN section) are: |
| // *.uberspace.de |
| {"uberspace.de", "de", true}, // "de" is in the ICANN DOMAIN section. See footnote (†). |
| {"aaa.uberspace.de", "aaa.uberspace.de", false}, |
| {"bbb.ccc.uberspace.de", "ccc.uberspace.de", false}, |
| |
| // There are no .nosuchtld rules. |
| {"nosuchtld", "nosuchtld", false}, |
| {"foo.nosuchtld", "nosuchtld", false}, |
| {"bar.foo.nosuchtld", "nosuchtld", false}, |
| |
| // (†) There is some disagreement on how wildcards behave: what should the |
| // public suffix of "platform.sh" be when both "*.platform.sh" and "sh" is |
| // in the PSL, but "platform.sh" is not? Two possible answers are |
| // "platform.sh" and "sh", there are valid arguments for either behavior, |
| // and different browsers have implemented different behaviors. |
| // |
| // This implementation, Go's golang.org/x/net/publicsuffix, returns "sh", |
| // the same as a literal interpretation of the "Formal Algorithm" section |
| // of https://publicsuffix.org/list/ |
| // |
| // Together, the TestPublicSuffix and TestSlowPublicSuffix tests check that |
| // the Go implementation (func PublicSuffix in list.go) and the literal |
| // interpretation (func slowPublicSuffix in list_test.go) produce the same |
| // (golden) results on every test case in this publicSuffixTestCases slice, |
| // including some "platform.sh" style cases. |
| // |
| // More discussion of "the platform.sh problem" is at: |
| // - https://github.com/publicsuffix/list/issues/694 |
| // - https://bugzilla.mozilla.org/show_bug.cgi?id=1124625#c6 |
| // - https://wiki.mozilla.org/Public_Suffix_List/platform.sh_Problem |
| } |
| |
| func BenchmarkPublicSuffix(b *testing.B) { |
| for i := 0; i < b.N; i++ { |
| for _, tc := range publicSuffixTestCases { |
| List.PublicSuffix(tc.domain) |
| } |
| } |
| } |
| |
| func TestPublicSuffix(t *testing.T) { |
| for _, tc := range publicSuffixTestCases { |
| gotPS, gotICANN := PublicSuffix(tc.domain) |
| if gotPS != tc.wantPS || gotICANN != tc.wantICANN { |
| t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN) |
| } |
| } |
| } |
| |
| func TestSlowPublicSuffix(t *testing.T) { |
| for _, tc := range publicSuffixTestCases { |
| gotPS, gotICANN := slowPublicSuffix(tc.domain) |
| if gotPS != tc.wantPS || gotICANN != tc.wantICANN { |
| t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN) |
| } |
| } |
| } |
| |
| func TestNumICANNRules(t *testing.T) { |
| if numICANNRules <= 0 { |
| t.Fatal("no ICANN rules") |
| } |
| if numICANNRules >= len(rules) { |
| t.Fatal("no Private rules") |
| } |
| // Check the last ICANN and first Private rules. If the underlying public |
| // suffix list changes, we may need to update these hard-coded checks. |
| if got, want := rules[numICANNRules-1], "zuerich"; got != want { |
| t.Errorf("last ICANN rule: got %q, wawnt %q", got, want) |
| } |
| if got, want := rules[numICANNRules], "cc.ua"; got != want { |
| t.Errorf("first Private rule: got %q, wawnt %q", got, want) |
| } |
| } |
| |
| type slowPublicSuffixRule struct { |
| ruleParts []string |
| icann bool |
| } |
| |
| // slowPublicSuffix implements the canonical (but O(number of rules)) public |
| // suffix algorithm described at http://publicsuffix.org/list/. |
| // |
| // 1. Match domain against all rules and take note of the matching ones. |
| // 2. If no rules match, the prevailing rule is "*". |
| // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule. |
| // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels. |
| // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label. |
| // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots). |
| // 7. The registered or registrable domain is the public suffix plus one additional label. |
| // |
| // This function returns the public suffix, not the registrable domain, and so |
| // it stops after step 6. |
| func slowPublicSuffix(domain string) (string, bool) { |
| match := func(rulePart, domainPart string) bool { |
| switch rulePart[0] { |
| case '*': |
| return true |
| case '!': |
| return rulePart[1:] == domainPart |
| } |
| return rulePart == domainPart |
| } |
| |
| domainParts := strings.Split(domain, ".") |
| var matchingRules []slowPublicSuffixRule |
| |
| loop: |
| for i, rule := range rules { |
| ruleParts := strings.Split(rule, ".") |
| if len(domainParts) < len(ruleParts) { |
| continue |
| } |
| for i := range ruleParts { |
| rulePart := ruleParts[len(ruleParts)-1-i] |
| domainPart := domainParts[len(domainParts)-1-i] |
| if !match(rulePart, domainPart) { |
| continue loop |
| } |
| } |
| matchingRules = append(matchingRules, slowPublicSuffixRule{ |
| ruleParts: ruleParts, |
| icann: i < numICANNRules, |
| }) |
| } |
| if len(matchingRules) == 0 { |
| matchingRules = append(matchingRules, slowPublicSuffixRule{ |
| ruleParts: []string{"*"}, |
| icann: false, |
| }) |
| } else { |
| sort.Sort(byPriority(matchingRules)) |
| } |
| |
| prevailing := matchingRules[0] |
| if prevailing.ruleParts[0][0] == '!' { |
| prevailing.ruleParts = prevailing.ruleParts[1:] |
| } |
| if prevailing.ruleParts[0][0] == '*' { |
| replaced := domainParts[len(domainParts)-len(prevailing.ruleParts)] |
| prevailing.ruleParts = append([]string{replaced}, prevailing.ruleParts[1:]...) |
| } |
| return strings.Join(prevailing.ruleParts, "."), prevailing.icann |
| } |
| |
| type byPriority []slowPublicSuffixRule |
| |
| func (b byPriority) Len() int { return len(b) } |
| func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] } |
| func (b byPriority) Less(i, j int) bool { |
| if b[i].ruleParts[0][0] == '!' { |
| return true |
| } |
| if b[j].ruleParts[0][0] == '!' { |
| return false |
| } |
| return len(b[i].ruleParts) > len(b[j].ruleParts) |
| } |
| |
| // eTLDPlusOneTestCases come from |
| // https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt |
| var eTLDPlusOneTestCases = []struct { |
| domain, want string |
| }{ |
| // Empty input. |
| {"", ""}, |
| // Unlisted TLD. |
| {"example", ""}, |
| {"example.example", "example.example"}, |
| {"b.example.example", "example.example"}, |
| {"a.b.example.example", "example.example"}, |
| // TLD with only 1 rule. |
| {"biz", ""}, |
| {"domain.biz", "domain.biz"}, |
| {"b.domain.biz", "domain.biz"}, |
| {"a.b.domain.biz", "domain.biz"}, |
| // TLD with some 2-level rules. |
| {"com", ""}, |
| {"example.com", "example.com"}, |
| {"b.example.com", "example.com"}, |
| {"a.b.example.com", "example.com"}, |
| {"uk.com", ""}, |
| {"example.uk.com", "example.uk.com"}, |
| {"b.example.uk.com", "example.uk.com"}, |
| {"a.b.example.uk.com", "example.uk.com"}, |
| {"test.ac", "test.ac"}, |
| // TLD with only 1 (wildcard) rule. |
| {"mm", ""}, |
| {"c.mm", ""}, |
| {"b.c.mm", "b.c.mm"}, |
| {"a.b.c.mm", "b.c.mm"}, |
| // More complex TLD. |
| {"jp", ""}, |
| {"test.jp", "test.jp"}, |
| {"www.test.jp", "test.jp"}, |
| {"ac.jp", ""}, |
| {"test.ac.jp", "test.ac.jp"}, |
| {"www.test.ac.jp", "test.ac.jp"}, |
| {"kyoto.jp", ""}, |
| {"test.kyoto.jp", "test.kyoto.jp"}, |
| {"ide.kyoto.jp", ""}, |
| {"b.ide.kyoto.jp", "b.ide.kyoto.jp"}, |
| {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"}, |
| {"c.kobe.jp", ""}, |
| {"b.c.kobe.jp", "b.c.kobe.jp"}, |
| {"a.b.c.kobe.jp", "b.c.kobe.jp"}, |
| {"city.kobe.jp", "city.kobe.jp"}, |
| {"www.city.kobe.jp", "city.kobe.jp"}, |
| // TLD with a wildcard rule and exceptions. |
| {"ck", ""}, |
| {"test.ck", ""}, |
| {"b.test.ck", "b.test.ck"}, |
| {"a.b.test.ck", "b.test.ck"}, |
| {"www.ck", "www.ck"}, |
| {"www.www.ck", "www.ck"}, |
| // US K12. |
| {"us", ""}, |
| {"test.us", "test.us"}, |
| {"www.test.us", "test.us"}, |
| {"ak.us", ""}, |
| {"test.ak.us", "test.ak.us"}, |
| {"www.test.ak.us", "test.ak.us"}, |
| {"k12.ak.us", ""}, |
| {"test.k12.ak.us", "test.k12.ak.us"}, |
| {"www.test.k12.ak.us", "test.k12.ak.us"}, |
| // Punycoded IDN labels |
| {"xn--85x722f.com.cn", "xn--85x722f.com.cn"}, |
| {"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"}, |
| {"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"}, |
| {"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"}, |
| {"xn--55qx5d.cn", ""}, |
| {"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"}, |
| {"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"}, |
| {"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"}, |
| {"xn--fiqs8s", ""}, |
| } |
| |
| func TestEffectiveTLDPlusOne(t *testing.T) { |
| for _, tc := range eTLDPlusOneTestCases { |
| got, _ := EffectiveTLDPlusOne(tc.domain) |
| if got != tc.want { |
| t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want) |
| } |
| } |
| } |