blob: c61b998480744ea4ff9fe2a15ac6f3f7a7b5f9d2 [file] [log] [blame]
Nigel Tao67a30482012-12-12 15:58:52 +11001// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package publicsuffix
6
7import (
8 "sort"
9 "strings"
10 "testing"
11)
12
13func TestNodeLabel(t *testing.T) {
14 for i, want := range nodeLabels {
15 got := nodeLabel(uint32(i))
16 if got != want {
17 t.Errorf("%d: got %q, want %q", i, got, want)
18 }
19 }
20}
21
22func TestFind(t *testing.T) {
23 testCases := []string{
24 "",
25 "a",
26 "a0",
27 "aaaa",
28 "ao",
29 "ap",
30 "ar",
31 "aro",
32 "arp",
33 "arpa",
34 "arpaa",
35 "arpb",
36 "az",
37 "b",
38 "b0",
39 "ba",
40 "z",
41 "zu",
42 "zv",
43 "zw",
44 "zx",
45 "zy",
46 "zz",
47 "zzzz",
48 }
49 for _, tc := range testCases {
50 got := find(tc, 0, numTLD)
51 want := notFound
52 for i := uint32(0); i < numTLD; i++ {
53 if tc == nodeLabel(i) {
54 want = i
55 break
56 }
57 }
58 if got != want {
59 t.Errorf("%q: got %d, want %d", tc, got, want)
60 }
61 }
62}
63
Nigel Taob8ab5102013-01-09 22:10:50 +110064func TestICANN(t *testing.T) {
65 testCases := map[string]bool{
66 "foo.org": true,
67 "foo.co.uk": true,
68 "foo.dyndns.org": false,
69 "foo.go.dyndns.org": false,
70 "foo.blogspot.co.uk": false,
71 "foo.intranet": false,
72 }
73 for domain, want := range testCases {
74 _, got := PublicSuffix(domain)
75 if got != want {
76 t.Errorf("%q: got %v, want %v", domain, got, want)
77 }
78 }
79}
80
Nigel Tao67a30482012-12-12 15:58:52 +110081var publicSuffixTestCases = []struct {
82 domain, want string
83}{
84 // Empty string.
85 {"", ""},
86
87 // The .ao rules are:
88 // ao
89 // ed.ao
90 // gv.ao
91 // og.ao
92 // co.ao
93 // pb.ao
94 // it.ao
95 {"ao", "ao"},
96 {"www.ao", "ao"},
97 {"pb.ao", "pb.ao"},
98 {"www.pb.ao", "pb.ao"},
99 {"www.xxx.yyy.zzz.pb.ao", "pb.ao"},
100
101 // The .ar rules are:
102 // *.ar
103 // !congresodelalengua3.ar
104 // !educ.ar
105 // !gobiernoelectronico.ar
106 // !mecon.ar
107 // !nacion.ar
108 // !nic.ar
109 // !promocion.ar
110 // !retina.ar
111 // !uba.ar
112 // blogspot.com.ar
113 {"ar", "ar"},
114 {"www.ar", "www.ar"},
115 {"nic.ar", "ar"},
116 {"www.nic.ar", "ar"},
117 {"com.ar", "com.ar"},
118 {"www.com.ar", "com.ar"},
119 {"blogspot.com.ar", "blogspot.com.ar"},
120 {"www.blogspot.com.ar", "blogspot.com.ar"},
121 {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar"},
122 {"logspot.com.ar", "com.ar"},
123 {"zlogspot.com.ar", "com.ar"},
124 {"zblogspot.com.ar", "com.ar"},
125
126 // The .arpa rules are:
127 // e164.arpa
128 // in-addr.arpa
129 // ip6.arpa
130 // iris.arpa
131 // uri.arpa
132 // urn.arpa
133 {"arpa", "arpa"},
134 {"www.arpa", "arpa"},
135 {"urn.arpa", "urn.arpa"},
136 {"www.urn.arpa", "urn.arpa"},
137 {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa"},
138
139 // The relevant {kobe,kyoto}.jp rules are:
140 // jp
141 // *.kobe.jp
142 // !city.kobe.jp
143 // kyoto.jp
144 // ide.kyoto.jp
145 {"jp", "jp"},
146 {"kobe.jp", "jp"},
147 {"c.kobe.jp", "c.kobe.jp"},
148 {"b.c.kobe.jp", "c.kobe.jp"},
149 {"a.b.c.kobe.jp", "c.kobe.jp"},
150 {"city.kobe.jp", "kobe.jp"},
151 {"www.city.kobe.jp", "kobe.jp"},
152 {"kyoto.jp", "kyoto.jp"},
153 {"test.kyoto.jp", "kyoto.jp"},
154 {"ide.kyoto.jp", "ide.kyoto.jp"},
155 {"b.ide.kyoto.jp", "ide.kyoto.jp"},
156 {"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
157
Nigel Taocbecf2f2012-12-20 19:36:00 +1100158 // The .tw rules are:
159 // tw
160 // edu.tw
161 // gov.tw
162 // mil.tw
163 // com.tw
164 // net.tw
165 // org.tw
166 // idv.tw
167 // game.tw
168 // ebiz.tw
169 // club.tw
170 // 網路.tw (xn--zf0ao64a.tw)
171 // 組織.tw (xn--uc0atv.tw)
172 // 商業.tw (xn--czrw28b.tw)
173 // blogspot.tw
174 {"tw", "tw"},
175 {"aaa.tw", "tw"},
176 {"www.aaa.tw", "tw"},
177 {"xn--czrw28b.aaa.tw", "tw"},
178 {"edu.tw", "edu.tw"},
179 {"www.edu.tw", "edu.tw"},
180 {"xn--czrw28b.edu.tw", "edu.tw"},
181 {"xn--czrw28b.tw", "xn--czrw28b.tw"},
182 {"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
183 {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
184 {"xn--kpry57d.tw", "tw"},
185
Nigel Tao67a30482012-12-12 15:58:52 +1100186 // The .uk rules are:
187 // *.uk
188 // *.sch.uk
189 // !bl.uk
190 // !british-library.uk
191 // !jet.uk
192 // !mod.uk
193 // !national-library-scotland.uk
194 // !nel.uk
195 // !nic.uk
196 // !nls.uk
197 // !parliament.uk
198 // blogspot.co.uk
199 {"uk", "uk"},
200 {"aaa.uk", "aaa.uk"},
201 {"www.aaa.uk", "aaa.uk"},
202 {"mod.uk", "uk"},
203 {"www.mod.uk", "uk"},
204 {"sch.uk", "sch.uk"},
205 {"mod.sch.uk", "mod.sch.uk"},
206 {"www.sch.uk", "www.sch.uk"},
207 {"blogspot.co.uk", "blogspot.co.uk"},
208 {"blogspot.nic.uk", "uk"},
209 {"blogspot.sch.uk", "blogspot.sch.uk"},
210
Nigel Taocbecf2f2012-12-20 19:36:00 +1100211 // The .рф rules are
212 // рф (xn--p1ai)
213 {"xn--p1ai", "xn--p1ai"},
214 {"aaa.xn--p1ai", "xn--p1ai"},
215 {"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
216
Nigel Tao67a30482012-12-12 15:58:52 +1100217 // The .zw rules are:
218 // *.zw
219 {"zw", "zw"},
220 {"www.zw", "www.zw"},
221 {"zzz.zw", "zzz.zw"},
222 {"www.zzz.zw", "zzz.zw"},
223 {"www.xxx.yyy.zzz.zw", "zzz.zw"},
224
225 // There are no .nosuchtld rules.
226 {"nosuchtld", "nosuchtld"},
227 {"foo.nosuchtld", "nosuchtld"},
228 {"bar.foo.nosuchtld", "nosuchtld"},
229}
230
Nigel Tao0f34b772012-12-22 12:09:13 +1100231func BenchmarkPublicSuffix(b *testing.B) {
232 for i := 0; i < b.N; i++ {
233 for _, tc := range publicSuffixTestCases {
234 List.PublicSuffix(tc.domain)
235 }
236 }
237}
238
Nigel Tao67a30482012-12-12 15:58:52 +1100239func TestPublicSuffix(t *testing.T) {
240 for _, tc := range publicSuffixTestCases {
241 got := List.PublicSuffix(tc.domain)
242 if got != tc.want {
243 t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
244 }
245 }
246}
247
248func TestSlowPublicSuffix(t *testing.T) {
249 for _, tc := range publicSuffixTestCases {
250 got := slowPublicSuffix(tc.domain)
251 if got != tc.want {
252 t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
253 }
254 }
255}
256
257// slowPublicSuffix implements the canonical (but O(number of rules)) public
258// suffix algorithm described at http://publicsuffix.org/list/.
259//
260// 1. Match domain against all rules and take note of the matching ones.
261// 2. If no rules match, the prevailing rule is "*".
262// 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
263// 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
264// 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
265// 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
266// 7. The registered or registrable domain is the public suffix plus one additional label.
267//
268// This function returns the public suffix, not the registrable domain, and so
269// it stops after step 6.
270func slowPublicSuffix(domain string) string {
271 match := func(rulePart, domainPart string) bool {
272 switch rulePart[0] {
273 case '*':
274 return true
275 case '!':
276 return rulePart[1:] == domainPart
277 }
278 return rulePart == domainPart
279 }
280
281 domainParts := strings.Split(domain, ".")
282 var matchingRules [][]string
283
284loop:
285 for _, rule := range rules {
286 ruleParts := strings.Split(rule, ".")
287 if len(domainParts) < len(ruleParts) {
288 continue
289 }
290 for i := range ruleParts {
291 rulePart := ruleParts[len(ruleParts)-1-i]
292 domainPart := domainParts[len(domainParts)-1-i]
293 if !match(rulePart, domainPart) {
294 continue loop
295 }
296 }
297 matchingRules = append(matchingRules, ruleParts)
298 }
299 if len(matchingRules) == 0 {
300 matchingRules = append(matchingRules, []string{"*"})
301 } else {
302 sort.Sort(byPriority(matchingRules))
303 }
304 prevailing := matchingRules[0]
305 if prevailing[0][0] == '!' {
306 prevailing = prevailing[1:]
307 }
308 if prevailing[0][0] == '*' {
309 replaced := domainParts[len(domainParts)-len(prevailing)]
310 prevailing = append([]string{replaced}, prevailing[1:]...)
311 }
312 return strings.Join(prevailing, ".")
313}
314
315type byPriority [][]string
316
317func (b byPriority) Len() int { return len(b) }
318func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
319func (b byPriority) Less(i, j int) bool {
320 if b[i][0][0] == '!' {
321 return true
322 }
323 if b[j][0][0] == '!' {
324 return false
325 }
326 return len(b[i]) > len(b[j])
327}
328
Nigel Tao61791142013-01-22 21:23:30 +1100329// eTLDPlusOneTestCases come from
Nigel Tao67a30482012-12-12 15:58:52 +1100330// http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt
Nigel Tao61791142013-01-22 21:23:30 +1100331var eTLDPlusOneTestCases = []struct {
332 domain, want string
333}{
334 // Empty input.
335 {"", ""},
336 // Unlisted TLD.
337 {"example", ""},
338 {"example.example", "example.example"},
339 {"b.example.example", "example.example"},
340 {"a.b.example.example", "example.example"},
341 // TLD with only 1 rule.
342 {"biz", ""},
343 {"domain.biz", "domain.biz"},
344 {"b.domain.biz", "domain.biz"},
345 {"a.b.domain.biz", "domain.biz"},
346 // TLD with some 2-level rules.
347 {"com", ""},
348 {"example.com", "example.com"},
349 {"b.example.com", "example.com"},
350 {"a.b.example.com", "example.com"},
351 {"uk.com", ""},
352 {"example.uk.com", "example.uk.com"},
353 {"b.example.uk.com", "example.uk.com"},
354 {"a.b.example.uk.com", "example.uk.com"},
355 {"test.ac", "test.ac"},
356 // TLD with only 1 (wildcard) rule.
357 {"cy", ""},
358 {"c.cy", ""},
359 {"b.c.cy", "b.c.cy"},
360 {"a.b.c.cy", "b.c.cy"},
361 // More complex TLD.
362 {"jp", ""},
363 {"test.jp", "test.jp"},
364 {"www.test.jp", "test.jp"},
365 {"ac.jp", ""},
366 {"test.ac.jp", "test.ac.jp"},
367 {"www.test.ac.jp", "test.ac.jp"},
368 {"kyoto.jp", ""},
369 {"test.kyoto.jp", "test.kyoto.jp"},
370 {"ide.kyoto.jp", ""},
371 {"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
372 {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
373 {"c.kobe.jp", ""},
374 {"b.c.kobe.jp", "b.c.kobe.jp"},
375 {"a.b.c.kobe.jp", "b.c.kobe.jp"},
376 {"city.kobe.jp", "city.kobe.jp"},
377 {"www.city.kobe.jp", "city.kobe.jp"},
378 // TLD with a wildcard rule and exceptions.
379 {"om", ""},
380 {"test.om", ""},
381 {"b.test.om", "b.test.om"},
382 {"a.b.test.om", "b.test.om"},
383 {"songfest.om", "songfest.om"},
384 {"www.songfest.om", "songfest.om"},
385 // US K12.
386 {"us", ""},
387 {"test.us", "test.us"},
388 {"www.test.us", "test.us"},
389 {"ak.us", ""},
390 {"test.ak.us", "test.ak.us"},
391 {"www.test.ak.us", "test.ak.us"},
392 {"k12.ak.us", ""},
393 {"test.k12.ak.us", "test.k12.ak.us"},
394 {"www.test.k12.ak.us", "test.k12.ak.us"},
395}
396
397func TestEffectiveTLDPlusOne(t *testing.T) {
398 for _, tc := range eTLDPlusOneTestCases {
399 got, _ := EffectiveTLDPlusOne(tc.domain)
400 if got != tc.want {
401 t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
402 }
403 }
404}