1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package org.apache.hc.client5.http.psl;
29
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.net.URL;
33 import java.nio.charset.StandardCharsets;
34 import java.util.List;
35
36 import org.junit.jupiter.api.Assertions;
37 import org.junit.jupiter.api.BeforeEach;
38 import org.junit.jupiter.api.Test;
39
40 class TestPublicSuffixMatcher {
41
42 private static final String SOURCE_FILE = "suffixlistmatcher.txt";
43 private static final String PUBLIC_SUFFIX_LIST_FILE = "org/publicsuffix/list/effective_tld_names.dat";
44
45 private PublicSuffixMatcher matcher;
46 private PublicSuffixMatcher pslMatcher;
47
48
49
50
51
52
53
54 @BeforeEach
55 void setUp() throws Exception {
56 final ClassLoader classLoader = getClass().getClassLoader();
57
58 try (InputStream in = classLoader.getResourceAsStream(SOURCE_FILE)) {
59 Assertions.assertNotNull(in, SOURCE_FILE);
60 final List<PublicSuffixList> lists = PublicSuffixListParser.INSTANCE.parseByType(new InputStreamReader(in, StandardCharsets.UTF_8));
61 matcher = new PublicSuffixMatcher(lists);
62 }
63 final URL publicSuffixListUrl = classLoader.getResource(PUBLIC_SUFFIX_LIST_FILE);
64 Assertions.assertNotNull(publicSuffixListUrl, PUBLIC_SUFFIX_LIST_FILE);
65 pslMatcher = PublicSuffixMatcherLoader.load(publicSuffixListUrl);
66 }
67
68 @Test
69 void testGetDomainRootAnyType() {
70
71 Assertions.assertEquals(null, matcher.getDomainRoot("com"));
72 Assertions.assertEquals("blah.com", matcher.getDomainRoot("blah.com"));
73 Assertions.assertEquals("foo.com", matcher.getDomainRoot("foo.com"));
74 Assertions.assertEquals(null, matcher.getDomainRoot("blah.foo.com"));
75 Assertions.assertEquals(null, matcher.getDomainRoot("booh.foo.com"));
76 Assertions.assertEquals("blah.blah.foo.com", matcher.getDomainRoot("blah.blah.foo.com"));
77
78 Assertions.assertEquals(null, matcher.getDomainRoot("kioto.jp"));
79 Assertions.assertEquals(null, matcher.getDomainRoot("tokyo.jp"));
80 Assertions.assertEquals(null, matcher.getDomainRoot("blah.tokyo.jp"));
81 Assertions.assertEquals(null, matcher.getDomainRoot("booh.tokyo.jp"));
82 Assertions.assertEquals("blah.blah.tokyo.jp", matcher.getDomainRoot("blah.blah.tokyo.jp"));
83 Assertions.assertEquals("metro.tokyo.jp", matcher.getDomainRoot("metro.tokyo.jp"));
84 Assertions.assertEquals("blah.ac.jp", matcher.getDomainRoot("blah.ac.jp"));
85 Assertions.assertEquals("blah.ac.jp", matcher.getDomainRoot("blah.blah.ac.jp"));
86 Assertions.assertEquals("metro.tokyo.jp", matcher.getDomainRoot("metro.tokyo.jp"));
87
88
89 Assertions.assertEquals("example.xx", matcher.getDomainRoot("example.XX"));
90 Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.example.XX"));
91 Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.blah.blah.example.XX"));
92 Assertions.assertEquals(null, matcher.getDomainRoot("appspot.com"));
93 Assertions.assertEquals("example.appspot.com", matcher.getDomainRoot("example.appspot.com"));
94
95 Assertions.assertNull(matcher.getDomainRoot("jp"));
96 Assertions.assertNull(matcher.getDomainRoot("ac.jp"));
97 Assertions.assertNull(matcher.getDomainRoot("any.tokyo.jp"));
98
99 Assertions.assertEquals(null, matcher.getDomainRoot("garbage"));
100 Assertions.assertEquals("garbage.garbage", matcher.getDomainRoot("garbage.garbage"));
101 Assertions.assertEquals("garbage.garbage", matcher.getDomainRoot("*.garbage.garbage"));
102 Assertions.assertEquals("garbage.garbage", matcher.getDomainRoot("*.garbage.garbage.garbage"));
103
104 Assertions.assertEquals(null, matcher.getDomainRoot("*.compute-1.amazonaws.com"));
105 Assertions.assertEquals(null, matcher.getDomainRoot("blah.compute-1.amazonaws.com"));
106 Assertions.assertEquals("blah.blah.compute-1.amazonaws.com", matcher.getDomainRoot("blah.blah.compute-1.amazonaws.com"));
107 }
108
109 @Test
110 void testGetDomainRootOnlyPRIVATE() {
111
112 Assertions.assertEquals("example.xx", matcher.getDomainRoot("example.XX", DomainType.PRIVATE));
113 Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.example.XX", DomainType.PRIVATE));
114 Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.blah.blah.example.XX", DomainType.PRIVATE));
115 Assertions.assertEquals("example.appspot.com", matcher.getDomainRoot("example.appspot.com"));
116
117 Assertions.assertNull(matcher.getDomainRoot("jp", DomainType.PRIVATE));
118 Assertions.assertNull(matcher.getDomainRoot("ac.jp", DomainType.PRIVATE));
119 Assertions.assertNull(matcher.getDomainRoot("any.tokyo.jp", DomainType.PRIVATE));
120
121 Assertions.assertNull(matcher.getDomainRoot("metro.tokyo.jp", DomainType.PRIVATE));
122 Assertions.assertNull(matcher.getDomainRoot("blah.blah.tokyo.jp", DomainType.PRIVATE));
123 Assertions.assertNull(matcher.getDomainRoot("blah.blah.ac.jp", DomainType.PRIVATE));
124
125 Assertions.assertNull(matcher.getDomainRoot("garbage", DomainType.PRIVATE));
126 Assertions.assertNull(matcher.getDomainRoot("garbage.garbage", DomainType.PRIVATE));
127 Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage", DomainType.PRIVATE));
128 Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage.garbage", DomainType.PRIVATE));
129 }
130
131 @Test
132 void testGetDomainRootOnlyICANN() {
133
134 Assertions.assertNull(matcher.getDomainRoot("example.XX", DomainType.ICANN));
135 Assertions.assertNull(matcher.getDomainRoot("www.example.XX", DomainType.ICANN));
136 Assertions.assertNull(matcher.getDomainRoot("www.blah.blah.example.XX", DomainType.ICANN));
137
138 Assertions.assertNull(matcher.getDomainRoot("xx", DomainType.ICANN));
139 Assertions.assertNull(matcher.getDomainRoot("jp", DomainType.ICANN));
140 Assertions.assertNull(matcher.getDomainRoot("ac.jp", DomainType.ICANN));
141 Assertions.assertNull(matcher.getDomainRoot("any.tokyo.jp", DomainType.ICANN));
142
143 Assertions.assertEquals("metro.tokyo.jp", matcher.getDomainRoot("metro.tokyo.jp", DomainType.ICANN));
144 Assertions.assertEquals("blah.blah.tokyo.jp", matcher.getDomainRoot("blah.blah.tokyo.jp", DomainType.ICANN));
145 Assertions.assertEquals("blah.ac.jp", matcher.getDomainRoot("blah.blah.ac.jp", DomainType.ICANN));
146
147 Assertions.assertNull(matcher.getDomainRoot("garbage", DomainType.ICANN));
148 Assertions.assertNull(matcher.getDomainRoot("garbage.garbage", DomainType.ICANN));
149 Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage", DomainType.ICANN));
150 Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage.garbage", DomainType.ICANN));
151 }
152
153 @Test
154 void testMatch() {
155 Assertions.assertTrue(matcher.matches(".jp"));
156 Assertions.assertTrue(matcher.matches(".ac.jp"));
157 Assertions.assertTrue(matcher.matches(".any.tokyo.jp"));
158 Assertions.assertTrue(matcher.matches(".xx"));
159 Assertions.assertTrue(matcher.matches(".appspot.com"));
160
161 Assertions.assertFalse(matcher.matches(".metro.tokyo.jp"));
162 }
163
164 @Test
165 void testMatchUnicode() {
166 Assertions.assertTrue(matcher.matches(".h\u00E5.no"));
167 Assertions.assertTrue(matcher.matches(".xn--h-2fa.no"));
168 Assertions.assertTrue(matcher.matches(".h\u00E5.no"));
169 Assertions.assertTrue(matcher.matches(".xn--h-2fa.no"));
170 }
171
172 private void checkPublicSuffix(final String input, final String expected) {
173 Assertions.assertEquals(expected, pslMatcher.getDomainRoot(input));
174 }
175
176
177 @Test
178 void testGetDomainRootPublicSuffixList() {
179
180 checkPublicSuffix(null, null);
181
182 checkPublicSuffix("COM", null);
183 checkPublicSuffix("example.COM", "example.com");
184 checkPublicSuffix("WwW.example.COM", "example.com");
185
186 checkPublicSuffix(".com", null);
187 checkPublicSuffix(".example", null);
188 checkPublicSuffix(".example.com", null);
189 checkPublicSuffix(".example.example", null);
190
191 checkPublicSuffix("example", null);
192 checkPublicSuffix("example.example", "example.example");
193 checkPublicSuffix("b.example.example", "example.example");
194 checkPublicSuffix("a.b.example.example", "example.example");
195
196
197
198
199
200
201 checkPublicSuffix("biz", null);
202 checkPublicSuffix("domain.biz", "domain.biz");
203 checkPublicSuffix("b.domain.biz", "domain.biz");
204 checkPublicSuffix("a.b.domain.biz", "domain.biz");
205
206 checkPublicSuffix("com", null);
207 checkPublicSuffix("example.com", "example.com");
208 checkPublicSuffix("b.example.com", "example.com");
209 checkPublicSuffix("a.b.example.com", "example.com");
210 checkPublicSuffix("uk.com", null);
211 checkPublicSuffix("example.uk.com", "example.uk.com");
212 checkPublicSuffix("b.example.uk.com", "example.uk.com");
213 checkPublicSuffix("a.b.example.uk.com", "example.uk.com");
214 checkPublicSuffix("test.ac", "test.ac");
215
216 checkPublicSuffix("mm", null);
217 checkPublicSuffix("c.mm", null);
218 checkPublicSuffix("b.c.mm", "b.c.mm");
219 checkPublicSuffix("a.b.c.mm", "b.c.mm");
220
221 checkPublicSuffix("jp", null);
222 checkPublicSuffix("test.jp", "test.jp");
223 checkPublicSuffix("www.test.jp", "test.jp");
224 checkPublicSuffix("ac.jp", null);
225 checkPublicSuffix("test.ac.jp", "test.ac.jp");
226 checkPublicSuffix("www.test.ac.jp", "test.ac.jp");
227 checkPublicSuffix("kyoto.jp", null);
228 checkPublicSuffix("test.kyoto.jp", "test.kyoto.jp");
229 checkPublicSuffix("ide.kyoto.jp", null);
230 checkPublicSuffix("b.ide.kyoto.jp", "b.ide.kyoto.jp");
231 checkPublicSuffix("a.b.ide.kyoto.jp", "b.ide.kyoto.jp");
232 checkPublicSuffix("c.kobe.jp", null);
233 checkPublicSuffix("b.c.kobe.jp", "b.c.kobe.jp");
234 checkPublicSuffix("a.b.c.kobe.jp", "b.c.kobe.jp");
235 checkPublicSuffix("city.kobe.jp", "city.kobe.jp");
236 checkPublicSuffix("www.city.kobe.jp", "city.kobe.jp");
237
238 checkPublicSuffix("ck", null);
239 checkPublicSuffix("test.ck", null);
240 checkPublicSuffix("b.test.ck", "b.test.ck");
241 checkPublicSuffix("a.b.test.ck", "b.test.ck");
242 checkPublicSuffix("www.ck", "www.ck");
243 checkPublicSuffix("www.www.ck", "www.ck");
244
245 checkPublicSuffix("us", null);
246 checkPublicSuffix("test.us", "test.us");
247 checkPublicSuffix("www.test.us", "test.us");
248 checkPublicSuffix("ak.us", null);
249 checkPublicSuffix("test.ak.us", "test.ak.us");
250 checkPublicSuffix("www.test.ak.us", "test.ak.us");
251 checkPublicSuffix("k12.ak.us", null);
252 checkPublicSuffix("test.k12.ak.us", "test.k12.ak.us");
253 checkPublicSuffix("www.test.k12.ak.us", "test.k12.ak.us");
254
255 checkPublicSuffix("食狮.com.cn", "食狮.com.cn");
256 checkPublicSuffix("食狮.公司.cn", "食狮.公司.cn");
257 checkPublicSuffix("www.食狮.公司.cn", "食狮.公司.cn");
258 checkPublicSuffix("shishi.公司.cn", "shishi.公司.cn");
259 checkPublicSuffix("公司.cn", null);
260 checkPublicSuffix("食狮.中国", "食狮.中国");
261 checkPublicSuffix("www.食狮.中国", "食狮.中国");
262 checkPublicSuffix("shishi.中国", "shishi.中国");
263 checkPublicSuffix("中国", null);
264
265 checkPublicSuffix("xn--85x722f.com.cn", "xn--85x722f.com.cn");
266 checkPublicSuffix("xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
267 checkPublicSuffix("www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
268 checkPublicSuffix("shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn");
269 checkPublicSuffix("xn--55qx5d.cn", null);
270 checkPublicSuffix("xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
271 checkPublicSuffix("www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
272 checkPublicSuffix("shishi.xn--fiqs8s", "shishi.xn--fiqs8s");
273 checkPublicSuffix("xn--fiqs8s", null);
274 }
275
276 }