View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.client5.http.psl;
29  
30  import java.io.InputStream;
31  import java.io.InputStreamReader;
32  import java.nio.charset.StandardCharsets;
33  import java.util.List;
34  
35  import org.junit.jupiter.api.Assertions;
36  import org.junit.jupiter.api.BeforeEach;
37  import org.junit.jupiter.api.Test;
38  
39  class TestPublicSuffixMatcher {
40  
41      private static final String SOURCE_FILE = "suffixlistmatcher.txt";
42  
43      private PublicSuffixMatcher matcher;
44      private PublicSuffixMatcher pslMatcher;
45  
46      @BeforeEach
47      void setUp() throws Exception {
48          final ClassLoader classLoader = getClass().getClassLoader();
49          // Create a matcher using a custom crafted public suffix list file
50          try (InputStream in = classLoader.getResourceAsStream(SOURCE_FILE)) {
51              Assertions.assertNotNull(in, SOURCE_FILE);
52              final List<PublicSuffixList> lists = PublicSuffixListParser.INSTANCE.parseByType(new InputStreamReader(in, StandardCharsets.UTF_8));
53              matcher = new PublicSuffixMatcher(lists);
54          }
55          // Create a matcher using the public suffix list file provided by publicsuffix.org (Mozilla).
56          pslMatcher = PublicSuffixMatcherLoader.getDefault();
57      }
58  
59      @Test
60      void testGetDomainRootAnyType() {
61          // ICANN
62          Assertions.assertEquals(null, matcher.getDomainRoot("com"));
63          Assertions.assertEquals("blah.com", matcher.getDomainRoot("blah.com"));
64          Assertions.assertEquals("foo.com", matcher.getDomainRoot("foo.com"));
65          Assertions.assertEquals(null, matcher.getDomainRoot("blah.foo.com"));
66          Assertions.assertEquals(null, matcher.getDomainRoot("booh.foo.com"));
67          Assertions.assertEquals("blah.blah.foo.com", matcher.getDomainRoot("blah.blah.foo.com"));
68  
69          Assertions.assertEquals(null, matcher.getDomainRoot("kioto.jp"));
70          Assertions.assertEquals(null, matcher.getDomainRoot("tokyo.jp"));
71          Assertions.assertEquals(null, matcher.getDomainRoot("blah.tokyo.jp"));
72          Assertions.assertEquals(null, matcher.getDomainRoot("booh.tokyo.jp"));
73          Assertions.assertEquals("blah.blah.tokyo.jp", matcher.getDomainRoot("blah.blah.tokyo.jp"));
74          Assertions.assertEquals("metro.tokyo.jp", matcher.getDomainRoot("metro.tokyo.jp"));
75          Assertions.assertEquals("blah.ac.jp", matcher.getDomainRoot("blah.ac.jp"));
76          Assertions.assertEquals("blah.ac.jp", matcher.getDomainRoot("blah.blah.ac.jp"));
77          Assertions.assertEquals("metro.tokyo.jp", matcher.getDomainRoot("metro.tokyo.jp"));
78  
79          // Private
80          Assertions.assertEquals("example.xx", matcher.getDomainRoot("example.XX"));
81          Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.example.XX"));
82          Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.blah.blah.example.XX"));
83          Assertions.assertEquals(null, matcher.getDomainRoot("appspot.com"));
84          Assertions.assertEquals("example.appspot.com", matcher.getDomainRoot("example.appspot.com"));
85          Assertions.assertEquals(null, matcher.getDomainRoot("s3.amazonaws.com"));
86          Assertions.assertEquals(null, matcher.getDomainRoot("blah.s3.amazonaws.com"));
87          // Too short
88          Assertions.assertNull(matcher.getDomainRoot("jp"));
89          Assertions.assertNull(matcher.getDomainRoot("ac.jp"));
90          Assertions.assertNull(matcher.getDomainRoot("any.tokyo.jp"));
91          // Unknown
92          Assertions.assertEquals(null, matcher.getDomainRoot("garbage"));
93          Assertions.assertEquals("garbage.garbage", matcher.getDomainRoot("garbage.garbage"));
94          Assertions.assertEquals("garbage.garbage", matcher.getDomainRoot("*.garbage.garbage"));
95          Assertions.assertEquals("garbage.garbage", matcher.getDomainRoot("*.garbage.garbage.garbage"));
96  
97          Assertions.assertEquals(null, matcher.getDomainRoot("*.compute-1.amazonaws.com"));
98          Assertions.assertEquals(null, matcher.getDomainRoot("blah.compute-1.amazonaws.com"));
99          Assertions.assertEquals("blah.blah.compute-1.amazonaws.com", matcher.getDomainRoot("blah.blah.compute-1.amazonaws.com"));
100     }
101 
102     @Test
103     void testGetDomainRootOnlyPRIVATE() {
104         // Private
105         Assertions.assertEquals("example.xx", matcher.getDomainRoot("example.XX", DomainType.PRIVATE));
106         Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.example.XX", DomainType.PRIVATE));
107         Assertions.assertEquals("example.xx", matcher.getDomainRoot("www.blah.blah.example.XX", DomainType.PRIVATE));
108         Assertions.assertEquals("example.appspot.com", matcher.getDomainRoot("example.appspot.com"));
109         // Too short
110         Assertions.assertNull(matcher.getDomainRoot("jp", DomainType.PRIVATE));
111         Assertions.assertNull(matcher.getDomainRoot("ac.jp", DomainType.PRIVATE));
112         Assertions.assertNull(matcher.getDomainRoot("any.tokyo.jp", DomainType.PRIVATE));
113         // ICANN
114         Assertions.assertNull(matcher.getDomainRoot("metro.tokyo.jp", DomainType.PRIVATE));
115         Assertions.assertNull(matcher.getDomainRoot("blah.blah.tokyo.jp", DomainType.PRIVATE));
116         Assertions.assertNull(matcher.getDomainRoot("blah.blah.ac.jp", DomainType.PRIVATE));
117         // Unknown
118         Assertions.assertNull(matcher.getDomainRoot("garbage", DomainType.PRIVATE));
119         Assertions.assertNull(matcher.getDomainRoot("garbage.garbage", DomainType.PRIVATE));
120         Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage", DomainType.PRIVATE));
121         Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage.garbage", DomainType.PRIVATE));
122         Assertions.assertNull(matcher.getDomainRoot("s3.amazonaws.com"));
123         Assertions.assertNull(matcher.getDomainRoot("blah.s3.amazonaws.com"));
124     }
125 
126     @Test
127     void testGetDomainRootOnlyICANN() {
128         // Private
129         Assertions.assertNull(matcher.getDomainRoot("example.XX", DomainType.ICANN));
130         Assertions.assertNull(matcher.getDomainRoot("www.example.XX", DomainType.ICANN));
131         Assertions.assertNull(matcher.getDomainRoot("www.blah.blah.example.XX", DomainType.ICANN));
132         // Too short
133         Assertions.assertNull(matcher.getDomainRoot("xx", DomainType.ICANN));
134         Assertions.assertNull(matcher.getDomainRoot("jp", DomainType.ICANN));
135         Assertions.assertNull(matcher.getDomainRoot("ac.jp", DomainType.ICANN));
136         Assertions.assertNull(matcher.getDomainRoot("any.tokyo.jp", DomainType.ICANN));
137         // ICANN
138         Assertions.assertEquals("metro.tokyo.jp", matcher.getDomainRoot("metro.tokyo.jp", DomainType.ICANN));
139         Assertions.assertEquals("blah.blah.tokyo.jp", matcher.getDomainRoot("blah.blah.tokyo.jp", DomainType.ICANN));
140         Assertions.assertEquals("blah.ac.jp", matcher.getDomainRoot("blah.blah.ac.jp", DomainType.ICANN));
141         // Unknown
142         Assertions.assertNull(matcher.getDomainRoot("garbage", DomainType.ICANN));
143         Assertions.assertNull(matcher.getDomainRoot("garbage.garbage", DomainType.ICANN));
144         Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage", DomainType.ICANN));
145         Assertions.assertNull(matcher.getDomainRoot("*.garbage.garbage.garbage", DomainType.ICANN));
146     }
147 
148     @Test
149     void testMaySetCookies() {
150         Assertions.assertTrue(matcher.verify("foo.com"));
151 
152         Assertions.assertFalse(matcher.verify("bar.foo.com"));
153         Assertions.assertTrue(matcher.verify("example.bar.foo.com"));
154 
155         Assertions.assertTrue(matcher.verify("foo.bar.jp"));
156         Assertions.assertFalse(matcher.verify("bar.jp"));
157 
158         Assertions.assertTrue(matcher.verify("foo.bar.hokkaido.jp"));
159         Assertions.assertFalse(matcher.verify("bar.hokkaido.jp"));
160 
161         Assertions.assertTrue(matcher.verify("foo.bar.tokyo.jp"));
162         Assertions.assertFalse(matcher.verify("bar.tokyo.jp"));
163 
164         Assertions.assertTrue(matcher.verify("pref.hokkaido.jp")); // exception from a wildcard rule
165         Assertions.assertTrue(matcher.verify("metro.tokyo.jp")); // exception from a wildcard rule
166     }
167 
168     @Test
169     void testVerifyPrivate() {
170         Assertions.assertTrue(matcher.verify("s3.amazonaws.com"));
171         Assertions.assertTrue(matcher.verify("blah.s3.amazonaws.com"));
172         Assertions.assertTrue(matcher.verify("blah.xxx.uk"));
173     }
174 
175     @Test
176     void testMatch() {
177         Assertions.assertTrue(matcher.matches(".jp"));
178         Assertions.assertTrue(matcher.matches(".ac.jp"));
179         Assertions.assertTrue(matcher.matches(".any.tokyo.jp"));
180         Assertions.assertTrue(matcher.matches(".xx"));
181         Assertions.assertTrue(matcher.matches(".appspot.com"));
182         // exception
183         Assertions.assertFalse(matcher.matches(".metro.tokyo.jp"));
184     }
185 
186     @Test
187     void testMatchUnicode() {
188         Assertions.assertTrue(matcher.matches(".h\u00E5.no")); // \u00E5 is <aring>
189         Assertions.assertTrue(matcher.matches(".xn--h-2fa.no"));
190         Assertions.assertTrue(matcher.matches(".h\u00E5.no"));
191         Assertions.assertTrue(matcher.matches(".xn--h-2fa.no"));
192     }
193 
194     private void checkPublicSuffix(final String input, final String expected) {
195         Assertions.assertEquals(expected, pslMatcher.getDomainRoot(input));
196     }
197 
198     //see https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt
199     @Test
200     void testGetDomainRootPublicSuffixList() {
201          // null input.
202         checkPublicSuffix(null, null);
203         // Mixed case.
204         checkPublicSuffix("COM", null);
205         checkPublicSuffix("example.COM", "example.com");
206         checkPublicSuffix("WwW.example.COM", "example.com");
207         // Leading dot.
208         checkPublicSuffix(".com", null);
209         checkPublicSuffix(".example", null);
210         checkPublicSuffix(".example.com", null);
211         checkPublicSuffix(".example.example", null);
212         // Unlisted TLD.
213         checkPublicSuffix("example", null);
214         checkPublicSuffix("example.example", "example.example");
215         checkPublicSuffix("b.example.example", "example.example");
216         checkPublicSuffix("a.b.example.example", "example.example");
217         // Listed, but non-Internet, TLD.
218         //checkPublicSuffix("local", null);
219         //checkPublicSuffix("example.local", null);
220         //checkPublicSuffix("b.example.local", null);
221         //checkPublicSuffix("a.b.example.local", null);
222         // TLD with only 1 rule.
223         checkPublicSuffix("biz", null);
224         checkPublicSuffix("domain.biz", "domain.biz");
225         checkPublicSuffix("b.domain.biz", "domain.biz");
226         checkPublicSuffix("a.b.domain.biz", "domain.biz");
227         // TLD with some 2-level rules.
228         checkPublicSuffix("com", null);
229         checkPublicSuffix("example.com", "example.com");
230         checkPublicSuffix("b.example.com", "example.com");
231         checkPublicSuffix("a.b.example.com", "example.com");
232         checkPublicSuffix("uk.com", null);
233         checkPublicSuffix("example.uk.com", "example.uk.com");
234         checkPublicSuffix("b.example.uk.com", "example.uk.com");
235         checkPublicSuffix("a.b.example.uk.com", "example.uk.com");
236         checkPublicSuffix("test.ac", "test.ac");
237         // TLD with only 1 (wildcard) rule.
238         checkPublicSuffix("mm", null);
239         checkPublicSuffix("c.mm", null);
240         checkPublicSuffix("b.c.mm", "b.c.mm");
241         checkPublicSuffix("a.b.c.mm", "b.c.mm");
242         // More complex TLD.
243         checkPublicSuffix("jp", null);
244         checkPublicSuffix("test.jp", "test.jp");
245         checkPublicSuffix("www.test.jp", "test.jp");
246         checkPublicSuffix("ac.jp", null);
247         checkPublicSuffix("test.ac.jp", "test.ac.jp");
248         checkPublicSuffix("www.test.ac.jp", "test.ac.jp");
249         checkPublicSuffix("kyoto.jp", null);
250         checkPublicSuffix("test.kyoto.jp", "test.kyoto.jp");
251         checkPublicSuffix("ide.kyoto.jp", null);
252         checkPublicSuffix("b.ide.kyoto.jp", "b.ide.kyoto.jp");
253         checkPublicSuffix("a.b.ide.kyoto.jp", "b.ide.kyoto.jp");
254         checkPublicSuffix("c.kobe.jp", null);
255         checkPublicSuffix("b.c.kobe.jp", "b.c.kobe.jp");
256         checkPublicSuffix("a.b.c.kobe.jp", "b.c.kobe.jp");
257         checkPublicSuffix("city.kobe.jp", "city.kobe.jp");
258         checkPublicSuffix("www.city.kobe.jp", "city.kobe.jp");
259         // TLD with a wildcard rule and exceptions.
260         checkPublicSuffix("ck", null);
261         checkPublicSuffix("test.ck", null);
262         checkPublicSuffix("b.test.ck", "b.test.ck");
263         checkPublicSuffix("a.b.test.ck", "b.test.ck");
264         checkPublicSuffix("www.ck", "www.ck");
265         checkPublicSuffix("www.www.ck", "www.ck");
266         // US K12.
267         checkPublicSuffix("us", null);
268         checkPublicSuffix("test.us", "test.us");
269         checkPublicSuffix("www.test.us", "test.us");
270         checkPublicSuffix("ak.us", null);
271         checkPublicSuffix("test.ak.us", "test.ak.us");
272         checkPublicSuffix("www.test.ak.us", "test.ak.us");
273         checkPublicSuffix("k12.ak.us", null);
274         checkPublicSuffix("test.k12.ak.us", "test.k12.ak.us");
275         checkPublicSuffix("www.test.k12.ak.us", "test.k12.ak.us");
276         // IDN labels.
277         checkPublicSuffix("食狮.com.cn", "食狮.com.cn");
278         checkPublicSuffix("食狮.公司.cn", "食狮.公司.cn");
279         checkPublicSuffix("www.食狮.公司.cn", "食狮.公司.cn");
280         checkPublicSuffix("shishi.公司.cn", "shishi.公司.cn");
281         checkPublicSuffix("公司.cn", null);
282         checkPublicSuffix("食狮.中国", "食狮.中国");
283         checkPublicSuffix("www.食狮.中国", "食狮.中国");
284         checkPublicSuffix("shishi.中国", "shishi.中国");
285         checkPublicSuffix("中国", null);
286         // Same as above, but punycoded.
287         checkPublicSuffix("xn--85x722f.com.cn", "xn--85x722f.com.cn");
288         checkPublicSuffix("xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
289         checkPublicSuffix("www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn");
290         checkPublicSuffix("shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn");
291         checkPublicSuffix("xn--55qx5d.cn", null);
292         checkPublicSuffix("xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
293         checkPublicSuffix("www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s");
294         checkPublicSuffix("shishi.xn--fiqs8s", "shishi.xn--fiqs8s");
295         checkPublicSuffix("xn--fiqs8s", null);
296     }
297 
298 }