View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.conn.util;
28  
29  import java.io.BufferedReader;
30  import java.io.IOException;
31  import java.io.Reader;
32  import java.util.ArrayList;
33  import java.util.List;
34  
35  import org.apache.http.annotation.Contract;
36  import org.apache.http.annotation.ThreadingBehavior;
37  
38  /**
39   * Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
40   * and configures a PublicSuffixFilter.
41   *
42   * @since 4.4
43   */
44  @Contract(threading = ThreadingBehavior.IMMUTABLE)
45  public final class PublicSuffixListParser {
46  
47      public PublicSuffixListParser() {
48      }
49  
50      /**
51       * Parses the public suffix list format.
52       * <p>
53       * When creating the reader from the file, make sure to use the correct encoding
54       * (the original list is in UTF-8).
55       *
56       * @param reader the data reader. The caller is responsible for closing the reader.
57       * @throws java.io.IOException on error while reading from list
58       */
59      public PublicSuffixList parse(final Reader reader) throws IOException {
60          final List<String> rules = new ArrayList<String>();
61          final List<String> exceptions = new ArrayList<String>();
62          final BufferedReader r = new BufferedReader(reader);
63  
64          String line;
65          while ((line = r.readLine()) != null) {
66              if (line.isEmpty()) {
67                  continue;
68              }
69              if (line.startsWith("//")) {
70                  continue; //entire lines can also be commented using //
71              }
72              if (line.startsWith(".")) {
73                  line = line.substring(1); // A leading dot is optional
74              }
75              // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
76              final boolean isException = line.startsWith("!");
77              if (isException) {
78                  line = line.substring(1);
79              }
80  
81              if (isException) {
82                  exceptions.add(line);
83              } else {
84                  rules.add(line);
85              }
86          }
87          return new PublicSuffixList(DomainType.UNKNOWN, rules, exceptions);
88      }
89  
90      /**
91       * Parses the public suffix list format by domain type (currently supported ICANN and PRIVATE).
92       * <p>
93       * When creating the reader from the file, make sure to use the correct encoding
94       * (the original list is in UTF-8).
95       *
96       * @param reader the data reader. The caller is responsible for closing the reader.
97       * @throws java.io.IOException on error while reading from list
98       *
99       * @since 4.5
100      */
101     public List<PublicSuffixList> parseByType(final Reader reader) throws IOException {
102         final List<PublicSuffixList> result = new ArrayList<PublicSuffixList>(2);
103 
104         final BufferedReader r = new BufferedReader(reader);
105         final StringBuilder sb = new StringBuilder(256);
106 
107         DomainType domainType = null;
108         List<String> rules = null;
109         List<String> exceptions = null;
110         String line;
111         while ((line = r.readLine()) != null) {
112             if (line.isEmpty()) {
113                 continue;
114             }
115             if (line.startsWith("//")) {
116 
117                 if (domainType == null) {
118                     if (line.contains("===BEGIN ICANN DOMAINS===")) {
119                         domainType = DomainType.ICANN;
120                     } else if (line.contains("===BEGIN PRIVATE DOMAINS===")) {
121                         domainType = DomainType.PRIVATE;
122                     }
123                 } else {
124                     if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) {
125                         if (rules != null) {
126                             result.add(new PublicSuffixList(domainType, rules, exceptions));
127                         }
128                         domainType = null;
129                         rules = null;
130                         exceptions = null;
131                     }
132                 }
133 
134                 continue; //entire lines can also be commented using //
135             }
136             if (domainType == null) {
137                 continue;
138             }
139 
140             if (line.startsWith(".")) {
141                 line = line.substring(1); // A leading dot is optional
142             }
143             // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
144             final boolean isException = line.startsWith("!");
145             if (isException) {
146                 line = line.substring(1);
147             }
148 
149             if (isException) {
150                 if (exceptions == null) {
151                     exceptions = new ArrayList<String>();
152                 }
153                 exceptions.add(line);
154             } else {
155                 if (rules == null) {
156                     rules = new ArrayList<String>();
157                 }
158                 rules.add(line);
159             }
160         }
161         return result;
162     }
163 
164 }