View Javadoc

1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.impl.cookie;
28  
29  import java.io.BufferedReader;
30  import java.io.IOException;
31  import java.io.Reader;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  
35  import org.apache.http.annotation.Immutable;
36  
37  /**
38   * Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
39   * and configures a PublicSuffixFilter.
40   *
41   * @since 4.0
42   */
43  @Immutable
44  public class PublicSuffixListParser {
45      private static final int MAX_LINE_LEN = 256;
46      private final PublicSuffixFilter filter;
47  
48      PublicSuffixListParser(final PublicSuffixFilter filter) {
49          this.filter = filter;
50      }
51  
52      /**
53       * Parses the public suffix list format.
54       * When creating the reader from the file, make sure to
55       * use the correct encoding (the original list is in UTF-8).
56       *
57       * @param list the suffix list. The caller is responsible for closing the reader.
58       * @throws IOException on error while reading from list
59       */
60      public void parse(final Reader list) throws IOException {
61          final Collection<String> rules = new ArrayList<String>();
62          final Collection<String> exceptions = new ArrayList<String>();
63          final BufferedReader r = new BufferedReader(list);
64          final StringBuilder sb = new StringBuilder(256);
65          boolean more = true;
66          while (more) {
67              more = readLine(r, sb);
68              String line = sb.toString();
69              if (line.length() == 0) {
70                  continue;
71              }
72              if (line.startsWith("//"))
73               {
74                  continue; //entire lines can also be commented using //
75              }
76              if (line.startsWith("."))
77               {
78                  line = line.substring(1); // A leading dot is optional
79              }
80              // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
81              final boolean isException = line.startsWith("!");
82              if (isException) {
83                  line = line.substring(1);
84              }
85  
86              if (isException) {
87                  exceptions.add(line);
88              } else {
89                  rules.add(line);
90              }
91          }
92  
93          filter.setPublicSuffixes(rules);
94          filter.setExceptions(exceptions);
95      }
96  
97      /**
98       *
99       * @param r
100      * @param sb
101      * @return false when the end of the stream is reached
102      * @throws IOException
103      */
104     private boolean readLine(final Reader r, final StringBuilder sb) throws IOException {
105         sb.setLength(0);
106         int b;
107         boolean hitWhitespace = false;
108         while ((b = r.read()) != -1) {
109             final char c = (char) b;
110             if (c == '\n') {
111                 break;
112             }
113             // Each line is only read up to the first whitespace
114             if (Character.isWhitespace(c)) {
115                 hitWhitespace = true;
116             }
117             if (!hitWhitespace) {
118                 sb.append(c);
119             }
120             if (sb.length() > MAX_LINE_LEN)
121              {
122                 throw new IOException("Line too long"); // prevent excess memory usage
123             }
124         }
125         return (b != -1);
126     }
127 }