1 /*
2 * $HeadURL$
3 * $Revision: 1470921 $
4 * $Date: 2013-04-23 12:42:29 +0000 (Tue, 23 Apr 2013) $
5 *
6 * ====================================================================
7 * Licensed to the Apache Software Foundation (ASF) under one
8 * or more contributor license agreements. See the NOTICE file
9 * distributed with this work for additional information
10 * regarding copyright ownership. The ASF licenses this file
11 * to you under the Apache License, Version 2.0 (the
12 * "License"); you may not use this file except in compliance
13 * with the License. You may obtain a copy of the License at
14 *
15 * http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing,
18 * software distributed under the License is distributed on an
19 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 * KIND, either express or implied. See the License for the
21 * specific language governing permissions and limitations
22 * under the License.
23 * ====================================================================
24 *
25 * This software consists of voluntary contributions made by many
26 * individuals on behalf of the Apache Software Foundation. For more
27 * information on the Apache Software Foundation, please see
28 * <http://www.apache.org/>.
29 *
30 */
31
32 package org.apache.http.impl.cookie;
33
34 import java.io.BufferedReader;
35 import java.io.IOException;
36 import java.io.Reader;
37 import java.util.ArrayList;
38 import java.util.Collection;
39
40 import org.apache.http.annotation.Immutable;
41
42 /**
43 * Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
44 * and configures a PublicSuffixFilter.
45 *
46 * @since 4.0
47 */
48 @Immutable
49 public class PublicSuffixListParser {
50 private static final int MAX_LINE_LEN = 256;
51 private final PublicSuffixFilter filter;
52
53 PublicSuffixListParser(PublicSuffixFilter filter) {
54 this.filter = filter;
55 }
56
57 /**
58 * Parses the public suffix list format.
59 * When creating the reader from the file, make sure to
60 * use the correct encoding (the original list is in UTF-8).
61 *
62 * @param list the suffix list. The caller is responsible for closing the reader.
63 * @throws IOException on error while reading from list
64 */
65 public void parse(Reader list) throws IOException {
66 Collection<String> rules = new ArrayList<String>();
67 Collection<String> exceptions = new ArrayList<String>();
68 BufferedReader r = new BufferedReader(list);
69 StringBuilder sb = new StringBuilder(256);
70 boolean more = true;
71 while (more) {
72 more = readLine(r, sb);
73 String line = sb.toString();
74 if (line.length() == 0) continue;
75 if (line.startsWith("//")) continue; //entire lines can also be commented using //
76 if (line.startsWith(".")) line = line.substring(1); // A leading dot is optional
77 // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
78 boolean isException = line.startsWith("!");
79 if (isException) line = line.substring(1);
80
81 if (isException) {
82 exceptions.add(line);
83 } else {
84 rules.add(line);
85 }
86 }
87
88 filter.setPublicSuffixes(rules);
89 filter.setExceptions(exceptions);
90 }
91
92 /**
93 *
94 * @param r
95 * @param sb
96 * @return false when the end of the stream is reached
97 * @throws IOException
98 */
99 private boolean readLine(Reader r, StringBuilder sb) throws IOException {
100 sb.setLength(0);
101 int b;
102 boolean hitWhitespace = false;
103 while ((b = r.read()) != -1) {
104 char c = (char) b;
105 if (c == '\n') break;
106 // Each line is only read up to the first whitespace
107 if (Character.isWhitespace(c)) hitWhitespace = true;
108 if (!hitWhitespace) sb.append(c);
109 if (sb.length() > MAX_LINE_LEN) throw new IOException("Line too long"); // prevent excess memory usage
110 }
111 return (b != -1);
112 }
113 }