View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.core5.http.message;
29  
30  import java.util.BitSet;
31  
32  import org.apache.hc.core5.annotation.Contract;
33  import org.apache.hc.core5.annotation.ThreadingBehavior;
34  import org.apache.hc.core5.util.Args;
35  
36  /**
37   * Low level parser for header field elements. The parsing routines of this class are designed
38   * to produce near zero intermediate garbage and make no intermediate copies of input data.
39   * <p>
40   * This class is immutable and thread safe.
41   *
42   * @since 4.4
43   */
44  @Contract(threading = ThreadingBehavior.IMMUTABLE)
45  public class TokenParser {
46  
47      public static BitSet INIT_BITSET(final int ... b) {
48          final BitSet bitset = new BitSet();
49          for (final int aB : b) {
50              bitset.set(aB);
51          }
52          return bitset;
53      }
54  
55      /** US-ASCII CR, carriage return (13) */
56      public static final char CR = '\r';
57  
58      /** US-ASCII LF, line feed (10) */
59      public static final char LF = '\n';
60  
61      /** US-ASCII SP, space (32) */
62      public static final char SP = ' ';
63  
64      /** US-ASCII HT, horizontal-tab (9) */
65      public static final char HT = '\t';
66  
67      /** Double quote */
68      public static final char DQUOTE = '\"';
69  
70      /** Backward slash / escape character */
71      public static final char ESCAPE = '\\';
72  
73      public static boolean isWhitespace(final char ch) {
74          return ch == SP || ch == HT || ch == CR || ch == LF;
75      }
76  
77      public static final TokenParser INSTANCE = new TokenParser();
78  
79      /**
80       * Extracts from the sequence of chars a token terminated with any of the given delimiters
81       * discarding semantically insignificant whitespace characters.
82       *
83       * @param buf buffer with the sequence of chars to be parsed
84       * @param cursor defines the bounds and current position of the buffer
85       * @param delimiters set of delimiting characters. Can be {@code null} if the token
86       *  is not delimited by any character.
87       */
88      public String parseToken(final CharSequence buf, final ParserCursor cursor, final BitSet delimiters) {
89          Args.notNull(buf, "Char sequence");
90          Args.notNull(cursor, "Parser cursor");
91          final StringBuilder dst = new StringBuilder();
92          boolean whitespace = false;
93          while (!cursor.atEnd()) {
94              final char current = buf.charAt(cursor.getPos());
95              if (delimiters != null && delimiters.get(current)) {
96                  break;
97              } else if (isWhitespace(current)) {
98                  skipWhiteSpace(buf, cursor);
99                  whitespace = true;
100             } else {
101                 if (whitespace && dst.length() > 0) {
102                     dst.append(' ');
103                 }
104                 copyContent(buf, cursor, delimiters, dst);
105                 whitespace = false;
106             }
107         }
108         return dst.toString();
109     }
110 
111     /**
112      * Extracts from the sequence of chars a value which can be enclosed in quote marks and
113      * terminated with any of the given delimiters discarding semantically insignificant
114      * whitespace characters.
115      *
116      * @param buf buffer with the sequence of chars to be parsed
117      * @param cursor defines the bounds and current position of the buffer
118      * @param delimiters set of delimiting characters. Can be {@code null} if the value
119      *  is not delimited by any character.
120      */
121     public String parseValue(final CharSequence buf, final ParserCursor cursor, final BitSet delimiters) {
122         Args.notNull(buf, "Char sequence");
123         Args.notNull(cursor, "Parser cursor");
124         final StringBuilder dst = new StringBuilder();
125         boolean whitespace = false;
126         while (!cursor.atEnd()) {
127             final char current = buf.charAt(cursor.getPos());
128             if (delimiters != null && delimiters.get(current)) {
129                 break;
130             } else if (isWhitespace(current)) {
131                 skipWhiteSpace(buf, cursor);
132                 whitespace = true;
133             } else if (current == DQUOTE) {
134                 if (whitespace && dst.length() > 0) {
135                     dst.append(' ');
136                 }
137                 copyQuotedContent(buf, cursor, dst);
138                 whitespace = false;
139             } else {
140                 if (whitespace && dst.length() > 0) {
141                     dst.append(' ');
142                 }
143                 copyUnquotedContent(buf, cursor, delimiters, dst);
144                 whitespace = false;
145             }
146         }
147         return dst.toString();
148     }
149 
150     /**
151      * Skips semantically insignificant whitespace characters and moves the cursor to the closest
152      * non-whitespace character.
153      *
154      * @param buf buffer with the sequence of chars to be parsed
155      * @param cursor defines the bounds and current position of the buffer
156      */
157     public void skipWhiteSpace(final CharSequence buf, final ParserCursor cursor) {
158         Args.notNull(buf, "Char sequence");
159         Args.notNull(cursor, "Parser cursor");
160         int pos = cursor.getPos();
161         final int indexFrom = cursor.getPos();
162         final int indexTo = cursor.getUpperBound();
163         for (int i = indexFrom; i < indexTo; i++) {
164             final char current = buf.charAt(i);
165             if (!isWhitespace(current)) {
166                 break;
167             }
168             pos++;
169         }
170         cursor.updatePos(pos);
171     }
172 
173     /**
174      * Transfers content into the destination buffer until a whitespace character or any of
175      * the given delimiters is encountered.
176      *
177      * @param buf buffer with the sequence of chars to be parsed
178      * @param cursor defines the bounds and current position of the buffer
179      * @param delimiters set of delimiting characters. Can be {@code null} if the value
180      *  is delimited by a whitespace only.
181      * @param dst destination buffer
182      */
183     public void copyContent(final CharSequence buf, final ParserCursor cursor, final BitSet delimiters,
184             final StringBuilder dst) {
185         Args.notNull(buf, "Char sequence");
186         Args.notNull(cursor, "Parser cursor");
187         Args.notNull(dst, "String builder");
188         int pos = cursor.getPos();
189         final int indexFrom = cursor.getPos();
190         final int indexTo = cursor.getUpperBound();
191         for (int i = indexFrom; i < indexTo; i++) {
192             final char current = buf.charAt(i);
193             if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
194                 break;
195             }
196             pos++;
197             dst.append(current);
198         }
199         cursor.updatePos(pos);
200     }
201 
202     /**
203      * Transfers content into the destination buffer until a whitespace character,  a quote,
204      * or any of the given delimiters is encountered.
205      *
206      * @param buf buffer with the sequence of chars to be parsed
207      * @param cursor defines the bounds and current position of the buffer
208      * @param delimiters set of delimiting characters. Can be {@code null} if the value
209      *  is delimited by a whitespace or a quote only.
210      * @param dst destination buffer
211      */
212     public void copyUnquotedContent(final CharSequence buf, final ParserCursor cursor,
213             final BitSet delimiters, final StringBuilder dst) {
214         Args.notNull(buf, "Char sequence");
215         Args.notNull(cursor, "Parser cursor");
216         Args.notNull(dst, "String builder");
217         int pos = cursor.getPos();
218         final int indexFrom = cursor.getPos();
219         final int indexTo = cursor.getUpperBound();
220         for (int i = indexFrom; i < indexTo; i++) {
221             final char current = buf.charAt(i);
222             if ((delimiters != null && delimiters.get(current))
223                     || isWhitespace(current) || current == DQUOTE) {
224                 break;
225             }
226             pos++;
227             dst.append(current);
228         }
229         cursor.updatePos(pos);
230     }
231 
232     /**
233      * Transfers content enclosed with quote marks into the destination buffer.
234      *
235      * @param buf buffer with the sequence of chars to be parsed
236      * @param cursor defines the bounds and current position of the buffer
237      * @param dst destination buffer
238      */
239     public void copyQuotedContent(final CharSequence buf, final ParserCursor cursor,
240             final StringBuilder dst) {
241         Args.notNull(buf, "Char sequence");
242         Args.notNull(cursor, "Parser cursor");
243         Args.notNull(dst, "String builder");
244         if (cursor.atEnd()) {
245             return;
246         }
247         int pos = cursor.getPos();
248         int indexFrom = cursor.getPos();
249         final int indexTo = cursor.getUpperBound();
250         char current = buf.charAt(pos);
251         if (current != DQUOTE) {
252             return;
253         }
254         pos++;
255         indexFrom++;
256         boolean escaped = false;
257         for (int i = indexFrom; i < indexTo; i++, pos++) {
258             current = buf.charAt(i);
259             if (escaped) {
260                 if (current != DQUOTE && current != ESCAPE) {
261                     dst.append(ESCAPE);
262                 }
263                 dst.append(current);
264                 escaped = false;
265             } else {
266                 if (current == DQUOTE) {
267                     pos++;
268                     break;
269                 }
270                 if (current == ESCAPE) {
271                     escaped = true;
272                 } else if (current != CR && current != LF) {
273                     dst.append(current);
274                 }
275             }
276         }
277         cursor.updatePos(pos);
278     }
279 
280 }