View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.http.message;
29  
30  import java.util.BitSet;
31  
32  import org.apache.http.annotation.ThreadingBehavior;
33  import org.apache.http.annotation.Contract;
34  import org.apache.http.util.CharArrayBuffer;
35  
36  /**
37   * Low level parser for header field elements. The parsing routines of this class are designed
38   * to produce near zero intermediate garbage and make no intermediate copies of input data.
39   * <p>
40   * This class is immutable and thread safe.
41   *
42   * @since 4.4
43   */
44  @Contract(threading = ThreadingBehavior.IMMUTABLE)
45  public class TokenParser {
46  
47      public static BitSet INIT_BITSET(final int ... b) {
48          final BitSet bitset = new BitSet();
49          for (final int aB : b) {
50              bitset.set(aB);
51          }
52          return bitset;
53      }
54  
55      /** US-ASCII CR, carriage return (13) */
56      public static final char CR = '\r';
57  
58      /** US-ASCII LF, line feed (10) */
59      public static final char LF = '\n';
60  
61      /** US-ASCII SP, space (32) */
62      public static final char SP = ' ';
63  
64      /** US-ASCII HT, horizontal-tab (9) */
65      public static final char HT = '\t';
66  
67      /** Double quote */
68      public static final char DQUOTE = '\"';
69  
70      /** Backward slash / escape character */
71      public static final char ESCAPE = '\\';
72  
73      public static boolean isWhitespace(final char ch) {
74          return ch == SP || ch == HT || ch == CR || ch == LF;
75      }
76  
77      public static final TokenParser INSTANCE = new TokenParser();
78  
79      /**
80       * Extracts from the sequence of chars a token terminated with any of the given delimiters
81       * discarding semantically insignificant whitespace characters.
82       *
83       * @param buf buffer with the sequence of chars to be parsed
84       * @param cursor defines the bounds and current position of the buffer
85       * @param delimiters set of delimiting characters. Can be {@code null} if the token
86       *  is not delimited by any character.
87       */
88      public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
89          final StringBuilder dst = new StringBuilder();
90          boolean whitespace = false;
91          while (!cursor.atEnd()) {
92              final char current = buf.charAt(cursor.getPos());
93              if (delimiters != null && delimiters.get(current)) {
94                  break;
95              } else if (isWhitespace(current)) {
96                  skipWhiteSpace(buf, cursor);
97                  whitespace = true;
98              } else {
99                  if (whitespace && dst.length() > 0) {
100                     dst.append(' ');
101                 }
102                 copyContent(buf, cursor, delimiters, dst);
103                 whitespace = false;
104             }
105         }
106         return dst.toString();
107     }
108 
109     /**
110      * Extracts from the sequence of chars a value which can be enclosed in quote marks and
111      * terminated with any of the given delimiters discarding semantically insignificant
112      * whitespace characters.
113      *
114      * @param buf buffer with the sequence of chars to be parsed
115      * @param cursor defines the bounds and current position of the buffer
116      * @param delimiters set of delimiting characters. Can be {@code null} if the value
117      *  is not delimited by any character.
118      */
119     public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
120         final StringBuilder dst = new StringBuilder();
121         boolean whitespace = false;
122         while (!cursor.atEnd()) {
123             final char current = buf.charAt(cursor.getPos());
124             if (delimiters != null && delimiters.get(current)) {
125                 break;
126             } else if (isWhitespace(current)) {
127                 skipWhiteSpace(buf, cursor);
128                 whitespace = true;
129             } else if (current == DQUOTE) {
130                 if (whitespace && dst.length() > 0) {
131                     dst.append(' ');
132                 }
133                 copyQuotedContent(buf, cursor, dst);
134                 whitespace = false;
135             } else {
136                 if (whitespace && dst.length() > 0) {
137                     dst.append(' ');
138                 }
139                 copyUnquotedContent(buf, cursor, delimiters, dst);
140                 whitespace = false;
141             }
142         }
143         return dst.toString();
144     }
145 
146     /**
147      * Skips semantically insignificant whitespace characters and moves the cursor to the closest
148      * non-whitespace character.
149      *
150      * @param buf buffer with the sequence of chars to be parsed
151      * @param cursor defines the bounds and current position of the buffer
152      */
153     public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) {
154         int pos = cursor.getPos();
155         final int indexFrom = cursor.getPos();
156         final int indexTo = cursor.getUpperBound();
157         for (int i = indexFrom; i < indexTo; i++) {
158             final char current = buf.charAt(i);
159             if (!isWhitespace(current)) {
160                 break;
161             } else {
162                 pos++;
163             }
164         }
165         cursor.updatePos(pos);
166     }
167 
168     /**
169      * Transfers content into the destination buffer until a whitespace character or any of
170      * the given delimiters is encountered.
171      *
172      * @param buf buffer with the sequence of chars to be parsed
173      * @param cursor defines the bounds and current position of the buffer
174      * @param delimiters set of delimiting characters. Can be {@code null} if the value
175      *  is delimited by a whitespace only.
176      * @param dst destination buffer
177      */
178     public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters,
179             final StringBuilder dst) {
180         int pos = cursor.getPos();
181         final int indexFrom = cursor.getPos();
182         final int indexTo = cursor.getUpperBound();
183         for (int i = indexFrom; i < indexTo; i++) {
184             final char current = buf.charAt(i);
185             if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
186                 break;
187             } else {
188                 pos++;
189                 dst.append(current);
190             }
191         }
192         cursor.updatePos(pos);
193     }
194 
195     /**
196      * Transfers content into the destination buffer until a whitespace character,  a quote,
197      * or any of the given delimiters is encountered.
198      *
199      * @param buf buffer with the sequence of chars to be parsed
200      * @param cursor defines the bounds and current position of the buffer
201      * @param delimiters set of delimiting characters. Can be {@code null} if the value
202      *  is delimited by a whitespace or a quote only.
203      * @param dst destination buffer
204      */
205     public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
206             final BitSet delimiters, final StringBuilder dst) {
207         int pos = cursor.getPos();
208         final int indexFrom = cursor.getPos();
209         final int indexTo = cursor.getUpperBound();
210         for (int i = indexFrom; i < indexTo; i++) {
211             final char current = buf.charAt(i);
212             if ((delimiters != null && delimiters.get(current))
213                     || isWhitespace(current) || current == DQUOTE) {
214                 break;
215             } else {
216                 pos++;
217                 dst.append(current);
218             }
219         }
220         cursor.updatePos(pos);
221     }
222 
223     /**
224      * Transfers content enclosed with quote marks into the destination buffer.
225      *
226      * @param buf buffer with the sequence of chars to be parsed
227      * @param cursor defines the bounds and current position of the buffer
228      * @param dst destination buffer
229      */
230     public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
231             final StringBuilder dst) {
232         if (cursor.atEnd()) {
233             return;
234         }
235         int pos = cursor.getPos();
236         int indexFrom = cursor.getPos();
237         final int indexTo = cursor.getUpperBound();
238         char current = buf.charAt(pos);
239         if (current != DQUOTE) {
240             return;
241         }
242         pos++;
243         indexFrom++;
244         boolean escaped = false;
245         for (int i = indexFrom; i < indexTo; i++, pos++) {
246             current = buf.charAt(i);
247             if (escaped) {
248                 if (current != DQUOTE && current != ESCAPE) {
249                     dst.append(ESCAPE);
250                 }
251                 dst.append(current);
252                 escaped = false;
253             } else {
254                 if (current == DQUOTE) {
255                     pos++;
256                     break;
257                 }
258                 if (current == ESCAPE) {
259                     escaped = true;
260                 } else if (current != CR && current != LF) {
261                     dst.append(current);
262                 }
263             }
264         }
265         cursor.updatePos(pos);
266     }
267 
268 }