View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.core5.http.impl.io;
29  
30  import java.io.IOException;
31  import java.io.InputStream;
32  
33  import org.apache.hc.core5.http.ConnectionClosedException;
34  import org.apache.hc.core5.http.Header;
35  import org.apache.hc.core5.http.HttpException;
36  import org.apache.hc.core5.http.MalformedChunkCodingException;
37  import org.apache.hc.core5.http.StreamClosedException;
38  import org.apache.hc.core5.http.TruncatedChunkException;
39  import org.apache.hc.core5.http.config.Http1Config;
40  import org.apache.hc.core5.http.io.SessionInputBuffer;
41  import org.apache.hc.core5.util.Args;
42  import org.apache.hc.core5.util.CharArrayBuffer;
43  
44  /**
45   * Implements chunked transfer coding. The content is received in small chunks.
46   * Entities transferred using this input stream can be of unlimited length.
47   * After the stream is read to the end, it provides access to the trailers,
48   * if any.
49   * <p>
50   * Note that this class NEVER closes the underlying stream, even when
51   * {@link #close()} gets called.  Instead, it will read until the "end" of its
52   * chunking on close, which allows for the seamless execution of subsequent
53   * HTTP 1.1 requests, while not requiring the client to remember to read the
54   * entire contents of the response.
55   *
56   *
57   * @since 4.0
58   *
59   */
60  public class ChunkedInputStream extends InputStream {
61  
62      private enum State {
63          CHUNK_LEN, CHUNK_DATA, CHUNK_CRLF, CHUNK_INVALID
64      }
65  
66      private static final int BUFFER_SIZE = 2048;
67      private static final Header[] EMPTY_FOOTERS = {};
68  
69      /** The session input buffer */
70      private final SessionInputBuffer buffer;
71      private final InputStream inputStream;
72      private final CharArrayBuffer lineBuffer;
73      private final Http1Config http1Config;
74  
75      private State state;
76  
77      /** The chunk size */
78      private long chunkSize;
79  
80      /** The current position within the current chunk */
81      private long pos;
82  
83      /** True if we've reached the end of stream */
84      private boolean eof;
85  
86      /** True if this stream is closed */
87      private boolean closed;
88  
89      private Header[] footers = EMPTY_FOOTERS;
90  
91      /**
92       * Default constructor.
93       *
94       * @param buffer Session input buffer
95       * @param inputStream Input stream
96       * @param http1Config Message http1Config. If {@code null} {@link Http1Config#DEFAULT} will be used.
97       *
98       * @since 4.4
99       */
100     public ChunkedInputStream(final SessionInputBuffer buffer, final InputStream inputStream, final Http1Config http1Config) {
101         super();
102         this.buffer = Args.notNull(buffer, "Session input buffer");
103         this.inputStream = Args.notNull(inputStream, "Input stream");
104         this.pos = 0L;
105         this.lineBuffer = new CharArrayBuffer(16);
106         this.http1Config = http1Config != null ? http1Config : Http1Config.DEFAULT;
107         this.state = State.CHUNK_LEN;
108     }
109 
110     /**
111      * Wraps session input stream and reads chunk coded input.
112      *
113      * @param buffer Session input buffer
114      * @param inputStream Input stream
115      */
116     public ChunkedInputStream(final SessionInputBuffer buffer, final InputStream inputStream) {
117         this(buffer, inputStream, null);
118     }
119 
120     @Override
121     public int available() throws IOException {
122         final int len = this.buffer.length();
123         return (int) Math.min(len, this.chunkSize - this.pos);
124     }
125 
126     /**
127      * <p> Returns all the data in a chunked stream in coalesced form. A chunk
128      * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
129      * is detected.</p>
130      *
131      * <p> Trailer headers are read automatically at the end of the stream and
132      * can be obtained with the getResponseFooters() method.</p>
133      *
134      * @return -1 of the end of the stream has been reached or the next data
135      * byte
136      * @throws IOException in case of an I/O error
137      */
138     @Override
139     public int read() throws IOException {
140         if (this.closed) {
141             throw new StreamClosedException();
142         }
143         if (this.eof) {
144             return -1;
145         }
146         if (state != State.CHUNK_DATA) {
147             nextChunk();
148             if (this.eof) {
149                 return -1;
150             }
151         }
152         final int b = buffer.read(inputStream);
153         if (b != -1) {
154             pos++;
155             if (pos >= chunkSize) {
156                 state = State.CHUNK_CRLF;
157             }
158         }
159         return b;
160     }
161 
162     /**
163      * Read some bytes from the stream.
164      * @param b The byte array that will hold the contents from the stream.
165      * @param off The offset into the byte array at which bytes will start to be
166      * placed.
167      * @param len the maximum number of bytes that can be returned.
168      * @return The number of bytes returned or -1 if the end of stream has been
169      * reached.
170      * @throws IOException in case of an I/O error
171      */
172     @Override
173     public int read(final byte[] b, final int off, final int len) throws IOException {
174         if (closed) {
175             throw new StreamClosedException();
176         }
177         if (len == 0) {
178             return 0;
179         }
180         if (eof) {
181             return -1;
182         }
183         if (state != State.CHUNK_DATA) {
184             nextChunk();
185             if (eof) {
186                 return -1;
187             }
188         }
189         final int bytesRead = buffer.read(b, off, (int) Math.min(len, chunkSize - pos), inputStream);
190         if (bytesRead != -1) {
191             pos += bytesRead;
192             if (pos >= chunkSize) {
193                 state = State.CHUNK_CRLF;
194             }
195             return bytesRead;
196         }
197         eof = true;
198         throw new TruncatedChunkException("Truncated chunk (expected size: %d; actual size: %d)",
199                         chunkSize, pos);
200     }
201 
202     /**
203      * Read some bytes from the stream.
204      * @param b The byte array that will hold the contents from the stream.
205      * @return The number of bytes returned or -1 if the end of stream has been
206      * reached.
207      * @throws IOException in case of an I/O error
208      */
209     @Override
210     public int read(final byte[] b) throws IOException {
211         return read(b, 0, b.length);
212     }
213 
214     /**
215      * Read the next chunk.
216      * @throws IOException in case of an I/O error
217      */
218     private void nextChunk() throws IOException {
219         if (state == State.CHUNK_INVALID) {
220             throw new MalformedChunkCodingException("Corrupt data stream");
221         }
222         try {
223             chunkSize = getChunkSize();
224             if (chunkSize < 0L) {
225                 throw new MalformedChunkCodingException("Negative chunk size");
226             }
227             state = State.CHUNK_DATA;
228             pos = 0L;
229             if (chunkSize == 0L) {
230                 eof = true;
231                 parseTrailerHeaders();
232             }
233         } catch (final MalformedChunkCodingException ex) {
234             state = State.CHUNK_INVALID;
235             throw ex;
236         }
237     }
238 
239     /**
240      * Expects the stream to start with a chunksize in hex with optional
241      * comments after a semicolon. The line must end with a CRLF: "a3; some
242      * comment\r\n" Positions the stream at the start of the next line.
243      */
244     private long getChunkSize() throws IOException {
245         final State st = this.state;
246         switch (st) {
247         case CHUNK_CRLF:
248             lineBuffer.clear();
249             final int bytesRead1 = this.buffer.readLine(lineBuffer, inputStream);
250             if (bytesRead1 == -1) {
251                 throw new MalformedChunkCodingException(
252                     "CRLF expected at end of chunk");
253             }
254             if (!lineBuffer.isEmpty()) {
255                 throw new MalformedChunkCodingException(
256                     "Unexpected content at the end of chunk");
257             }
258             state = State.CHUNK_LEN;
259             //$FALL-THROUGH$
260         case CHUNK_LEN:
261             lineBuffer.clear();
262             final int bytesRead2 = this.buffer.readLine(lineBuffer, inputStream);
263             if (bytesRead2 == -1) {
264                 throw new ConnectionClosedException(
265                                 "Premature end of chunk coded message body: closing chunk expected");
266             }
267             int separator = lineBuffer.indexOf(';');
268             if (separator < 0) {
269                 separator = lineBuffer.length();
270             }
271             final String s = this.lineBuffer.substringTrimmed(0, separator);
272             try {
273                 return Long.parseLong(s, 16);
274             } catch (final NumberFormatException e) {
275                 throw new MalformedChunkCodingException("Bad chunk header: " + s);
276             }
277         default:
278             throw new IllegalStateException("Inconsistent codec state");
279         }
280     }
281 
282     /**
283      * Reads and stores the Trailer headers.
284      * @throws IOException in case of an I/O error
285      */
286     private void parseTrailerHeaders() throws IOException {
287         try {
288             this.footers = AbstractMessageParser.parseHeaders(buffer, inputStream,
289                     http1Config.getMaxHeaderCount(),
290                     http1Config.getMaxLineLength(),
291                     null);
292         } catch (final HttpException ex) {
293             final IOException ioe = new MalformedChunkCodingException("Invalid trailing header: "
294                     + ex.getMessage());
295             ioe.initCause(ex);
296             throw ioe;
297         }
298     }
299 
300     /**
301      * Reads the remainder of the chunked message, leaving the underlying
302      * stream at a position to start reading the next response without
303      * scanning. But does NOT close the underlying stream.
304      * @throws IOException in case of an I/O error
305      */
306     @Override
307     public void close() throws IOException {
308         if (!closed) {
309             try {
310                 if (!eof && state != State.CHUNK_INVALID) {
311                     // Optimistically check if the content has been fully read
312                     // when there's no data remaining in the current chunk.
313                     // This is common when self-terminating content (e.g. JSON)
314                     // is parsed from response streams.
315                     if (chunkSize == pos && chunkSize > 0 && read() == -1) {
316                         return;
317                     }
318                     // read and discard the remainder of the message
319                     final byte[] buff = new byte[BUFFER_SIZE];
320                     while (read(buff) >= 0) {
321                     }
322                 }
323             } finally {
324                 eof = true;
325                 closed = true;
326             }
327         }
328     }
329 
330     public Header[] getFooters() {
331         return footers.length > 0 ? footers.clone() : EMPTY_FOOTERS;
332     }
333 
334 }