View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.core5.http.io.entity;
29  
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.io.InputStreamReader;
33  import java.io.Reader;
34  import java.io.UnsupportedEncodingException;
35  import java.nio.charset.Charset;
36  import java.nio.charset.StandardCharsets;
37  import java.nio.charset.UnsupportedCharsetException;
38  import java.util.Collections;
39  import java.util.List;
40  
41  import org.apache.hc.core5.http.ClassicHttpResponse;
42  import org.apache.hc.core5.http.ContentType;
43  import org.apache.hc.core5.http.HttpEntity;
44  import org.apache.hc.core5.http.NameValuePair;
45  import org.apache.hc.core5.http.ParseException;
46  import org.apache.hc.core5.net.URLEncodedUtils;
47  import org.apache.hc.core5.util.Args;
48  import org.apache.hc.core5.util.ByteArrayBuffer;
49  import org.apache.hc.core5.util.CharArrayBuffer;
50  
51  /**
52   * Static helpers for dealing with {@link HttpEntity}s.
53   *
54   * @since 4.0
55   */
56  public final class EntityUtils {
57  
58      private EntityUtils() {
59      }
60  
61      /**
62       * Ensures that the entity content is fully consumed and the content stream, if exists,
63       * is closed. The process is done, <i>quietly</i> , without throwing any IOException.
64       *
65       * @param entity the entity to consume.
66       *
67       *
68       * @since 4.2
69       */
70      public static void consumeQuietly(final HttpEntity entity) {
71          try {
72            consume(entity);
73          } catch (final IOException ignore) {
74          }
75      }
76  
77      /**
78       * Ensures that the entity content is fully consumed and the content stream, if exists,
79       * is closed.
80       *
81       * @param entity the entity to consume.
82       * @throws IOException if an error occurs reading the input stream
83       *
84       * @since 4.1
85       */
86      public static void consume(final HttpEntity entity) throws IOException {
87          if (entity == null) {
88              return;
89          }
90          if (entity.isStreaming()) {
91              final InputStream instream = entity.getContent();
92              if (instream != null) {
93                  instream.close();
94              }
95          }
96      }
97  
98      /**
99       * Updates an entity in a response by first consuming an existing entity, then setting the new one.
100      *
101      * @param response the response with an entity to update; must not be null.
102      * @param entity the entity to set in the response.
103      * @throws IOException if an error occurs while reading the input stream on the existing
104      * entity.
105      * @throws IllegalArgumentException if response is null.
106      *
107      * @since 4.3
108      */
109     public static void updateEntity(
110             final ClassicHttpResponse response, final HttpEntity entity) throws IOException {
111         Args.notNull(response, "Response");
112         consume(response.getEntity());
113         response.setEntity(entity);
114     }
115 
116     /**
117      * Extracts {@code Content-Type} value from {@link HttpEntity} exactly as
118      * specified by the {@code Content-Type} header of the entity. Returns {@code null}
119      * if not specified.
120      *
121      * @param entity HTTP entity
122      * @return content type
123      * {@code Content-Type} value.
124      * @throws UnsupportedCharsetException Thrown when the named charset is not available in
125      * this instance of the Java virtual machine
126      */
127     public static ContentType getContentType(final HttpEntity entity) throws UnsupportedCharsetException {
128         if (entity == null) {
129             return null;
130         }
131         final String contentType = entity.getContentType();
132         if (contentType != null) {
133             return ContentType.parse(contentType);
134         }
135         return null;
136     }
137 
138     /**
139      * Extracts {@code Content-Type} value from {@link HttpEntity}. Returns {@code null}
140      * if not specified or incorrect (could not be parsed)..
141      *
142      * @param entity HTTP entity
143      * @return content type
144      *
145      * @since 4.4
146      *
147      */
148     public static ContentType getContentTypeLenient(final HttpEntity entity) {
149         if (entity == null) {
150             return null;
151         }
152         final String contentType = entity.getContentType();
153         if (contentType != null) {
154             return ContentType.parseLenient(contentType);
155         }
156         return null;
157     }
158 
159     /**
160      * Extracts {@code Content-Type} value from {@link HttpEntity} or returns the default value
161      * {@link ContentType#DEFAULT_TEXT} if not explicitly specified.
162      *
163      * @param entity HTTP entity
164      * @return content type
165      * {@code Content-Type} value.
166      * @throws UnsupportedCharsetException Thrown when the named charset is not available in
167      * this instance of the Java virtual machine
168      */
169     public static ContentType getContentTypeOrDefault(final HttpEntity entity) throws UnsupportedCharsetException {
170         final ContentType contentType = getContentType(entity);
171         return contentType != null ? contentType : ContentType.DEFAULT_TEXT;
172     }
173 
174     /**
175      * Extracts {@code Content-Type} value from {@link HttpEntity} or returns the default value
176      * {@link ContentType#DEFAULT_TEXT} if not explicitly specified or incorrect (could not be parsed).
177      *
178      * @param entity HTTP entity
179      * @return content type
180      *
181      * @since 4.4
182      */
183     public static ContentType getContentTypeLenientOrDefault(final HttpEntity entity) throws UnsupportedCharsetException {
184         final ContentType contentType = getContentType(entity);
185         return contentType != null ? contentType : ContentType.DEFAULT_TEXT;
186     }
187 
188     /**
189      * Read the contents of an entity and return it as a byte array.
190      *
191      * @param entity the entity to read from=
192      * @return byte array containing the entity content. May be null if
193      *   {@link HttpEntity#getContent()} is null.
194      * @throws IOException if an error occurs reading the input stream
195      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
196      */
197     public static byte[] toByteArray(final HttpEntity entity) throws IOException {
198         Args.notNull(entity, "Entity");
199         final InputStream instream = entity.getContent();
200         if (instream == null) {
201             return null;
202         }
203         try {
204             int i = (int) Args.checkContentLength(entity);
205             if (i < 0) {
206                 i = 4096;
207             }
208             final ByteArrayBuffer buffer = new ByteArrayBuffer(i);
209             final byte[] tmp = new byte[4096];
210             int l;
211             while((l = instream.read(tmp)) != -1) {
212                 buffer.append(tmp, 0, l);
213             }
214             return buffer.toByteArray();
215         } finally {
216             instream.close();
217         }
218     }
219 
220     private static String toString(
221             final HttpEntity entity,
222             final ContentType contentType) throws IOException {
223         final InputStream instream = entity.getContent();
224         if (instream == null) {
225             return null;
226         }
227         try {
228             int i = (int) Args.checkContentLength(entity);
229             if (i < 0) {
230                 i = 4096;
231             }
232             Charset charset = null;
233             if (contentType != null) {
234                 charset = contentType.getCharset();
235                 if (charset == null) {
236                     final ContentType defaultContentType = ContentType.getByMimeType(contentType.getMimeType());
237                     charset = defaultContentType != null ? defaultContentType.getCharset() : null;
238                 }
239             }
240             if (charset == null) {
241                 charset = StandardCharsets.ISO_8859_1;
242             }
243             final Reader reader = new InputStreamReader(instream, charset);
244             final CharArrayBuffer buffer = new CharArrayBuffer(i);
245             final char[] tmp = new char[1024];
246             int l;
247             while((l = reader.read(tmp)) != -1) {
248                 buffer.append(tmp, 0, l);
249             }
250             return buffer.toString();
251         } finally {
252             instream.close();
253         }
254     }
255 
256     /**
257      * Get the entity content as a String, using the provided default character set
258      * if none is found in the entity.
259      * If defaultCharset is null, the default "ISO-8859-1" is used.
260      *
261      * @param entity must not be null
262      * @param defaultCharset character set to be applied if none found in the entity,
263      * or if the entity provided charset is invalid or not available.
264      * @return the entity content as a String. May be null if
265      *   {@link HttpEntity#getContent()} is null.
266      * @throws ParseException if header elements cannot be parsed
267      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
268      * @throws IOException if an error occurs reading the input stream
269      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named entity's charset is not available in
270      * this instance of the Java virtual machine and no defaultCharset is provided.
271      */
272     public static String toString(
273             final HttpEntity entity, final Charset defaultCharset) throws IOException, ParseException {
274         Args.notNull(entity, "Entity");
275         ContentType contentType = null;
276         try {
277             contentType = getContentType(entity);
278         } catch (final UnsupportedCharsetException ex) {
279             if (defaultCharset == null) {
280                 throw new UnsupportedEncodingException(ex.getMessage());
281             }
282         }
283         if (contentType != null) {
284             if (contentType.getCharset() == null) {
285                 contentType = contentType.withCharset(defaultCharset);
286             }
287         } else {
288             contentType = ContentType.DEFAULT_TEXT.withCharset(defaultCharset);
289         }
290         return toString(entity, contentType);
291     }
292 
293     /**
294      * Get the entity content as a String, using the provided default character set
295      * if none is found in the entity.
296      * If defaultCharset is null, the default "ISO-8859-1" is used.
297      *
298      * @param entity must not be null
299      * @param defaultCharset character set to be applied if none found in the entity
300      * @return the entity content as a String. May be null if
301      *   {@link HttpEntity#getContent()} is null.
302      * @throws ParseException if header elements cannot be parsed
303      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
304      * @throws IOException if an error occurs reading the input stream
305      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
306      * this instance of the Java virtual machine
307      */
308     public static String toString(
309             final HttpEntity entity, final String defaultCharset) throws IOException, ParseException {
310         return toString(entity, defaultCharset != null ? Charset.forName(defaultCharset) : null);
311     }
312 
313     /**
314      * Read the contents of an entity and return it as a String.
315      * The content is converted using the character set from the entity (if any),
316      * failing that, "ISO-8859-1" is used.
317      *
318      * @param entity the entity to convert to a string; must not be null
319      * @return String containing the content.
320      * @throws ParseException if header elements cannot be parsed
321      * @throws IllegalArgumentException if entity is null or if content length &gt; Integer.MAX_VALUE
322      * @throws IOException if an error occurs reading the input stream
323      * @throws java.nio.charset.UnsupportedCharsetException Thrown when the named charset is not available in
324      * this instance of the Java virtual machine
325      */
326     public static String toString(final HttpEntity entity) throws IOException, ParseException {
327         Args.notNull(entity, "Entity");
328         return toString(entity, getContentType(entity));
329     }
330 
331     /**
332      * Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}.
333      * The encoding is taken from the entity's Content-Encoding header.
334      * <p>
335      * This is typically used while parsing an HTTP POST.
336      *
337      * @param entity
338      *            The entity to parse
339      * @return a list of {@link NameValuePair} as built from the URI's query portion.
340      * @throws IOException
341      *             If there was an exception getting the entity's data.
342      */
343     public static List<NameValuePair> parse(final HttpEntity entity) throws IOException {
344         Args.notNull(entity, "HTTP entity");
345         final ContentType contentType = EntityUtils.getContentType(entity);
346         if (contentType == null || !contentType.getMimeType().equalsIgnoreCase(URLEncodedUtils.CONTENT_TYPE)) {
347             return Collections.emptyList();
348         }
349         final long len = entity.getContentLength();
350         Args.checkRange(len, 0, Integer.MAX_VALUE, "HTTP entity is too large");
351         final Charset charset = contentType.getCharset() != null ? contentType.getCharset() : StandardCharsets.ISO_8859_1;
352         final InputStream instream = entity.getContent();
353         if (instream == null) {
354             return Collections.emptyList();
355         }
356         final CharArrayBuffer buf;
357         try {
358             buf = new CharArrayBuffer(len > 0 ? (int) len : 1024);
359             final Reader reader = new InputStreamReader(instream, charset);
360             final char[] tmp = new char[1024];
361             int l;
362             while((l = reader.read(tmp)) != -1) {
363                 buf.append(tmp, 0, l);
364             }
365 
366         } finally {
367             instream.close();
368         }
369         if (buf.length() == 0) {
370             return Collections.emptyList();
371         }
372         return URLEncodedUtils.parse(buf, charset, '&');
373     }
374 
375 }