View Javadoc

1   /*
2    * ====================================================================
3    *
4    *  Licensed to the Apache Software Foundation (ASF) under one or more
5    *  contributor license agreements.  See the NOTICE file distributed with
6    *  this work for additional information regarding copyright ownership.
7    *  The ASF licenses this file to You under the Apache License, Version 2.0
8    *  (the "License"); you may not use this file except in compliance with
9    *  the License.  You may obtain a copy of the License at
10   *
11   *      http://www.apache.org/licenses/LICENSE-2.0
12   *
13   *  Unless required by applicable law or agreed to in writing, software
14   *  distributed under the License is distributed on an "AS IS" BASIS,
15   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   *  See the License for the specific language governing permissions and
17   *  limitations under the License.
18   * ====================================================================
19   *
20   * This software consists of voluntary contributions made by many
21   * individuals on behalf of the Apache Software Foundation.  For more
22   * information on the Apache Software Foundation, please see
23   * <http://www.apache.org/>.
24   *
25  */
26  package org.apache.http.client.entity;
27  
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.io.PushbackInputStream;
31  import java.util.zip.DataFormatException;
32  import java.util.zip.Inflater;
33  import java.util.zip.InflaterInputStream;
34  
35  import org.apache.http.Header;
36  import org.apache.http.HttpEntity;
37  import org.apache.http.entity.HttpEntityWrapper;
38  
39  /**
40   * {@link HttpEntityWrapper} responsible for handling deflate Content Coded responses. In RFC2616
41   * terms, <code>deflate</code> means a <code>zlib</code> stream as defined in RFC1950. Some server
42   * implementations have misinterpreted RFC2616 to mean that a <code>deflate</code> stream as
43   * defined in RFC1951 should be used (or maybe they did that since that's how IE behaves?). It's
44   * confusing that <code>deflate</code> in HTTP 1.1 means <code>zlib</code> streams rather than
45   * <code>deflate</code> streams. We handle both types in here, since that's what is seen on the
46   * internet. Moral - prefer <code>gzip</code>!
47   *
48   * @see GzipDecompressingEntity
49   *
50   * @since 4.1
51   */
52  public class DeflateDecompressingEntity extends DecompressingEntity {
53  
54      /**
55       * Creates a new {@link DeflateDecompressingEntity} which will wrap the specified
56       * {@link HttpEntity}.
57       *
58       * @param entity
59       *            a non-null {@link HttpEntity} to be wrapped
60       */
61      public DeflateDecompressingEntity(final HttpEntity entity) {
62          super(entity);
63      }
64  
65      /**
66       * Returns the non-null InputStream that should be returned to by all requests to
67       * {@link #getContent()}.
68       *
69       * @return a non-null InputStream
70       * @throws IOException if there was a problem
71       */
72      @Override
73      InputStream getDecompressingInputStream(final InputStream wrapped) throws IOException {
74          /*
75           * A zlib stream will have a header.
76           *
77           * CMF | FLG [| DICTID ] | ...compressed data | ADLER32 |
78           *
79           * * CMF is one byte.
80           *
81           * * FLG is one byte.
82           *
83           * * DICTID is four bytes, and only present if FLG.FDICT is set.
84           *
85           * Sniff the content. Does it look like a zlib stream, with a CMF, etc? c.f. RFC1950,
86           * section 2.2. http://tools.ietf.org/html/rfc1950#page-4
87           *
88           * We need to see if it looks like a proper zlib stream, or whether it is just a deflate
89           * stream. RFC2616 calls zlib streams deflate. Confusing, isn't it? That's why some servers
90           * implement deflate Content-Encoding using deflate streams, rather than zlib streams.
91           *
92           * We could start looking at the bytes, but to be honest, someone else has already read
93           * the RFCs and implemented that for us. So we'll just use the JDK libraries and exception
94           * handling to do this. If that proves slow, then we could potentially change this to check
95           * the first byte - does it look like a CMF? What about the second byte - does it look like
96           * a FLG, etc.
97           */
98  
99          /* We read a small buffer to sniff the content. */
100         byte[] peeked = new byte[6];
101 
102         PushbackInputStream pushback = new PushbackInputStream(wrapped, peeked.length);
103 
104         int headerLength = pushback.read(peeked);
105 
106         if (headerLength == -1) {
107             throw new IOException("Unable to read the response");
108         }
109 
110         /* We try to read the first uncompressed byte. */
111         byte[] dummy = new byte[1];
112 
113         Inflater inf = new Inflater();
114 
115         try {
116             int n;
117             while ((n = inf.inflate(dummy)) == 0) {
118                 if (inf.finished()) {
119 
120                     /* Not expecting this, so fail loudly. */
121                     throw new IOException("Unable to read the response");
122                 }
123 
124                 if (inf.needsDictionary()) {
125 
126                     /* Need dictionary - then it must be zlib stream with DICTID part? */
127                     break;
128                 }
129 
130                 if (inf.needsInput()) {
131                     inf.setInput(peeked);
132                 }
133             }
134 
135             if (n == -1) {
136                 throw new IOException("Unable to read the response");
137             }
138 
139             /*
140              * We read something without a problem, so it's a valid zlib stream. Just need to reset
141              * and return an unused InputStream now.
142              */
143             pushback.unread(peeked, 0, headerLength);
144             return new InflaterInputStream(pushback);
145         } catch (DataFormatException e) {
146 
147             /* Presume that it's an RFC1951 deflate stream rather than RFC1950 zlib stream and try
148              * again. */
149             pushback.unread(peeked, 0, headerLength);
150             return new InflaterInputStream(pushback, new Inflater(true));
151         }
152     }
153 
154     /**
155      * {@inheritDoc}
156      */
157     @Override
158     public Header getContentEncoding() {
159 
160         /* This HttpEntityWrapper has dealt with the Content-Encoding. */
161         return null;
162     }
163 
164     /**
165      * {@inheritDoc}
166      */
167     @Override
168     public long getContentLength() {
169 
170         /* Length of inflated content is unknown. */
171         return -1;
172     }
173 
174 }