View Javadoc

1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.client.entity;
28  
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.io.PushbackInputStream;
32  import java.util.zip.DataFormatException;
33  import java.util.zip.Inflater;
34  import java.util.zip.InflaterInputStream;
35  
36  import org.apache.http.Header;
37  import org.apache.http.HttpEntity;
38  import org.apache.http.entity.HttpEntityWrapper;
39  
40  /**
41   * {@link HttpEntityWrapper} responsible for handling deflate Content Coded responses. In RFC2616
42   * terms, <code>deflate</code> means a <code>zlib</code> stream as defined in RFC1950. Some server
43   * implementations have misinterpreted RFC2616 to mean that a <code>deflate</code> stream as
44   * defined in RFC1951 should be used (or maybe they did that since that's how IE behaves?). It's
45   * confusing that <code>deflate</code> in HTTP 1.1 means <code>zlib</code> streams rather than
46   * <code>deflate</code> streams. We handle both types in here, since that's what is seen on the
47   * internet. Moral - prefer <code>gzip</code>!
48   *
49   * @see GzipDecompressingEntity
50   *
51   * @since 4.1
52   */
53  public class DeflateDecompressingEntity extends DecompressingEntity {
54  
55      /**
56       * Creates a new {@link DeflateDecompressingEntity} which will wrap the specified
57       * {@link HttpEntity}.
58       *
59       * @param entity
60       *            a non-null {@link HttpEntity} to be wrapped
61       */
62      public DeflateDecompressingEntity(final HttpEntity entity) {
63          super(entity);
64      }
65  
66      /**
67       * Returns the non-null InputStream that should be returned to by all requests to
68       * {@link #getContent()}.
69       *
70       * @return a non-null InputStream
71       * @throws IOException if there was a problem
72       */
73      @Override
74      InputStream decorate(final InputStream wrapped) throws IOException {
75          /*
76           * A zlib stream will have a header.
77           *
78           * CMF | FLG [| DICTID ] | ...compressed data | ADLER32 |
79           *
80           * * CMF is one byte.
81           *
82           * * FLG is one byte.
83           *
84           * * DICTID is four bytes, and only present if FLG.FDICT is set.
85           *
86           * Sniff the content. Does it look like a zlib stream, with a CMF, etc? c.f. RFC1950,
87           * section 2.2. http://tools.ietf.org/html/rfc1950#page-4
88           *
89           * We need to see if it looks like a proper zlib stream, or whether it is just a deflate
90           * stream. RFC2616 calls zlib streams deflate. Confusing, isn't it? That's why some servers
91           * implement deflate Content-Encoding using deflate streams, rather than zlib streams.
92           *
93           * We could start looking at the bytes, but to be honest, someone else has already read
94           * the RFCs and implemented that for us. So we'll just use the JDK libraries and exception
95           * handling to do this. If that proves slow, then we could potentially change this to check
96           * the first byte - does it look like a CMF? What about the second byte - does it look like
97           * a FLG, etc.
98           */
99  
100         /* We read a small buffer to sniff the content. */
101         final byte[] peeked = new byte[6];
102 
103         final PushbackInputStream pushback = new PushbackInputStream(wrapped, peeked.length);
104 
105         final int headerLength = pushback.read(peeked);
106 
107         if (headerLength == -1) {
108             throw new IOException("Unable to read the response");
109         }
110 
111         /* We try to read the first uncompressed byte. */
112         final byte[] dummy = new byte[1];
113 
114         final Inflater inf = new Inflater();
115 
116         try {
117             int n;
118             while ((n = inf.inflate(dummy)) == 0) {
119                 if (inf.finished()) {
120 
121                     /* Not expecting this, so fail loudly. */
122                     throw new IOException("Unable to read the response");
123                 }
124 
125                 if (inf.needsDictionary()) {
126 
127                     /* Need dictionary - then it must be zlib stream with DICTID part? */
128                     break;
129                 }
130 
131                 if (inf.needsInput()) {
132                     inf.setInput(peeked);
133                 }
134             }
135 
136             if (n == -1) {
137                 throw new IOException("Unable to read the response");
138             }
139 
140             /*
141              * We read something without a problem, so it's a valid zlib stream. Just need to reset
142              * and return an unused InputStream now.
143              */
144             pushback.unread(peeked, 0, headerLength);
145             return new DeflateStream(pushback, new Inflater());
146         } catch (final DataFormatException e) {
147 
148             /* Presume that it's an RFC1951 deflate stream rather than RFC1950 zlib stream and try
149              * again. */
150             pushback.unread(peeked, 0, headerLength);
151             return new DeflateStream(pushback, new Inflater(true));
152         } finally {
153             inf.end();
154         }
155     }
156 
157     /**
158      * {@inheritDoc}
159      */
160     @Override
161     public Header getContentEncoding() {
162 
163         /* This HttpEntityWrapper has dealt with the Content-Encoding. */
164         return null;
165     }
166 
167     /**
168      * {@inheritDoc}
169      */
170     @Override
171     public long getContentLength() {
172 
173         /* Length of inflated content is unknown. */
174         return -1;
175     }
176 
177     static class DeflateStream extends InflaterInputStream {
178 
179         private boolean closed = false;
180 
181         public DeflateStream(final InputStream in, final Inflater inflater) {
182             super(in, inflater);
183         }
184 
185         @Override
186         public void close() throws IOException {
187             if (closed) {
188                 return;
189             }
190             closed = true;
191             inf.end();
192             super.close();
193         }
194 
195     }
196 
197 }