1 /*
2 * ====================================================================
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 * ====================================================================
20 *
21 * This software consists of voluntary contributions made by many
22 * individuals on behalf of the Apache Software Foundation. For more
23 * information on the Apache Software Foundation, please see
24 * <http://www.apache.org/>.
25 *
26 */
27 package org.apache.http.client.entity;
28
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.PushbackInputStream;
32 import java.util.zip.DataFormatException;
33 import java.util.zip.Inflater;
34 import java.util.zip.InflaterInputStream;
35
36 import org.apache.http.Header;
37 import org.apache.http.HttpEntity;
38 import org.apache.http.entity.HttpEntityWrapper;
39
40 /**
41 * {@link HttpEntityWrapper} responsible for handling deflate Content Coded responses. In RFC2616
42 * terms, <code>deflate</code> means a <code>zlib</code> stream as defined in RFC1950. Some server
43 * implementations have misinterpreted RFC2616 to mean that a <code>deflate</code> stream as
44 * defined in RFC1951 should be used (or maybe they did that since that's how IE behaves?). It's
45 * confusing that <code>deflate</code> in HTTP 1.1 means <code>zlib</code> streams rather than
46 * <code>deflate</code> streams. We handle both types in here, since that's what is seen on the
47 * internet. Moral - prefer <code>gzip</code>!
48 *
49 * @see GzipDecompressingEntity
50 *
51 * @since 4.1
52 */
53 public class DeflateDecompressingEntity extends DecompressingEntity {
54
55 /**
56 * Creates a new {@link DeflateDecompressingEntity} which will wrap the specified
57 * {@link HttpEntity}.
58 *
59 * @param entity
60 * a non-null {@link HttpEntity} to be wrapped
61 */
62 public DeflateDecompressingEntity(final HttpEntity entity) {
63 super(entity);
64 }
65
66 /**
67 * Returns the non-null InputStream that should be returned to by all requests to
68 * {@link #getContent()}.
69 *
70 * @return a non-null InputStream
71 * @throws IOException if there was a problem
72 */
73 @Override
74 InputStream decorate(final InputStream wrapped) throws IOException {
75 /*
76 * A zlib stream will have a header.
77 *
78 * CMF | FLG [| DICTID ] | ...compressed data | ADLER32 |
79 *
80 * * CMF is one byte.
81 *
82 * * FLG is one byte.
83 *
84 * * DICTID is four bytes, and only present if FLG.FDICT is set.
85 *
86 * Sniff the content. Does it look like a zlib stream, with a CMF, etc? c.f. RFC1950,
87 * section 2.2. http://tools.ietf.org/html/rfc1950#page-4
88 *
89 * We need to see if it looks like a proper zlib stream, or whether it is just a deflate
90 * stream. RFC2616 calls zlib streams deflate. Confusing, isn't it? That's why some servers
91 * implement deflate Content-Encoding using deflate streams, rather than zlib streams.
92 *
93 * We could start looking at the bytes, but to be honest, someone else has already read
94 * the RFCs and implemented that for us. So we'll just use the JDK libraries and exception
95 * handling to do this. If that proves slow, then we could potentially change this to check
96 * the first byte - does it look like a CMF? What about the second byte - does it look like
97 * a FLG, etc.
98 */
99
100 /* We read a small buffer to sniff the content. */
101 final byte[] peeked = new byte[6];
102
103 final PushbackInputStream pushback = new PushbackInputStream(wrapped, peeked.length);
104
105 final int headerLength = pushback.read(peeked);
106
107 if (headerLength == -1) {
108 throw new IOException("Unable to read the response");
109 }
110
111 /* We try to read the first uncompressed byte. */
112 final byte[] dummy = new byte[1];
113
114 final Inflater inf = new Inflater();
115
116 try {
117 int n;
118 while ((n = inf.inflate(dummy)) == 0) {
119 if (inf.finished()) {
120
121 /* Not expecting this, so fail loudly. */
122 throw new IOException("Unable to read the response");
123 }
124
125 if (inf.needsDictionary()) {
126
127 /* Need dictionary - then it must be zlib stream with DICTID part? */
128 break;
129 }
130
131 if (inf.needsInput()) {
132 inf.setInput(peeked);
133 }
134 }
135
136 if (n == -1) {
137 throw new IOException("Unable to read the response");
138 }
139
140 /*
141 * We read something without a problem, so it's a valid zlib stream. Just need to reset
142 * and return an unused InputStream now.
143 */
144 pushback.unread(peeked, 0, headerLength);
145 return new DeflateStream(pushback, new Inflater());
146 } catch (final DataFormatException e) {
147
148 /* Presume that it's an RFC1951 deflate stream rather than RFC1950 zlib stream and try
149 * again. */
150 pushback.unread(peeked, 0, headerLength);
151 return new DeflateStream(pushback, new Inflater(true));
152 } finally {
153 inf.end();
154 }
155 }
156
157 /**
158 * {@inheritDoc}
159 */
160 @Override
161 public Header getContentEncoding() {
162
163 /* This HttpEntityWrapper has dealt with the Content-Encoding. */
164 return null;
165 }
166
167 /**
168 * {@inheritDoc}
169 */
170 @Override
171 public long getContentLength() {
172
173 /* Length of inflated content is unknown. */
174 return -1;
175 }
176
177 static class DeflateStream extends InflaterInputStream {
178
179 private boolean closed = false;
180
181 public DeflateStream(final InputStream in, final Inflater inflater) {
182 super(in, inflater);
183 }
184
185 @Override
186 public void close() throws IOException {
187 if (closed) {
188 return;
189 }
190 closed = true;
191 inf.end();
192 super.close();
193 }
194
195 }
196
197 }