1 /*
2 * ====================================================================
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 * ====================================================================
20 *
21 * This software consists of voluntary contributions made by many
22 * individuals on behalf of the Apache Software Foundation. For more
23 * information on the Apache Software Foundation, please see
24 * <http://www.apache.org/>.
25 *
26 */
27 package org.apache.hc.client5.http.impl.cache;
28
29 import java.time.Duration;
30 import java.time.Instant;
31 import java.util.Iterator;
32
33 import org.apache.hc.client5.http.cache.ResponseCacheControl;
34 import org.apache.hc.client5.http.utils.DateUtils;
35 import org.apache.hc.core5.http.HttpHeaders;
36 import org.apache.hc.core5.http.HttpRequest;
37 import org.apache.hc.core5.http.HttpResponse;
38 import org.apache.hc.core5.http.HttpStatus;
39 import org.apache.hc.core5.http.HttpVersion;
40 import org.apache.hc.core5.http.Method;
41 import org.apache.hc.core5.http.ProtocolVersion;
42 import org.apache.hc.core5.http.message.MessageSupport;
43 import org.slf4j.Logger;
44 import org.slf4j.LoggerFactory;
45
46 class ResponseCachingPolicy {
47
48 /**
49 * The default freshness duration for a cached object, in seconds.
50 *
51 * <p>This constant is used to set the default value for the freshness lifetime of a cached object.
52 * When a new object is added to the cache, it will be assigned this duration if no other duration
53 * is specified.</p>
54 *
55 * <p>By default, this value is set to 300 seconds (5 minutes). Applications can customize this
56 * value as needed.</p>
57 */
58 private static final Duration DEFAULT_FRESHNESS_DURATION = Duration.ofMinutes(5);
59
60 private static final Logger LOG = LoggerFactory.getLogger(ResponseCachingPolicy.class);
61
62 private final boolean sharedCache;
63 private final boolean neverCache1_0ResponsesWithQueryString;
64 private final boolean neverCache1_1ResponsesWithQueryString;
65
66 /**
67 * Constructs a new ResponseCachingPolicy with the specified cache policy settings and stale-if-error support.
68 *
69 * @param sharedCache whether to behave as a shared cache (true) or a
70 * non-shared/private cache (false)
71 * @param neverCache1_0ResponsesWithQueryString {@code true} to never cache HTTP 1.0 responses with a query string,
72 * {@code false} to cache if explicit cache headers are found.
73 * @param neverCache1_1ResponsesWithQueryString {@code true} to never cache HTTP 1.1 responses with a query string,
74 * {@code false} to cache if explicit cache headers are found.
75 * @since 5.4
76 */
77 public ResponseCachingPolicy(
78 final boolean sharedCache,
79 final boolean neverCache1_0ResponsesWithQueryString,
80 final boolean neverCache1_1ResponsesWithQueryString) {
81 this.sharedCache = sharedCache;
82 this.neverCache1_0ResponsesWithQueryString = neverCache1_0ResponsesWithQueryString;
83 this.neverCache1_1ResponsesWithQueryString = neverCache1_1ResponsesWithQueryString;
84 }
85
86 /**
87 * Determine if the {@link HttpResponse} gotten from the origin is a
88 * cacheable response.
89 *
90 * @return {@code true} if response is cacheable
91 */
92 public boolean isResponseCacheable(final ResponseCacheControl cacheControl, final HttpRequest request, final HttpResponse response) {
93 final ProtocolVersion version = request.getVersion() != null ? request.getVersion() : HttpVersion.DEFAULT;
94 if (version.compareToVersion(HttpVersion.HTTP_1_1) > 0) {
95 if (LOG.isDebugEnabled()) {
96 LOG.debug("Protocol version {} is non-cacheable", version);
97 }
98 return false;
99 }
100
101 // Presently only GET and HEAD methods are supported
102 final String httpMethod = request.getMethod();
103 if (!Method.GET.isSame(httpMethod) && !Method.HEAD.isSame(httpMethod)) {
104 if (LOG.isDebugEnabled()) {
105 LOG.debug("{} method response is not cacheable", httpMethod);
106 }
107 return false;
108 }
109
110 final int code = response.getCode();
111
112 // Should never happen but better be defensive
113 if (code <= HttpStatus.SC_INFORMATIONAL) {
114 return false;
115 }
116
117 if (isKnownNonCacheableStatusCode(code)) {
118 if (LOG.isDebugEnabled()) {
119 LOG.debug("{} response is not cacheable", code);
120 }
121 return false;
122 }
123
124 if (request.getPath().contains("?")) {
125 if (neverCache1_0ResponsesWithQueryString && from1_0Origin(response)) {
126 LOG.debug("Response is not cacheable as it had a query string");
127 return false;
128 } else if (!neverCache1_1ResponsesWithQueryString && !isExplicitlyCacheable(cacheControl, response)) {
129 LOG.debug("Response is not cacheable as it is missing explicit caching headers");
130 return false;
131 }
132 }
133
134 if (cacheControl.isMustUnderstand() && !understoodStatusCode(code)) {
135 // must-understand cache directive overrides no-store
136 LOG.debug("Response contains a status code that the cache does not understand, so it's not cacheable");
137 return false;
138 }
139
140 if (isExplicitlyNonCacheable(cacheControl)) {
141 LOG.debug("Response is explicitly non-cacheable per cache control directive");
142 return false;
143 }
144
145 if (sharedCache) {
146 if (request.containsHeader(HttpHeaders.AUTHORIZATION) &&
147 cacheControl.getSharedMaxAge() == -1 &&
148 !(cacheControl.isPublic() || cacheControl.isMustRevalidate())) {
149 LOG.debug("Request contains private credentials");
150 return false;
151 }
152 }
153
154 // See if the response is tainted
155 if (response.countHeaders(HttpHeaders.EXPIRES) > 1) {
156 LOG.debug("Multiple Expires headers");
157 return false;
158 }
159
160 if (response.countHeaders(HttpHeaders.DATE) > 1) {
161 LOG.debug("Multiple Date headers");
162 return false;
163 }
164
165 final Instant responseDate = DateUtils.parseStandardDate(response, HttpHeaders.DATE);
166 final Instant responseExpires = DateUtils.parseStandardDate(response, HttpHeaders.EXPIRES);
167
168 if (expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(cacheControl, responseDate, responseExpires)) {
169 LOG.debug("Expires header less or equal to Date header and no cache control directives");
170 return false;
171 }
172
173 // Treat responses with `Vary: *` as essentially non-cacheable.
174 final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VARY);
175 while (it.hasNext()) {
176 final String token = it.next();
177 if ("*".equals(token)) {
178 if (LOG.isDebugEnabled()) {
179 LOG.debug("Vary: * found");
180 }
181 return false;
182 }
183 }
184
185 return isExplicitlyCacheable(cacheControl, response) || isHeuristicallyCacheable(cacheControl, code, responseDate, responseExpires);
186 }
187
188 private static boolean isKnownCacheableStatusCode(final int status) {
189 return status == HttpStatus.SC_OK ||
190 status == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION ||
191 status == HttpStatus.SC_MULTIPLE_CHOICES ||
192 status == HttpStatus.SC_MOVED_PERMANENTLY ||
193 status == HttpStatus.SC_GONE;
194 }
195
196 private static boolean isKnownNonCacheableStatusCode(final int status) {
197 return status == HttpStatus.SC_PARTIAL_CONTENT;
198 }
199
200 private static boolean isUnknownStatusCode(final int status) {
201 if (status >= 100 && status <= 101) {
202 return false;
203 }
204 if (status >= 200 && status <= 206) {
205 return false;
206 }
207 if (status >= 300 && status <= 307) {
208 return false;
209 }
210 if (status >= 400 && status <= 417) {
211 return false;
212 }
213 return status < 500 || status > 505;
214 }
215
216 /**
217 * Determines whether the given CacheControl object indicates that the response is explicitly non-cacheable.
218 *
219 * @param cacheControl the CacheControl object representing the cache-control directive(s) from the HTTP response.
220 * @return true if the response is explicitly non-cacheable according to the cache-control directive(s),
221 * false otherwise.
222 * <p>
223 * When cacheControl is non-null:
224 * - Returns true if the response contains "no-store" or (if sharedCache is true) "private" cache-control directives.
225 * - If the response contains the "no-cache" directive, it is considered cacheable, but requires validation against
226 * the origin server before use. In this case, the method returns false.
227 * - Returns false for other cache-control directives, implying the response is cacheable.
228 * <p>
229 * When cacheControl is null, returns false, implying the response is cacheable.
230 */
231 protected boolean isExplicitlyNonCacheable(final ResponseCacheControl cacheControl) {
232 if (cacheControl == null) {
233 return false;
234 }
235 // The response is considered explicitly non-cacheable if it contains
236 // "no-store" or (if sharedCache is true) "private" directives.
237 // Note that "no-cache" is considered cacheable but requires validation before use.
238 return cacheControl.isNoStore() || sharedCache && cacheControl.isCachePrivate();
239 }
240
241 protected boolean isExplicitlyCacheable(final ResponseCacheControl cacheControl, final HttpResponse response) {
242 if (cacheControl.isPublic()) {
243 return true;
244 }
245 if (!sharedCache && cacheControl.isCachePrivate()) {
246 return true;
247 }
248 if (response.containsHeader(HttpHeaders.EXPIRES)) {
249 return true;
250 }
251 if (cacheControl.getMaxAge() > 0) {
252 return true;
253 }
254 if (sharedCache && cacheControl.getSharedMaxAge() > 0) {
255 return true;
256 }
257 return false;
258 }
259
260 protected boolean isHeuristicallyCacheable(final ResponseCacheControl cacheControl,
261 final int status,
262 final Instant responseDate,
263 final Instant responseExpires) {
264 if (isKnownCacheableStatusCode(status)) {
265 final Duration freshnessLifetime = calculateFreshnessLifetime(cacheControl, responseDate, responseExpires);
266 // calculate freshness lifetime
267 if (freshnessLifetime.isNegative()) {
268 if (LOG.isDebugEnabled()) {
269 LOG.debug("Freshness lifetime is invalid");
270 }
271 return false;
272 }
273 // If the 'immutable' directive is present and the response is still fresh,
274 // then the response is considered cacheable without further validation
275 if (cacheControl.isImmutable() && responseIsStillFresh(responseDate, freshnessLifetime)) {
276 if (LOG.isDebugEnabled()) {
277 LOG.debug("Response is immutable and fresh, considered cacheable without further validation");
278 }
279 return true;
280 }
281 if (freshnessLifetime.compareTo(Duration.ZERO) > 0) {
282 return true;
283 }
284 } else if (isUnknownStatusCode(status)) {
285 // a response with an unknown status code MUST NOT be
286 // cached
287 if (LOG.isDebugEnabled()) {
288 LOG.debug("{} response is unknown", status);
289 }
290 return false;
291 }
292 return false;
293 }
294
295 private boolean expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant expires) {
296 if (!cacheControl.isUndefined()) {
297 return false;
298 }
299 if (expires == null || responseDate == null) {
300 return false;
301 }
302 return expires.compareTo(responseDate) <= 0;
303 }
304
305 private boolean from1_0Origin(final HttpResponse response) {
306 final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VIA);
307 if (it.hasNext()) {
308 final String token = it.next();
309 return token.startsWith("1.0 ") || token.startsWith("HTTP/1.0 ");
310 }
311 final ProtocolVersion version = response.getVersion() != null ? response.getVersion() : HttpVersion.DEFAULT;
312 return HttpVersion.HTTP_1_0.equals(version);
313 }
314
315 /**
316 * Calculates the freshness lifetime of a response, based on the headers in the response.
317 * <p>
318 * This method follows the algorithm for calculating the freshness lifetime.
319 * The freshness lifetime represents the time interval in seconds during which the response can be served without
320 * being considered stale. The freshness lifetime calculation takes into account the s-maxage, max-age, Expires, and
321 * Date headers as follows:
322 * <ul>
323 * <li>If the s-maxage directive is present in the Cache-Control header of the response, its value is used as the
324 * freshness lifetime for shared caches, which typically serve multiple users or clients.</li>
325 * <li>If the max-age directive is present in the Cache-Control header of the response, its value is used as the
326 * freshness lifetime for private caches, which serve a single user or client.</li>
327 * <li>If the Expires header is present in the response, its value is used as the expiration time of the response.
328 * The freshness lifetime is calculated as the difference between the expiration time and the time specified in the
329 * Date header of the response.</li>
330 * <li>If none of the above headers are present or if the calculated freshness lifetime is invalid, a default value of
331 * 5 minutes is returned.</li>
332 * </ul>
333 *
334 * <p>
335 * Note that caching is a complex topic and cache control directives may interact with each other in non-trivial ways.
336 * This method provides a basic implementation of the freshness lifetime calculation algorithm and may not be suitable
337 * for all use cases. Developers should consult the HTTP caching specifications for more information and consider
338 * implementing additional caching mechanisms as needed.
339 * </p>
340 */
341 private Duration calculateFreshnessLifetime(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant responseExpires) {
342
343 if (cacheControl.isUndefined()) {
344 // If no cache-control header is present, assume no caching directives and return a default value
345 return DEFAULT_FRESHNESS_DURATION; // 5 minutes
346 }
347
348 // Check if s-maxage is present and use its value if it is
349 if (cacheControl.getSharedMaxAge() != -1) {
350 return Duration.ofSeconds(cacheControl.getSharedMaxAge());
351 } else if (cacheControl.getMaxAge() != -1) {
352 return Duration.ofSeconds(cacheControl.getMaxAge());
353 }
354
355 if (responseDate != null && responseExpires != null) {
356 return Duration.ofSeconds(responseExpires.getEpochSecond() - responseDate.getEpochSecond());
357 }
358
359 // If none of the above conditions are met, a heuristic freshness lifetime might be applicable
360 return DEFAULT_FRESHNESS_DURATION; // 5 minutes
361 }
362
363 /**
364 * Understood status codes include:
365 * - All 2xx (Successful) status codes (200-299)
366 * - All 3xx (Redirection) status codes (300-399)
367 * - All 4xx (Client Error) status codes up to 417 and 421
368 * - All 5xx (Server Error) status codes up to 505
369 *
370 * @param status The HTTP status code to be checked.
371 * @return true if the HTTP status code is understood, false otherwise.
372 */
373 private boolean understoodStatusCode(final int status) {
374 return status >= 200 && status <= 206 ||
375 status >= 300 && status <= 399 ||
376 status >= 400 && status <= 417 ||
377 status == 421 ||
378 status >= 500 && status <= 505;
379 }
380
381 /**
382 * Determines if an HttpResponse is still fresh based on its Date header and calculated freshness lifetime.
383 *
384 * <p>
385 * This method calculates the age of the response from its Date header and compares it with the provided freshness
386 * lifetime. If the age is less than the freshness lifetime, the response is considered fresh.
387 * </p>
388 *
389 * <p>
390 * Note: If the Date header is missing or invalid, this method assumes the response is not fresh.
391 * </p>
392 *
393 * @param responseDate The response date.
394 * @param freshnessLifetime The calculated freshness lifetime of the HttpResponse.
395 * @return {@code true} if the response age is less than its freshness lifetime, {@code false} otherwise.
396 */
397 private boolean responseIsStillFresh(final Instant responseDate, final Duration freshnessLifetime) {
398 if (responseDate == null) {
399 // The Date header is missing or invalid. Assuming the response is not fresh.
400 return false;
401 }
402 final Duration age = Duration.between(responseDate, Instant.now());
403 return age.compareTo(freshnessLifetime) < 0;
404 }
405
406 }