View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.hc.client5.http.impl.cache;
28  
29  import java.time.Duration;
30  import java.time.Instant;
31  import java.util.Iterator;
32  
33  import org.apache.hc.client5.http.cache.ResponseCacheControl;
34  import org.apache.hc.client5.http.utils.DateUtils;
35  import org.apache.hc.core5.http.HttpHeaders;
36  import org.apache.hc.core5.http.HttpRequest;
37  import org.apache.hc.core5.http.HttpResponse;
38  import org.apache.hc.core5.http.HttpStatus;
39  import org.apache.hc.core5.http.HttpVersion;
40  import org.apache.hc.core5.http.Method;
41  import org.apache.hc.core5.http.ProtocolVersion;
42  import org.apache.hc.core5.http.message.MessageSupport;
43  import org.slf4j.Logger;
44  import org.slf4j.LoggerFactory;
45  
46  class ResponseCachingPolicy {
47  
48      /**
49       * The default freshness duration for a cached object, in seconds.
50       *
51       * <p>This constant is used to set the default value for the freshness lifetime of a cached object.
52       * When a new object is added to the cache, it will be assigned this duration if no other duration
53       * is specified.</p>
54       *
55       * <p>By default, this value is set to 300 seconds (5 minutes). Applications can customize this
56       * value as needed.</p>
57       */
58       private static final Duration DEFAULT_FRESHNESS_DURATION = Duration.ofMinutes(5);
59  
60      private static final Logger LOG = LoggerFactory.getLogger(ResponseCachingPolicy.class);
61  
62      private final boolean sharedCache;
63      private final boolean neverCache1_0ResponsesWithQueryString;
64      private final boolean neverCache1_1ResponsesWithQueryString;
65  
66      /**
67       * Constructs a new ResponseCachingPolicy with the specified cache policy settings and stale-if-error support.
68       *
69       * @param sharedCache                           whether to behave as a shared cache (true) or a
70       *                                              non-shared/private cache (false)
71       * @param neverCache1_0ResponsesWithQueryString {@code true} to never cache HTTP 1.0 responses with a query string,
72       *                                              {@code false} to cache if explicit cache headers are found.
73       * @param neverCache1_1ResponsesWithQueryString {@code true} to never cache HTTP 1.1 responses with a query string,
74       *                                              {@code false} to cache if explicit cache headers are found.
75       * @since 5.4
76       */
77      public ResponseCachingPolicy(
78               final boolean sharedCache,
79               final boolean neverCache1_0ResponsesWithQueryString,
80               final boolean neverCache1_1ResponsesWithQueryString) {
81          this.sharedCache = sharedCache;
82          this.neverCache1_0ResponsesWithQueryString = neverCache1_0ResponsesWithQueryString;
83          this.neverCache1_1ResponsesWithQueryString = neverCache1_1ResponsesWithQueryString;
84      }
85  
86      /**
87       * Determine if the {@link HttpResponse} gotten from the origin is a
88       * cacheable response.
89       *
90       * @return {@code true} if response is cacheable
91       */
92      public boolean isResponseCacheable(final ResponseCacheControl cacheControl, final HttpRequest request, final HttpResponse response) {
93          final ProtocolVersion version = request.getVersion() != null ? request.getVersion() : HttpVersion.DEFAULT;
94          if (version.compareToVersion(HttpVersion.HTTP_1_1) > 0) {
95              if (LOG.isDebugEnabled()) {
96                  LOG.debug("Protocol version {} is non-cacheable", version);
97              }
98              return false;
99          }
100 
101         // Presently only GET and HEAD methods are supported
102         final String httpMethod = request.getMethod();
103         if (!Method.GET.isSame(httpMethod) && !Method.HEAD.isSame(httpMethod)) {
104             if (LOG.isDebugEnabled()) {
105                 LOG.debug("{} method response is not cacheable", httpMethod);
106             }
107             return false;
108         }
109 
110         final int code = response.getCode();
111 
112         // Should never happen but better be defensive
113         if (code <= HttpStatus.SC_INFORMATIONAL) {
114             return false;
115         }
116 
117         if (isKnownNonCacheableStatusCode(code)) {
118             if (LOG.isDebugEnabled()) {
119                 LOG.debug("{} response is not cacheable", code);
120             }
121             return false;
122         }
123 
124         if (request.getPath().contains("?")) {
125             if (neverCache1_0ResponsesWithQueryString && from1_0Origin(response)) {
126                 LOG.debug("Response is not cacheable as it had a query string");
127                 return false;
128             } else if (!neverCache1_1ResponsesWithQueryString && !isExplicitlyCacheable(cacheControl, response)) {
129                 LOG.debug("Response is not cacheable as it is missing explicit caching headers");
130                 return false;
131             }
132         }
133 
134         if (cacheControl.isMustUnderstand() && !understoodStatusCode(code)) {
135             // must-understand cache directive overrides no-store
136             LOG.debug("Response contains a status code that the cache does not understand, so it's not cacheable");
137             return false;
138         }
139 
140         if (isExplicitlyNonCacheable(cacheControl)) {
141             LOG.debug("Response is explicitly non-cacheable per cache control directive");
142             return false;
143         }
144 
145         if (sharedCache) {
146             if (request.containsHeader(HttpHeaders.AUTHORIZATION) &&
147                     cacheControl.getSharedMaxAge() == -1 &&
148                     !(cacheControl.isPublic() || cacheControl.isMustRevalidate())) {
149                 LOG.debug("Request contains private credentials");
150                 return false;
151             }
152         }
153 
154         // See if the response is tainted
155         if (response.countHeaders(HttpHeaders.EXPIRES) > 1) {
156             LOG.debug("Multiple Expires headers");
157             return false;
158         }
159 
160         if (response.countHeaders(HttpHeaders.DATE) > 1) {
161             LOG.debug("Multiple Date headers");
162             return false;
163         }
164 
165         final Instant responseDate = DateUtils.parseStandardDate(response, HttpHeaders.DATE);
166         final Instant responseExpires = DateUtils.parseStandardDate(response, HttpHeaders.EXPIRES);
167 
168         if (expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(cacheControl, responseDate, responseExpires)) {
169             LOG.debug("Expires header less or equal to Date header and no cache control directives");
170             return false;
171         }
172 
173         // Treat responses with `Vary: *` as essentially non-cacheable.
174         final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VARY);
175         while (it.hasNext()) {
176             final String token = it.next();
177             if ("*".equals(token)) {
178                 if (LOG.isDebugEnabled()) {
179                     LOG.debug("Vary: * found");
180                 }
181                 return false;
182             }
183         }
184 
185         return isExplicitlyCacheable(cacheControl, response) || isHeuristicallyCacheable(cacheControl, code, responseDate, responseExpires);
186     }
187 
188     private static boolean isKnownCacheableStatusCode(final int status) {
189         return status == HttpStatus.SC_OK ||
190                 status == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION ||
191                 status == HttpStatus.SC_MULTIPLE_CHOICES ||
192                 status == HttpStatus.SC_MOVED_PERMANENTLY ||
193                 status == HttpStatus.SC_GONE;
194     }
195 
196     private static boolean isKnownNonCacheableStatusCode(final int status) {
197         return status == HttpStatus.SC_PARTIAL_CONTENT;
198     }
199 
200     private static boolean isUnknownStatusCode(final int status) {
201         if (status >= 100 && status <= 101) {
202             return false;
203         }
204         if (status >= 200 && status <= 206) {
205             return false;
206         }
207         if (status >= 300 && status <= 307) {
208             return false;
209         }
210         if (status >= 400 && status <= 417) {
211             return false;
212         }
213         return status < 500 || status > 505;
214     }
215 
216     /**
217      * Determines whether the given CacheControl object indicates that the response is explicitly non-cacheable.
218      *
219      * @param cacheControl the CacheControl object representing the cache-control directive(s) from the HTTP response.
220      * @return true if the response is explicitly non-cacheable according to the cache-control directive(s),
221      * false otherwise.
222      * <p>
223      * When cacheControl is non-null:
224      * - Returns true if the response contains "no-store" or (if sharedCache is true) "private" cache-control directives.
225      * - If the response contains the "no-cache" directive, it is considered cacheable, but requires validation against
226      * the origin server before use. In this case, the method returns false.
227      * - Returns false for other cache-control directives, implying the response is cacheable.
228      * <p>
229      * When cacheControl is null, returns false, implying the response is cacheable.
230      */
231     protected boolean isExplicitlyNonCacheable(final ResponseCacheControl cacheControl) {
232         if (cacheControl == null) {
233             return false;
234         }
235         // The response is considered explicitly non-cacheable if it contains
236         // "no-store" or (if sharedCache is true) "private" directives.
237         // Note that "no-cache" is considered cacheable but requires validation before use.
238         return cacheControl.isNoStore() || (sharedCache && cacheControl.isCachePrivate());
239     }
240 
241     protected boolean isExplicitlyCacheable(final ResponseCacheControl cacheControl, final HttpResponse response) {
242         if (cacheControl.isPublic()) {
243             return true;
244         }
245         if (!sharedCache && cacheControl.isCachePrivate()) {
246             return true;
247         }
248         if (response.containsHeader(HttpHeaders.EXPIRES)) {
249             return true;
250         }
251         if (cacheControl.getMaxAge() > 0) {
252             return true;
253         }
254         if (sharedCache && cacheControl.getSharedMaxAge() > 0) {
255             return true;
256         }
257         return false;
258     }
259 
260     protected boolean isHeuristicallyCacheable(final ResponseCacheControl cacheControl,
261                                                final int status,
262                                                final Instant responseDate,
263                                                final Instant responseExpires) {
264         if (isKnownCacheableStatusCode(status)) {
265             final Duration freshnessLifetime = calculateFreshnessLifetime(cacheControl, responseDate, responseExpires);
266             // calculate freshness lifetime
267             if (freshnessLifetime.isNegative()) {
268                 if (LOG.isDebugEnabled()) {
269                     LOG.debug("Freshness lifetime is invalid");
270                 }
271                 return false;
272             }
273             // If the 'immutable' directive is present and the response is still fresh,
274             // then the response is considered cacheable without further validation
275             if (cacheControl.isImmutable() && responseIsStillFresh(responseDate, freshnessLifetime)) {
276                 if (LOG.isDebugEnabled()) {
277                     LOG.debug("Response is immutable and fresh, considered cacheable without further validation");
278                 }
279                 return true;
280             }
281             if (freshnessLifetime.compareTo(Duration.ZERO) > 0) {
282                 return true;
283             }
284         } else if (isUnknownStatusCode(status)) {
285             // a response with an unknown status code MUST NOT be
286             // cached
287             if (LOG.isDebugEnabled()) {
288                 LOG.debug("{} response is unknown", status);
289             }
290             return false;
291         }
292         return false;
293     }
294 
295     private boolean expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant expires) {
296         if (!cacheControl.isUndefined()) {
297             return false;
298         }
299         if (expires == null || responseDate == null) {
300             return false;
301         }
302         return expires.compareTo(responseDate) <= 0;
303     }
304 
305     private boolean from1_0Origin(final HttpResponse response) {
306         final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VIA);
307         if (it.hasNext()) {
308             final String token = it.next();
309             return token.startsWith("1.0 ") || token.startsWith("HTTP/1.0 ");
310         }
311         final ProtocolVersion version = response.getVersion() != null ? response.getVersion() : HttpVersion.DEFAULT;
312         return HttpVersion.HTTP_1_0.equals(version);
313     }
314 
315     /**
316      * Calculates the freshness lifetime of a response, based on the headers in the response.
317      * <p>
318      * This method follows the algorithm for calculating the freshness lifetime.
319      * The freshness lifetime represents the time interval in seconds during which the response can be served without
320      * being considered stale. The freshness lifetime calculation takes into account the s-maxage, max-age, Expires, and
321      * Date headers as follows:
322      * <ul>
323      * <li>If the s-maxage directive is present in the Cache-Control header of the response, its value is used as the
324      * freshness lifetime for shared caches, which typically serve multiple users or clients.</li>
325      * <li>If the max-age directive is present in the Cache-Control header of the response, its value is used as the
326      * freshness lifetime for private caches, which serve a single user or client.</li>
327      * <li>If the Expires header is present in the response, its value is used as the expiration time of the response.
328      * The freshness lifetime is calculated as the difference between the expiration time and the time specified in the
329      * Date header of the response.</li>
330      * <li>If none of the above headers are present or if the calculated freshness lifetime is invalid, a default value of
331      * 5 minutes is returned.</li>
332      * </ul>
333      *
334      * <p>
335      * Note that caching is a complex topic and cache control directives may interact with each other in non-trivial ways.
336      * This method provides a basic implementation of the freshness lifetime calculation algorithm and may not be suitable
337      * for all use cases. Developers should consult the HTTP caching specifications for more information and consider
338      * implementing additional caching mechanisms as needed.
339      * </p>
340      */
341     private Duration calculateFreshnessLifetime(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant responseExpires) {
342 
343         if (cacheControl.isUndefined()) {
344             // If no cache-control header is present, assume no caching directives and return a default value
345             return DEFAULT_FRESHNESS_DURATION; // 5 minutes
346         }
347 
348         // Check if s-maxage is present and use its value if it is
349         if (cacheControl.getSharedMaxAge() != -1) {
350             return Duration.ofSeconds(cacheControl.getSharedMaxAge());
351         } else if (cacheControl.getMaxAge() != -1) {
352             return Duration.ofSeconds(cacheControl.getMaxAge());
353         }
354 
355         if (responseDate != null && responseExpires != null) {
356             return Duration.ofSeconds(responseExpires.getEpochSecond() - responseDate.getEpochSecond());
357         }
358 
359         // If none of the above conditions are met, a heuristic freshness lifetime might be applicable
360         return DEFAULT_FRESHNESS_DURATION; // 5 minutes
361     }
362 
363     /**
364      * Understood status codes include:
365      * - All 2xx (Successful) status codes (200-299)
366      * - All 3xx (Redirection) status codes (300-399)
367      * - All 4xx (Client Error) status codes up to 417 and 421
368      * - All 5xx (Server Error) status codes up to 505
369      *
370      * @param status The HTTP status code to be checked.
371      * @return true if the HTTP status code is understood, false otherwise.
372      */
373     private boolean understoodStatusCode(final int status) {
374         return (status >= 200 && status <= 206)    ||
375                 (status >= 300 && status <= 399)   ||
376                 (status >= 400 && status <= 417)   ||
377                 (status == 421)                    ||
378                 (status >= 500 && status <= 505);
379     }
380 
381     /**
382      * Determines if an HttpResponse is still fresh based on its Date header and calculated freshness lifetime.
383      *
384      * <p>
385      * This method calculates the age of the response from its Date header and compares it with the provided freshness
386      * lifetime. If the age is less than the freshness lifetime, the response is considered fresh.
387      * </p>
388      *
389      * <p>
390      * Note: If the Date header is missing or invalid, this method assumes the response is not fresh.
391      * </p>
392      *
393      * @param responseDate  The response date.
394      * @param freshnessLifetime The calculated freshness lifetime of the HttpResponse.
395      * @return {@code true} if the response age is less than its freshness lifetime, {@code false} otherwise.
396      */
397     private boolean responseIsStillFresh(final Instant responseDate, final Duration freshnessLifetime) {
398         if (responseDate == null) {
399             // The Date header is missing or invalid. Assuming the response is not fresh.
400             return false;
401         }
402         final Duration age = Duration.between(responseDate, Instant.now());
403         return age.compareTo(freshnessLifetime) < 0;
404     }
405 
406 }