View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.client.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.ArrayList;
32  import java.util.EnumSet;
33  import java.util.Iterator;
34  import java.util.List;
35  import java.util.Locale;
36  import java.util.Stack;
37  
38  import org.apache.http.HttpHost;
39  import org.apache.http.conn.routing.RouteInfo;
40  import org.apache.http.util.Args;
41  import org.apache.http.util.TextUtils;
42  
43  /**
44   * A collection of utilities for {@link URI URIs}, to workaround
45   * bugs within the class or for ease-of-use features.
46   *
47   * @since 4.0
48   */
49  public class URIUtils {
50  
51      /**
52       * Flags that control how URI is being rewritten.
53       *
54       * @since 4.5.8
55       */
56      public enum UriFlag {
57          DROP_FRAGMENT,
58          NORMALIZE
59      }
60  
61      /**
62       * Empty set of uri flags.
63       *
64       * @since 4.5.8
65       */
66      public static final EnumSet<UriFlag> NO_FLAGS = EnumSet.noneOf(UriFlag.class);
67  
68      /**
69       * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT}.
70       *
71       * @since 4.5.8
72       */
73      public static final EnumSet<UriFlag> DROP_FRAGMENT = EnumSet.of(UriFlag.DROP_FRAGMENT);
74  
75      /**
76       * Set of uri flags containing {@link UriFlag#NORMALIZE}.
77       *
78       * @since 4.5.8
79       */
80      public static final EnumSet<UriFlag> NORMALIZE = EnumSet.of(UriFlag.NORMALIZE);
81  
82      /**
83       * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT} and {@link UriFlag#NORMALIZE}.
84       *
85       * @since 4.5.8
86       */
87      public static final EnumSet<UriFlag> DROP_FRAGMENT_AND_NORMALIZE = EnumSet.of(UriFlag.DROP_FRAGMENT, UriFlag.NORMALIZE);
88  
89       /**
90           * Constructs a {@link URI} using all the parameters. This should be
91           * used instead of
92           * {@link URI#URI(String, String, String, int, String, String, String)}
93           * or any of the other URI multi-argument URI constructors.
94           *
95           * @param scheme
96           *            Scheme name
97           * @param host
98           *            Host name
99           * @param port
100          *            Port number
101          * @param path
102          *            Path
103          * @param query
104          *            Query
105          * @param fragment
106          *            Fragment
107          *
108          * @throws URISyntaxException
109          *             If both a scheme and a path are given but the path is
110          *             relative, if the URI string constructed from the given
111          *             components violates RFC&nbsp;2396, or if the authority
112          *             component of the string is present but cannot be parsed
113          *             as a server-based authority
114          *
115          * @deprecated (4.2) use {@link URIBuilder}.
116          */
117     @Deprecated
118     public static URI createURI(
119             final String scheme,
120             final String host,
121             final int port,
122             final String path,
123             final String query,
124             final String fragment) throws URISyntaxException {
125         final StringBuilder buffer = new StringBuilder();
126         if (host != null) {
127             if (scheme != null) {
128                 buffer.append(scheme);
129                 buffer.append("://");
130             }
131             buffer.append(host);
132             if (port > 0) {
133                 buffer.append(':');
134                 buffer.append(port);
135             }
136         }
137         if (path == null || !path.startsWith("/")) {
138             buffer.append('/');
139         }
140         if (path != null) {
141             buffer.append(path);
142         }
143         if (query != null) {
144             buffer.append('?');
145             buffer.append(query);
146         }
147         if (fragment != null) {
148             buffer.append('#');
149             buffer.append(fragment);
150         }
151         return new URI(buffer.toString());
152     }
153 
154     /**
155      * A convenience method for creating a new {@link URI} whose scheme, host
156      * and port are taken from the target host, but whose path, query and
157      * fragment are taken from the existing URI. The fragment is only used if
158      * dropFragment is false. The path is set to "/" if not explicitly specified.
159      *
160      * @param uri
161      *            Contains the path, query and fragment to use.
162      * @param target
163      *            Contains the scheme, host and port to use.
164      * @param dropFragment
165      *            True if the fragment should not be copied.
166      *
167      * @throws URISyntaxException
168      *             If the resulting URI is invalid.
169      * @deprecated (4.5.8) Use {@link #rewriteURI(URI, HttpHost, EnumSet)}
170      */
171     @Deprecated
172     public static URI rewriteURI(
173             final URI uri,
174             final HttpHost target,
175             final boolean dropFragment) throws URISyntaxException
176     {
177         return rewriteURI(uri, target, dropFragment ? DROP_FRAGMENT : NO_FLAGS);
178     }
179 
180     /**
181      * A convenience method for creating a new {@link URI} whose scheme, host
182      * and port are taken from the target host, but whose path, query and
183      * fragment are taken from the existing URI. What exactly is used and how
184      * is driven by the passed in flags. The path is set to "/" if not explicitly specified.
185      *
186      * @param uri
187      *            Contains the path, query and fragment to use.
188      * @param target
189      *            Contains the scheme, host and port to use.
190      * @param flags
191      *            True if the fragment should not be copied.
192      *
193      * @throws URISyntaxException
194      *             If the resulting URI is invalid.
195      * @since 4.5.8
196      */
197     public static URI rewriteURI(
198             final URI uri,
199             final HttpHost target,
200             final EnumSet<UriFlag> flags) throws URISyntaxException {
201         Args.notNull(uri, "URI");
202         Args.notNull(flags, "URI flags");
203         if (uri.isOpaque()) {
204             return uri;
205         }
206         final URIBuilderuilder.html#URIBuilder">URIBuilder uribuilder = new URIBuilder(uri);
207         if (target != null) {
208             uribuilder.setScheme(target.getSchemeName());
209             uribuilder.setHost(target.getHostName());
210             uribuilder.setPort(target.getPort());
211         } else {
212             uribuilder.setScheme(null);
213             uribuilder.setHost(null);
214             uribuilder.setPort(-1);
215         }
216         if (flags.contains(UriFlag.DROP_FRAGMENT)) {
217             uribuilder.setFragment(null);
218         }
219         if (flags.contains(UriFlag.NORMALIZE)) {
220             final List<String> pathSegments = new ArrayList<String>(uribuilder.getPathSegments());
221             for (final Iterator<String> it = pathSegments.iterator(); it.hasNext(); ) {
222                 final String pathSegment = it.next();
223                 if (pathSegment.isEmpty() && it.hasNext()) {
224                     it.remove();
225                 }
226             }
227             uribuilder.setPathSegments(pathSegments);
228         }
229         if (uribuilder.isPathEmpty()) {
230             uribuilder.setPathSegments("");
231         }
232         return uribuilder.build();
233     }
234 
235     /**
236      * A convenience method for
237      * {@link URIUtils#rewriteURI(URI, HttpHost, EnumSet)} that always keeps the
238      * fragment.
239      */
240     public static URI rewriteURI(
241             final URI uri,
242             final HttpHost target) throws URISyntaxException {
243         return rewriteURI(uri, target, NORMALIZE);
244     }
245 
246     /**
247      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
248      * query are taken from the existing URI, dropping any fragment or user-information.
249      * The path is set to "/" if not explicitly specified. The existing URI is returned
250      * unmodified if it has no fragment or user-information and has a path.
251      *
252      * @param uri
253      *            original URI.
254      * @throws URISyntaxException
255      *             If the resulting URI is invalid.
256      */
257     public static URI rewriteURI(final URI uri) throws URISyntaxException {
258         Args.notNull(uri, "URI");
259         if (uri.isOpaque()) {
260             return uri;
261         }
262         final URIBuilderuilder.html#URIBuilder">URIBuilder uribuilder = new URIBuilder(uri);
263         if (uribuilder.getUserInfo() != null) {
264             uribuilder.setUserInfo(null);
265         }
266         if (uribuilder.getPathSegments().isEmpty()) {
267             uribuilder.setPathSegments("");
268         }
269         if (TextUtils.isEmpty(uribuilder.getPath())) {
270             uribuilder.setPath("/");
271         }
272         if (uribuilder.getHost() != null) {
273             uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT));
274         }
275         uribuilder.setFragment(null);
276         return uribuilder.build();
277     }
278 
279     /**
280      * A convenience method that optionally converts the original {@link java.net.URI} either
281      * to a relative or an absolute form as required by the specified route.
282      *
283      * @param uri
284      *            original URI.
285      * @throws URISyntaxException
286      *             If the resulting URI is invalid.
287      *
288      * @since 4.4
289      */
290     public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException {
291         return rewriteURIForRoute(uri, route, true);
292     }
293 
294     /**
295      * A convenience method that optionally converts the original {@link java.net.URI} either
296      * to a relative or an absolute form as required by the specified route.
297      *
298      * @param uri
299      *            original URI.
300      * @throws URISyntaxException
301      *             If the resulting URI is invalid.
302      *
303      * @since 4.5.8
304      */
305     public static URI rewriteURIForRoute(final URI uri, final RouteInfo route, final boolean normalizeUri) throws URISyntaxException {
306         if (uri == null) {
307             return null;
308         }
309         if (route.getProxyHost() != null && !route.isTunnelled()) {
310             // Make sure the request URI is absolute
311             return uri.isAbsolute()
312                     ? rewriteURI(uri)
313                     : rewriteURI(uri, route.getTargetHost(), normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT);
314         }
315         // Make sure the request URI is relative
316         return uri.isAbsolute() ? rewriteURI(uri, null, normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT) : rewriteURI(uri);
317     }
318 
319     /**
320      * Resolves a URI reference against a base URI. Work-around for bug in
321      * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
322      *
323      * @param baseURI the base URI
324      * @param reference the URI reference
325      * @return the resulting URI
326      */
327     public static URI resolve(final URI baseURI, final String reference) {
328         return resolve(baseURI, URI.create(reference));
329     }
330 
331     /**
332      * Resolves a URI reference against a base URI. Work-around for bugs in
333      * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
334      *
335      * @param baseURI the base URI
336      * @param reference the URI reference
337      * @return the resulting URI
338      */
339     public static URI resolve(final URI baseURI, final URI reference){
340         Args.notNull(baseURI, "Base URI");
341         Args.notNull(reference, "Reference URI");
342         final String s = reference.toASCIIString();
343         if (s.startsWith("?")) {
344             String baseUri = baseURI.toASCIIString();
345             final int i = baseUri.indexOf('?');
346             baseUri = i > -1 ? baseUri.substring(0, i) : baseUri;
347             return URI.create(baseUri + s);
348         }
349         final boolean emptyReference = s.isEmpty();
350         URI resolved;
351         if (emptyReference) {
352             resolved = baseURI.resolve(URI.create("#"));
353             final String resolvedString = resolved.toASCIIString();
354             resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#')));
355         } else {
356             resolved = baseURI.resolve(reference);
357         }
358         try {
359             return normalizeSyntax(resolved);
360         } catch (final URISyntaxException ex) {
361             throw new IllegalArgumentException(ex);
362         }
363     }
364 
365     /**
366      * Removes dot segments according to RFC 3986, section 5.2.4 and
367      * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
368      *
369      * @param uri the original URI
370      * @return the URI without dot segments
371      *
372      * @since 4.5
373      */
374     public static URI normalizeSyntax(final URI uri) throws URISyntaxException {
375         if (uri.isOpaque() || uri.getAuthority() == null) {
376             // opaque and file: URIs
377             return uri;
378         }
379         final URIBuilderRIBuilder.html#URIBuilder">URIBuilder builder = new URIBuilder(uri);
380         final List<String> inputSegments = builder.getPathSegments();
381         final Stack<String> outputSegments = new Stack<String>();
382         for (final String inputSegment : inputSegments) {
383             if (".".equals(inputSegment)) {
384                 // Do nothing
385             } else if ("..".equals(inputSegment)) {
386                 if (!outputSegments.isEmpty()) {
387                     outputSegments.pop();
388                 }
389             } else {
390                 outputSegments.push(inputSegment);
391             }
392         }
393         if (outputSegments.size() == 0) {
394             outputSegments.add("");
395         }
396         builder.setPathSegments(outputSegments);
397         if (builder.getScheme() != null) {
398             builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT));
399         }
400         if (builder.getHost() != null) {
401             builder.setHost(builder.getHost().toLowerCase(Locale.ROOT));
402         }
403         return builder.build();
404     }
405 
406     /**
407      * Extracts target host from the given {@link URI}.
408      *
409      * @param uri
410      * @return the target host if the URI is absolute or {@code null} if the URI is
411      * relative or does not contain a valid host name.
412      *
413      * @since 4.1
414      */
415     public static HttpHost extractHost(final URI uri) {
416         if (uri == null) {
417             return null;
418         }
419         HttpHost target = null;
420         if (uri.isAbsolute()) {
421             int port = uri.getPort(); // may be overridden later
422             String host = uri.getHost();
423             if (host == null) { // normal parse failed; let's do it ourselves
424                 // authority does not seem to care about the valid character-set for host names
425                 host = uri.getAuthority();
426                 if (host != null) {
427                     // Strip off any leading user credentials
428                     final int at = host.indexOf('@');
429                     if (at >= 0) {
430                         if (host.length() > at+1 ) {
431                             host = host.substring(at+1);
432                         } else {
433                             host = null; // @ on its own
434                         }
435                     }
436                     // Extract the port suffix, if present
437                     if (host != null) {
438                         final int colon = host.indexOf(':');
439                         if (colon >= 0) {
440                             final int pos = colon + 1;
441                             int len = 0;
442                             for (int i = pos; i < host.length(); i++) {
443                                 if (Character.isDigit(host.charAt(i))) {
444                                     len++;
445                                 } else {
446                                     break;
447                                 }
448                             }
449                             if (len > 0) {
450                                 try {
451                                     port = Integer.parseInt(host.substring(pos, pos + len));
452                                 } catch (final NumberFormatException ex) {
453                                 }
454                             }
455                             host = host.substring(0, colon);
456                         }
457                     }
458                 }
459             }
460             final String scheme = uri.getScheme();
461             if (!TextUtils.isBlank(host)) {
462                 try {
463                     target = new HttpHost(host, port, scheme);
464                 } catch (final IllegalArgumentException ignore) {
465                 }
466             }
467         }
468         return target;
469     }
470 
471     /**
472      * Derives the interpreted (absolute) URI that was used to generate the last
473      * request. This is done by extracting the request-uri and target origin for
474      * the last request and scanning all the redirect locations for the last
475      * fragment identifier, then combining the result into a {@link URI}.
476      *
477      * @param originalURI
478      *            original request before any redirects
479      * @param target
480      *            if the last URI is relative, it is resolved against this target,
481      *            or {@code null} if not available.
482      * @param redirects
483      *            collection of redirect locations since the original request
484      *            or {@code null} if not available.
485      * @return interpreted (absolute) URI
486      */
487     public static URI resolve(
488             final URI originalURI,
489             final HttpHost target,
490             final List<URI> redirects) throws URISyntaxException {
491         Args.notNull(originalURI, "Request URI");
492         final URIBuilder uribuilder;
493         if (redirects == null || redirects.isEmpty()) {
494             uribuilder = new URIBuilder(originalURI);
495         } else {
496             uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
497             String frag = uribuilder.getFragment();
498             // read interpreted fragment identifier from redirect locations
499             for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
500                 frag = redirects.get(i).getFragment();
501             }
502             uribuilder.setFragment(frag);
503         }
504         // read interpreted fragment identifier from original request
505         if (uribuilder.getFragment() == null) {
506             uribuilder.setFragment(originalURI.getFragment());
507         }
508         // last target origin
509         if (target != null && !uribuilder.isAbsolute()) {
510             uribuilder.setScheme(target.getSchemeName());
511             uribuilder.setHost(target.getHostName());
512             uribuilder.setPort(target.getPort());
513         }
514         return uribuilder.build();
515     }
516 
517     /**
518      * This class should not be instantiated.
519      */
520     private URIUtils() {
521     }
522 
523 }