View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.client.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.ArrayList;
32  import java.util.EnumSet;
33  import java.util.Iterator;
34  import java.util.List;
35  import java.util.Locale;
36  import java.util.Stack;
37  
38  import org.apache.http.HttpHost;
39  import org.apache.http.conn.routing.RouteInfo;
40  import org.apache.http.util.Args;
41  import org.apache.http.util.TextUtils;
42  
43  /**
44   * A collection of utilities for {@link URI URIs}, to workaround
45   * bugs within the class or for ease-of-use features.
46   *
47   * @since 4.0
48   */
49  public class URIUtils {
50  
51      /**
52       * Flags that control how URI is being rewritten.
53       *
54       * @since 4.5.8
55       */
56      public enum UriFlag {
57          DROP_FRAGMENT,
58          NORMALIZE
59      }
60  
61      /**
62       * Empty set of uri flags.
63       *
64       * @since 4.5.8
65       */
66      public static final EnumSet<UriFlag> NO_FLAGS = EnumSet.noneOf(UriFlag.class);
67  
68      /**
69       * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT}.
70       *
71       * @since 4.5.8
72       */
73      public static final EnumSet<UriFlag> DROP_FRAGMENT = EnumSet.of(UriFlag.DROP_FRAGMENT);
74  
75      /**
76       * Set of uri flags containing {@link UriFlag#NORMALIZE}.
77       *
78       * @since 4.5.8
79       */
80      public static final EnumSet<UriFlag> NORMALIZE = EnumSet.of(UriFlag.NORMALIZE);
81  
82      /**
83       * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT} and {@link UriFlag#NORMALIZE}.
84       *
85       * @since 4.5.8
86       */
87      public static final EnumSet<UriFlag> DROP_FRAGMENT_AND_NORMALIZE = EnumSet.of(UriFlag.DROP_FRAGMENT, UriFlag.NORMALIZE);
88  
89       /**
90           * Constructs a {@link URI} using all the parameters. This should be
91           * used instead of
92           * {@link URI#URI(String, String, String, int, String, String, String)}
93           * or any of the other URI multi-argument URI constructors.
94           *
95           * @param scheme
96           *            Scheme name
97           * @param host
98           *            Host name
99           * @param port
100          *            Port number
101          * @param path
102          *            Path
103          * @param query
104          *            Query
105          * @param fragment
106          *            Fragment
107          *
108          * @throws URISyntaxException
109          *             If both a scheme and a path are given but the path is
110          *             relative, if the URI string constructed from the given
111          *             components violates RFC&nbsp;2396, or if the authority
112          *             component of the string is present but cannot be parsed
113          *             as a server-based authority
114          *
115          * @deprecated (4.2) use {@link URIBuilder}.
116          */
117     @Deprecated
118     public static URI createURI(
119             final String scheme,
120             final String host,
121             final int port,
122             final String path,
123             final String query,
124             final String fragment) throws URISyntaxException {
125         final StringBuilder buffer = new StringBuilder();
126         if (host != null) {
127             if (scheme != null) {
128                 buffer.append(scheme);
129                 buffer.append("://");
130             }
131             buffer.append(host);
132             if (port > 0) {
133                 buffer.append(':');
134                 buffer.append(port);
135             }
136         }
137         if (path == null || !path.startsWith("/")) {
138             buffer.append('/');
139         }
140         if (path != null) {
141             buffer.append(path);
142         }
143         if (query != null) {
144             buffer.append('?');
145             buffer.append(query);
146         }
147         if (fragment != null) {
148             buffer.append('#');
149             buffer.append(fragment);
150         }
151         return new URI(buffer.toString());
152     }
153 
154     /**
155      * A convenience method for creating a new {@link URI} whose scheme, host
156      * and port are taken from the target host, but whose path, query and
157      * fragment are taken from the existing URI. The fragment is only used if
158      * dropFragment is false. The path is set to "/" if not explicitly specified.
159      *
160      * @param uri
161      *            Contains the path, query and fragment to use.
162      * @param target
163      *            Contains the scheme, host and port to use.
164      * @param dropFragment
165      *            True if the fragment should not be copied.
166      *
167      * @throws URISyntaxException
168      *             If the resulting URI is invalid.
169      * @deprecated (4.5.8) Use {@link #rewriteURI(URI, HttpHost, EnumSet)}
170      */
171     @Deprecated
172     public static URI rewriteURI(
173             final URI uri,
174             final HttpHost target,
175             final boolean dropFragment) throws URISyntaxException
176     {
177         return rewriteURI(uri, target, dropFragment ? DROP_FRAGMENT : NO_FLAGS);
178     }
179 
180     /**
181      * A convenience method for creating a new {@link URI} whose scheme, host
182      * and port are taken from the target host, but whose path, query and
183      * fragment are taken from the existing URI. What exactly is used and how
184      * is driven by the passed in flags. The path is set to "/" if not explicitly specified.
185      *
186      * @param uri
187      *            Contains the path, query and fragment to use.
188      * @param target
189      *            Contains the scheme, host and port to use.
190      * @param flags
191      *            True if the fragment should not be copied.
192      *
193      * @throws URISyntaxException
194      *             If the resulting URI is invalid.
195      * @since 4.5.8
196      */
197     public static URI rewriteURI(
198             final URI uri,
199             final HttpHost target,
200             final EnumSet<UriFlag> flags) throws URISyntaxException {
201         Args.notNull(uri, "URI");
202         Args.notNull(flags, "URI flags");
203         if (uri.isOpaque()) {
204             return uri;
205         }
206         final URIBuilderuilder.html#URIBuilder">URIBuilder uribuilder = new URIBuilder(uri);
207         if (target != null) {
208             uribuilder.setScheme(target.getSchemeName());
209             uribuilder.setHost(target.getHostName());
210             uribuilder.setPort(target.getPort());
211         } else {
212             uribuilder.setScheme(null);
213             uribuilder.setHost(null);
214             uribuilder.setPort(-1);
215         }
216         if (flags.contains(UriFlag.DROP_FRAGMENT)) {
217             uribuilder.setFragment(null);
218         }
219         if (flags.contains(UriFlag.NORMALIZE)) {
220             final List<String> originalPathSegments = uribuilder.getPathSegments();
221             final List<String> pathSegments = new ArrayList<String>(originalPathSegments);
222             for (final Iterator<String> it = pathSegments.iterator(); it.hasNext(); ) {
223                 final String pathSegment = it.next();
224                 if (pathSegment.isEmpty() && it.hasNext()) {
225                     it.remove();
226                 }
227             }
228             if (pathSegments.size() != originalPathSegments.size()) {
229                 uribuilder.setPathSegments(pathSegments);
230             }
231         }
232         if (uribuilder.isPathEmpty()) {
233             uribuilder.setPathSegments("");
234         }
235         return uribuilder.build();
236     }
237 
238     /**
239      * A convenience method for
240      * {@link URIUtils#rewriteURI(URI, HttpHost, EnumSet)} that always keeps the
241      * fragment.
242      */
243     public static URI rewriteURI(
244             final URI uri,
245             final HttpHost target) throws URISyntaxException {
246         return rewriteURI(uri, target, NORMALIZE);
247     }
248 
249     /**
250      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
251      * query are taken from the existing URI, dropping any fragment or user-information.
252      * The path is set to "/" if not explicitly specified. The existing URI is returned
253      * unmodified if it has no fragment or user-information and has a path.
254      *
255      * @param uri
256      *            original URI.
257      * @throws URISyntaxException
258      *             If the resulting URI is invalid.
259      */
260     public static URI rewriteURI(final URI uri) throws URISyntaxException {
261         Args.notNull(uri, "URI");
262         if (uri.isOpaque()) {
263             return uri;
264         }
265         final URIBuilderuilder.html#URIBuilder">URIBuilder uribuilder = new URIBuilder(uri);
266         if (uribuilder.getUserInfo() != null) {
267             uribuilder.setUserInfo(null);
268         }
269         if (uribuilder.getPathSegments().isEmpty()) {
270             uribuilder.setPathSegments("");
271         }
272         if (TextUtils.isEmpty(uribuilder.getPath())) {
273             uribuilder.setPath("/");
274         }
275         if (uribuilder.getHost() != null) {
276             uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT));
277         }
278         uribuilder.setFragment(null);
279         return uribuilder.build();
280     }
281 
282     /**
283      * A convenience method that optionally converts the original {@link java.net.URI} either
284      * to a relative or an absolute form as required by the specified route.
285      *
286      * @param uri
287      *            original URI.
288      * @throws URISyntaxException
289      *             If the resulting URI is invalid.
290      *
291      * @since 4.4
292      */
293     public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException {
294         return rewriteURIForRoute(uri, route, true);
295     }
296 
297     /**
298      * A convenience method that optionally converts the original {@link java.net.URI} either
299      * to a relative or an absolute form as required by the specified route.
300      *
301      * @param uri
302      *            original URI.
303      * @throws URISyntaxException
304      *             If the resulting URI is invalid.
305      *
306      * @since 4.5.8
307      */
308     public static URI rewriteURIForRoute(final URI uri, final RouteInfo route, final boolean normalizeUri) throws URISyntaxException {
309         if (uri == null) {
310             return null;
311         }
312         if (route.getProxyHost() != null && !route.isTunnelled()) {
313             // Make sure the request URI is absolute
314             return uri.isAbsolute()
315                     ? rewriteURI(uri)
316                     : rewriteURI(uri, route.getTargetHost(), normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT);
317         }
318         // Make sure the request URI is relative
319         return uri.isAbsolute() ? rewriteURI(uri, null, normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT) : rewriteURI(uri);
320     }
321 
322     /**
323      * Resolves a URI reference against a base URI. Work-around for bug in
324      * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
325      *
326      * @param baseURI the base URI
327      * @param reference the URI reference
328      * @return the resulting URI
329      */
330     public static URI resolve(final URI baseURI, final String reference) {
331         return resolve(baseURI, URI.create(reference));
332     }
333 
334     /**
335      * Resolves a URI reference against a base URI. Work-around for bugs in
336      * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
337      *
338      * @param baseURI the base URI
339      * @param reference the URI reference
340      * @return the resulting URI
341      */
342     public static URI resolve(final URI baseURI, final URI reference){
343         Args.notNull(baseURI, "Base URI");
344         Args.notNull(reference, "Reference URI");
345         final String s = reference.toASCIIString();
346         if (s.startsWith("?")) {
347             String baseUri = baseURI.toASCIIString();
348             final int i = baseUri.indexOf('?');
349             baseUri = i > -1 ? baseUri.substring(0, i) : baseUri;
350             return URI.create(baseUri + s);
351         }
352         final boolean emptyReference = s.isEmpty();
353         URI resolved;
354         if (emptyReference) {
355             resolved = baseURI.resolve(URI.create("#"));
356             final String resolvedString = resolved.toASCIIString();
357             resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#')));
358         } else {
359             resolved = baseURI.resolve(reference);
360         }
361         try {
362             return normalizeSyntax(resolved);
363         } catch (final URISyntaxException ex) {
364             throw new IllegalArgumentException(ex);
365         }
366     }
367 
368     /**
369      * Removes dot segments according to RFC 3986, section 5.2.4 and
370      * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
371      *
372      * @param uri the original URI
373      * @return the URI without dot segments
374      *
375      * @since 4.5
376      */
377     public static URI normalizeSyntax(final URI uri) throws URISyntaxException {
378         if (uri.isOpaque() || uri.getAuthority() == null) {
379             // opaque and file: URIs
380             return uri;
381         }
382         final URIBuilderRIBuilder.html#URIBuilder">URIBuilder builder = new URIBuilder(uri);
383         final List<String> inputSegments = builder.getPathSegments();
384         final Stack<String> outputSegments = new Stack<String>();
385         for (final String inputSegment : inputSegments) {
386             if (".".equals(inputSegment)) {
387                 // Do nothing
388             } else if ("..".equals(inputSegment)) {
389                 if (!outputSegments.isEmpty()) {
390                     outputSegments.pop();
391                 }
392             } else {
393                 outputSegments.push(inputSegment);
394             }
395         }
396         if (outputSegments.size() == 0) {
397             outputSegments.add("");
398         }
399         builder.setPathSegments(outputSegments);
400         if (builder.getScheme() != null) {
401             builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT));
402         }
403         if (builder.getHost() != null) {
404             builder.setHost(builder.getHost().toLowerCase(Locale.ROOT));
405         }
406         return builder.build();
407     }
408 
409     /**
410      * Extracts target host from the given {@link URI}.
411      *
412      * @param uri
413      * @return the target host if the URI is absolute or {@code null} if the URI is
414      * relative or does not contain a valid host name.
415      *
416      * @since 4.1
417      */
418     public static HttpHost extractHost(final URI uri) {
419         if (uri == null) {
420             return null;
421         }
422         if (uri.isAbsolute()) {
423             if (uri.getHost() == null) { // normal parse failed; let's do it ourselves
424                 // authority does not seem to care about the valid character-set for host names
425                 if (uri.getAuthority() != null) {
426                     String content = uri.getAuthority();
427                     // Strip off any leading user credentials
428                     int at = content.indexOf('@');
429                     if (at != -1) {
430                         content = content.substring(at + 1);
431                     }
432                     final String scheme = uri.getScheme();
433                     final String hostname;
434                     final int port;
435                     at = content.indexOf(":");
436                     if (at != -1) {
437                         hostname = content.substring(0, at);
438                         try {
439                             final String portText = content.substring(at + 1);
440                             port = !TextUtils.isEmpty(portText) ? Integer.parseInt(portText) : -1;
441                         } catch (final NumberFormatException ex) {
442                             return null;
443                         }
444                     } else {
445                         hostname = content;
446                         port = -1;
447                     }
448                     try {
449                         return new HttpHost(hostname, port, scheme);
450                     } catch (final IllegalArgumentException ex) {
451                         // ignore
452                     }
453                 }
454             } else {
455                 return new HttpHost(uri.getHost(), uri.getPort(), uri.getScheme());
456             }
457         }
458         return null;
459     }
460 
461     /**
462      * Derives the interpreted (absolute) URI that was used to generate the last
463      * request. This is done by extracting the request-uri and target origin for
464      * the last request and scanning all the redirect locations for the last
465      * fragment identifier, then combining the result into a {@link URI}.
466      *
467      * @param originalURI
468      *            original request before any redirects
469      * @param target
470      *            if the last URI is relative, it is resolved against this target,
471      *            or {@code null} if not available.
472      * @param redirects
473      *            collection of redirect locations since the original request
474      *            or {@code null} if not available.
475      * @return interpreted (absolute) URI
476      */
477     public static URI resolve(
478             final URI originalURI,
479             final HttpHost target,
480             final List<URI> redirects) throws URISyntaxException {
481         Args.notNull(originalURI, "Request URI");
482         final URIBuilder uribuilder;
483         if (redirects == null || redirects.isEmpty()) {
484             uribuilder = new URIBuilder(originalURI);
485         } else {
486             uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
487             String frag = uribuilder.getFragment();
488             // read interpreted fragment identifier from redirect locations
489             for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
490                 frag = redirects.get(i).getFragment();
491             }
492             uribuilder.setFragment(frag);
493         }
494         // read interpreted fragment identifier from original request
495         if (uribuilder.getFragment() == null) {
496             uribuilder.setFragment(originalURI.getFragment());
497         }
498         // last target origin
499         if (target != null && !uribuilder.isAbsolute()) {
500             uribuilder.setScheme(target.getSchemeName());
501             uribuilder.setHost(target.getHostName());
502             uribuilder.setPort(target.getPort());
503         }
504         return uribuilder.build();
505     }
506 
507     /**
508      * This class should not be instantiated.
509      */
510     private URIUtils() {
511     }
512 
513 }