View Javadoc

1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.client.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.List;
32  import java.util.Locale;
33  import java.util.Stack;
34  
35  import org.apache.http.HttpHost;
36  import org.apache.http.annotation.Immutable;
37  import org.apache.http.conn.routing.RouteInfo;
38  import org.apache.http.util.Args;
39  import org.apache.http.util.TextUtils;
40  
41  /**
42   * A collection of utilities for {@link URI URIs}, to workaround
43   * bugs within the class or for ease-of-use features.
44   *
45   * @since 4.0
46   */
47  @Immutable
48  public class URIUtils {
49  
50       /**
51           * Constructs a {@link URI} using all the parameters. This should be
52           * used instead of
53           * {@link URI#URI(String, String, String, int, String, String, String)}
54           * or any of the other URI multi-argument URI constructors.
55           *
56           * @param scheme
57           *            Scheme name
58           * @param host
59           *            Host name
60           * @param port
61           *            Port number
62           * @param path
63           *            Path
64           * @param query
65           *            Query
66           * @param fragment
67           *            Fragment
68           *
69           * @throws URISyntaxException
70           *             If both a scheme and a path are given but the path is
71           *             relative, if the URI string constructed from the given
72           *             components violates RFC&nbsp;2396, or if the authority
73           *             component of the string is present but cannot be parsed
74           *             as a server-based authority
75           *
76           * @deprecated (4.2) use {@link URIBuilder}.
77           */
78      @Deprecated
79      public static URI createURI(
80              final String scheme,
81              final String host,
82              final int port,
83              final String path,
84              final String query,
85              final String fragment) throws URISyntaxException {
86          final StringBuilder buffer = new StringBuilder();
87          if (host != null) {
88              if (scheme != null) {
89                  buffer.append(scheme);
90                  buffer.append("://");
91              }
92              buffer.append(host);
93              if (port > 0) {
94                  buffer.append(':');
95                  buffer.append(port);
96              }
97          }
98          if (path == null || !path.startsWith("/")) {
99              buffer.append('/');
100         }
101         if (path != null) {
102             buffer.append(path);
103         }
104         if (query != null) {
105             buffer.append('?');
106             buffer.append(query);
107         }
108         if (fragment != null) {
109             buffer.append('#');
110             buffer.append(fragment);
111         }
112         return new URI(buffer.toString());
113     }
114 
115     /**
116      * A convenience method for creating a new {@link URI} whose scheme, host
117      * and port are taken from the target host, but whose path, query and
118      * fragment are taken from the existing URI. The fragment is only used if
119      * dropFragment is false. The path is set to "/" if not explicitly specified.
120      *
121      * @param uri
122      *            Contains the path, query and fragment to use.
123      * @param target
124      *            Contains the scheme, host and port to use.
125      * @param dropFragment
126      *            True if the fragment should not be copied.
127      *
128      * @throws URISyntaxException
129      *             If the resulting URI is invalid.
130      */
131     public static URI rewriteURI(
132             final URI uri,
133             final HttpHost target,
134             final boolean dropFragment) throws URISyntaxException {
135         Args.notNull(uri, "URI");
136         if (uri.isOpaque()) {
137             return uri;
138         }
139         final URIBuilder uribuilder = new URIBuilder(uri);
140         if (target != null) {
141             uribuilder.setScheme(target.getSchemeName());
142             uribuilder.setHost(target.getHostName());
143             uribuilder.setPort(target.getPort());
144         } else {
145             uribuilder.setScheme(null);
146             uribuilder.setHost(null);
147             uribuilder.setPort(-1);
148         }
149         if (dropFragment) {
150             uribuilder.setFragment(null);
151         }
152         if (TextUtils.isEmpty(uribuilder.getPath())) {
153             uribuilder.setPath("/");
154         }
155         return uribuilder.build();
156     }
157 
158     /**
159      * A convenience method for
160      * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the
161      * fragment.
162      */
163     public static URI rewriteURI(
164             final URI uri,
165             final HttpHost target) throws URISyntaxException {
166         return rewriteURI(uri, target, false);
167     }
168 
169     /**
170      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
171      * query are taken from the existing URI, dropping any fragment or user-information.
172      * The path is set to "/" if not explicitly specified. The existing URI is returned
173      * unmodified if it has no fragment or user-information and has a path.
174      *
175      * @param uri
176      *            original URI.
177      * @throws URISyntaxException
178      *             If the resulting URI is invalid.
179      */
180     public static URI rewriteURI(final URI uri) throws URISyntaxException {
181         Args.notNull(uri, "URI");
182         if (uri.isOpaque()) {
183             return uri;
184         }
185         final URIBuilder uribuilder = new URIBuilder(uri);
186         if (uribuilder.getUserInfo() != null) {
187             uribuilder.setUserInfo(null);
188         }
189         if (TextUtils.isEmpty(uribuilder.getPath())) {
190             uribuilder.setPath("/");
191         }
192         if (uribuilder.getHost() != null) {
193             uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT));
194         }
195         uribuilder.setFragment(null);
196         return uribuilder.build();
197     }
198 
199     /**
200      * A convenience method that optionally converts the original {@link java.net.URI} either
201      * to a relative or an absolute form as required by the specified route.
202      *
203      * @param uri
204      *            original URI.
205      * @throws URISyntaxException
206      *             If the resulting URI is invalid.
207      *
208      * @since 4.4
209      */
210     public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException {
211         if (uri == null) {
212             return null;
213         }
214         if (route.getProxyHost() != null && !route.isTunnelled()) {
215             // Make sure the request URI is absolute
216             if (!uri.isAbsolute()) {
217                 final HttpHost target = route.getTargetHost();
218                 return rewriteURI(uri, target, true);
219             } else {
220                 return rewriteURI(uri);
221             }
222         } else {
223             // Make sure the request URI is relative
224             if (uri.isAbsolute()) {
225                 return rewriteURI(uri, null, true);
226             } else {
227                 return rewriteURI(uri);
228             }
229         }
230     }
231 
232     /**
233      * Resolves a URI reference against a base URI. Work-around for bug in
234      * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
235      *
236      * @param baseURI the base URI
237      * @param reference the URI reference
238      * @return the resulting URI
239      */
240     public static URI resolve(final URI baseURI, final String reference) {
241         return URIUtils.resolve(baseURI, URI.create(reference));
242     }
243 
244     /**
245      * Resolves a URI reference against a base URI. Work-around for bugs in
246      * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
247      *
248      * @param baseURI the base URI
249      * @param reference the URI reference
250      * @return the resulting URI
251      */
252     public static URI resolve(final URI baseURI, final URI reference){
253         Args.notNull(baseURI, "Base URI");
254         Args.notNull(reference, "Reference URI");
255         URI ref = reference;
256         final String s = ref.toString();
257         if (s.startsWith("?")) {
258             return resolveReferenceStartingWithQueryString(baseURI, ref);
259         }
260         final boolean emptyReference = s.isEmpty();
261         if (emptyReference) {
262             ref = URI.create("#");
263         }
264         URI resolved = baseURI.resolve(ref);
265         if (emptyReference) {
266             final String resolvedString = resolved.toString();
267             resolved = URI.create(resolvedString.substring(0,
268                 resolvedString.indexOf('#')));
269         }
270         return normalizeSyntax(resolved);
271     }
272 
273     /**
274      * Resolves a reference starting with a query string.
275      *
276      * @param baseURI the base URI
277      * @param reference the URI reference starting with a query string
278      * @return the resulting URI
279      */
280     private static URI resolveReferenceStartingWithQueryString(
281             final URI baseURI, final URI reference) {
282         String baseUri = baseURI.toString();
283         baseUri = baseUri.indexOf('?') > -1 ?
284             baseUri.substring(0, baseUri.indexOf('?')) : baseUri;
285         return URI.create(baseUri + reference.toString());
286     }
287 
288     /**
289      * Removes dot segments according to RFC 3986, section 5.2.4 and
290      * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
291      *
292      * @param uri the original URI
293      * @return the URI without dot segments
294      */
295     private static URI normalizeSyntax(final URI uri) {
296         if (uri.isOpaque() || uri.getAuthority() == null) {
297             // opaque and file: URIs
298             return uri;
299         }
300         Args.check(uri.isAbsolute(), "Base URI must be absolute");
301         final String path = uri.getPath() == null ? "" : uri.getPath();
302         final String[] inputSegments = path.split("/");
303         final Stack<String> outputSegments = new Stack<String>();
304         for (final String inputSegment : inputSegments) {
305             if ((inputSegment.isEmpty())
306                 || (".".equals(inputSegment))) {
307                 // Do nothing
308             } else if ("..".equals(inputSegment)) {
309                 if (!outputSegments.isEmpty()) {
310                     outputSegments.pop();
311                 }
312             } else {
313                 outputSegments.push(inputSegment);
314             }
315         }
316         final StringBuilder outputBuffer = new StringBuilder();
317         for (final String outputSegment : outputSegments) {
318             outputBuffer.append('/').append(outputSegment);
319         }
320         if (path.lastIndexOf('/') == path.length() - 1) {
321             // path.endsWith("/") || path.equals("")
322             outputBuffer.append('/');
323         }
324         try {
325             final String scheme = uri.getScheme().toLowerCase(Locale.ROOT);
326             final String auth = uri.getAuthority().toLowerCase(Locale.ROOT);
327             final URI ref = new URI(scheme, auth, outputBuffer.toString(),
328                     null, null);
329             if (uri.getQuery() == null && uri.getFragment() == null) {
330                 return ref;
331             }
332             final StringBuilder normalized = new StringBuilder(
333                     ref.toASCIIString());
334             if (uri.getQuery() != null) {
335                 // query string passed through unchanged
336                 normalized.append('?').append(uri.getRawQuery());
337             }
338             if (uri.getFragment() != null) {
339                 // fragment passed through unchanged
340                 normalized.append('#').append(uri.getRawFragment());
341             }
342             return URI.create(normalized.toString());
343         } catch (final URISyntaxException e) {
344             throw new IllegalArgumentException(e);
345         }
346     }
347 
348     /**
349      * Extracts target host from the given {@link URI}.
350      *
351      * @param uri
352      * @return the target host if the URI is absolute or {@code null} if the URI is
353      * relative or does not contain a valid host name.
354      *
355      * @since 4.1
356      */
357     public static HttpHost extractHost(final URI uri) {
358         if (uri == null) {
359             return null;
360         }
361         HttpHost target = null;
362         if (uri.isAbsolute()) {
363             int port = uri.getPort(); // may be overridden later
364             String host = uri.getHost();
365             if (host == null) { // normal parse failed; let's do it ourselves
366                 // authority does not seem to care about the valid character-set for host names
367                 host = uri.getAuthority();
368                 if (host != null) {
369                     // Strip off any leading user credentials
370                     final int at = host.indexOf('@');
371                     if (at >= 0) {
372                         if (host.length() > at+1 ) {
373                             host = host.substring(at+1);
374                         } else {
375                             host = null; // @ on its own
376                         }
377                     }
378                     // Extract the port suffix, if present
379                     if (host != null) {
380                         final int colon = host.indexOf(':');
381                         if (colon >= 0) {
382                             final int pos = colon + 1;
383                             int len = 0;
384                             for (int i = pos; i < host.length(); i++) {
385                                 if (Character.isDigit(host.charAt(i))) {
386                                     len++;
387                                 } else {
388                                     break;
389                                 }
390                             }
391                             if (len > 0) {
392                                 try {
393                                     port = Integer.parseInt(host.substring(pos, pos + len));
394                                 } catch (final NumberFormatException ex) {
395                                 }
396                             }
397                             host = host.substring(0, colon);
398                         }
399                     }
400                 }
401             }
402             final String scheme = uri.getScheme();
403             if (!TextUtils.isBlank(host)) {
404                 try {
405                     target = new HttpHost(host, port, scheme);
406                 } catch (IllegalArgumentException ignore) {
407                 }
408             }
409         }
410         return target;
411     }
412 
413     /**
414      * Derives the interpreted (absolute) URI that was used to generate the last
415      * request. This is done by extracting the request-uri and target origin for
416      * the last request and scanning all the redirect locations for the last
417      * fragment identifier, then combining the result into a {@link URI}.
418      *
419      * @param originalURI
420      *            original request before any redirects
421      * @param target
422      *            if the last URI is relative, it is resolved against this target,
423      *            or {@code null} if not available.
424      * @param redirects
425      *            collection of redirect locations since the original request
426      *            or {@code null} if not available.
427      * @return interpreted (absolute) URI
428      */
429     public static URI resolve(
430             final URI originalURI,
431             final HttpHost target,
432             final List<URI> redirects) throws URISyntaxException {
433         Args.notNull(originalURI, "Request URI");
434         final URIBuilder uribuilder;
435         if (redirects == null || redirects.isEmpty()) {
436             uribuilder = new URIBuilder(originalURI);
437         } else {
438             uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
439             String frag = uribuilder.getFragment();
440             // read interpreted fragment identifier from redirect locations
441             for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
442                 frag = redirects.get(i).getFragment();
443             }
444             uribuilder.setFragment(frag);
445         }
446         // read interpreted fragment identifier from original request
447         if (uribuilder.getFragment() == null) {
448             uribuilder.setFragment(originalURI.getFragment());
449         }
450         // last target origin
451         if (target != null && !uribuilder.isAbsolute()) {
452             uribuilder.setScheme(target.getSchemeName());
453             uribuilder.setHost(target.getHostName());
454             uribuilder.setPort(target.getPort());
455         }
456         return uribuilder.build();
457     }
458 
459     /**
460      * This class should not be instantiated.
461      */
462     private URIUtils() {
463     }
464 
465 }