View Javadoc

1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.client.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.List;
32  import java.util.Locale;
33  import java.util.Stack;
34  
35  import org.apache.http.HttpHost;
36  import org.apache.http.annotation.Immutable;
37  import org.apache.http.conn.routing.RouteInfo;
38  import org.apache.http.util.Args;
39  import org.apache.http.util.TextUtils;
40  
41  /**
42   * A collection of utilities for {@link URI URIs}, to workaround
43   * bugs within the class or for ease-of-use features.
44   *
45   * @since 4.0
46   */
47  @Immutable
48  public class URIUtils {
49  
50       /**
51           * Constructs a {@link URI} using all the parameters. This should be
52           * used instead of
53           * {@link URI#URI(String, String, String, int, String, String, String)}
54           * or any of the other URI multi-argument URI constructors.
55           *
56           * @param scheme
57           *            Scheme name
58           * @param host
59           *            Host name
60           * @param port
61           *            Port number
62           * @param path
63           *            Path
64           * @param query
65           *            Query
66           * @param fragment
67           *            Fragment
68           *
69           * @throws URISyntaxException
70           *             If both a scheme and a path are given but the path is
71           *             relative, if the URI string constructed from the given
72           *             components violates RFC&nbsp;2396, or if the authority
73           *             component of the string is present but cannot be parsed
74           *             as a server-based authority
75           *
76           * @deprecated (4.2) use {@link URIBuilder}.
77           */
78      @Deprecated
79      public static URI createURI(
80              final String scheme,
81              final String host,
82              final int port,
83              final String path,
84              final String query,
85              final String fragment) throws URISyntaxException {
86          final StringBuilder buffer = new StringBuilder();
87          if (host != null) {
88              if (scheme != null) {
89                  buffer.append(scheme);
90                  buffer.append("://");
91              }
92              buffer.append(host);
93              if (port > 0) {
94                  buffer.append(':');
95                  buffer.append(port);
96              }
97          }
98          if (path == null || !path.startsWith("/")) {
99              buffer.append('/');
100         }
101         if (path != null) {
102             buffer.append(path);
103         }
104         if (query != null) {
105             buffer.append('?');
106             buffer.append(query);
107         }
108         if (fragment != null) {
109             buffer.append('#');
110             buffer.append(fragment);
111         }
112         return new URI(buffer.toString());
113     }
114 
115     /**
116      * A convenience method for creating a new {@link URI} whose scheme, host
117      * and port are taken from the target host, but whose path, query and
118      * fragment are taken from the existing URI. The fragment is only used if
119      * dropFragment is false. The path is set to "/" if not explicitly specified.
120      *
121      * @param uri
122      *            Contains the path, query and fragment to use.
123      * @param target
124      *            Contains the scheme, host and port to use.
125      * @param dropFragment
126      *            True if the fragment should not be copied.
127      *
128      * @throws URISyntaxException
129      *             If the resulting URI is invalid.
130      */
131     public static URI rewriteURI(
132             final URI uri,
133             final HttpHost target,
134             final boolean dropFragment) throws URISyntaxException {
135         Args.notNull(uri, "URI");
136         if (uri.isOpaque()) {
137             return uri;
138         }
139         final URIBuilder uribuilder = new URIBuilder(uri);
140         if (target != null) {
141             uribuilder.setScheme(target.getSchemeName());
142             uribuilder.setHost(target.getHostName());
143             uribuilder.setPort(target.getPort());
144         } else {
145             uribuilder.setScheme(null);
146             uribuilder.setHost(null);
147             uribuilder.setPort(-1);
148         }
149         if (dropFragment) {
150             uribuilder.setFragment(null);
151         }
152         if (TextUtils.isEmpty(uribuilder.getPath())) {
153             uribuilder.setPath("/");
154         }
155         return uribuilder.build();
156     }
157 
158     /**
159      * A convenience method for
160      * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the
161      * fragment.
162      */
163     public static URI rewriteURI(
164             final URI uri,
165             final HttpHost target) throws URISyntaxException {
166         return rewriteURI(uri, target, false);
167     }
168 
169     /**
170      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
171      * query are taken from the existing URI, dropping any fragment or user-information.
172      * The path is set to "/" if not explicitly specified. The existing URI is returned
173      * unmodified if it has no fragment or user-information and has a path.
174      *
175      * @param uri
176      *            original URI.
177      * @throws URISyntaxException
178      *             If the resulting URI is invalid.
179      */
180     public static URI rewriteURI(final URI uri) throws URISyntaxException {
181         Args.notNull(uri, "URI");
182         if (uri.isOpaque()) {
183             return uri;
184         }
185         final URIBuilder uribuilder = new URIBuilder(uri);
186         if (uribuilder.getUserInfo() != null) {
187             uribuilder.setUserInfo(null);
188         }
189         if (TextUtils.isEmpty(uribuilder.getPath())) {
190             uribuilder.setPath("/");
191         }
192         if (uribuilder.getHost() != null) {
193             uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT));
194         }
195         uribuilder.setFragment(null);
196         return uribuilder.build();
197     }
198 
199     /**
200      * A convenience method that optionally converts the original {@link java.net.URI} either
201      * to a relative or an absolute form as required by the specified route.
202      *
203      * @param uri
204      *            original URI.
205      * @throws URISyntaxException
206      *             If the resulting URI is invalid.
207      *
208      * @since 4.4
209      */
210     public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException {
211         if (uri == null) {
212             return null;
213         }
214         if (route.getProxyHost() != null && !route.isTunnelled()) {
215             // Make sure the request URI is absolute
216             if (!uri.isAbsolute()) {
217                 final HttpHost target = route.getTargetHost();
218                 return rewriteURI(uri, target, true);
219             } else {
220                 return rewriteURI(uri);
221             }
222         } else {
223             // Make sure the request URI is relative
224             if (uri.isAbsolute()) {
225                 return rewriteURI(uri, null, true);
226             } else {
227                 return rewriteURI(uri);
228             }
229         }
230     }
231 
232     /**
233      * Resolves a URI reference against a base URI. Work-around for bug in
234      * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
235      *
236      * @param baseURI the base URI
237      * @param reference the URI reference
238      * @return the resulting URI
239      */
240     public static URI resolve(final URI baseURI, final String reference) {
241         return resolve(baseURI, URI.create(reference));
242     }
243 
244     /**
245      * Resolves a URI reference against a base URI. Work-around for bugs in
246      * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
247      *
248      * @param baseURI the base URI
249      * @param reference the URI reference
250      * @return the resulting URI
251      */
252     public static URI resolve(final URI baseURI, final URI reference){
253         Args.notNull(baseURI, "Base URI");
254         Args.notNull(reference, "Reference URI");
255         final String s = reference.toASCIIString();
256         if (s.startsWith("?")) {
257             String baseUri = baseURI.toASCIIString();
258             final int i = baseUri.indexOf('?');
259             baseUri = i > -1 ? baseUri.substring(0, i) : baseUri;
260             return URI.create(baseUri + s);
261         }
262         final boolean emptyReference = s.isEmpty();
263         URI resolved;
264         if (emptyReference) {
265             resolved = baseURI.resolve(URI.create("#"));
266             final String resolvedString = resolved.toASCIIString();
267             resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#')));
268         } else {
269             resolved = baseURI.resolve(reference);
270         }
271         try {
272             return normalizeSyntax(resolved);
273         } catch (URISyntaxException ex) {
274             throw new IllegalArgumentException(ex);
275         }
276     }
277 
278     /**
279      * Removes dot segments according to RFC 3986, section 5.2.4 and
280      * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
281      *
282      * @param uri the original URI
283      * @return the URI without dot segments
284      */
285     static URI normalizeSyntax(final URI uri) throws URISyntaxException {
286         if (uri.isOpaque() || uri.getAuthority() == null) {
287             // opaque and file: URIs
288             return uri;
289         }
290         Args.check(uri.isAbsolute(), "Base URI must be absolute");
291         final URIBuilder builder = new URIBuilder(uri);
292         final String path = builder.getPath();
293         if (path != null && !path.equals("/")) {
294             final String[] inputSegments = path.split("/");
295             final Stack<String> outputSegments = new Stack<String>();
296             for (final String inputSegment : inputSegments) {
297                 if ((inputSegment.isEmpty()) || (".".equals(inputSegment))) {
298                     // Do nothing
299                 } else if ("..".equals(inputSegment)) {
300                     if (!outputSegments.isEmpty()) {
301                         outputSegments.pop();
302                     }
303                 } else {
304                     outputSegments.push(inputSegment);
305                 }
306             }
307             final StringBuilder outputBuffer = new StringBuilder();
308             for (final String outputSegment : outputSegments) {
309                 outputBuffer.append('/').append(outputSegment);
310             }
311             if (path.lastIndexOf('/') == path.length() - 1) {
312                 // path.endsWith("/") || path.equals("")
313                 outputBuffer.append('/');
314             }
315             builder.setPath(outputBuffer.toString());
316         }
317         if (builder.getScheme() != null) {
318             builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT));
319         }
320         if (builder.getHost() != null) {
321             builder.setHost(builder.getHost().toLowerCase(Locale.ROOT));
322         }
323         return builder.build();
324     }
325 
326     /**
327      * Extracts target host from the given {@link URI}.
328      *
329      * @param uri
330      * @return the target host if the URI is absolute or {@code null} if the URI is
331      * relative or does not contain a valid host name.
332      *
333      * @since 4.1
334      */
335     public static HttpHost extractHost(final URI uri) {
336         if (uri == null) {
337             return null;
338         }
339         HttpHost target = null;
340         if (uri.isAbsolute()) {
341             int port = uri.getPort(); // may be overridden later
342             String host = uri.getHost();
343             if (host == null) { // normal parse failed; let's do it ourselves
344                 // authority does not seem to care about the valid character-set for host names
345                 host = uri.getAuthority();
346                 if (host != null) {
347                     // Strip off any leading user credentials
348                     final int at = host.indexOf('@');
349                     if (at >= 0) {
350                         if (host.length() > at+1 ) {
351                             host = host.substring(at+1);
352                         } else {
353                             host = null; // @ on its own
354                         }
355                     }
356                     // Extract the port suffix, if present
357                     if (host != null) {
358                         final int colon = host.indexOf(':');
359                         if (colon >= 0) {
360                             final int pos = colon + 1;
361                             int len = 0;
362                             for (int i = pos; i < host.length(); i++) {
363                                 if (Character.isDigit(host.charAt(i))) {
364                                     len++;
365                                 } else {
366                                     break;
367                                 }
368                             }
369                             if (len > 0) {
370                                 try {
371                                     port = Integer.parseInt(host.substring(pos, pos + len));
372                                 } catch (final NumberFormatException ex) {
373                                 }
374                             }
375                             host = host.substring(0, colon);
376                         }
377                     }
378                 }
379             }
380             final String scheme = uri.getScheme();
381             if (!TextUtils.isBlank(host)) {
382                 try {
383                     target = new HttpHost(host, port, scheme);
384                 } catch (IllegalArgumentException ignore) {
385                 }
386             }
387         }
388         return target;
389     }
390 
391     /**
392      * Derives the interpreted (absolute) URI that was used to generate the last
393      * request. This is done by extracting the request-uri and target origin for
394      * the last request and scanning all the redirect locations for the last
395      * fragment identifier, then combining the result into a {@link URI}.
396      *
397      * @param originalURI
398      *            original request before any redirects
399      * @param target
400      *            if the last URI is relative, it is resolved against this target,
401      *            or {@code null} if not available.
402      * @param redirects
403      *            collection of redirect locations since the original request
404      *            or {@code null} if not available.
405      * @return interpreted (absolute) URI
406      */
407     public static URI resolve(
408             final URI originalURI,
409             final HttpHost target,
410             final List<URI> redirects) throws URISyntaxException {
411         Args.notNull(originalURI, "Request URI");
412         final URIBuilder uribuilder;
413         if (redirects == null || redirects.isEmpty()) {
414             uribuilder = new URIBuilder(originalURI);
415         } else {
416             uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
417             String frag = uribuilder.getFragment();
418             // read interpreted fragment identifier from redirect locations
419             for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
420                 frag = redirects.get(i).getFragment();
421             }
422             uribuilder.setFragment(frag);
423         }
424         // read interpreted fragment identifier from original request
425         if (uribuilder.getFragment() == null) {
426             uribuilder.setFragment(originalURI.getFragment());
427         }
428         // last target origin
429         if (target != null && !uribuilder.isAbsolute()) {
430             uribuilder.setScheme(target.getSchemeName());
431             uribuilder.setHost(target.getHostName());
432             uribuilder.setPort(target.getPort());
433         }
434         return uribuilder.build();
435     }
436 
437     /**
438      * This class should not be instantiated.
439      */
440     private URIUtils() {
441     }
442 
443 }