View Javadoc

1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.http.client.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.List;
32  import java.util.Locale;
33  import java.util.Stack;
34  
35  import org.apache.http.HttpHost;
36  import org.apache.http.annotation.Immutable;
37  import org.apache.http.util.Args;
38  import org.apache.http.util.TextUtils;
39  
40  /**
41   * A collection of utilities for {@link URI URIs}, to workaround
42   * bugs within the class or for ease-of-use features.
43   *
44   * @since 4.0
45   */
46  @Immutable
47  public class URIUtils {
48  
49       /**
50           * Constructs a {@link URI} using all the parameters. This should be
51           * used instead of
52           * {@link URI#URI(String, String, String, int, String, String, String)}
53           * or any of the other URI multi-argument URI constructors.
54           *
55           * @param scheme
56           *            Scheme name
57           * @param host
58           *            Host name
59           * @param port
60           *            Port number
61           * @param path
62           *            Path
63           * @param query
64           *            Query
65           * @param fragment
66           *            Fragment
67           *
68           * @throws URISyntaxException
69           *             If both a scheme and a path are given but the path is
70           *             relative, if the URI string constructed from the given
71           *             components violates RFC&nbsp;2396, or if the authority
72           *             component of the string is present but cannot be parsed
73           *             as a server-based authority
74           *
75           * @deprecated (4.2) use {@link URIBuilder}.
76           */
77      @Deprecated
78      public static URI createURI(
79              final String scheme,
80              final String host,
81              final int port,
82              final String path,
83              final String query,
84              final String fragment) throws URISyntaxException {
85          final StringBuilder buffer = new StringBuilder();
86          if (host != null) {
87              if (scheme != null) {
88                  buffer.append(scheme);
89                  buffer.append("://");
90              }
91              buffer.append(host);
92              if (port > 0) {
93                  buffer.append(':');
94                  buffer.append(port);
95              }
96          }
97          if (path == null || !path.startsWith("/")) {
98              buffer.append('/');
99          }
100         if (path != null) {
101             buffer.append(path);
102         }
103         if (query != null) {
104             buffer.append('?');
105             buffer.append(query);
106         }
107         if (fragment != null) {
108             buffer.append('#');
109             buffer.append(fragment);
110         }
111         return new URI(buffer.toString());
112     }
113 
114     /**
115      * A convenience method for creating a new {@link URI} whose scheme, host
116      * and port are taken from the target host, but whose path, query and
117      * fragment are taken from the existing URI. The fragment is only used if
118      * dropFragment is false. The path is set to "/" if not explicitly specified.
119      *
120      * @param uri
121      *            Contains the path, query and fragment to use.
122      * @param target
123      *            Contains the scheme, host and port to use.
124      * @param dropFragment
125      *            True if the fragment should not be copied.
126      *
127      * @throws URISyntaxException
128      *             If the resulting URI is invalid.
129      */
130     public static URI rewriteURI(
131             final URI uri,
132             final HttpHost target,
133             final boolean dropFragment) throws URISyntaxException {
134         Args.notNull(uri, "URI");
135         if (uri.isOpaque()) {
136             return uri;
137         }
138         final URIBuilder uribuilder = new URIBuilder(uri);
139         if (target != null) {
140             uribuilder.setScheme(target.getSchemeName());
141             uribuilder.setHost(target.getHostName());
142             uribuilder.setPort(target.getPort());
143         } else {
144             uribuilder.setScheme(null);
145             uribuilder.setHost(null);
146             uribuilder.setPort(-1);
147         }
148         if (dropFragment) {
149             uribuilder.setFragment(null);
150         }
151         if (TextUtils.isEmpty(uribuilder.getPath())) {
152             uribuilder.setPath("/");
153         }
154         return uribuilder.build();
155     }
156 
157     /**
158      * A convenience method for
159      * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the
160      * fragment.
161      */
162     public static URI rewriteURI(
163             final URI uri,
164             final HttpHost target) throws URISyntaxException {
165         return rewriteURI(uri, target, false);
166     }
167 
168     /**
169      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
170      * query are taken from the existing URI, dropping any fragment or user-information.
171      * The path is set to "/" if not explicitly specified. The existing URI is returned
172      * unmodified if it has no fragment or user-information and has a path.
173      *
174      * @param uri
175      *            original URI.
176      * @throws URISyntaxException
177      *             If the resulting URI is invalid.
178      */
179     public static URI rewriteURI(final URI uri) throws URISyntaxException {
180         Args.notNull(uri, "URI");
181         if (uri.isOpaque()) {
182             return uri;
183         }
184         final URIBuilder uribuilder = new URIBuilder(uri);
185         if (uribuilder.getUserInfo() != null) {
186             uribuilder.setUserInfo(null);
187         }
188         if (TextUtils.isEmpty(uribuilder.getPath())) {
189             uribuilder.setPath("/");
190         }
191         if (uribuilder.getHost() != null) {
192             uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ENGLISH));
193         }
194         uribuilder.setFragment(null);
195         return uribuilder.build();
196     }
197 
198     /**
199      * Resolves a URI reference against a base URI. Work-around for bug in
200      * java.net.URI (<http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535>)
201      *
202      * @param baseURI the base URI
203      * @param reference the URI reference
204      * @return the resulting URI
205      */
206     public static URI resolve(final URI baseURI, final String reference) {
207         return URIUtils.resolve(baseURI, URI.create(reference));
208     }
209 
210     /**
211      * Resolves a URI reference against a base URI. Work-around for bugs in
212      * java.net.URI (e.g. <http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535>)
213      *
214      * @param baseURI the base URI
215      * @param reference the URI reference
216      * @return the resulting URI
217      */
218     public static URI resolve(final URI baseURI, final URI reference){
219         Args.notNull(baseURI, "Base URI");
220         Args.notNull(reference, "Reference URI");
221         URI ref = reference;
222         final String s = ref.toString();
223         if (s.startsWith("?")) {
224             return resolveReferenceStartingWithQueryString(baseURI, ref);
225         }
226         final boolean emptyReference = s.length() == 0;
227         if (emptyReference) {
228             ref = URI.create("#");
229         }
230         URI resolved = baseURI.resolve(ref);
231         if (emptyReference) {
232             final String resolvedString = resolved.toString();
233             resolved = URI.create(resolvedString.substring(0,
234                 resolvedString.indexOf('#')));
235         }
236         return normalizeSyntax(resolved);
237     }
238 
239     /**
240      * Resolves a reference starting with a query string.
241      *
242      * @param baseURI the base URI
243      * @param reference the URI reference starting with a query string
244      * @return the resulting URI
245      */
246     private static URI resolveReferenceStartingWithQueryString(
247             final URI baseURI, final URI reference) {
248         String baseUri = baseURI.toString();
249         baseUri = baseUri.indexOf('?') > -1 ?
250             baseUri.substring(0, baseUri.indexOf('?')) : baseUri;
251         return URI.create(baseUri + reference.toString());
252     }
253 
254     /**
255      * Removes dot segments according to RFC 3986, section 5.2.4 and
256      * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
257      *
258      * @param uri the original URI
259      * @return the URI without dot segments
260      */
261     private static URI normalizeSyntax(final URI uri) {
262         if (uri.isOpaque() || uri.getAuthority() == null) {
263             // opaque and file: URIs
264             return uri;
265         }
266         Args.check(uri.isAbsolute(), "Base URI must be absolute");
267         final String path = uri.getPath() == null ? "" : uri.getPath();
268         final String[] inputSegments = path.split("/");
269         final Stack<String> outputSegments = new Stack<String>();
270         for (final String inputSegment : inputSegments) {
271             if ((inputSegment.length() == 0)
272                 || (".".equals(inputSegment))) {
273                 // Do nothing
274             } else if ("..".equals(inputSegment)) {
275                 if (!outputSegments.isEmpty()) {
276                     outputSegments.pop();
277                 }
278             } else {
279                 outputSegments.push(inputSegment);
280             }
281         }
282         final StringBuilder outputBuffer = new StringBuilder();
283         for (final String outputSegment : outputSegments) {
284             outputBuffer.append('/').append(outputSegment);
285         }
286         if (path.lastIndexOf('/') == path.length() - 1) {
287             // path.endsWith("/") || path.equals("")
288             outputBuffer.append('/');
289         }
290         try {
291             final String scheme = uri.getScheme().toLowerCase(Locale.ENGLISH);
292             final String auth = uri.getAuthority().toLowerCase(Locale.ENGLISH);
293             final URI ref = new URI(scheme, auth, outputBuffer.toString(),
294                     null, null);
295             if (uri.getQuery() == null && uri.getFragment() == null) {
296                 return ref;
297             }
298             final StringBuilder normalized = new StringBuilder(
299                     ref.toASCIIString());
300             if (uri.getQuery() != null) {
301                 // query string passed through unchanged
302                 normalized.append('?').append(uri.getRawQuery());
303             }
304             if (uri.getFragment() != null) {
305                 // fragment passed through unchanged
306                 normalized.append('#').append(uri.getRawFragment());
307             }
308             return URI.create(normalized.toString());
309         } catch (final URISyntaxException e) {
310             throw new IllegalArgumentException(e);
311         }
312     }
313 
314     /**
315      * Extracts target host from the given {@link URI}.
316      *
317      * @param uri
318      * @return the target host if the URI is absolute or <code>null</null> if the URI is
319      * relative or does not contain a valid host name.
320      *
321      * @since 4.1
322      */
323     public static HttpHost extractHost(final URI uri) {
324         if (uri == null) {
325             return null;
326         }
327         HttpHost target = null;
328         if (uri.isAbsolute()) {
329             int port = uri.getPort(); // may be overridden later
330             String host = uri.getHost();
331             if (host == null) { // normal parse failed; let's do it ourselves
332                 // authority does not seem to care about the valid character-set for host names
333                 host = uri.getAuthority();
334                 if (host != null) {
335                     // Strip off any leading user credentials
336                     final int at = host.indexOf('@');
337                     if (at >= 0) {
338                         if (host.length() > at+1 ) {
339                             host = host.substring(at+1);
340                         } else {
341                             host = null; // @ on its own
342                         }
343                     }
344                     // Extract the port suffix, if present
345                     if (host != null) {
346                         final int colon = host.indexOf(':');
347                         if (colon >= 0) {
348                             final int pos = colon + 1;
349                             int len = 0;
350                             for (int i = pos; i < host.length(); i++) {
351                                 if (Character.isDigit(host.charAt(i))) {
352                                     len++;
353                                 } else {
354                                     break;
355                                 }
356                             }
357                             if (len > 0) {
358                                 try {
359                                     port = Integer.parseInt(host.substring(pos, pos + len));
360                                 } catch (final NumberFormatException ex) {
361                                 }
362                             }
363                             host = host.substring(0, colon);
364                         }
365                     }
366                 }
367             }
368             final String scheme = uri.getScheme();
369             if (!TextUtils.isBlank(host)) {
370                 target = new HttpHost(host, port, scheme);
371             }
372         }
373         return target;
374     }
375 
376     /**
377      * Derives the interpreted (absolute) URI that was used to generate the last
378      * request. This is done by extracting the request-uri and target origin for
379      * the last request and scanning all the redirect locations for the last
380      * fragment identifier, then combining the result into a {@link URI}.
381      *
382      * @param originalURI
383      *            original request before any redirects
384      * @param target
385      *            if the last URI is relative, it is resolved against this target,
386      *            or <code>null</code> if not available.
387      * @param redirects
388      *            collection of redirect locations since the original request
389      *            or <code>null</code> if not available.
390      * @return interpreted (absolute) URI
391      */
392     public static URI resolve(
393             final URI originalURI,
394             final HttpHost target,
395             final List<URI> redirects) throws URISyntaxException {
396         Args.notNull(originalURI, "Request URI");
397         final URIBuilder uribuilder;
398         if (redirects == null || redirects.isEmpty()) {
399             uribuilder = new URIBuilder(originalURI);
400         } else {
401             uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
402             String frag = uribuilder.getFragment();
403             // read interpreted fragment identifier from redirect locations
404             for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
405                 frag = redirects.get(i).getFragment();
406             }
407             uribuilder.setFragment(frag);
408         }
409         // read interpreted fragment identifier from original request
410         if (uribuilder.getFragment() == null) {
411             uribuilder.setFragment(originalURI.getFragment());
412         }
413         // last target origin
414         if (target != null && !uribuilder.isAbsolute()) {
415             uribuilder.setScheme(target.getSchemeName());
416             uribuilder.setHost(target.getHostName());
417             uribuilder.setPort(target.getPort());
418         }
419         return uribuilder.build();
420     }
421 
422     /**
423      * This class should not be instantiated.
424      */
425     private URIUtils() {
426     }
427 
428 }