View Javadoc

1   /*
2    * ====================================================================
3    *
4    *  Licensed to the Apache Software Foundation (ASF) under one or more
5    *  contributor license agreements.  See the NOTICE file distributed with
6    *  this work for additional information regarding copyright ownership.
7    *  The ASF licenses this file to You under the Apache License, Version 2.0
8    *  (the "License"); you may not use this file except in compliance with
9    *  the License.  You may obtain a copy of the License at
10   *
11   *      http://www.apache.org/licenses/LICENSE-2.0
12   *
13   *  Unless required by applicable law or agreed to in writing, software
14   *  distributed under the License is distributed on an "AS IS" BASIS,
15   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   *  See the License for the specific language governing permissions and
17   *  limitations under the License.
18   * ====================================================================
19   *
20   * This software consists of voluntary contributions made by many
21   * individuals on behalf of the Apache Software Foundation.  For more
22   * information on the Apache Software Foundation, please see
23   * <http://www.apache.org/>.
24   *
25   */
26  
27  package org.apache.http.client.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.Stack;
32  
33  import org.apache.http.annotation.Immutable;
34  
35  import org.apache.http.HttpHost;
36  
37  /**
38   * A collection of utilities for {@link URI URIs}, to workaround
39   * bugs within the class or for ease-of-use features.
40   *
41   * @since 4.0
42   */
43  @Immutable
44  public class URIUtils {
45  
46       /**
47           * Constructs a {@link URI} using all the parameters. This should be
48           * used instead of
49           * {@link URI#URI(String, String, String, int, String, String, String)}
50           * or any of the other URI multi-argument URI constructors.
51           *
52           * @param scheme
53           *            Scheme name
54           * @param host
55           *            Host name
56           * @param port
57           *            Port number
58           * @param path
59           *            Path
60           * @param query
61           *            Query
62           * @param fragment
63           *            Fragment
64           *
65           * @throws URISyntaxException
66           *             If both a scheme and a path are given but the path is
67           *             relative, if the URI string constructed from the given
68           *             components violates RFC&nbsp;2396, or if the authority
69           *             component of the string is present but cannot be parsed
70           *             as a server-based authority
71           *
72           * @deprecated (4.2) use {@link URIBuilder}.
73           */
74      @Deprecated
75      public static URI createURI(
76              final String scheme,
77              final String host,
78              int port,
79              final String path,
80              final String query,
81              final String fragment) throws URISyntaxException {
82          StringBuilder buffer = new StringBuilder();
83          if (host != null) {
84              if (scheme != null) {
85                  buffer.append(scheme);
86                  buffer.append("://");
87              }
88              buffer.append(host);
89              if (port > 0) {
90                  buffer.append(':');
91                  buffer.append(port);
92              }
93          }
94          if (path == null || !path.startsWith("/")) {
95              buffer.append('/');
96          }
97          if (path != null) {
98              buffer.append(path);
99          }
100         if (query != null) {
101             buffer.append('?');
102             buffer.append(query);
103         }
104         if (fragment != null) {
105             buffer.append('#');
106             buffer.append(fragment);
107         }
108         return new URI(buffer.toString());
109     }
110 
111     /**
112      * A convenience method for creating a new {@link URI} whose scheme, host
113      * and port are taken from the target host, but whose path, query and
114      * fragment are taken from the existing URI. The fragment is only used if
115      * dropFragment is false. The path is set to "/" if not explicitly specified.
116      *
117      * @param uri
118      *            Contains the path, query and fragment to use.
119      * @param target
120      *            Contains the scheme, host and port to use.
121      * @param dropFragment
122      *            True if the fragment should not be copied.
123      *
124      * @throws URISyntaxException
125      *             If the resulting URI is invalid.
126      */
127     public static URI rewriteURI(
128             final URI uri,
129             final HttpHost target,
130             boolean dropFragment) throws URISyntaxException {
131         if (uri == null) {
132             throw new IllegalArgumentException("URI may not be null");
133         }
134         URIBuilder uribuilder = new URIBuilder(uri);
135         if (target != null) {
136             uribuilder.setScheme(target.getSchemeName());
137             uribuilder.setHost(target.getHostName());
138             uribuilder.setPort(target.getPort());
139         } else {
140             uribuilder.setScheme(null);
141             uribuilder.setHost(null);
142             uribuilder.setPort(-1);
143         }
144         if (dropFragment) {
145             uribuilder.setFragment(null);
146         }
147         if (uribuilder.getPath() == null || uribuilder.getPath().length() == 0) {
148             uribuilder.setPath("/");
149         }
150         return uribuilder.build();
151     }
152 
153     /**
154      * A convenience method for
155      * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the
156      * fragment.
157      */
158     public static URI rewriteURI(
159             final URI uri,
160             final HttpHost target) throws URISyntaxException {
161         return rewriteURI(uri, target, false);
162     }
163 
164     /**
165      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
166      * query are taken from the existing URI, dropping any fragment or user-information.
167      * The path is set to "/" if not explicitly specified. The existing URI is returned
168      * unmodified if it has no fragment or user-information and has a path.
169      *
170      * @param uri
171      *            original URI.
172      * @throws URISyntaxException
173      *             If the resulting URI is invalid.
174      */
175     public static URI rewriteURI(final URI uri) throws URISyntaxException {
176         if (uri == null) {
177             throw new IllegalArgumentException("URI may not be null");
178         }
179         if (uri.getFragment() != null || uri.getUserInfo() != null
180                 || (uri.getPath() == null || uri.getPath().length() == 0)) {
181             URIBuilder uribuilder = new URIBuilder(uri);
182             uribuilder.setFragment(null).setUserInfo(null);
183             if (uribuilder.getPath() == null || uribuilder.getPath().length() == 0) {
184                 uribuilder.setPath("/");
185             }
186             return uribuilder.build();
187         } else {
188             return uri;
189         }
190     }
191 
192     /**
193      * Resolves a URI reference against a base URI. Work-around for bug in
194      * java.net.URI (<http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535>)
195      *
196      * @param baseURI the base URI
197      * @param reference the URI reference
198      * @return the resulting URI
199      */
200     public static URI resolve(final URI baseURI, final String reference) {
201         return URIUtils.resolve(baseURI, URI.create(reference));
202     }
203 
204     /**
205      * Resolves a URI reference against a base URI. Work-around for bugs in
206      * java.net.URI (e.g. <http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535>)
207      *
208      * @param baseURI the base URI
209      * @param reference the URI reference
210      * @return the resulting URI
211      */
212     public static URI resolve(final URI baseURI, URI reference){
213         if (baseURI == null) {
214             throw new IllegalArgumentException("Base URI may nor be null");
215         }
216         if (reference == null) {
217             throw new IllegalArgumentException("Reference URI may nor be null");
218         }
219         String s = reference.toString();
220         if (s.startsWith("?")) {
221             return resolveReferenceStartingWithQueryString(baseURI, reference);
222         }
223         boolean emptyReference = s.length() == 0;
224         if (emptyReference) {
225             reference = URI.create("#");
226         }
227         URI resolved = baseURI.resolve(reference);
228         if (emptyReference) {
229             String resolvedString = resolved.toString();
230             resolved = URI.create(resolvedString.substring(0,
231                 resolvedString.indexOf('#')));
232         }
233         return removeDotSegments(resolved);
234     }
235 
236     /**
237      * Resolves a reference starting with a query string.
238      *
239      * @param baseURI the base URI
240      * @param reference the URI reference starting with a query string
241      * @return the resulting URI
242      */
243     private static URI resolveReferenceStartingWithQueryString(
244             final URI baseURI, final URI reference) {
245         String baseUri = baseURI.toString();
246         baseUri = baseUri.indexOf('?') > -1 ?
247             baseUri.substring(0, baseUri.indexOf('?')) : baseUri;
248         return URI.create(baseUri + reference.toString());
249     }
250 
251     /**
252      * Removes dot segments according to RFC 3986, section 5.2.4
253      *
254      * @param uri the original URI
255      * @return the URI without dot segments
256      */
257     private static URI removeDotSegments(URI uri) {
258         String path = uri.getPath();
259         if ((path == null) || (path.indexOf("/.") == -1)) {
260             // No dot segments to remove
261             return uri;
262         }
263         String[] inputSegments = path.split("/");
264         Stack<String> outputSegments = new Stack<String>();
265         for (int i = 0; i < inputSegments.length; i++) {
266             if ((inputSegments[i].length() == 0)
267                 || (".".equals(inputSegments[i]))) {
268                 // Do nothing
269             } else if ("..".equals(inputSegments[i])) {
270                 if (!outputSegments.isEmpty()) {
271                     outputSegments.pop();
272                 }
273             } else {
274                 outputSegments.push(inputSegments[i]);
275             }
276         }
277         StringBuilder outputBuffer = new StringBuilder();
278         for (String outputSegment : outputSegments) {
279             outputBuffer.append('/').append(outputSegment);
280         }
281         try {
282             return new URI(uri.getScheme(), uri.getAuthority(),
283                 outputBuffer.toString(), uri.getQuery(), uri.getFragment());
284         } catch (URISyntaxException e) {
285             throw new IllegalArgumentException(e);
286         }
287     }
288 
289     /**
290      * Extracts target host from the given {@link URI}.
291      *
292      * @param uri
293      * @return the target host if the URI is absolute or <code>null</null> if the URI is
294      * relative or does not contain a valid host name.
295      *
296      * @since 4.1
297      */
298     public static HttpHost extractHost(final URI uri) {
299         if (uri == null) {
300             return null;
301         }
302         HttpHost target = null;
303         if (uri.isAbsolute()) {
304             int port = uri.getPort(); // may be overridden later
305             String host = uri.getHost();
306             if (host == null) { // normal parse failed; let's do it ourselves
307                 // authority does not seem to care about the valid character-set for host names
308                 host = uri.getAuthority();
309                 if (host != null) {
310                     // Strip off any leading user credentials
311                     int at = host.indexOf('@');
312                     if (at >= 0) {
313                         if (host.length() > at+1 ) {
314                             host = host.substring(at+1);
315                         } else {
316                             host = null; // @ on its own
317                         }
318                     }
319                     // Extract the port suffix, if present
320                     if (host != null) {
321                         int colon = host.indexOf(':');
322                         if (colon >= 0) {
323                             int pos = colon + 1;
324                             int len = 0;
325                             for (int i = pos; i < host.length(); i++) {
326                                 if (Character.isDigit(host.charAt(i))) {
327                                     len++;
328                                 } else {
329                                     break;
330                                 }
331                             }
332                             if (len > 0) {
333                                 try {
334                                     port = Integer.parseInt(host.substring(pos, pos + len));
335                                 } catch (NumberFormatException ex) {
336                                 }
337                             }
338                             host = host.substring(0, colon);
339                         }
340                     }
341                 }
342             }
343             String scheme = uri.getScheme();
344             if (host != null) {
345                 target = new HttpHost(host, port, scheme);
346             }
347         }
348         return target;
349     }
350 
351     /**
352      * This class should not be instantiated.
353      */
354     private URIUtils() {
355     }
356 
357 }