View Javadoc

1   /*
2    * ====================================================================
3    *
4    *  Licensed to the Apache Software Foundation (ASF) under one or more
5    *  contributor license agreements.  See the NOTICE file distributed with
6    *  this work for additional information regarding copyright ownership.
7    *  The ASF licenses this file to You under the Apache License, Version 2.0
8    *  (the "License"); you may not use this file except in compliance with
9    *  the License.  You may obtain a copy of the License at
10   *
11   *      http://www.apache.org/licenses/LICENSE-2.0
12   *
13   *  Unless required by applicable law or agreed to in writing, software
14   *  distributed under the License is distributed on an "AS IS" BASIS,
15   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   *  See the License for the specific language governing permissions and
17   *  limitations under the License.
18   * ====================================================================
19   *
20   * This software consists of voluntary contributions made by many
21   * individuals on behalf of the Apache Software Foundation.  For more
22   * information on the Apache Software Foundation, please see
23   * <http://www.apache.org/>.
24   *
25   */
26  
27  package org.apache.http.client.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.Stack;
32  
33  import org.apache.http.annotation.Immutable;
34  
35  import org.apache.http.HttpHost;
36  
37  /**
38   * A collection of utilities for {@link URI URIs}, to workaround
39   * bugs within the class or for ease-of-use features.
40   *
41   * @since 4.0
42   */
43  @Immutable
44  public class URIUtils {
45  
46       /**
47           * Constructs a {@link URI} using all the parameters. This should be
48           * used instead of
49           * {@link URI#URI(String, String, String, int, String, String, String)}
50           * or any of the other URI multi-argument URI constructors.
51           *
52           * @param scheme
53           *            Scheme name
54           * @param host
55           *            Host name
56           * @param port
57           *            Port number
58           * @param path
59           *            Path
60           * @param query
61           *            Query
62           * @param fragment
63           *            Fragment
64           *
65           * @throws URISyntaxException
66           *             If both a scheme and a path are given but the path is
67           *             relative, if the URI string constructed from the given
68           *             components violates RFC&nbsp;2396, or if the authority
69           *             component of the string is present but cannot be parsed
70           *             as a server-based authority
71           *
72           * @deprecated (4.2) use {@link URIBuilder}.
73           */
74      @Deprecated
75      public static URI createURI(
76              final String scheme,
77              final String host,
78              int port,
79              final String path,
80              final String query,
81              final String fragment) throws URISyntaxException {
82          StringBuilder buffer = new StringBuilder();
83          if (host != null) {
84              if (scheme != null) {
85                  buffer.append(scheme);
86                  buffer.append("://");
87              }
88              buffer.append(host);
89              if (port > 0) {
90                  buffer.append(':');
91                  buffer.append(port);
92              }
93          }
94          if (path == null || !path.startsWith("/")) {
95              buffer.append('/');
96          }
97          if (path != null) {
98              buffer.append(path);
99          }
100         if (query != null) {
101             buffer.append('?');
102             buffer.append(query);
103         }
104         if (fragment != null) {
105             buffer.append('#');
106             buffer.append(fragment);
107         }
108         return new URI(buffer.toString());
109     }
110 
111     /**
112      * A convenience method for creating a new {@link URI} whose scheme, host
113      * and port are taken from the target host, but whose path, query and
114      * fragment are taken from the existing URI. The fragment is only used if
115      * dropFragment is false. The path is set to "/" if not explicitly specified.
116      *
117      * @param uri
118      *            Contains the path, query and fragment to use.
119      * @param target
120      *            Contains the scheme, host and port to use.
121      * @param dropFragment
122      *            True if the fragment should not be copied.
123      *
124      * @throws URISyntaxException
125      *             If the resulting URI is invalid.
126      */
127     public static URI rewriteURI(
128             final URI uri,
129             final HttpHost target,
130             boolean dropFragment) throws URISyntaxException {
131         if (uri == null) {
132             throw new IllegalArgumentException("URI may not be null");
133         }
134         URIBuilder uribuilder = new URIBuilder(uri);
135         if (target != null) {
136             uribuilder.setScheme(target.getSchemeName());
137             uribuilder.setHost(target.getHostName());
138             uribuilder.setPort(target.getPort());
139         } else {
140             uribuilder.setScheme(null);
141             uribuilder.setHost(null);
142             uribuilder.setPort(-1);
143         }
144         if (dropFragment) {
145             uribuilder.setFragment(null);
146         }
147         if (uribuilder.getPath() == null || uribuilder.getPath().length() == 0) {
148             uribuilder.setPath("/");
149         }
150         return uribuilder.build();
151     }
152 
153     /**
154      * A convenience method for
155      * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the
156      * fragment.
157      */
158     public static URI rewriteURI(
159             final URI uri,
160             final HttpHost target) throws URISyntaxException {
161         return rewriteURI(uri, target, false);
162     }
163 
164     /**
165      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
166      * query are taken from the existing URI, dropping any fragment or user-information.
167      * The path is set to "/" if not explicitly specified. The existing URI is returned
168      * unmodified if it has no fragment or user-information and has a path.
169      *
170      * @param uri
171      *            original URI.
172      * @throws URISyntaxException
173      *             If the resulting URI is invalid.
174      */
175     public static URI rewriteURI(final URI uri) throws URISyntaxException {
176         if (uri == null) {
177             throw new IllegalArgumentException("URI may not be null");
178         }
179         if (uri.getFragment() != null || uri.getUserInfo() != null
180                 || (uri.getPath() == null || uri.getPath().length() == 0)) {
181             URIBuilder uribuilder = new URIBuilder(uri);
182             uribuilder.setFragment(null).setUserInfo(null);
183             if (uribuilder.getPath() == null || uribuilder.getPath().length() == 0) {
184                 uribuilder.setPath("/");
185             }
186             return uribuilder.build();
187         } else {
188             return uri;
189         }
190     }
191 
192     /**
193      * Resolves a URI reference against a base URI. Work-around for bug in
194      * java.net.URI (<http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535>)
195      *
196      * @param baseURI the base URI
197      * @param reference the URI reference
198      * @return the resulting URI
199      */
200     public static URI resolve(final URI baseURI, final String reference) {
201         return URIUtils.resolve(baseURI, URI.create(reference));
202     }
203 
204     /**
205      * Resolves a URI reference against a base URI. Work-around for bugs in
206      * java.net.URI (e.g. <http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535>)
207      *
208      * @param baseURI the base URI
209      * @param reference the URI reference
210      * @return the resulting URI
211      */
212     public static URI resolve(final URI baseURI, URI reference){
213         if (baseURI == null) {
214             throw new IllegalArgumentException("Base URI may nor be null");
215         }
216         if (reference == null) {
217             throw new IllegalArgumentException("Reference URI may nor be null");
218         }
219         String s = reference.toString();
220         if (s.startsWith("?")) {
221             return resolveReferenceStartingWithQueryString(baseURI, reference);
222         }
223         boolean emptyReference = s.length() == 0;
224         if (emptyReference) {
225             reference = URI.create("#");
226         }
227         URI resolved = baseURI.resolve(reference);
228         if (emptyReference) {
229             String resolvedString = resolved.toString();
230             resolved = URI.create(resolvedString.substring(0,
231                 resolvedString.indexOf('#')));
232         }
233         return normalizeSyntax(resolved);
234     }
235 
236     /**
237      * Resolves a reference starting with a query string.
238      *
239      * @param baseURI the base URI
240      * @param reference the URI reference starting with a query string
241      * @return the resulting URI
242      */
243     private static URI resolveReferenceStartingWithQueryString(
244             final URI baseURI, final URI reference) {
245         String baseUri = baseURI.toString();
246         baseUri = baseUri.indexOf('?') > -1 ?
247             baseUri.substring(0, baseUri.indexOf('?')) : baseUri;
248         return URI.create(baseUri + reference.toString());
249     }
250 
251     /**
252      * Removes dot segments according to RFC 3986, section 5.2.4 and
253      * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
254      *
255      * @param uri the original URI
256      * @return the URI without dot segments
257      */
258     private static URI normalizeSyntax(final URI uri) {
259         if (uri.isOpaque()) {
260             return uri;
261         }
262         String path = uri.getPath() == null ? "" : uri.getPath();
263         String[] inputSegments = path.split("/");
264         Stack<String> outputSegments = new Stack<String>();
265         for (String inputSegment : inputSegments) {
266             if ((inputSegment.length() == 0)
267                     || (".".equals(inputSegment))) {
268                 // Do nothing
269             } else if ("..".equals(inputSegment)) {
270                 if (!outputSegments.isEmpty()) {
271                     outputSegments.pop();
272                 }
273             } else {
274                 outputSegments.push(inputSegment);
275             }
276         }
277         StringBuilder outputBuffer = new StringBuilder();
278         for (String outputSegment : outputSegments) {
279             outputBuffer.append('/').append(outputSegment);
280         }
281         if (path.lastIndexOf('/') == path.length() - 1) {
282             // path.endsWith("/") || path.equals("")
283             outputBuffer.append('/');
284         }
285         try {
286             String scheme = uri.getScheme().toLowerCase();
287             String auth = uri.getAuthority().toLowerCase();
288             URI ref = new URI(scheme, auth, outputBuffer.toString(),
289                     null, null);
290             if (uri.getQuery() == null && uri.getFragment() == null) {
291                 return ref;
292             }
293             StringBuilder normalized = new StringBuilder(
294                     ref.toASCIIString());
295             if (uri.getQuery() != null) {
296                 // query string passed through unchanged
297                 normalized.append('?').append(uri.getRawQuery());
298             }
299             if (uri.getFragment() != null) {
300                 // fragment passed through unchanged
301                 normalized.append('#').append(uri.getRawFragment());
302             }
303             return URI.create(normalized.toString());
304         } catch (URISyntaxException e) {
305             throw new IllegalArgumentException(e);
306         }
307     }
308 
309     /**
310      * Extracts target host from the given {@link URI}.
311      *
312      * @param uri
313      * @return the target host if the URI is absolute or <code>null</null> if the URI is
314      * relative or does not contain a valid host name.
315      *
316      * @since 4.1
317      */
318     public static HttpHost extractHost(final URI uri) {
319         if (uri == null) {
320             return null;
321         }
322         HttpHost target = null;
323         if (uri.isAbsolute()) {
324             int port = uri.getPort(); // may be overridden later
325             String host = uri.getHost();
326             if (host == null) { // normal parse failed; let's do it ourselves
327                 // authority does not seem to care about the valid character-set for host names
328                 host = uri.getAuthority();
329                 if (host != null) {
330                     // Strip off any leading user credentials
331                     int at = host.indexOf('@');
332                     if (at >= 0) {
333                         if (host.length() > at+1 ) {
334                             host = host.substring(at+1);
335                         } else {
336                             host = null; // @ on its own
337                         }
338                     }
339                     // Extract the port suffix, if present
340                     if (host != null) {
341                         int colon = host.indexOf(':');
342                         if (colon >= 0) {
343                             int pos = colon + 1;
344                             int len = 0;
345                             for (int i = pos; i < host.length(); i++) {
346                                 if (Character.isDigit(host.charAt(i))) {
347                                     len++;
348                                 } else {
349                                     break;
350                                 }
351                             }
352                             if (len > 0) {
353                                 try {
354                                     port = Integer.parseInt(host.substring(pos, pos + len));
355                                 } catch (NumberFormatException ex) {
356                                 }
357                             }
358                             host = host.substring(0, colon);
359                         }
360                     }
361                 }
362             }
363             String scheme = uri.getScheme();
364             if (host != null) {
365                 target = new HttpHost(host, port, scheme);
366             }
367         }
368         return target;
369     }
370 
371     /**
372      * This class should not be instantiated.
373      */
374     private URIUtils() {
375     }
376 
377 }