From 5f37506e7a2223d216eb3f13beb0dc77e7d027ff Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Fri, 15 Feb 2019 16:36:15 +0100 Subject: [PATCH] HTTPCLIENT-1968: added utility methods to parse and format URI path segments (ported from HttpCore master) --- .../http/client/utils/URLEncodedUtils.java | 155 ++++++++++++++---- .../client/utils/TestURLEncodedUtils.java | 39 +++++ 2 files changed, 162 insertions(+), 32 deletions(-) diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java index 11b462a6e..56888090a 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java @@ -36,7 +36,9 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Arrays; import java.util.BitSet; +import java.util.Collections; import java.util.List; import java.util.Scanner; @@ -68,6 +70,12 @@ public class URLEncodedUtils { private static final char QP_SEP_A = '&'; private static final char QP_SEP_S = ';'; private static final String NAME_VALUE_SEPARATOR = "="; + private static final char PATH_SEPARATOR = '/'; + + private static final BitSet PATH_SEPARATORS = new BitSet(256); + static { + PATH_SEPARATORS.set(PATH_SEPARATOR); + } /** * @deprecated 4.5 Use {@link #parse(URI, Charset)} @@ -78,19 +86,12 @@ public class URLEncodedUtils { } /** - * Returns a list of {@link NameValuePair NameValuePairs} as built from the URI's query portion. For example, a URI - * of {@code http://example.org/path/to/file?a=1&b=2&c=3} would return a list of three NameValuePairs, one for a=1, - * one for b=2, and one for c=3. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. - *

- * This is typically useful while parsing an HTTP PUT. + * Returns a list of {@link NameValuePair}s URI query parameters. + * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. * - * This API is currently only used for testing. - * - * @param uri - * URI to parse - * @param charset - * Charset to use while parsing the query - * @return a list of {@link NameValuePair} as built from the URI's query portion. + * @param uri input URI. + * @param charset parameter charset. + * @return list of query parameters. * * @since 4.5 */ @@ -230,14 +231,12 @@ public class URLEncodedUtils { } /** - * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character - * encoding. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. + * Returns a list of {@link NameValuePair}s URI query parameters. + * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. * - * @param s - * text to parse. - * @param charset - * Encoding to use when decoding the parameters. - * @return a list of {@link NameValuePair} as built from the URI's query portion. + * @param s URI query component. + * @param charset charset to use when decoding the parameters. + * @return list of query parameters. * * @since 4.2 */ @@ -254,13 +253,10 @@ public class URLEncodedUtils { * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character * encoding. * - * @param s - * text to parse. - * @param charset - * Encoding to use when decoding the parameters. - * @param separators - * element separators. - * @return a list of {@link NameValuePair} as built from the URI's query portion. + * @param s input text. + * @param charset parameter charset. + * @param separators parameter separators. + * @return list of query parameters. * * @since 4.3 */ @@ -274,8 +270,7 @@ public class URLEncodedUtils { } /** - * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using - * the given character encoding. + * Returns a list of {@link NameValuePair}s parameters. * * @param buf * text to parse. @@ -321,6 +316,98 @@ public class URLEncodedUtils { return list; } + static List splitSegments(final CharSequence s, final BitSet separators) { + final ParserCursor cursor = new ParserCursor(0, s.length()); + // Skip leading separator + if (cursor.atEnd()) { + return Collections.emptyList(); + } + if (separators.get(s.charAt(cursor.getPos()))) { + cursor.updatePos(cursor.getPos() + 1); + } + final List list = new ArrayList(); + final StringBuilder buf = new StringBuilder(); + for (;;) { + if (cursor.atEnd()) { + list.add(buf.toString()); + break; + } + final char current = s.charAt(cursor.getPos()); + if (separators.get(current)) { + list.add(buf.toString()); + buf.setLength(0); + } else { + buf.append(current); + } + cursor.updatePos(cursor.getPos() + 1); + } + return list; + } + + static List splitPathSegments(final CharSequence s) { + return splitSegments(s, PATH_SEPARATORS); + } + + /** + * Returns a list of URI path segments. + * + * @param s URI path component. + * @param charset parameter charset. + * @return list of segments. + * + * @since 4.5 + */ + public static List parsePathSegments(final CharSequence s, final Charset charset) { + Args.notNull(s, "Char sequence"); + final List list = splitPathSegments(s); + for (int i = 0; i < list.size(); i++) { + list.set(i, urlDecode(list.get(i), charset != null ? charset : Consts.UTF_8, false)); + } + return list; + } + + /** + * Returns a list of URI path segments. + * + * @param s URI path component. + * @return list of segments. + * + * @since 4.5 + */ + public static List parsePathSegments(final CharSequence s) { + return parsePathSegments(s, Consts.UTF_8); + } + + /** + * Returns a string consisting of joint encoded path segments. + * + * @param segments the segments. + * @param charset parameter charset. + * @return URI path component + * + * @since 4.5 + */ + public static String formatSegments(final Iterable segments, final Charset charset) { + Args.notNull(segments, "Segments"); + final StringBuilder result = new StringBuilder(); + for (final String segment : segments) { + result.append(PATH_SEPARATOR).append(urlEncode(segment, charset, PATHSAFE, false)); + } + return result.toString(); + } + + /** + * Returns a string consisting of joint encoded path segments. + * + * @param segments the segments. + * @return URI path component + * + * @since 4.5 + */ + public static String formatSegments(final String... segments) { + return formatSegments(Arrays.asList(segments), Consts.UTF_8); + } + /** * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded} * list of parameters in an HTTP PUT or HTTP POST. @@ -454,6 +541,8 @@ public class URLEncodedUtils { */ private static final BitSet URLENCODER = new BitSet(256); + private static final BitSet PATH_SPECIAL = new BitSet(256); + static { // unreserved chars // alpha characters @@ -491,9 +580,8 @@ public class URLEncodedUtils { // URL path safe PATHSAFE.or(UNRESERVED); - PATHSAFE.set('/'); // segment separator PATHSAFE.set(';'); // param separator - PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ , + PATHSAFE.set(':'); // RFC 2396 PATHSAFE.set('@'); PATHSAFE.set('&'); PATHSAFE.set('='); @@ -501,6 +589,9 @@ public class URLEncodedUtils { PATHSAFE.set('$'); PATHSAFE.set(','); + PATH_SPECIAL.or(PATHSAFE); + PATH_SPECIAL.set('/'); + RESERVED.set(';'); RESERVED.set('/'); RESERVED.set('?'); @@ -683,7 +774,7 @@ public class URLEncodedUtils { } /** - * Encode a String using the {@link #PATHSAFE} set of characters. + * Encode a String using the {@link #PATH_SPECIAL} set of characters. *

* Used by URIBuilder to encode path segments. * @@ -692,7 +783,7 @@ public class URLEncodedUtils { * @return the encoded string */ static String encPath(final String content, final Charset charset) { - return urlEncode(content, charset, PATHSAFE, false); + return urlEncode(content, charset, PATH_SPECIAL, false); } } diff --git a/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java b/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java index 89dfc6dbb..dc4c4970e 100644 --- a/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java +++ b/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java @@ -29,6 +29,8 @@ package org.apache.http.client.utils; import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.http.Consts; @@ -37,6 +39,7 @@ import org.apache.http.entity.ContentType; import org.apache.http.entity.StringEntity; import org.apache.http.message.BasicNameValuePair; import org.apache.http.protocol.HTTP; +import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Test; @@ -102,6 +105,42 @@ public class TestURLEncodedUtils { assertNameValuePair(result.get(1), "d", "e"); } + @Test + public void testParseSegments() throws Exception { + Assert.assertThat(URLEncodedUtils.parsePathSegments("/this/that"), + CoreMatchers.equalTo(Arrays.asList("this", "that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this/that"), + CoreMatchers.equalTo(Arrays.asList("this", "that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that"), + CoreMatchers.equalTo(Arrays.asList("this", "", "that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/"), + CoreMatchers.equalTo(Arrays.asList("this", "", "that", ""))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/%2fthis%20and%20that"), + CoreMatchers.equalTo(Arrays.asList("this", "", "that", "/this and that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this///that//"), + CoreMatchers.equalTo(Arrays.asList("this", "", "", "that", "", ""))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("/"), + CoreMatchers.equalTo(Collections.singletonList(""))); + Assert.assertThat(URLEncodedUtils.parsePathSegments(""), + CoreMatchers.equalTo(Collections.emptyList())); + } + + @Test + public void testFormatSegments() throws Exception { + Assert.assertThat(URLEncodedUtils.formatSegments("this", "that"), + CoreMatchers.equalTo("/this/that")); + Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that"), + CoreMatchers.equalTo("/this//that")); + Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that", "/this and that"), + CoreMatchers.equalTo("/this//that/%2Fthis%20and%20that")); + Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "", "that", "", ""), + CoreMatchers.equalTo("/this///that//")); + Assert.assertThat(URLEncodedUtils.formatSegments(""), + CoreMatchers.equalTo("/")); + Assert.assertThat(URLEncodedUtils.formatSegments(), + CoreMatchers.equalTo("")); + } + @Test public void testParseURLCodedContentString() throws Exception { List result;