HTTPCLIENT-1968: added utility methods to parse and format URI path segments (ported from HttpCore master)

This commit is contained in:
Oleg Kalnichevski 2019-02-15 16:36:15 +01:00
parent ae9ea60195
commit 5f37506e7a
2 changed files with 162 additions and 32 deletions

View File

@ -36,7 +36,9 @@ import java.nio.ByteBuffer;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet; import java.util.BitSet;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Scanner; import java.util.Scanner;
@ -68,6 +70,12 @@ public class URLEncodedUtils {
private static final char QP_SEP_A = '&'; private static final char QP_SEP_A = '&';
private static final char QP_SEP_S = ';'; private static final char QP_SEP_S = ';';
private static final String NAME_VALUE_SEPARATOR = "="; private static final String NAME_VALUE_SEPARATOR = "=";
private static final char PATH_SEPARATOR = '/';
private static final BitSet PATH_SEPARATORS = new BitSet(256);
static {
PATH_SEPARATORS.set(PATH_SEPARATOR);
}
/** /**
* @deprecated 4.5 Use {@link #parse(URI, Charset)} * @deprecated 4.5 Use {@link #parse(URI, Charset)}
@ -78,19 +86,12 @@ public class URLEncodedUtils {
} }
/** /**
* Returns a list of {@link NameValuePair NameValuePairs} as built from the URI's query portion. For example, a URI * Returns a list of {@link NameValuePair}s URI query parameters.
* of {@code http://example.org/path/to/file?a=1&b=2&c=3} would return a list of three NameValuePairs, one for a=1, * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
* one for b=2, and one for c=3. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
* <p>
* This is typically useful while parsing an HTTP PUT.
* *
* This API is currently only used for testing. * @param uri input URI.
* * @param charset parameter charset.
* @param uri * @return list of query parameters.
* URI to parse
* @param charset
* Charset to use while parsing the query
* @return a list of {@link NameValuePair} as built from the URI's query portion.
* *
* @since 4.5 * @since 4.5
*/ */
@ -230,14 +231,12 @@ public class URLEncodedUtils {
} }
/** /**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character * Returns a list of {@link NameValuePair}s URI query parameters.
* encoding. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
* *
* @param s * @param s URI query component.
* text to parse. * @param charset charset to use when decoding the parameters.
* @param charset * @return list of query parameters.
* Encoding to use when decoding the parameters.
* @return a list of {@link NameValuePair} as built from the URI's query portion.
* *
* @since 4.2 * @since 4.2
*/ */
@ -254,13 +253,10 @@ public class URLEncodedUtils {
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character
* encoding. * encoding.
* *
* @param s * @param s input text.
* text to parse. * @param charset parameter charset.
* @param charset * @param separators parameter separators.
* Encoding to use when decoding the parameters. * @return list of query parameters.
* @param separators
* element separators.
* @return a list of {@link NameValuePair} as built from the URI's query portion.
* *
* @since 4.3 * @since 4.3
*/ */
@ -274,8 +270,7 @@ public class URLEncodedUtils {
} }
/** /**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using * Returns a list of {@link NameValuePair}s parameters.
* the given character encoding.
* *
* @param buf * @param buf
* text to parse. * text to parse.
@ -321,6 +316,98 @@ public class URLEncodedUtils {
return list; return list;
} }
static List<String> splitSegments(final CharSequence s, final BitSet separators) {
final ParserCursor cursor = new ParserCursor(0, s.length());
// Skip leading separator
if (cursor.atEnd()) {
return Collections.emptyList();
}
if (separators.get(s.charAt(cursor.getPos()))) {
cursor.updatePos(cursor.getPos() + 1);
}
final List<String> list = new ArrayList<String>();
final StringBuilder buf = new StringBuilder();
for (;;) {
if (cursor.atEnd()) {
list.add(buf.toString());
break;
}
final char current = s.charAt(cursor.getPos());
if (separators.get(current)) {
list.add(buf.toString());
buf.setLength(0);
} else {
buf.append(current);
}
cursor.updatePos(cursor.getPos() + 1);
}
return list;
}
static List<String> splitPathSegments(final CharSequence s) {
return splitSegments(s, PATH_SEPARATORS);
}
/**
* Returns a list of URI path segments.
*
* @param s URI path component.
* @param charset parameter charset.
* @return list of segments.
*
* @since 4.5
*/
public static List<String> parsePathSegments(final CharSequence s, final Charset charset) {
Args.notNull(s, "Char sequence");
final List<String> list = splitPathSegments(s);
for (int i = 0; i < list.size(); i++) {
list.set(i, urlDecode(list.get(i), charset != null ? charset : Consts.UTF_8, false));
}
return list;
}
/**
* Returns a list of URI path segments.
*
* @param s URI path component.
* @return list of segments.
*
* @since 4.5
*/
public static List<String> parsePathSegments(final CharSequence s) {
return parsePathSegments(s, Consts.UTF_8);
}
/**
* Returns a string consisting of joint encoded path segments.
*
* @param segments the segments.
* @param charset parameter charset.
* @return URI path component
*
* @since 4.5
*/
public static String formatSegments(final Iterable<String> segments, final Charset charset) {
Args.notNull(segments, "Segments");
final StringBuilder result = new StringBuilder();
for (final String segment : segments) {
result.append(PATH_SEPARATOR).append(urlEncode(segment, charset, PATHSAFE, false));
}
return result.toString();
}
/**
* Returns a string consisting of joint encoded path segments.
*
* @param segments the segments.
* @return URI path component
*
* @since 4.5
*/
public static String formatSegments(final String... segments) {
return formatSegments(Arrays.asList(segments), Consts.UTF_8);
}
/** /**
* Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded} * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
* list of parameters in an HTTP PUT or HTTP POST. * list of parameters in an HTTP PUT or HTTP POST.
@ -454,6 +541,8 @@ public class URLEncodedUtils {
*/ */
private static final BitSet URLENCODER = new BitSet(256); private static final BitSet URLENCODER = new BitSet(256);
private static final BitSet PATH_SPECIAL = new BitSet(256);
static { static {
// unreserved chars // unreserved chars
// alpha characters // alpha characters
@ -491,9 +580,8 @@ public class URLEncodedUtils {
// URL path safe // URL path safe
PATHSAFE.or(UNRESERVED); PATHSAFE.or(UNRESERVED);
PATHSAFE.set('/'); // segment separator
PATHSAFE.set(';'); // param separator PATHSAFE.set(';'); // param separator
PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ , PATHSAFE.set(':'); // RFC 2396
PATHSAFE.set('@'); PATHSAFE.set('@');
PATHSAFE.set('&'); PATHSAFE.set('&');
PATHSAFE.set('='); PATHSAFE.set('=');
@ -501,6 +589,9 @@ public class URLEncodedUtils {
PATHSAFE.set('$'); PATHSAFE.set('$');
PATHSAFE.set(','); PATHSAFE.set(',');
PATH_SPECIAL.or(PATHSAFE);
PATH_SPECIAL.set('/');
RESERVED.set(';'); RESERVED.set(';');
RESERVED.set('/'); RESERVED.set('/');
RESERVED.set('?'); RESERVED.set('?');
@ -683,7 +774,7 @@ public class URLEncodedUtils {
} }
/** /**
* Encode a String using the {@link #PATHSAFE} set of characters. * Encode a String using the {@link #PATH_SPECIAL} set of characters.
* <p> * <p>
* Used by URIBuilder to encode path segments. * Used by URIBuilder to encode path segments.
* *
@ -692,7 +783,7 @@ public class URLEncodedUtils {
* @return the encoded string * @return the encoded string
*/ */
static String encPath(final String content, final Charset charset) { static String encPath(final String content, final Charset charset) {
return urlEncode(content, charset, PATHSAFE, false); return urlEncode(content, charset, PATH_SPECIAL, false);
} }
} }

View File

@ -29,6 +29,8 @@ package org.apache.http.client.utils;
import java.net.URI; import java.net.URI;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.http.Consts; import org.apache.http.Consts;
@ -37,6 +39,7 @@ import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity; import org.apache.http.entity.StringEntity;
import org.apache.http.message.BasicNameValuePair; import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP; import org.apache.http.protocol.HTTP;
import org.hamcrest.CoreMatchers;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
@ -102,6 +105,42 @@ public class TestURLEncodedUtils {
assertNameValuePair(result.get(1), "d", "e"); assertNameValuePair(result.get(1), "d", "e");
} }
@Test
public void testParseSegments() throws Exception {
Assert.assertThat(URLEncodedUtils.parsePathSegments("/this/that"),
CoreMatchers.equalTo(Arrays.asList("this", "that")));
Assert.assertThat(URLEncodedUtils.parsePathSegments("this/that"),
CoreMatchers.equalTo(Arrays.asList("this", "that")));
Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that"),
CoreMatchers.equalTo(Arrays.asList("this", "", "that")));
Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/"),
CoreMatchers.equalTo(Arrays.asList("this", "", "that", "")));
Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/%2fthis%20and%20that"),
CoreMatchers.equalTo(Arrays.asList("this", "", "that", "/this and that")));
Assert.assertThat(URLEncodedUtils.parsePathSegments("this///that//"),
CoreMatchers.equalTo(Arrays.asList("this", "", "", "that", "", "")));
Assert.assertThat(URLEncodedUtils.parsePathSegments("/"),
CoreMatchers.equalTo(Collections.singletonList("")));
Assert.assertThat(URLEncodedUtils.parsePathSegments(""),
CoreMatchers.equalTo(Collections.<String>emptyList()));
}
@Test
public void testFormatSegments() throws Exception {
Assert.assertThat(URLEncodedUtils.formatSegments("this", "that"),
CoreMatchers.equalTo("/this/that"));
Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that"),
CoreMatchers.equalTo("/this//that"));
Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that", "/this and that"),
CoreMatchers.equalTo("/this//that/%2Fthis%20and%20that"));
Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "", "that", "", ""),
CoreMatchers.equalTo("/this///that//"));
Assert.assertThat(URLEncodedUtils.formatSegments(""),
CoreMatchers.equalTo("/"));
Assert.assertThat(URLEncodedUtils.formatSegments(),
CoreMatchers.equalTo(""));
}
@Test @Test
public void testParseURLCodedContentString() throws Exception { public void testParseURLCodedContentString() throws Exception {
List <NameValuePair> result; List <NameValuePair> result;