HDFS-14423. Percent (%) and plus (+) characters no longer work in WebHDFS.

Signed-off-by: Masatake Iwasaki <iwasakims@apache.org>
This commit is contained in:
Masatake Iwasaki 2019-08-12 12:07:16 +09:00
parent 6ae8bc3a4a
commit da0006fe04
6 changed files with 56 additions and 54 deletions

View File

@ -818,12 +818,27 @@ public final class HttpServer2 implements FilterContainer {
*/
public void addJerseyResourcePackage(final String packageName,
final String pathSpec) {
addJerseyResourcePackage(packageName, pathSpec,
Collections.<String, String>emptyMap());
}
/**
* Add a Jersey resource package.
* @param packageName The Java package name containing the Jersey resource.
* @param pathSpec The path spec for the servlet
* @param params properties and features for ResourceConfig
*/
public void addJerseyResourcePackage(final String packageName,
final String pathSpec, Map<String, String> params) {
LOG.info("addJerseyResourcePackage: packageName=" + packageName
+ ", pathSpec=" + pathSpec);
final ServletHolder sh = new ServletHolder(ServletContainer.class);
sh.setInitParameter("com.sun.jersey.config.property.resourceConfigClass",
"com.sun.jersey.api.core.PackagesResourceConfig");
sh.setInitParameter("com.sun.jersey.config.property.packages", packageName);
for (Map.Entry<String, String> entry : params.entrySet()) {
sh.setInitParameter(entry.getKey(), entry.getValue());
}
webAppContext.addServlet(sh, pathSpec);
}

View File

@ -37,8 +37,6 @@ import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
@ -147,8 +145,6 @@ public class WebHdfsFileSystem extends FileSystem
public static final String EZ_HEADER = "X-Hadoop-Accept-EZ";
public static final String FEFINFO_HEADER = "X-Hadoop-feInfo";
public static final String SPECIAL_FILENAME_CHARACTERS_REGEX = ".*[;+%].*";
/**
* Default connection factory may be overridden in tests to use smaller
* timeout values
@ -611,44 +607,8 @@ public class WebHdfsFileSystem extends FileSystem
final Param<?,?>... parameters) throws IOException {
//initialize URI path and query
Path encodedFSPath = fspath;
if (fspath != null) {
URI fspathUri = fspath.toUri();
String fspathUriDecoded = fspathUri.getPath();
boolean pathAlreadyEncoded = false;
try {
fspathUriDecoded = URLDecoder.decode(fspathUri.getPath(), "UTF-8");
//below condition check added as part of fixing HDFS-14323 to make
//sure pathAlreadyEncoded is not set in the case the input url does
//not have any encoded sequence already.This will help pulling data
//from 2.x hadoop cluster to 3.x using 3.x distcp client operation
if(!fspathUri.getPath().equals(fspathUriDecoded)) {
pathAlreadyEncoded = true;
}
} catch (IllegalArgumentException ex) {
LOG.trace("Cannot decode URL encoded file", ex);
}
String[] fspathItems = fspathUriDecoded.split("/");
if (fspathItems.length > 0) {
StringBuilder fsPathEncodedItems = new StringBuilder();
for (String fsPathItem : fspathItems) {
fsPathEncodedItems.append("/");
if (fsPathItem.matches(SPECIAL_FILENAME_CHARACTERS_REGEX) ||
pathAlreadyEncoded) {
fsPathEncodedItems.append(URLEncoder.encode(fsPathItem, "UTF-8"));
} else {
fsPathEncodedItems.append(fsPathItem);
}
}
encodedFSPath = new Path(fspathUri.getScheme(),
fspathUri.getAuthority(), fsPathEncodedItems.substring(1));
}
}
final String path = PATH_PREFIX
+ (encodedFSPath == null ? "/" :
makeQualified(encodedFSPath).toUri().getRawPath());
+ (fspath == null? "/": makeQualified(fspath).toUri().getRawPath());
final String query = op.toQueryString()
+ Param.toSortedString("&", getAuthParameters(op))
+ Param.toSortedString("&", parameters);

View File

@ -58,7 +58,6 @@ import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.util.EnumSet;
@ -128,7 +127,7 @@ public class WebHdfsHandler extends SimpleChannelInboundHandler<HttpRequest> {
params = new ParameterParser(queryString, conf);
DataNodeUGIProvider ugiProvider = new DataNodeUGIProvider(params);
ugi = ugiProvider.ugi();
path = URLDecoder.decode(params.path(), "UTF-8");
path = params.path();
injectToken();
ugi.doAs(new PrivilegedExceptionAction<Void>() {

View File

@ -23,6 +23,7 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_RES
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Map;
import java.util.HashMap;
import javax.servlet.ServletContext;
@ -47,6 +48,8 @@ import org.apache.hadoop.http.HttpServer2;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.http.RestCsrfPreventionFilter;
import com.sun.jersey.api.core.ResourceConfig;
/**
* Encapsulates the HTTP server started by the NameNode.
*/
@ -99,9 +102,11 @@ public class NameNodeHttpServer {
}
// add webhdfs packages
final Map<String, String> params = new HashMap<>();
params.put(ResourceConfig.FEATURE_MATCH_MATRIX_PARAMS, "true");
httpServer2.addJerseyResourcePackage(
jerseyResourcePackage + ";" + Param.class.getPackage().getName(),
pathSpec);
pathSpec, params);
}
/**

View File

@ -25,7 +25,6 @@ import java.io.PrintWriter;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.UnknownHostException;
import java.security.Principal;
import java.security.PrivilegedExceptionAction;
@ -1070,9 +1069,7 @@ public class NamenodeWebHdfsMethods {
return doAs(ugi, new PrivilegedExceptionAction<Response>() {
@Override
public Response run() throws IOException, URISyntaxException {
String absolutePath = path.getAbsolutePath() == null ? null :
URLDecoder.decode(path.getAbsolutePath(), "UTF-8");
return get(ugi, delegation, username, doAsUser, absolutePath,
return get(ugi, delegation, username, doAsUser, path.getAbsolutePath(),
op, offset, length, renewer, bufferSize, xattrNames, xattrEncoding,
excludeDatanodes, fsAction, snapshotName, oldSnapshotName,
tokenKind, tokenService, noredirect, startAfter);

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.WebHdfs;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
@ -77,7 +78,7 @@ public class TestWebHdfsUrl {
uri, conf);
// Construct a file path that contains percentage-encoded string
String pathName = "/hdtest010%2C60020%2C1371000602151.1371058984668";
String pathName = "/hdtest010%2C60020%2C1371000602151.1371058984668+";
Path fsPath = new Path(pathName);
URL encodedPathUrl = webhdfs.toUrl(PutOpParam.Op.CREATE, fsPath);
// We should get back the original file path after cycling back and decoding
@ -440,15 +441,11 @@ public class TestWebHdfsUrl {
}
private static final String BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME =
"specialFile ?\"\\()[]_-=&,{}#'`~!@$^*|<>.";
"specialFile ?\"\\()[]_-=&,{}#'`~!@$^*|<>.+%";
@Test
public void testWebHdfsBackwardCompatibleSpecialCharacterFile()
throws Exception {
assertFalse(BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME
.matches(WebHdfsFileSystem.SPECIAL_FILENAME_CHARACTERS_REGEX));
UserGroupInformation ugi =
UserGroupInformation.createRemoteUser("test-user");
ugi.setAuthenticationMethod(KERBEROS);
@ -508,4 +505,33 @@ public class TestWebHdfsUrl {
WebHdfsTestUtil.LOG.info(url.getPath());
assertEquals(WebHdfsFileSystem.PATH_PREFIX + path, url.getPath());
}
@Test
public void testWebHdfsPathWithSemicolon() throws Exception {
try (MiniDFSCluster cluster =
new MiniDFSCluster.Builder(WebHdfsTestUtil.createConf())
.numDataNodes(1)
.build()) {
cluster.waitActive();
// regression test for HDFS-14423.
final Path semicolon = new Path("/a;b");
final Path plus = new Path("/a+b");
final Path percent = new Path("/a%b");
final WebHdfsFileSystem webhdfs = WebHdfsTestUtil.getWebHdfsFileSystem(
cluster.getConfiguration(0), WebHdfs.SCHEME);
webhdfs.create(semicolon).close();
webhdfs.create(plus).close();
webhdfs.create(percent).close();
final DistributedFileSystem dfs = cluster.getFileSystem();
assertEquals(semicolon.getName(),
dfs.getFileStatus(semicolon).getPath().getName());
assertEquals(plus.getName(),
dfs.getFileStatus(plus).getPath().getName());
assertEquals(percent.getName(),
dfs.getFileStatus(percent).getPath().getName());
}
}
}