From e66ad193950df02c148b6c1b93298234c4f3ce7a Mon Sep 17 00:00:00 2001 From: Masatake Iwasaki Date: Mon, 12 Aug 2019 12:07:16 +0900 Subject: [PATCH] HDFS-14423. Percent (%) and plus (+) characters no longer work in WebHDFS. Signed-off-by: Masatake Iwasaki (cherry picked from commit da0006fe0473e353ee2d489156248a01aa982dfd) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java (cherry picked from commit d7ca016d63d89e5c8377a035f93485a7c77c3430) --- .../org/apache/hadoop/http/HttpServer2.java | 15 +++++++ .../hadoop/hdfs/web/WebHdfsFileSystem.java | 42 +------------------ .../datanode/web/webhdfs/WebHdfsHandler.java | 3 +- .../server/namenode/NameNodeHttpServer.java | 6 ++- .../web/resources/NamenodeWebHdfsMethods.java | 5 +-- .../hadoop/hdfs/web/TestWebHdfsUrl.java | 38 ++++++++++++++--- 6 files changed, 55 insertions(+), 54 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java index 7452b0bfce8..02bd383eb91 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java @@ -786,12 +786,27 @@ public final class HttpServer2 implements FilterContainer { */ public void addJerseyResourcePackage(final String packageName, final String pathSpec) { + addJerseyResourcePackage(packageName, pathSpec, + Collections.emptyMap()); + } + + /** + * Add a Jersey resource package. + * @param packageName The Java package name containing the Jersey resource. + * @param pathSpec The path spec for the servlet + * @param params properties and features for ResourceConfig + */ + public void addJerseyResourcePackage(final String packageName, + final String pathSpec, Map params) { LOG.info("addJerseyResourcePackage: packageName=" + packageName + ", pathSpec=" + pathSpec); final ServletHolder sh = new ServletHolder(ServletContainer.class); sh.setInitParameter("com.sun.jersey.config.property.resourceConfigClass", "com.sun.jersey.api.core.PackagesResourceConfig"); sh.setInitParameter("com.sun.jersey.config.property.packages", packageName); + for (Map.Entry entry : params.entrySet()) { + sh.setInitParameter(entry.getKey(), entry.getValue()); + } webAppContext.addServlet(sh, pathSpec); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index c74577ad7bd..99cab37234c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -37,8 +37,6 @@ import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.URI; import java.net.URL; -import java.net.URLDecoder; -import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; @@ -146,8 +144,6 @@ public class WebHdfsFileSystem extends FileSystem public static final String EZ_HEADER = "X-Hadoop-Accept-EZ"; public static final String FEFINFO_HEADER = "X-Hadoop-feInfo"; - public static final String SPECIAL_FILENAME_CHARACTERS_REGEX = ".*[;+%].*"; - /** * Default connection factory may be overridden in tests to use smaller * timeout values @@ -603,44 +599,8 @@ public class WebHdfsFileSystem extends FileSystem final Param... parameters) throws IOException { //initialize URI path and query - Path encodedFSPath = fspath; - if (fspath != null) { - URI fspathUri = fspath.toUri(); - String fspathUriDecoded = fspathUri.getPath(); - boolean pathAlreadyEncoded = false; - try { - fspathUriDecoded = URLDecoder.decode(fspathUri.getPath(), "UTF-8"); - //below condition check added as part of fixing HDFS-14323 to make - //sure pathAlreadyEncoded is not set in the case the input url does - //not have any encoded sequence already.This will help pulling data - //from 2.x hadoop cluster to 3.x using 3.x distcp client operation - if(!fspathUri.getPath().equals(fspathUriDecoded)) { - pathAlreadyEncoded = true; - } - } catch (IllegalArgumentException ex) { - LOG.trace("Cannot decode URL encoded file", ex); - } - String[] fspathItems = fspathUriDecoded.split("/"); - - if (fspathItems.length > 0) { - StringBuilder fsPathEncodedItems = new StringBuilder(); - for (String fsPathItem : fspathItems) { - fsPathEncodedItems.append("/"); - if (fsPathItem.matches(SPECIAL_FILENAME_CHARACTERS_REGEX) || - pathAlreadyEncoded) { - fsPathEncodedItems.append(URLEncoder.encode(fsPathItem, "UTF-8")); - } else { - fsPathEncodedItems.append(fsPathItem); - } - } - encodedFSPath = new Path(fspathUri.getScheme(), - fspathUri.getAuthority(), fsPathEncodedItems.substring(1)); - } - } - final String path = PATH_PREFIX - + (encodedFSPath == null ? "/" : - makeQualified(encodedFSPath).toUri().getRawPath()); + + (fspath == null? "/": makeQualified(fspath).toUri().getRawPath()); final String query = op.toQueryString() + Param.toSortedString("&", getAuthParameters(op)) + Param.toSortedString("&", parameters); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java index 9a4b670f1b6..c5fc7ea1709 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java @@ -58,7 +58,6 @@ import java.io.OutputStream; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.security.PrivilegedExceptionAction; import java.util.EnumSet; @@ -128,7 +127,7 @@ public class WebHdfsHandler extends SimpleChannelInboundHandler { params = new ParameterParser(queryString, conf); DataNodeUGIProvider ugiProvider = new DataNodeUGIProvider(params); ugi = ugiProvider.ugi(); - path = URLDecoder.decode(params.path(), "UTF-8"); + path = params.path(); injectToken(); ugi.doAs(new PrivilegedExceptionAction() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java index 861afae5c74..dc1556d5807 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java @@ -51,6 +51,8 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.http.RestCsrfPreventionFilter; +import com.sun.jersey.api.core.ResourceConfig; + /** * Encapsulates the HTTP server started by the NameNode. */ @@ -112,9 +114,11 @@ public class NameNodeHttpServer { } // add webhdfs packages + final Map resourceParams = new HashMap<>(); + resourceParams.put(ResourceConfig.FEATURE_MATCH_MATRIX_PARAMS, "true"); httpServer2.addJerseyResourcePackage( jerseyResourcePackage + ";" + Param.class.getPackage().getName(), - pathSpec); + pathSpec, resourceParams); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index a578c10d676..4c43d388733 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -25,7 +25,6 @@ import java.io.PrintWriter; import java.net.InetAddress; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLDecoder; import java.net.UnknownHostException; import java.security.Principal; import java.security.PrivilegedExceptionAction; @@ -1029,9 +1028,7 @@ public class NamenodeWebHdfsMethods { return doAs(ugi, new PrivilegedExceptionAction() { @Override public Response run() throws IOException, URISyntaxException { - String absolutePath = path.getAbsolutePath() == null ? null : - URLDecoder.decode(path.getAbsolutePath(), "UTF-8"); - return get(ugi, delegation, username, doAsUser, absolutePath, + return get(ugi, delegation, username, doAsUser, path.getAbsolutePath(), op, offset, length, renewer, bufferSize, xattrNames, xattrEncoding, excludeDatanodes, fsAction, snapshotName, oldSnapshotName, tokenKind, tokenService, noredirect, startAfter); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java index 6733555b15e..3e5c5065bd1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java @@ -38,6 +38,7 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.WebHdfs; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; @@ -76,7 +77,7 @@ public class TestWebHdfsUrl { uri, conf); // Construct a file path that contains percentage-encoded string - String pathName = "/hdtest010%2C60020%2C1371000602151.1371058984668"; + String pathName = "/hdtest010%2C60020%2C1371000602151.1371058984668+"; Path fsPath = new Path(pathName); URL encodedPathUrl = webhdfs.toUrl(PutOpParam.Op.CREATE, fsPath); // We should get back the original file path after cycling back and decoding @@ -415,15 +416,11 @@ public class TestWebHdfsUrl { } private static final String BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME = - "specialFile ?\"\\()[]_-=&,{}#'`~!@$^*|<>."; + "specialFile ?\"\\()[]_-=&,{}#'`~!@$^*|<>.+%"; @Test public void testWebHdfsBackwardCompatibleSpecialCharacterFile() throws Exception { - - assertFalse(BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME - .matches(WebHdfsFileSystem.SPECIAL_FILENAME_CHARACTERS_REGEX)); - UserGroupInformation ugi = UserGroupInformation.createRemoteUser("test-user"); ugi.setAuthenticationMethod(KERBEROS); @@ -483,4 +480,33 @@ public class TestWebHdfsUrl { WebHdfsTestUtil.LOG.info(url.getPath()); assertEquals(WebHdfsFileSystem.PATH_PREFIX + path, url.getPath()); } + + @Test + public void testWebHdfsPathWithSemicolon() throws Exception { + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(WebHdfsTestUtil.createConf()) + .numDataNodes(1) + .build()) { + cluster.waitActive(); + + // regression test for HDFS-14423. + final Path semicolon = new Path("/a;b"); + final Path plus = new Path("/a+b"); + final Path percent = new Path("/a%b"); + + final WebHdfsFileSystem webhdfs = WebHdfsTestUtil.getWebHdfsFileSystem( + cluster.getConfiguration(0), WebHdfs.SCHEME); + webhdfs.create(semicolon).close(); + webhdfs.create(plus).close(); + webhdfs.create(percent).close(); + + final DistributedFileSystem dfs = cluster.getFileSystem(); + assertEquals(semicolon.getName(), + dfs.getFileStatus(semicolon).getPath().getName()); + assertEquals(plus.getName(), + dfs.getFileStatus(plus).getPath().getName()); + assertEquals(percent.getName(), + dfs.getFileStatus(percent).getPath().getName()); + } + } }