HDFS-13582. Improve backward compatibility for HDFS-13176 (WebHdfs file path gets truncated when having semicolon (;) inside). Contributed by Zsolt Venczel.

This commit is contained in:
Sean Mackrory 2018-05-31 07:56:57 -06:00
parent 6bc92e304f
commit 1361030e59
2 changed files with 65 additions and 1 deletions

View File

@ -146,6 +146,8 @@ public class WebHdfsFileSystem extends FileSystem
public static final String EZ_HEADER = "X-Hadoop-Accept-EZ";
public static final String FEFINFO_HEADER = "X-Hadoop-feInfo";
public static final String SPECIAL_FILENAME_CHARACTERS_REGEX = ".*[;+%].*";
/**
* Default connection factory may be overridden in tests to use smaller
* timeout values
@ -606,8 +608,10 @@ public class WebHdfsFileSystem extends FileSystem
if (fspath != null) {
URI fspathUri = fspath.toUri();
String fspathUriDecoded = fspathUri.getPath();
boolean pathAlreadyEncoded = false;
try {
fspathUriDecoded = URLDecoder.decode(fspathUri.getPath(), "UTF-8");
pathAlreadyEncoded = true;
} catch (IllegalArgumentException ex) {
LOG.trace("Cannot decode URL encoded file", ex);
}
@ -617,7 +621,12 @@ public class WebHdfsFileSystem extends FileSystem
StringBuilder fsPathEncodedItems = new StringBuilder();
for (String fsPathItem : fspathItems) {
fsPathEncodedItems.append("/");
fsPathEncodedItems.append(URLEncoder.encode(fsPathItem, "UTF-8"));
if (fsPathItem.matches(SPECIAL_FILENAME_CHARACTERS_REGEX) ||
pathAlreadyEncoded) {
fsPathEncodedItems.append(URLEncoder.encode(fsPathItem, "UTF-8"));
} else {
fsPathEncodedItems.append(fsPathItem);
}
}
encodedFSPath = new Path(fspathUri.getScheme(),
fspathUri.getAuthority(), fsPathEncodedItems.substring(1));

View File

@ -414,4 +414,59 @@ public class TestWebHdfsUrl {
}
}
private static final String BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME =
"specialFile ?\"\\()[]_-=&,{}#'`~!@$^*|<>.";
@Test
public void testWebHdfsBackwardCompatibleSpecialCharacterFile()
throws Exception {
assertFalse(BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME
.matches(WebHdfsFileSystem.SPECIAL_FILENAME_CHARACTERS_REGEX));
UserGroupInformation ugi =
UserGroupInformation.createRemoteUser("test-user");
ugi.setAuthenticationMethod(KERBEROS);
UserGroupInformation.setLoginUser(ugi);
final Configuration conf = WebHdfsTestUtil.createConf();
final Path dir = new Path("/testWebHdfsSpecialCharacterFile");
final short numDatanodes = 1;
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(numDatanodes)
.build();
try {
cluster.waitActive();
final FileSystem fs = WebHdfsTestUtil
.getWebHdfsFileSystem(conf, WebHdfs.SCHEME);
//create a file
final long length = 1L << 10;
final Path file1 = new Path(dir,
BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME);
DFSTestUtil.createFile(fs, file1, length, numDatanodes, 20120406L);
//get file status and check that it was written properly.
final FileStatus s1 = fs.getFileStatus(file1);
assertEquals("Write failed for file " + file1, length, s1.getLen());
boolean found = false;
RemoteIterator<LocatedFileStatus> statusRemoteIterator =
fs.listFiles(dir, false);
while (statusRemoteIterator.hasNext()) {
LocatedFileStatus locatedFileStatus = statusRemoteIterator.next();
if (locatedFileStatus.isFile() &&
BACKWARD_COMPATIBLE_SPECIAL_CHARACTER_FILENAME
.equals(locatedFileStatus.getPath().getName())) {
found = true;
}
}
assertFalse("Could not find file with special character", !found);
} finally {
cluster.shutdown();
}
}
}