From db51548f706ccd2d0200745ab89e27610c6d10bc Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Thu, 15 Jan 2015 20:54:44 +0900 Subject: [PATCH] HADOOP-11483. HardLink.java should use the jdk7 createLink method --- .../hadoop-common/CHANGES.txt | 2 + .../java/org/apache/hadoop/fs/HardLink.java | 313 +----------------- .../org/apache/hadoop/fs/TestHardLink.java | 91 +---- 3 files changed, 23 insertions(+), 383 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 9ef1fa66a0b..3f916a29b68 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -385,6 +385,8 @@ Release 2.7.0 - UNRELEASED IMPROVEMENTS + HADOOP-11483. HardLink.java should use the jdk7 createLink method (aajisaka) + HADOOP-11156. DelegateToFileSystem should implement getFsStatus(final Path f). (Zhihai Xu via wang) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java index e48354dba79..209ba6997ae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java @@ -23,13 +23,16 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.StringReader; -import java.util.Arrays; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import com.google.common.annotations.VisibleForTesting; + +import static java.nio.file.Files.createLink; + /** * Class for creating hardlinks. * Supports Unix/Linux, Windows via winutils , and Mac OS X. @@ -75,114 +78,30 @@ public class HardLink { /** * This abstract class bridges the OS-dependent implementations of the - * needed functionality for creating hardlinks and querying link counts. + * needed functionality for querying link counts. * The particular implementation class is chosen during * static initialization phase of the HardLink class. - * The "getter" methods construct shell command strings for various purposes. + * The "getter" methods construct shell command strings. */ private static abstract class HardLinkCommandGetter { - - /** - * Get the command string needed to hardlink a bunch of files from - * a single source directory into a target directory. The source directory - * is not specified here, but the command will be executed using the source - * directory as the "current working directory" of the shell invocation. - * - * @param fileBaseNames - array of path-less file names, relative - * to the source directory - * @param linkDir - target directory where the hardlinks will be put - * @return - an array of Strings suitable for use as a single shell command - * @throws IOException - if any of the file or path names misbehave - */ - abstract String[] linkMult(String[] fileBaseNames, File linkDir) - throws IOException; - - /** - * Get the command string needed to hardlink a single file - */ - abstract String[] linkOne(File file, File linkName) throws IOException; - /** * Get the command string to query the hardlink count of a file */ abstract String[] linkCount(File file) throws IOException; - - /** - * Calculate the total string length of the shell command - * resulting from execution of linkMult, plus the length of the - * source directory name (which will also be provided to the shell) - * - * @param fileDir - source directory, parent of fileBaseNames - * @param fileBaseNames - array of path-less file names, relative - * to the source directory - * @param linkDir - target directory where the hardlinks will be put - * @return - total data length (must not exceed maxAllowedCmdArgLength) - * @throws IOException - */ - abstract int getLinkMultArgLength( - File fileDir, String[] fileBaseNames, File linkDir) - throws IOException; - - /** - * Get the maximum allowed string length of a shell command on this OS, - * which is just the documented minimum guaranteed supported command - * length - aprx. 32KB for Unix, and 8KB for Windows. - */ - abstract int getMaxAllowedCmdArgLength(); } /** * Implementation of HardLinkCommandGetter class for Unix */ - static class HardLinkCGUnix extends HardLinkCommandGetter { - private static String[] hardLinkCommand = {"ln", null, null}; - private static String[] hardLinkMultPrefix = {"ln"}; - private static String[] hardLinkMultSuffix = {null}; + private static class HardLinkCGUnix extends HardLinkCommandGetter { private static String[] getLinkCountCommand = {"stat","-c%h", null}; - //Unix guarantees at least 32K bytes cmd length. - //Subtract another 64b to allow for Java 'exec' overhead - private static final int maxAllowedCmdArgLength = 32*1024 - 65; - private static synchronized void setLinkCountCmdTemplate(String[] template) { //May update this for specific unix variants, //after static initialization phase getLinkCountCommand = template; } - - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) - */ - @Override - String[] linkOne(File file, File linkName) - throws IOException { - String[] buf = new String[hardLinkCommand.length]; - System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); - //unix wants argument order: "ln " - buf[1] = FileUtil.makeShellPath(file, true); - buf[2] = FileUtil.makeShellPath(linkName, true); - return buf; - } - - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) - */ - @Override - String[] linkMult(String[] fileBaseNames, File linkDir) - throws IOException { - String[] buf = new String[fileBaseNames.length - + hardLinkMultPrefix.length - + hardLinkMultSuffix.length]; - int mark=0; - System.arraycopy(hardLinkMultPrefix, 0, buf, mark, - hardLinkMultPrefix.length); - mark += hardLinkMultPrefix.length; - System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); - mark += fileBaseNames.length; - buf[mark] = FileUtil.makeShellPath(linkDir, true); - return buf; - } - + /* * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) */ @@ -195,169 +114,30 @@ public class HardLink { buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true); return buf; } - - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) - */ - @Override - int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) - throws IOException{ - int sum = 0; - for (String x : fileBaseNames) { - // add 1 to account for terminal null or delimiter space - sum += 1 + ((x == null) ? 0 : x.length()); - } - sum += 2 + FileUtil.makeShellPath(fileDir, true).length() - + FileUtil.makeShellPath(linkDir, true).length(); - //add the fixed overhead of the hardLinkMult prefix and suffix - sum += 3; //length("ln") + 1 - return sum; - } - - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() - */ - @Override - int getMaxAllowedCmdArgLength() { - return maxAllowedCmdArgLength; - } } - /** * Implementation of HardLinkCommandGetter class for Windows */ + @VisibleForTesting static class HardLinkCGWin extends HardLinkCommandGetter { - //The Windows command getter impl class and its member fields are - //package-private ("default") access instead of "private" to assist - //unit testing (sort of) on non-Win servers - static String CMD_EXE = "cmd.exe"; - static String[] hardLinkCommand = { - Shell.WINUTILS,"hardlink","create", null, null}; - static String[] hardLinkMultPrefix = { - CMD_EXE, "/q", "/c", "for", "%f", "in", "("}; - static String hardLinkMultDir = "\\%f"; - static String[] hardLinkMultSuffix = { - ")", "do", Shell.WINUTILS, "hardlink", "create", null, - "%f"}; static String[] getLinkCountCommand = { Shell.WINUTILS, "hardlink", "stat", null}; - //Windows guarantees only 8K - 1 bytes cmd length. - //Subtract another 64b to allow for Java 'exec' overhead - static final int maxAllowedCmdArgLength = 8*1024 - 65; - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File) - */ - @Override - String[] linkOne(File file, File linkName) - throws IOException { - String[] buf = new String[hardLinkCommand.length]; - System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length); - //windows wants argument order: "create " - buf[4] = file.getCanonicalPath(); - buf[3] = linkName.getCanonicalPath(); - return buf; - } - - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File) - */ - @Override - String[] linkMult(String[] fileBaseNames, File linkDir) - throws IOException { - String[] buf = new String[fileBaseNames.length - + hardLinkMultPrefix.length - + hardLinkMultSuffix.length]; - String td = linkDir.getCanonicalPath() + hardLinkMultDir; - int mark=0; - System.arraycopy(hardLinkMultPrefix, 0, buf, mark, - hardLinkMultPrefix.length); - mark += hardLinkMultPrefix.length; - System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length); - mark += fileBaseNames.length; - System.arraycopy(hardLinkMultSuffix, 0, buf, mark, - hardLinkMultSuffix.length); - mark += hardLinkMultSuffix.length; - buf[mark - 2] = td; - return buf; - } - /* * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File) */ @Override - String[] linkCount(File file) - throws IOException { + String[] linkCount(File file) throws IOException { String[] buf = new String[getLinkCountCommand.length]; System.arraycopy(getLinkCountCommand, 0, buf, 0, getLinkCountCommand.length); buf[getLinkCountCommand.length - 1] = file.getCanonicalPath(); return buf; } - - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File) - */ - @Override - int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) - throws IOException { - int sum = 0; - for (String x : fileBaseNames) { - // add 1 to account for terminal null or delimiter space - sum += 1 + ((x == null) ? 0 : x.length()); - } - sum += 2 + fileDir.getCanonicalPath().length() + - linkDir.getCanonicalPath().length(); - //add the fixed overhead of the hardLinkMult command - //(prefix, suffix, and Dir suffix) - sum += (CMD_EXE + " /q /c for %f in ( ) do " - + Shell.WINUTILS + " hardlink create \\%f %f").length(); - return sum; - } - - /* - * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength() - */ - @Override - int getMaxAllowedCmdArgLength() { - return maxAllowedCmdArgLength; - } } - - - /** - * Calculate the nominal length of all contributors to the total - * commandstring length, including fixed overhead of the OS-dependent - * command. It's protected rather than private, to assist unit testing, - * but real clients are not expected to need it -- see the way - * createHardLinkMult() uses it internally so the user doesn't need to worry - * about it. - * - * @param fileDir - source directory, parent of fileBaseNames - * @param fileBaseNames - array of path-less file names, relative - * to the source directory - * @param linkDir - target directory where the hardlinks will be put - * @return - total data length (must not exceed maxAllowedCmdArgLength) - * @throws IOException - */ - protected static int getLinkMultArgLength( - File fileDir, String[] fileBaseNames, File linkDir) - throws IOException { - return getHardLinkCommand.getLinkMultArgLength(fileDir, - fileBaseNames, linkDir); - } - - /** - * Return this private value for use by unit tests. - * Shell commands are not allowed to have a total string length - * exceeding this size. - */ - protected static int getMaxAllowedCmdArgLength() { - return getHardLinkCommand.getMaxAllowedCmdArgLength(); - } - + /* * **************************************************** * Complexity is above. User-visible functionality is below @@ -370,7 +150,7 @@ public class HardLink { * @param linkName - desired target link file */ public static void createHardLink(File file, File linkName) - throws IOException { + throws IOException { if (file == null) { throw new IOException( "invalid arguments to createHardLink: source file is null"); @@ -379,17 +159,7 @@ public class HardLink { throw new IOException( "invalid arguments to createHardLink: link name is null"); } - // construct and execute shell command - String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName); - ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand); - try { - shexec.execute(); - } catch (ExitCodeException e) { - throw new IOException("Failed to execute command " + - Arrays.toString(hardLinkCommand) + - "; command output: \"" + shexec.getOutput() + "\"" + - "; WrappedException: \"" + e.getMessage() + "\""); - } + createLink(linkName.toPath(), file.toPath()); } /** @@ -398,30 +168,10 @@ public class HardLink { * @param parentDir - directory containing source files * @param fileBaseNames - list of path-less file names, as returned by * parentDir.list() - * @param linkDir - where the hardlinks should be put. It must already exist. - * - * If the list of files is too long (overflows maxAllowedCmdArgLength), - * we will automatically split it into multiple invocations of the - * underlying method. + * @param linkDir - where the hardlinks should be put. It must already exist. */ - public static void createHardLinkMult(File parentDir, String[] fileBaseNames, + public static void createHardLinkMult(File parentDir, String[] fileBaseNames, File linkDir) throws IOException { - //This is the public method all non-test clients are expected to use. - //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd - createHardLinkMult(parentDir, fileBaseNames, linkDir, - getHardLinkCommand.getMaxAllowedCmdArgLength()); - } - - /* - * Implements {@link createHardLinkMult} with added variable "maxLength", - * to ease unit testing of the auto-splitting feature for long lists. - * Likewise why it returns "callCount", the number of sub-arrays that - * the file list had to be split into. - * Non-test clients are expected to call the public method instead. - */ - protected static int createHardLinkMult(File parentDir, - String[] fileBaseNames, File linkDir, int maxLength) - throws IOException { if (parentDir == null) { throw new IOException( "invalid arguments to createHardLinkMult: parent directory is null"); @@ -435,40 +185,13 @@ public class HardLink { "invalid arguments to createHardLinkMult: " + "filename list can be empty but not null"); } - if (fileBaseNames.length == 0) { - //the OS cmds can't handle empty list of filenames, - //but it's legal, so just return. - return 0; - } if (!linkDir.exists()) { throw new FileNotFoundException(linkDir + " not found."); } - - //if the list is too long, split into multiple invocations - int callCount = 0; - if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength - && fileBaseNames.length > 1) { - String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2); - callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength); - String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2, - fileBaseNames.length); - callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength); - return callCount; - } else { - callCount = 1; + for (String name : fileBaseNames) { + createLink(linkDir.toPath().resolve(name), + parentDir.toPath().resolve(name)); } - - // construct and execute shell command - String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, - linkDir); - ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand, - parentDir, null, 0L); - try { - shexec.execute(); - } catch (ExitCodeException e) { - throw new IOException(shexec.getOutput() + e.getMessage()); - } - return callCount; } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHardLink.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHardLink.java index 512ba0b365c..c68861c55cf 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHardLink.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHardLink.java @@ -295,91 +295,11 @@ public class TestHardLink { String[] emptyList = {}; //test the case of empty file list - int callCount = createHardLinkMult(src, emptyList, tgt_mult, - getMaxAllowedCmdArgLength()); - //check no exec calls were made - assertEquals(0, callCount); + createHardLinkMult(src, emptyList, tgt_mult); //check nothing changed in the directory tree validateSetup(); } - - /** - * Test createHardLinkMult(), again, this time with the "too long list" - * case where the total size of the command line arguments exceed the - * allowed maximum. In this case, the list should be automatically - * broken up into chunks, each chunk no larger than the max allowed. - * - * We use an extended version of the method call, specifying the - * size limit explicitly, to simulate the "too long" list with a - * relatively short list. - */ - @Test - public void testCreateHardLinkMultOversizeAndEmpty() throws IOException { - - // prep long filenames - each name takes 10 chars in the arg list - // (9 actual chars plus terminal null or delimeter blank) - String name1 = "x11111111"; - String name2 = "x22222222"; - String name3 = "x33333333"; - File x1_long = new File(src, name1); - File x2_long = new File(src, name2); - File x3_long = new File(src, name3); - //set up source files with long file names - x1.renameTo(x1_long); - x2.renameTo(x2_long); - x3.renameTo(x3_long); - //validate setup - assertTrue(x1_long.exists()); - assertTrue(x2_long.exists()); - assertTrue(x3_long.exists()); - assertFalse(x1.exists()); - assertFalse(x2.exists()); - assertFalse(x3.exists()); - - //prep appropriate length information to construct test case for - //oversize filename list - int callCount; - String[] emptyList = {}; - String[] fileNames = src.list(); - //get fixed size of arg list without any filenames - int overhead = getLinkMultArgLength(src, emptyList, tgt_mult); - //select a maxLength that is slightly too short to hold 3 filenames - int maxLength = overhead + (int)(2.5 * (float)(1 + name1.length())); - - //now test list of three filenames when there is room for only 2.5 - callCount = createHardLinkMult(src, fileNames, tgt_mult, maxLength); - //check the request was completed in exactly two "chunks" - assertEquals(2, callCount); - String[] tgt_multNames = tgt_mult.list(); - //sort directory listings before comparsion - Arrays.sort(fileNames); - Arrays.sort(tgt_multNames); - //and check the results were as expected in the dir tree - assertArrayEquals(fileNames, tgt_multNames); - - //Test the case where maxlength is too small even for one filename. - //It should go ahead and try the single files. - - //Clear the test dir tree - FileUtil.fullyDelete(tgt_mult); - assertFalse(tgt_mult.exists()); - tgt_mult.mkdirs(); - assertTrue(tgt_mult.exists() && tgt_mult.list().length == 0); - //set a limit size much smaller than a single filename - maxLength = overhead + (int)(0.5 * (float)(1 + name1.length())); - //attempt the method call - callCount = createHardLinkMult(src, fileNames, tgt_mult, - maxLength); - //should go ahead with each of the three single file names - assertEquals(3, callCount); - tgt_multNames = tgt_mult.list(); - //sort directory listings before comparsion - Arrays.sort(fileNames); - Arrays.sort(tgt_multNames); - //and check the results were as expected in the dir tree - assertArrayEquals(fileNames, tgt_multNames); - } - + /* * Assume that this test won't usually be run on a Windows box. * This test case allows testing of the correct syntax of the Windows @@ -392,18 +312,13 @@ public class TestHardLink { */ @Test public void testWindowsSyntax() { - class win extends HardLinkCGWin {}; + class win extends HardLinkCGWin {} //basic checks on array lengths - assertEquals(5, win.hardLinkCommand.length); - assertEquals(7, win.hardLinkMultPrefix.length); - assertEquals(7, win.hardLinkMultSuffix.length); assertEquals(4, win.getLinkCountCommand.length); - assertTrue(win.hardLinkMultPrefix[4].equals("%f")); //make sure "%f" was not munged assertEquals(2, ("%f").length()); - assertTrue(win.hardLinkMultDir.equals("\\%f")); //make sure "\\%f" was munged correctly assertEquals(3, ("\\%f").length()); assertTrue(win.getLinkCountCommand[1].equals("hardlink"));