From 41c4cd08a0feb2fa6b1125ab70504ab70fe59a09 Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Fri, 12 Apr 2013 03:00:29 +0000 Subject: [PATCH] MAPREDUCE-4885. Streaming tests have multiple failures on Windows. (Chris Nauroth via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467158 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ hadoop-tools/hadoop-streaming/pom.xml | 14 ++++++ .../apache/hadoop/streaming/StreamJob.java | 3 +- .../hadoop-streaming/src/test/bin/cat.cmd | 18 +++++++ .../src/test/bin/xargs_cat.cmd | 18 +++++++ .../apache/hadoop/streaming/TestFileArgs.java | 6 ++- .../streaming/TestMultipleArchiveFiles.java | 7 +-- .../streaming/TestMultipleCachefiles.java | 4 +- .../streaming/TestStreamXmlRecordReader.java | 2 +- .../hadoop/streaming/TestStreaming.java | 49 +++++++++++++++---- .../streaming/TestStreamingKeyValue.java | 2 +- .../TestStreamingOutputKeyValueTypes.java | 18 +++---- .../streaming/TestStreamingTaskLog.java | 17 ++++--- .../apache/hadoop/streaming/TestSymLink.java | 6 +-- 14 files changed, 129 insertions(+), 38 deletions(-) create mode 100644 hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd create mode 100644 hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 0c64a5b6ad4..93b5d63e767 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -131,6 +131,9 @@ Trunk (Unreleased) MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path separators. (Chris Nauroth via sseth) + MAPREDUCE-4885. Streaming tests have multiple failures on Windows. (Chris + Nauroth via bikas) + BREAKDOWN OF HADOOP-8562 SUBTASKS MAPREDUCE-4739. Some MapReduce tests fail to find winutils. diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml index 99249693166..7265c0468d5 100644 --- a/hadoop-tools/hadoop-streaming/pom.xml +++ b/hadoop-tools/hadoop-streaming/pom.xml @@ -127,6 +127,20 @@ + + copy-test-bin + process-test-resources + + run + + + + + + + + + diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index 2acc5563bd7..d18a7654e09 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -294,8 +294,7 @@ public class StreamJob implements Tool { for (String file : values) { packageFiles_.add(file); try { - URI pathURI = new URI(file); - Path path = new Path(pathURI); + Path path = new Path(file); FileSystem localFs = FileSystem.getLocal(config_); String finalPath = path.makeQualified(localFs).toString(); if(fileList.length() > 0) { diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd new file mode 100644 index 00000000000..4b38e3e3b4b --- /dev/null +++ b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd @@ -0,0 +1,18 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @echo %%A +@rem lines have been copied from stdin to stdout diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd new file mode 100644 index 00000000000..f398a8d65c3 --- /dev/null +++ b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd @@ -0,0 +1,18 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @type %%A +@rem files named on stdin have been copied to stdout diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java index ac577e4c7ec..e864e9d8555 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java @@ -19,6 +19,7 @@ package org.apache.hadoop.streaming; import java.io.DataOutputStream; +import java.io.File; import java.io.IOException; import java.util.Map; @@ -27,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.MiniMRCluster; +import org.apache.hadoop.util.Shell; import org.junit.After; import org.junit.Before; @@ -45,7 +47,8 @@ public class TestFileArgs extends TestStreaming private static final String EXPECTED_OUTPUT = "job.jar\t\nsidefile\t\n"; - private static final String LS_PATH = "/bin/ls"; + private static final String LS_PATH = Shell.WINDOWS ? "cmd /c dir /B" : + "/bin/ls"; public TestFileArgs() throws IOException { @@ -58,6 +61,7 @@ public class TestFileArgs extends TestStreaming map = LS_PATH; FileSystem.setDefaultUri(conf, "hdfs://" + namenode); + setTestDir(new File("/tmp/TestFileArgs")); } @Before diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java index c5136e6dc7d..47b70ef6fbd 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java @@ -70,8 +70,8 @@ public class TestMultipleArchiveFiles extends TestStreaming namenode = fileSys.getUri().getAuthority(); mr = new MiniMRCluster(1, namenode, 1); - map = "xargs cat"; - reduce = "cat"; + map = XARGS_CAT; + reduce = CAT; } @Override @@ -84,7 +84,8 @@ public class TestMultipleArchiveFiles extends TestStreaming { fileSys.delete(new Path(INPUT_DIR), true); DataOutputStream dos = fileSys.create(new Path(INPUT_FILE)); - String inputFileString = "symlink1/cacheArchive1\nsymlink2/cacheArchive2"; + String inputFileString = "symlink1" + File.separator + + "cacheArchive1\nsymlink2" + File.separator + "cacheArchive2"; dos.write(inputFileString.getBytes("UTF-8")); dos.close(); diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java index 357bfcfd0b3..ae8f57d231c 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java @@ -49,8 +49,8 @@ public class TestMultipleCachefiles String CACHE_FILE = "/testing-streaming/cache.txt"; String CACHE_FILE_2 = "/testing-streaming/cache2.txt"; String input = "check to see if we can read this none reduce"; - String map = "xargs cat "; - String reduce = "cat"; + String map = TestStreaming.XARGS_CAT; + String reduce = TestStreaming.CAT; String mapString = "testlink"; String mapString2 = "testlink2"; String cacheString = "This is just the cache string"; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java index da0bdae484c..53009dbbabc 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java @@ -33,7 +33,7 @@ public class TestStreamXmlRecordReader extends TestStreaming { INPUT_FILE = new File("target/input.xml"); input = "\t\nroses.are.red\t\nviolets.are.blue\t\n" + "bunnies.are.pink\t\n\t\n"; - map = "cat"; + map = CAT; reduce = "NONE"; outputExpect = input; } diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java index 98ed1a299ea..4f39120a162 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java @@ -33,7 +33,7 @@ import static org.junit.Assert.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.Configuration; - +import org.apache.hadoop.util.Shell; /** * This class tests hadoopStreaming in MapReduce local mode. @@ -43,6 +43,22 @@ public class TestStreaming public static final String STREAMING_JAR = JarFinder.getJar(StreamJob.class); + /** + * cat command used for copying stdin to stdout as mapper or reducer function. + * On Windows, use a cmd script that approximates the functionality of cat. + */ + static final String CAT = Shell.WINDOWS ? + "cmd /c " + new File("target/bin/cat.cmd").getAbsolutePath() : "cat"; + + /** + * Command used for iterating through file names on stdin and copying each + * file's contents to stdout, used as mapper or reducer function. On Windows, + * use a cmd script that approximates the functionality of xargs cat. + */ + static final String XARGS_CAT = Shell.WINDOWS ? + "cmd /c " + new File("target/bin/xargs_cat.cmd").getAbsolutePath() : + "xargs cat"; + // "map" command: grep -E (red|green|blue) // reduce command: uniq protected File TEST_DIR; @@ -66,9 +82,22 @@ public class TestStreaming UtilTest utilTest = new UtilTest(getClass().getName()); utilTest.checkUserDir(); utilTest.redirectIfAntJunit(); - TEST_DIR = new File("target/TestStreaming").getAbsoluteFile(); - OUTPUT_DIR = new File(TEST_DIR, "out"); - INPUT_FILE = new File(TEST_DIR, "input.txt"); + setTestDir(new File("target/TestStreaming").getAbsoluteFile()); + } + + /** + * Sets root of test working directory and resets any other paths that must be + * children of the test working directory. Typical usage is for subclasses + * that use HDFS to override the test directory to the form "/tmp/" + * so that on Windows, tests won't attempt to use paths containing a ':' from + * the drive specifier. The ':' character is considered invalid by HDFS. + * + * @param testDir File to set + */ + protected void setTestDir(File testDir) { + TEST_DIR = testDir; + OUTPUT_DIR = new File(testDir, "out"); + INPUT_FILE = new File(testDir, "input.txt"); } @Before @@ -89,19 +118,18 @@ public class TestStreaming protected void createInput() throws IOException { - DataOutputStream out = getFileSystem().create( - new Path(INPUT_FILE.getAbsolutePath())); + DataOutputStream out = getFileSystem().create(new Path( + INPUT_FILE.getPath())); out.write(getInputData().getBytes("UTF-8")); out.close(); } protected void setInputOutput() { - inputFile = INPUT_FILE.getAbsolutePath(); - outDir = OUTPUT_DIR.getAbsolutePath(); + inputFile = INPUT_FILE.getPath(); + outDir = OUTPUT_DIR.getPath(); } protected String[] genArgs() { - setInputOutput(); args.add("-input");args.add(inputFile); args.add("-output");args.add(outDir); args.add("-mapper");args.add(map); @@ -129,7 +157,7 @@ public class TestStreaming } protected void checkOutput() throws IOException { - Path outPath = new Path(OUTPUT_DIR.getAbsolutePath(), "part-00000"); + Path outPath = new Path(OUTPUT_DIR.getPath(), "part-00000"); FileSystem fs = getFileSystem(); String output = StreamUtil.slurpHadoop(outPath, fs); fs.delete(outPath, true); @@ -155,6 +183,7 @@ public class TestStreaming * @throws IOException */ protected int runStreamJob() throws IOException { + setInputOutput(); createInput(); boolean mayExit = false; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java index 444355f4fbb..c21cb159f4f 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java @@ -76,7 +76,7 @@ public class TestStreamingKeyValue return new String[] { "-input", INPUT_FILE.getAbsolutePath(), "-output", OUTPUT_DIR.getAbsolutePath(), - "-mapper", "cat", + "-mapper", TestStreaming.CAT, "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true", "-jobconf", "stream.non.zero.exit.is.failure=true", "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"), diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java index f3158b26405..35eb752b23a 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java @@ -120,7 +120,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { @Test public void testJavaMapperAndCommandReducer() throws Exception { map = "org.apache.hadoop.mapred.lib.IdentityMapper"; - reduce = "cat"; + reduce = CAT; super.testCommandLine(); } @@ -128,7 +128,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { @Test public void testJavaMapperAndCommandReducerAndZeroReduces() throws Exception { map = "org.apache.hadoop.mapred.lib.IdentityMapper"; - reduce = "cat"; + reduce = CAT; args.add("-numReduceTasks"); args.add("0"); super.testCommandLine(); @@ -137,7 +137,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Java Reducer @Test public void testCommandMapperAndJavaReducer() throws Exception { - map = "cat"; + map = CAT; reduce = MyReducer.class.getName(); super.testCommandLine(); } @@ -145,7 +145,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Java Reducer and -numReduceTasks 0 @Test public void testCommandMapperAndJavaReducerAndZeroReduces() throws Exception { - map = "cat"; + map = CAT; reduce = MyReducer.class.getName(); args.add("-numReduceTasks"); args.add("0"); @@ -155,7 +155,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Reducer = "NONE" @Test public void testCommandMapperWithReduceNone() throws Exception { - map = "cat"; + map = CAT; reduce = "NONE"; super.testCommandLine(); } @@ -163,8 +163,8 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Command Reducer @Test public void testCommandMapperAndCommandReducer() throws Exception { - map = "cat"; - reduce = "cat"; + map = CAT; + reduce = CAT; super.testCommandLine(); } @@ -172,8 +172,8 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { @Test public void testCommandMapperAndCommandReducerAndZeroReduces() throws Exception { - map = "cat"; - reduce = "cat"; + map = CAT; + reduce = CAT; args.add("-numReduceTasks"); args.add("0"); super.testCommandLine(); diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java index 823433c4c04..11c3b4e9b04 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java @@ -83,7 +83,7 @@ public class TestStreamingTaskLog { * (b) hadoop.tasklog.totalLogFileSize * for the children of java tasks in streaming jobs. */ - @Test (timeout = 30000) + @Test (timeout = 120000) public void testStreamingTaskLogWithHadoopCmd() { try { final int numSlaves = 1; @@ -95,13 +95,14 @@ public class TestStreamingTaskLog { fs.delete(testDir, true); } fs.mkdirs(testDir); - File scriptFile = createScript( - testDir.toString() + "/testTaskLog.sh"); + File scriptFile = createScript(testDir.toString() + + (Shell.WINDOWS ? "/testTaskLog.cmd" : "/testTaskLog.sh")); conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false); mr = new MiniMRCluster(numSlaves, fs.getUri().toString(), 1, null, null, conf); writeInputFile(fs, inputPath); - map = scriptFile.getAbsolutePath(); + map = Shell.WINDOWS ? "cmd /c " + scriptFile.getAbsolutePath() : + scriptFile.getAbsolutePath(); runStreamJobAndValidateEnv(); @@ -120,8 +121,12 @@ public class TestStreamingTaskLog { File scriptFile = new File(script); UtilTest.recursiveDelete(scriptFile); FileOutputStream in = new FileOutputStream(scriptFile); - in.write(("cat > /dev/null 2>&1\n" + - "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes()); + if (Shell.WINDOWS) { + in.write("@echo %HADOOP_ROOT_LOGGER% %HADOOP_CLIENT_OPTS%".getBytes()); + } else { + in.write(("cat > /dev/null 2>&1\n" + + "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes()); + } in.close(); Shell.execCommand(Shell.getSetPermissionCommand("+x", false, diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java index dba676a32db..730429d6daf 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java @@ -47,13 +47,13 @@ public class TestSymLink String OUTPUT_DIR = "/testing-streaming/out"; String CACHE_FILE = "/testing-streaming/cache.txt"; String input = "check to see if we can read this none reduce"; - String map = "xargs cat "; - String reduce = "cat"; + String map = TestStreaming.XARGS_CAT; + String reduce = TestStreaming.CAT; String mapString = "testlink\n"; String cacheString = "This is just the cache string"; StreamJob job; - @Test (timeout = 60000) + @Test (timeout = 120000) public void testSymLink() throws Exception { boolean mayExit = false;