MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching multiple compressed map outputs. Contributed by Nathan Roberts
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1491611 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b088524750
commit
c0737b0bdb
|
@ -502,6 +502,9 @@ Release 2.1.0-beta - UNRELEASED
|
||||||
MAPREDUCE-5301. Updated MR code to work with YARN-635 changes of renaming
|
MAPREDUCE-5301. Updated MR code to work with YARN-635 changes of renaming
|
||||||
YarnRemoteException to YarnException. (Siddharth Seth via vinodkv)
|
YarnRemoteException to YarnException. (Siddharth Seth via vinodkv)
|
||||||
|
|
||||||
|
MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching
|
||||||
|
multiple compressed map outputs (Nathan Roberts via jlowe)
|
||||||
|
|
||||||
BREAKDOWN OF HADOOP-8562 SUBTASKS
|
BREAKDOWN OF HADOOP-8562 SUBTASKS
|
||||||
|
|
||||||
MAPREDUCE-4739. Some MapReduce tests fail to find winutils.
|
MAPREDUCE-4739. Some MapReduce tests fail to find winutils.
|
||||||
|
@ -1108,6 +1111,9 @@ Release 0.23.9 - UNRELEASED
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
|
MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching
|
||||||
|
multiple compressed map outputs (Nathan Roberts via jlowe)
|
||||||
|
|
||||||
Release 0.23.8 - 2013-06-05
|
Release 0.23.8 - 2013-06-05
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -99,6 +99,19 @@ class InMemoryMapOutput<K, V> extends MapOutput<K, V> {
|
||||||
reporter.progress();
|
reporter.progress();
|
||||||
LOG.info("Read " + memory.length + " bytes from map-output for " +
|
LOG.info("Read " + memory.length + " bytes from map-output for " +
|
||||||
getMapId());
|
getMapId());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We've gotten the amount of data we were expecting. Verify the
|
||||||
|
* decompressor has nothing more to offer. This action also forces the
|
||||||
|
* decompressor to read any trailing bytes that weren't critical
|
||||||
|
* for decompression, which is necessary to keep the stream
|
||||||
|
* in sync.
|
||||||
|
*/
|
||||||
|
if (input.read() >= 0 ) {
|
||||||
|
throw new IOException("Unexpected extra bytes from input stream for " +
|
||||||
|
getMapId());
|
||||||
|
}
|
||||||
|
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
// Close the streams
|
// Close the streams
|
||||||
IOUtils.cleanup(LOG, input);
|
IOUtils.cleanup(LOG, input);
|
||||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapred.Counters;
|
import org.apache.hadoop.mapred.Counters;
|
||||||
|
import org.apache.hadoop.mapred.IFileOutputStream;
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
import org.apache.hadoop.mapred.JobConf;
|
||||||
import org.apache.hadoop.mapred.Reporter;
|
import org.apache.hadoop.mapred.Reporter;
|
||||||
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
||||||
|
@ -233,6 +234,80 @@ public class TestFetcher {
|
||||||
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
|
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
|
||||||
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
|
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
|
||||||
}
|
}
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Test
|
||||||
|
public void testCopyFromHostExtraBytes() throws Exception {
|
||||||
|
LOG.info("testCopyFromHostWaitExtraBytes");
|
||||||
|
JobConf job = new JobConf();
|
||||||
|
TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1");
|
||||||
|
ShuffleScheduler<Text, Text> ss = mock(ShuffleScheduler.class);
|
||||||
|
MergeManagerImpl<Text, Text> mm = mock(MergeManagerImpl.class);
|
||||||
|
InMemoryMapOutput<Text, Text> immo = mock(InMemoryMapOutput.class);
|
||||||
|
|
||||||
|
Reporter r = mock(Reporter.class);
|
||||||
|
ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
|
||||||
|
ExceptionReporter except = mock(ExceptionReporter.class);
|
||||||
|
SecretKey key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
|
||||||
|
HttpURLConnection connection = mock(HttpURLConnection.class);
|
||||||
|
|
||||||
|
Counters.Counter allErrs = mock(Counters.Counter.class);
|
||||||
|
when(r.getCounter(anyString(), anyString()))
|
||||||
|
.thenReturn(allErrs);
|
||||||
|
|
||||||
|
Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm,
|
||||||
|
r, metrics, except, key, connection);
|
||||||
|
|
||||||
|
|
||||||
|
MapHost host = new MapHost("localhost", "http://localhost:8080/");
|
||||||
|
|
||||||
|
ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
|
||||||
|
TaskAttemptID map1ID = TaskAttemptID.forName("attempt_0_1_m_1_1");
|
||||||
|
maps.add(map1ID);
|
||||||
|
TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
|
||||||
|
maps.add(map2ID);
|
||||||
|
when(ss.getMapsForHost(host)).thenReturn(maps);
|
||||||
|
|
||||||
|
String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
|
||||||
|
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
|
||||||
|
|
||||||
|
when(connection.getResponseCode()).thenReturn(200);
|
||||||
|
when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH))
|
||||||
|
.thenReturn(replyHash);
|
||||||
|
ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1);
|
||||||
|
|
||||||
|
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||||
|
DataOutputStream dos = new DataOutputStream(bout);
|
||||||
|
IFileOutputStream ios = new IFileOutputStream(dos);
|
||||||
|
header.write(dos);
|
||||||
|
ios.write("MAPDATA123".getBytes());
|
||||||
|
ios.finish();
|
||||||
|
|
||||||
|
ShuffleHeader header2 = new ShuffleHeader(map2ID.toString(), 14, 10, 1);
|
||||||
|
IFileOutputStream ios2 = new IFileOutputStream(dos);
|
||||||
|
header2.write(dos);
|
||||||
|
ios2.write("MAPDATA456".getBytes());
|
||||||
|
ios2.finish();
|
||||||
|
|
||||||
|
ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
|
||||||
|
when(connection.getInputStream()).thenReturn(in);
|
||||||
|
// 8 < 10 therefore there appear to be extra bytes in the IFileInputStream
|
||||||
|
InMemoryMapOutput<Text, Text> mapOut = new InMemoryMapOutput<Text, Text>(job, map1ID, mm, 8, null, true );
|
||||||
|
InMemoryMapOutput<Text, Text> mapOut2 = new InMemoryMapOutput<Text, Text>(job, map2ID, mm, 10, null, true );
|
||||||
|
|
||||||
|
when(mm.reserve(eq(map1ID), anyLong(), anyInt())).thenReturn(mapOut);
|
||||||
|
when(mm.reserve(eq(map2ID), anyLong(), anyInt())).thenReturn(mapOut2);
|
||||||
|
|
||||||
|
|
||||||
|
underTest.copyFromHost(host);
|
||||||
|
|
||||||
|
|
||||||
|
verify(allErrs).increment(1);
|
||||||
|
verify(ss).copyFailed(map1ID, host, true, false);
|
||||||
|
verify(ss, never()).copyFailed(map2ID, host, true, false);
|
||||||
|
|
||||||
|
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
|
||||||
|
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
|
||||||
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Test(timeout=10000)
|
@Test(timeout=10000)
|
||||||
|
@ -265,7 +340,6 @@ public class TestFetcher {
|
||||||
TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
|
TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
|
||||||
maps.add(map2ID);
|
maps.add(map2ID);
|
||||||
when(ss.getMapsForHost(host)).thenReturn(maps);
|
when(ss.getMapsForHost(host)).thenReturn(maps);
|
||||||
|
|
||||||
String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
|
String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
|
||||||
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
|
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue