HDFS-12612. DFSStripedOutputStream.close will throw if called a second time with a failed streamer. (Lei (Eddy) Xu)
This commit is contained in:
parent
75323394fb
commit
f27a4ad032
|
@ -82,6 +82,12 @@ public class DFSStripedOutputStream extends DFSOutputStream
|
||||||
implements StreamCapabilities {
|
implements StreamCapabilities {
|
||||||
private static final ByteBufferPool BUFFER_POOL = new ElasticByteBufferPool();
|
private static final ByteBufferPool BUFFER_POOL = new ElasticByteBufferPool();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OutputStream level last exception, will be used to indicate the fatal
|
||||||
|
* exception of this stream, i.e., being aborted.
|
||||||
|
*/
|
||||||
|
private final ExceptionLastSeen exceptionLastSeen = new ExceptionLastSeen();
|
||||||
|
|
||||||
static class MultipleBlockingQueue<T> {
|
static class MultipleBlockingQueue<T> {
|
||||||
private final List<BlockingQueue<T>> queues;
|
private final List<BlockingQueue<T>> queues;
|
||||||
|
|
||||||
|
@ -971,12 +977,9 @@ public class DFSStripedOutputStream extends DFSOutputStream
|
||||||
if (isClosed()) {
|
if (isClosed()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (StripedDataStreamer streamer : streamers) {
|
exceptionLastSeen.set(new IOException("Lease timeout of "
|
||||||
streamer.getLastException().set(
|
|
||||||
new IOException("Lease timeout of "
|
|
||||||
+ (dfsClient.getConf().getHdfsTimeout() / 1000)
|
+ (dfsClient.getConf().getHdfsTimeout() / 1000)
|
||||||
+ " seconds expired."));
|
+ " seconds expired."));
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
closeThreads(true);
|
closeThreads(true);
|
||||||
|
@ -1133,19 +1136,27 @@ public class DFSStripedOutputStream extends DFSOutputStream
|
||||||
@Override
|
@Override
|
||||||
protected synchronized void closeImpl() throws IOException {
|
protected synchronized void closeImpl() throws IOException {
|
||||||
if (isClosed()) {
|
if (isClosed()) {
|
||||||
|
exceptionLastSeen.check(true);
|
||||||
|
|
||||||
|
// Writing to at least {dataUnits} replicas can be considered as success,
|
||||||
|
// and the rest of data can be recovered.
|
||||||
|
final int minReplication = ecPolicy.getNumDataUnits();
|
||||||
|
int goodStreamers = 0;
|
||||||
final MultipleIOException.Builder b = new MultipleIOException.Builder();
|
final MultipleIOException.Builder b = new MultipleIOException.Builder();
|
||||||
for(int i = 0; i < streamers.size(); i++) {
|
for (final StripedDataStreamer si : streamers) {
|
||||||
final StripedDataStreamer si = getStripedDataStreamer(i);
|
|
||||||
try {
|
try {
|
||||||
si.getLastException().check(true);
|
si.getLastException().check(true);
|
||||||
|
goodStreamers++;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
b.add(e);
|
b.add(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (goodStreamers < minReplication) {
|
||||||
final IOException ioe = b.build();
|
final IOException ioe = b.build();
|
||||||
if (ioe != null) {
|
if (ioe != null) {
|
||||||
throw ioe;
|
throw ioe;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1183,9 +1194,10 @@ public class DFSStripedOutputStream extends DFSOutputStream
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
// Failures may happen when flushing data/parity data out. Exceptions
|
// Failures may happen when flushing data/parity data out. Exceptions
|
||||||
// may be thrown if more than 3 streamers fail, or updatePipeline RPC
|
// may be thrown if the number of failed streamers is more than the
|
||||||
// fails. Streamers may keep waiting for the new block/GS information.
|
// number of parity blocks, or updatePipeline RPC fails. Streamers may
|
||||||
// Thus need to force closing these threads.
|
// keep waiting for the new block/GS information. Thus need to force
|
||||||
|
// closing these threads.
|
||||||
closeThreads(true);
|
closeThreads(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -285,38 +285,21 @@ class DataStreamer extends Daemon {
|
||||||
packets.clear();
|
packets.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
class LastExceptionInStreamer {
|
class LastExceptionInStreamer extends ExceptionLastSeen {
|
||||||
private IOException thrown;
|
/**
|
||||||
|
* Check if there already is an exception.
|
||||||
synchronized void set(Throwable t) {
|
*/
|
||||||
assert t != null;
|
@Override
|
||||||
this.thrown = t instanceof IOException ?
|
|
||||||
(IOException) t : new IOException(t);
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized void clear() {
|
|
||||||
thrown = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Check if there already is an exception. */
|
|
||||||
synchronized void check(boolean resetToNull) throws IOException {
|
synchronized void check(boolean resetToNull) throws IOException {
|
||||||
|
final IOException thrown = get();
|
||||||
if (thrown != null) {
|
if (thrown != null) {
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
// wrap and print the exception to know when the check is called
|
// wrap and print the exception to know when the check is called
|
||||||
LOG.trace("Got Exception while checking, " + DataStreamer.this,
|
LOG.trace("Got Exception while checking, " + DataStreamer.this,
|
||||||
new Throwable(thrown));
|
new Throwable(thrown));
|
||||||
}
|
}
|
||||||
final IOException e = thrown;
|
super.check(resetToNull);
|
||||||
if (resetToNull) {
|
|
||||||
thrown = null;
|
|
||||||
}
|
}
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized void throwException4Close() throws IOException {
|
|
||||||
check(false);
|
|
||||||
throw new ClosedChannelException();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.channels.ClosedChannelException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The exception last seen by the {@link DataStreamer} or
|
||||||
|
* {@link DFSOutputStream}.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
class ExceptionLastSeen {
|
||||||
|
private IOException thrown;
|
||||||
|
|
||||||
|
/** Get the last seen exception. */
|
||||||
|
synchronized protected IOException get() {
|
||||||
|
return thrown;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the last seen exception.
|
||||||
|
* @param t the exception.
|
||||||
|
*/
|
||||||
|
synchronized void set(Throwable t) {
|
||||||
|
assert t != null;
|
||||||
|
this.thrown = t instanceof IOException ?
|
||||||
|
(IOException) t : new IOException(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Clear the last seen exception. */
|
||||||
|
synchronized void clear() {
|
||||||
|
thrown = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if there already is an exception. Throw the exception if exist.
|
||||||
|
*
|
||||||
|
* @param resetToNull set to true to reset exception to null after calling
|
||||||
|
* this function.
|
||||||
|
* @throws IOException on existing IOException.
|
||||||
|
*/
|
||||||
|
synchronized void check(boolean resetToNull) throws IOException {
|
||||||
|
if (thrown != null) {
|
||||||
|
final IOException e = thrown;
|
||||||
|
if (resetToNull) {
|
||||||
|
thrown = null;
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized void throwException4Close() throws IOException {
|
||||||
|
check(false);
|
||||||
|
throw new ClosedChannelException();
|
||||||
|
}
|
||||||
|
}
|
|
@ -319,6 +319,82 @@ public class TestDFSStripedOutputStreamWithFailure {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void testCloseWithExceptionsInStreamer(
|
||||||
|
int numFailures, boolean shouldFail) throws Exception {
|
||||||
|
assertTrue(numFailures <=
|
||||||
|
ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits());
|
||||||
|
final Path dirFile = new Path(dir, "ecfile-" + numFailures);
|
||||||
|
try (FSDataOutputStream out = dfs.create(dirFile, true)) {
|
||||||
|
out.write("idempotent close".getBytes());
|
||||||
|
|
||||||
|
// Expect to raise IOE on the first close call, but any following
|
||||||
|
// close() should be no-op.
|
||||||
|
LambdaTestUtils.intercept(IOException.class,
|
||||||
|
out::close);
|
||||||
|
|
||||||
|
assertTrue(out.getWrappedStream() instanceof DFSStripedOutputStream);
|
||||||
|
DFSStripedOutputStream stripedOut =
|
||||||
|
(DFSStripedOutputStream) out.getWrappedStream();
|
||||||
|
for (int i = 0; i < numFailures; i++) {
|
||||||
|
// Only inject 1 stream failure.
|
||||||
|
stripedOut.getStripedDataStreamer(i).getLastException().set(
|
||||||
|
new IOException("injected failure")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (shouldFail) {
|
||||||
|
LambdaTestUtils.intercept(IOException.class, out::close);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close multiple times. All the following close() should have no
|
||||||
|
// side-effect.
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// HDFS-12612
|
||||||
|
@Test
|
||||||
|
public void testIdempotentCloseWithFailedStreams() throws Exception {
|
||||||
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
|
try {
|
||||||
|
setup(conf);
|
||||||
|
// shutdown few datanodes to avoid getting sufficient data blocks number
|
||||||
|
// of datanodes.
|
||||||
|
while (cluster.getDataNodes().size() >= dataBlocks) {
|
||||||
|
cluster.stopDataNode(0);
|
||||||
|
}
|
||||||
|
cluster.restartNameNodes();
|
||||||
|
cluster.triggerHeartbeats();
|
||||||
|
|
||||||
|
testCloseWithExceptionsInStreamer(1, false);
|
||||||
|
testCloseWithExceptionsInStreamer(ecPolicy.getNumParityUnits(), false);
|
||||||
|
testCloseWithExceptionsInStreamer(ecPolicy.getNumParityUnits() + 1, true);
|
||||||
|
testCloseWithExceptionsInStreamer(ecPolicy.getNumDataUnits(), true);
|
||||||
|
} finally {
|
||||||
|
tearDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCloseAfterAbort() throws Exception {
|
||||||
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
|
try {
|
||||||
|
setup(conf);
|
||||||
|
|
||||||
|
final Path dirFile = new Path(dir, "ecfile");
|
||||||
|
FSDataOutputStream out = dfs.create(dirFile, true);
|
||||||
|
assertTrue(out.getWrappedStream() instanceof DFSStripedOutputStream);
|
||||||
|
DFSStripedOutputStream stripedOut =
|
||||||
|
(DFSStripedOutputStream) out.getWrappedStream();
|
||||||
|
stripedOut.abort();
|
||||||
|
LambdaTestUtils.intercept(IOException.class,
|
||||||
|
"Lease timeout", stripedOut::close);
|
||||||
|
} finally {
|
||||||
|
tearDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout = 90000)
|
@Test(timeout = 90000)
|
||||||
public void testAddBlockWhenNoSufficientParityNumOfNodes()
|
public void testAddBlockWhenNoSufficientParityNumOfNodes()
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue