From d5f7aa2285a0b0f6561c64d2321565bbaf5d287b Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Tue, 14 Jan 2020 15:39:59 -0800 Subject: [PATCH] HDFS-13339. Volume reference can't be released and may lead to deadlock when DataXceiver does a check volume. Contributed by Jim Brennan, Zsolt Venczel. (cherry picked from commit c675719c3fb0fe5f6b0be624935fdf22bb228e0f) --- .../datanode/checker/DatasetVolumeChecker.java | 13 ++++++++++++- .../datanode/checker/TestDatasetVolumeChecker.java | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java index 0f59b847dc1..05a9ae83da4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java @@ -47,6 +47,7 @@ import java.util.HashSet; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -104,6 +105,8 @@ public class DatasetVolumeChecker { private static final VolumeCheckContext IGNORED_CONTEXT = new VolumeCheckContext(); + private final ExecutorService checkVolumeResultHandlerExecutorService; + /** * @param conf Configuration object. * @param timer {@link Timer} object used for throttling checks. @@ -165,6 +168,12 @@ public class DatasetVolumeChecker { .setNameFormat("DataNode DiskChecker thread %d") .setDaemon(true) .build())); + + checkVolumeResultHandlerExecutorService = Executors.newCachedThreadPool( + new ThreadFactoryBuilder() + .setNameFormat("VolumeCheck ResultHandler thread %d") + .setDaemon(true) + .build()); } /** @@ -295,7 +304,9 @@ public class DatasetVolumeChecker { Futures.addCallback(olf.get(), new ResultHandler(volumeReference, new HashSet(), new HashSet(), - new AtomicLong(1), callback)); + new AtomicLong(1), callback), + checkVolumeResultHandlerExecutorService + ); return true; } else { IOUtils.cleanup(null, volumeReference); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/checker/TestDatasetVolumeChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/checker/TestDatasetVolumeChecker.java index 2a1c824d4ea..08aa1c9a73d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/checker/TestDatasetVolumeChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/checker/TestDatasetVolumeChecker.java @@ -19,11 +19,13 @@ package org.apache.hadoop.hdfs.server.datanode.checker; import com.google.common.base.Optional; +import com.google.common.base.Supplier; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.datanode.fsdataset.*; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi.VolumeCheckContext; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.FakeTimer; import org.junit.Rule; @@ -121,6 +123,13 @@ public class TestDatasetVolumeChecker { } }); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return (numCallbackInvocations.get() > 0); + } + }, 5, 10000); + // Ensure that the check was invoked at least once. verify(volume, times(1)).check(any(VolumeCheckContext.class)); if (result) {