HDFS-3828. Block Scanner rescans blocks too frequently. Contributed by Andy Isaacson
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1381478 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a58e65c926
commit
5ef8a80920
|
@ -560,6 +560,9 @@ Release 2.0.1-alpha - UNRELEASED
|
||||||
|
|
||||||
HDFS-1490. TransferFSImage should timeout (Dmytro Molkov and Vinay via todd)
|
HDFS-1490. TransferFSImage should timeout (Dmytro Molkov and Vinay via todd)
|
||||||
|
|
||||||
|
HDFS-3828. Block Scanner rescans blocks too frequently.
|
||||||
|
(Andy Isaacson via eli)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-3042 SUBTASKS
|
BREAKDOWN OF HDFS-3042 SUBTASKS
|
||||||
|
|
||||||
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
|
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
|
||||||
|
|
|
@ -51,6 +51,8 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scans the block files under a block pool and verifies that the
|
* Scans the block files under a block pool and verifies that the
|
||||||
* files are not corrupt.
|
* files are not corrupt.
|
||||||
|
@ -255,6 +257,11 @@ class BlockPoolSliceScanner {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getTotalScans() {
|
||||||
|
return totalScans;
|
||||||
|
}
|
||||||
|
|
||||||
/** @return the last scan time for the block pool. */
|
/** @return the last scan time for the block pool. */
|
||||||
long getLastScanTime() {
|
long getLastScanTime() {
|
||||||
return lastScanTime.get();
|
return lastScanTime.get();
|
||||||
|
@ -563,7 +570,24 @@ class BlockPoolSliceScanner {
|
||||||
currentPeriodStart = Time.now();
|
currentPeriodStart = Time.now();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private synchronized boolean workRemainingInCurrentPeriod() {
|
||||||
|
if (bytesLeft <= 0 && Time.now() < currentPeriodStart + scanPeriod) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Skipping scan since bytesLeft=" + bytesLeft + ", Start=" +
|
||||||
|
currentPeriodStart + ", period=" + scanPeriod + ", now=" +
|
||||||
|
Time.now() + " " + blockPoolId);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void scanBlockPoolSlice() {
|
void scanBlockPoolSlice() {
|
||||||
|
if (!workRemainingInCurrentPeriod()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Create a new processedBlocks structure
|
// Create a new processedBlocks structure
|
||||||
processedBlocks = new HashMap<Long, Integer>();
|
processedBlocks = new HashMap<Long, Integer>();
|
||||||
if (!assignInitialVerificationTimes()) {
|
if (!assignInitialVerificationTimes()) {
|
||||||
|
@ -608,14 +632,14 @@ class BlockPoolSliceScanner {
|
||||||
LOG.warn("RuntimeException during BlockPoolScanner.scan()", e);
|
LOG.warn("RuntimeException during BlockPoolScanner.scan()", e);
|
||||||
throw e;
|
throw e;
|
||||||
} finally {
|
} finally {
|
||||||
cleanUp();
|
rollVerificationLogs();
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Done scanning block pool: " + blockPoolId);
|
LOG.debug("Done scanning block pool: " + blockPoolId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void cleanUp() {
|
private synchronized void rollVerificationLogs() {
|
||||||
if (verificationLog != null) {
|
if (verificationLog != null) {
|
||||||
try {
|
try {
|
||||||
verificationLog.logs.roll();
|
verificationLog.logs.roll();
|
||||||
|
|
|
@ -34,6 +34,8 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DataBlockScanner manages block scanning for all the block pools. For each
|
* DataBlockScanner manages block scanning for all the block pools. For each
|
||||||
* block pool a {@link BlockPoolSliceScanner} is created which runs in a separate
|
* block pool a {@link BlockPoolSliceScanner} is created which runs in a separate
|
||||||
|
@ -47,6 +49,8 @@ public class DataBlockScanner implements Runnable {
|
||||||
private final FsDatasetSpi<? extends FsVolumeSpi> dataset;
|
private final FsDatasetSpi<? extends FsVolumeSpi> dataset;
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
|
|
||||||
|
static final int SLEEP_PERIOD_MS = 5 * 1000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Map to find the BlockPoolScanner for a given block pool id. This is updated
|
* Map to find the BlockPoolScanner for a given block pool id. This is updated
|
||||||
* when a BPOfferService becomes alive or dies.
|
* when a BPOfferService becomes alive or dies.
|
||||||
|
@ -68,10 +72,10 @@ public class DataBlockScanner implements Runnable {
|
||||||
String currentBpId = "";
|
String currentBpId = "";
|
||||||
boolean firstRun = true;
|
boolean firstRun = true;
|
||||||
while (datanode.shouldRun && !Thread.interrupted()) {
|
while (datanode.shouldRun && !Thread.interrupted()) {
|
||||||
//Sleep everytime except in the first interation.
|
//Sleep everytime except in the first iteration.
|
||||||
if (!firstRun) {
|
if (!firstRun) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(5000);
|
Thread.sleep(SLEEP_PERIOD_MS);
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
// Interrupt itself again to set the interrupt status
|
// Interrupt itself again to set the interrupt status
|
||||||
blockScannerThread.interrupt();
|
blockScannerThread.interrupt();
|
||||||
|
@ -103,7 +107,7 @@ public class DataBlockScanner implements Runnable {
|
||||||
while ((getBlockPoolSetSize() < datanode.getAllBpOs().length)
|
while ((getBlockPoolSetSize() < datanode.getAllBpOs().length)
|
||||||
|| (getBlockPoolSetSize() < 1)) {
|
|| (getBlockPoolSetSize() < 1)) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(5000);
|
Thread.sleep(SLEEP_PERIOD_MS);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
blockScannerThread.interrupt();
|
blockScannerThread.interrupt();
|
||||||
return;
|
return;
|
||||||
|
@ -249,7 +253,7 @@ public class DataBlockScanner implements Runnable {
|
||||||
LOG.info("Removed bpid="+blockPoolId+" from blockPoolScannerMap");
|
LOG.info("Removed bpid="+blockPoolId+" from blockPoolScannerMap");
|
||||||
}
|
}
|
||||||
|
|
||||||
// This method is used for testing
|
@VisibleForTesting
|
||||||
long getBlocksScannedInLastRun(String bpid) throws IOException {
|
long getBlocksScannedInLastRun(String bpid) throws IOException {
|
||||||
BlockPoolSliceScanner bpScanner = getBPScanner(bpid);
|
BlockPoolSliceScanner bpScanner = getBPScanner(bpid);
|
||||||
if (bpScanner == null) {
|
if (bpScanner == null) {
|
||||||
|
@ -259,6 +263,16 @@ public class DataBlockScanner implements Runnable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getTotalScans(String bpid) throws IOException {
|
||||||
|
BlockPoolSliceScanner bpScanner = getBPScanner(bpid);
|
||||||
|
if (bpScanner == null) {
|
||||||
|
throw new IOException("Block Pool: "+bpid+" is not running");
|
||||||
|
} else {
|
||||||
|
return bpScanner.getTotalScans();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void start() {
|
public void start() {
|
||||||
blockScannerThread = new Thread(this);
|
blockScannerThread = new Thread(this);
|
||||||
blockScannerThread.setDaemon(true);
|
blockScannerThread.setDaemon(true);
|
||||||
|
|
|
@ -20,8 +20,11 @@ package org.apache.hadoop.hdfs.server.datanode;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.commons.logging.impl.Log4JLogger;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -31,7 +34,13 @@ import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.BlockPoolSliceScanner;
|
||||||
|
import static org.apache.hadoop.hdfs.server.datanode.DataBlockScanner.SLEEP_PERIOD_MS;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
|
||||||
public class TestMultipleNNDataBlockScanner {
|
public class TestMultipleNNDataBlockScanner {
|
||||||
|
@ -166,4 +175,75 @@ public class TestMultipleNNDataBlockScanner {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test2NNBlockRescanInterval() throws IOException {
|
||||||
|
((Log4JLogger)BlockPoolSliceScanner.LOG).getLogger().setLevel(Level.ALL);
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.nnTopology(MiniDFSNNTopology.simpleFederatedTopology(3))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
FileSystem fs = cluster.getFileSystem(1);
|
||||||
|
Path file2 = new Path("/test/testBlockScanInterval");
|
||||||
|
DFSTestUtil.createFile(fs, file2, 30, (short) 1, 0);
|
||||||
|
|
||||||
|
fs = cluster.getFileSystem(0);
|
||||||
|
Path file1 = new Path("/test/testBlockScanInterval");
|
||||||
|
DFSTestUtil.createFile(fs, file1, 30, (short) 1, 0);
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
LOG.info("Verifying that the blockscanner scans exactly once");
|
||||||
|
waitAndScanBlocks(1, 1);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HDFS-3828: DN rescans blocks too frequently
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testBlockRescanInterval() throws IOException {
|
||||||
|
((Log4JLogger)BlockPoolSliceScanner.LOG).getLogger().setLevel(Level.ALL);
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
Path file1 = new Path("/test/testBlockScanInterval");
|
||||||
|
DFSTestUtil.createFile(fs, file1, 30, (short) 1, 0);
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
LOG.info("Verifying that the blockscanner scans exactly once");
|
||||||
|
waitAndScanBlocks(1, 1);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void waitAndScanBlocks(long scansLastRun, long scansTotal)
|
||||||
|
throws IOException {
|
||||||
|
// DataBlockScanner will run for every 5 seconds so we are checking for
|
||||||
|
// every 5 seconds
|
||||||
|
int n = 5;
|
||||||
|
String bpid = cluster.getNamesystem(0).getBlockPoolId();
|
||||||
|
DataNode dn = cluster.getDataNodes().get(0);
|
||||||
|
long blocksScanned, total;
|
||||||
|
do {
|
||||||
|
try {
|
||||||
|
Thread.sleep(SLEEP_PERIOD_MS);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
fail("Interrupted: " + e);
|
||||||
|
}
|
||||||
|
blocksScanned = dn.blockScanner.getBlocksScannedInLastRun(bpid);
|
||||||
|
total = dn.blockScanner.getTotalScans(bpid);
|
||||||
|
LOG.info("bpid = " + bpid + " blocksScanned = " + blocksScanned + " total=" + total);
|
||||||
|
} while (n-- > 0 && (blocksScanned != scansLastRun || scansTotal != total));
|
||||||
|
Assert.assertEquals(scansTotal, total);
|
||||||
|
Assert.assertEquals(scansLastRun, blocksScanned);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue