HDFS-15207. VolumeScanner skip to scan blocks accessed during recent scan peroid. Contributed by Yang Yun.

This commit is contained in:
Wei-Chiu Chuang 2020-05-18 08:40:38 -07:00
parent a3f44dacc1
commit 50caba1a92
5 changed files with 80 additions and 0 deletions

View File

@ -846,6 +846,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT = 21 * 24; // 3 weeks.
public static final String DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND = "dfs.block.scanner.volume.bytes.per.second";
public static final long DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT = 1048576L;
public static final String DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED =
"dfs.block.scanner.skip.recent.accessed";
public static final boolean DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT =
false;
public static final String DFS_DATANODE_TRANSFERTO_ALLOWED_KEY = "dfs.datanode.transferTo.allowed";
public static final boolean DFS_DATANODE_TRANSFERTO_ALLOWED_DEFAULT = true;
public static final String DFS_HEARTBEAT_INTERVAL_KEY = "dfs.heartbeat.interval";

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.hdfs.server.datanode;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY;
@ -112,6 +114,7 @@ public class BlockScanner {
final long maxStalenessMs;
final long scanPeriodMs;
final long cursorSaveMs;
final boolean skipRecentAccessed;
final Class<? extends ScanResultHandler> resultHandler;
private static long getUnitTestLong(Configuration conf, String key,
@ -163,6 +166,9 @@ public class BlockScanner {
this.cursorSaveMs = Math.max(0L, getUnitTestLong(conf,
INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS,
INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS_DEFAULT));
this.skipRecentAccessed = conf.getBoolean(
DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED,
DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT);
if (allowUnitTestSettings) {
this.resultHandler = (Class<? extends ScanResultHandler>)
conf.getClass(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,

View File

@ -19,8 +19,11 @@
package org.apache.hadoop.hdfs.server.datanode;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
@ -32,6 +35,7 @@ import com.google.common.base.Preconditions;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference;
@ -540,6 +544,24 @@ public class VolumeScanner extends Thread {
this, curBlockIter.getBlockPoolId());
saveBlockIterator(curBlockIter);
return 0;
} else if (conf.skipRecentAccessed) {
// Check the access time of block file to avoid scanning recently
// changed blocks, reducing disk IO.
try {
BlockLocalPathInfo blockLocalPathInfo =
volume.getDataset().getBlockLocalPathInfo(block);
BasicFileAttributes attr = Files.readAttributes(
new File(blockLocalPathInfo.getBlockPath()).toPath(),
BasicFileAttributes.class);
if (System.currentTimeMillis() - attr.lastAccessTime().
to(TimeUnit.MILLISECONDS) < conf.scanPeriodMs) {
return 0;
}
} catch (IOException ioe) {
LOG.debug("Failed to get access time of block {}",
block, ioe);
}
}
}
if (curBlockIter != null) {

View File

@ -1586,6 +1586,16 @@
</description>
</property>
<property>
<name>dfs.block.scanner.skip.recent.accessed</name>
<value>false</value>
<description>
If this is true, scanner will check the access time of block file to avoid
scanning blocks accessed during recent scan peroid, reducing disk IO.
This feature will not work if the DataNode volume has noatime mount option.
</description>
</property>
<property>
<name>dfs.datanode.readahead.bytes</name>
<value>4194304</value>

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdfs.server.datanode;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND;
import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_DATANODE_SCAN_PERIOD_MS;
@ -25,6 +26,7 @@ import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.fail;
import java.io.Closeable;
import java.io.File;
@ -974,4 +976,40 @@ public class TestBlockScanner {
info.blocksScanned = 0;
}
}
@Test
public void testSkipRecentAccessFile() throws Exception {
Configuration conf = new Configuration();
conf.setBoolean(DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED, true);
conf.setLong(INTERNAL_DFS_DATANODE_SCAN_PERIOD_MS, 2000L);
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
TestScanResultHandler.class.getName());
final TestContext ctx = new TestContext(conf, 1);
final int totalBlocks = 5;
ctx.createFiles(0, totalBlocks, 4096);
final TestScanResultHandler.Info info =
TestScanResultHandler.getInfo(ctx.volumes.get(0));
synchronized (info) {
info.shouldRun = true;
info.notify();
}
try {
GenericTestUtils.waitFor(() -> {
synchronized (info) {
return info.blocksScanned > 0;
}
}, 10, 500);
fail("Scan nothing for all files are accessed in last period.");
} catch (TimeoutException e) {
LOG.debug("Timeout for all files are accessed in last period.");
}
synchronized (info) {
info.shouldRun = false;
info.notify();
}
assertEquals("Should not scan block accessed in last period",
0, info.blocksScanned);
ctx.close();
}
}