HDFS-13050: [SPS]: Create start/stop script to start external SPS process. Contributed by Surendra Singh Lilhore.

This commit is contained in:
Rakesh Radhakrishnan 2018-01-29 03:10:48 +05:30 committed by Uma Maheswara Rao Gangumalla
parent 99594b48b8
commit 5845c36c16
15 changed files with 259 additions and 128 deletions

View File

@ -63,6 +63,7 @@ function hadoop_usage
hadoop_add_subcommand "secondarynamenode" daemon "run the DFS secondary namenode" hadoop_add_subcommand "secondarynamenode" daemon "run the DFS secondary namenode"
hadoop_add_subcommand "snapshotDiff" client "diff two snapshots of a directory or diff the current directory contents with a snapshot" hadoop_add_subcommand "snapshotDiff" client "diff two snapshots of a directory or diff the current directory contents with a snapshot"
hadoop_add_subcommand "storagepolicies" admin "list/get/set/satisfyStoragePolicy block storage policies" hadoop_add_subcommand "storagepolicies" admin "list/get/set/satisfyStoragePolicy block storage policies"
hadoop_add_subcommand "sps" daemon "run external storagepolicysatisfier"
hadoop_add_subcommand "version" client "print the version" hadoop_add_subcommand "version" client "print the version"
hadoop_add_subcommand "zkfc" daemon "run the ZK Failover Controller daemon" hadoop_add_subcommand "zkfc" daemon "run the ZK Failover Controller daemon"
hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" false hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" false
@ -201,6 +202,10 @@ function hdfscmd_case
storagepolicies) storagepolicies)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.StoragePolicyAdmin HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.StoragePolicyAdmin
;; ;;
sps)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.sps.ExternalStoragePolicySatisfier
;;
version) version)
HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo
;; ;;

View File

@ -94,6 +94,9 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.Namesystem; import org.apache.hadoop.hdfs.server.namenode.Namesystem;
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.namenode.sps.IntraSPSNameNodeBlockMoveTaskHandler;
import org.apache.hadoop.hdfs.server.namenode.sps.IntraSPSNameNodeContext;
import org.apache.hadoop.hdfs.server.namenode.sps.IntraSPSNameNodeFileIdCollector;
import org.apache.hadoop.hdfs.server.namenode.sps.SPSPathIds; import org.apache.hadoop.hdfs.server.namenode.sps.SPSPathIds;
import org.apache.hadoop.hdfs.server.namenode.sps.SPSService; import org.apache.hadoop.hdfs.server.namenode.sps.SPSService;
import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier; import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier;
@ -5106,9 +5109,15 @@ public class BlockManager implements BlockStatsMXBean {
return; return;
} }
updateSPSMode(StoragePolicySatisfierMode.INTERNAL); updateSPSMode(StoragePolicySatisfierMode.INTERNAL);
sps.init(new IntraSPSNameNodeContext(this.namesystem, this, sps),
new IntraSPSNameNodeFileIdCollector(this.namesystem.getFSDirectory(),
sps),
new IntraSPSNameNodeBlockMoveTaskHandler(this, this.namesystem), null);
sps.start(true, spsMode); sps.start(true, spsMode);
} }
/** /**
* Enable storage policy satisfier by starting its service. * Enable storage policy satisfier by starting its service.
*/ */

View File

@ -672,7 +672,7 @@ public class Mover {
} }
if (spsRunning) { if (spsRunning) {
System.err.println("Mover failed due to StoragePolicySatisfier" System.err.println("Mover failed due to StoragePolicySatisfier"
+ " is running. Exiting with status " + " service running inside namenode. Exiting with status "
+ ExitStatus.SKIPPED_DUE_TO_SPS + "... "); + ExitStatus.SKIPPED_DUE_TO_SPS + "... ");
return ExitStatus.SKIPPED_DUE_TO_SPS.getExitCode(); return ExitStatus.SKIPPED_DUE_TO_SPS.getExitCode();
} }

View File

@ -175,9 +175,4 @@ public interface Context {
*/ */
String getFilePath(Long inodeId); String getFilePath(Long inodeId);
/**
* Close the resources.
*/
void close() throws IOException;
} }

View File

@ -196,8 +196,4 @@ public class IntraSPSNameNodeContext implements Context {
return namesystem.getFilePath(inodeId); return namesystem.getFilePath(inodeId);
} }
@Override
public void close() throws IOException {
// Nothing to clean.
}
} }

View File

@ -158,11 +158,15 @@ public class IntraSPSNameNodeFileIdCollector extends FSTreeTraverser
*/ */
public synchronized int remainingCapacity() { public synchronized int remainingCapacity() {
int size = service.processingQueueSize(); int size = service.processingQueueSize();
if (size >= maxQueueLimitToScan) { int remainingSize = 0;
return 0; if (size < maxQueueLimitToScan) {
} else { remainingSize = maxQueueLimitToScan - size;
return (maxQueueLimitToScan - size);
} }
if (LOG.isDebugEnabled()) {
LOG.debug("SPS processing Q -> maximum capacity:{}, current size:{},"
+ " remaining size:{}", maxQueueLimitToScan, size, remainingSize);
}
return remainingSize;
} }
class SPSTraverseInfo extends TraverseInfo { class SPSTraverseInfo extends TraverseInfo {

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability;
public class SPSPathIds { public class SPSPathIds {
// List of pending dir to satisfy the policy // List of pending dir to satisfy the policy
// TODO: Make this bounded queue.
private final Queue<Long> spsDirsToBeTraveresed = new LinkedList<Long>(); private final Queue<Long> spsDirsToBeTraveresed = new LinkedList<Long>();
/** /**

View File

@ -55,6 +55,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.StorageReport;
import org.apache.hadoop.hdfs.util.StripedBlockUtil; import org.apache.hadoop.hdfs.util.StripedBlockUtil;
import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -174,10 +175,11 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
return; return;
} }
if (reconfigStart) { if (reconfigStart) {
LOG.info("Starting StoragePolicySatisfier, as admin requested to " LOG.info("Starting {} StoragePolicySatisfier, as admin requested to "
+ "start it."); + "start it.", StringUtils.toLowerCase(spsMode.toString()));
} else { } else {
LOG.info("Starting StoragePolicySatisfier."); LOG.info("Starting {} StoragePolicySatisfier.",
StringUtils.toLowerCase(spsMode.toString()));
} }
// Ensure that all the previously submitted block movements(if any) have to // Ensure that all the previously submitted block movements(if any) have to
@ -243,7 +245,14 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
@Override @Override
public void run() { public void run() {
while (ctxt.isRunning()) { while (isRunning) {
// Check if dependent service is running
if (!ctxt.isRunning()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Upstream service is down, skipping the sps work.");
}
continue;
}
try { try {
if (!ctxt.isInSafeMode()) { if (!ctxt.isInSafeMode()) {
ItemInfo itemInfo = storageMovementNeeded.get(); ItemInfo itemInfo = storageMovementNeeded.get();
@ -284,33 +293,39 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
// Just add to monitor, so it will be tracked for report and // Just add to monitor, so it will be tracked for report and
// be removed on storage movement attempt finished report. // be removed on storage movement attempt finished report.
case BLOCKS_TARGETS_PAIRED: case BLOCKS_TARGETS_PAIRED:
if (LOG.isDebugEnabled()) {
LOG.debug("Block analysis status:{} for the file path:{}."
+ " Adding to attempt monitor queue for the storage "
+ "movement attempt finished report",
status.status, fileStatus.getPath());
}
this.storageMovementsMonitor.add(new AttemptedItemInfo(itemInfo this.storageMovementsMonitor.add(new AttemptedItemInfo(itemInfo
.getStartId(), itemInfo.getFileId(), monotonicNow(), .getStartId(), itemInfo.getFileId(), monotonicNow(),
status.assignedBlocks, itemInfo.getRetryCount())); status.assignedBlocks, itemInfo.getRetryCount()));
break; break;
case NO_BLOCKS_TARGETS_PAIRED: case NO_BLOCKS_TARGETS_PAIRED:
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Adding trackID " + trackId LOG.debug("Adding trackID:{} for the file path:{} back to"
+ " back to retry queue as none of the blocks" + " retry queue as none of the blocks found its eligible"
+ " found its eligible targets."); + " targets.", trackId, fileStatus.getPath());
} }
itemInfo.increRetryCount(); itemInfo.increRetryCount();
this.storageMovementNeeded.add(itemInfo); this.storageMovementNeeded.add(itemInfo);
break; break;
case FEW_LOW_REDUNDANCY_BLOCKS: case FEW_LOW_REDUNDANCY_BLOCKS:
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Adding trackID " + trackId LOG.debug("Adding trackID:{} for the file path:{} back to "
+ " back to retry queue as some of the blocks" + "retry queue as some of the blocks are low redundant.",
+ " are low redundant."); trackId, fileStatus.getPath());
} }
itemInfo.increRetryCount(); itemInfo.increRetryCount();
this.storageMovementNeeded.add(itemInfo); this.storageMovementNeeded.add(itemInfo);
break; break;
case BLOCKS_FAILED_TO_MOVE: case BLOCKS_FAILED_TO_MOVE:
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Adding trackID " + trackId LOG.debug("Adding trackID:{} for the file path:{} back to "
+ " back to retry queue as some of the blocks" + "retry queue as some of the blocks movement failed.",
+ " movement failed."); trackId, fileStatus.getPath());
} }
this.storageMovementNeeded.add(itemInfo); this.storageMovementNeeded.add(itemInfo);
break; break;
@ -318,8 +333,9 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
case BLOCKS_TARGET_PAIRING_SKIPPED: case BLOCKS_TARGET_PAIRING_SKIPPED:
case BLOCKS_ALREADY_SATISFIED: case BLOCKS_ALREADY_SATISFIED:
default: default:
LOG.info("Block analysis skipped or blocks already satisfied" LOG.info("Block analysis status:{} for the file path:{}."
+ " with storages. So, Cleaning up the Xattrs."); + " So, Cleaning up the Xattrs.", status.status,
fileStatus.getPath());
storageMovementNeeded.removeItemTrackInfo(itemInfo, true); storageMovementNeeded.removeItemTrackInfo(itemInfo, true);
break; break;
} }
@ -346,20 +362,20 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
if (isRunning) { if (isRunning) {
synchronized (this) { synchronized (this) {
if (isRunning) { if (isRunning) {
isRunning = false; if (t instanceof InterruptedException) {
// Stopping monitor thread and clearing queues as well isRunning = false;
this.clearQueues(); LOG.info("Stopping StoragePolicySatisfier.");
this.storageMovementsMonitor.stopGracefully(); // Stopping monitor thread and clearing queues as well
if (!(t instanceof InterruptedException)) { this.clearQueues();
LOG.info("StoragePolicySatisfier received an exception" this.storageMovementsMonitor.stopGracefully();
+ " while shutting down.", t); } else {
LOG.error(
"StoragePolicySatisfier thread received runtime exception, "
+ "ignoring", t);
} }
LOG.info("Stopping StoragePolicySatisfier.");
} }
} }
} }
LOG.error("StoragePolicySatisfier thread received runtime exception. "
+ "Stopping Storage policy satisfier work", t);
return; return;
} }
@ -374,9 +390,8 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
final boolean lastBlkComplete = locatedBlocks.isLastBlockComplete(); final boolean lastBlkComplete = locatedBlocks.isLastBlockComplete();
if (!lastBlkComplete) { if (!lastBlkComplete) {
// Postpone, currently file is under construction // Postpone, currently file is under construction
// So, should we add back? or leave it to user LOG.info("File: {} is under construction. So, postpone"
LOG.info("BlockCollectionID: {} file is under construction. So, postpone" + " this to the next retry iteration", fileInfo.getPath());
+ " this to the next retry iteration", fileInfo.getFileId());
return new BlocksMovingAnalysis( return new BlocksMovingAnalysis(
BlocksMovingAnalysis.Status.ANALYSIS_SKIPPED_FOR_RETRY, BlocksMovingAnalysis.Status.ANALYSIS_SKIPPED_FOR_RETRY,
new ArrayList<>()); new ArrayList<>());
@ -384,8 +399,8 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
List<LocatedBlock> blocks = locatedBlocks.getLocatedBlocks(); List<LocatedBlock> blocks = locatedBlocks.getLocatedBlocks();
if (blocks.size() == 0) { if (blocks.size() == 0) {
LOG.info("BlockCollectionID: {} file is not having any blocks." LOG.info("File: {} is not having any blocks."
+ " So, skipping the analysis.", fileInfo.getFileId()); + " So, skipping the analysis.", fileInfo.getPath());
return new BlocksMovingAnalysis( return new BlocksMovingAnalysis(
BlocksMovingAnalysis.Status.BLOCKS_TARGET_PAIRING_SKIPPED, BlocksMovingAnalysis.Status.BLOCKS_TARGET_PAIRING_SKIPPED,
new ArrayList<>()); new ArrayList<>());
@ -970,4 +985,12 @@ public class StoragePolicySatisfier implements SPSService, Runnable {
public void markScanCompletedForPath(Long inodeId) { public void markScanCompletedForPath(Long inodeId) {
getStorageMovementQueue().markScanCompletedForDir(inodeId); getStorageMovementQueue().markScanCompletedForDir(inodeId);
} }
/**
* Join main SPS thread.
*/
public void join() throws InterruptedException {
//TODO Add join here on SPS rpc server also
storagePolicySatisfierThread.join();
}
} }

View File

@ -110,7 +110,7 @@ public class ExternalSPSBlockMoveTaskHandler implements BlockMoveTaskHandler {
/** /**
* Initializes block movement tracker daemon and starts the thread. * Initializes block movement tracker daemon and starts the thread.
*/ */
void init() { public void init() {
movementTrackerThread = new Daemon(this.blkMovementTracker); movementTrackerThread = new Daemon(this.blkMovementTracker);
movementTrackerThread.setName("BlockStorageMovementTracker"); movementTrackerThread.setName("BlockStorageMovementTracker");
movementTrackerThread.start(); movementTrackerThread.start();

View File

@ -19,19 +19,13 @@
package org.apache.hadoop.hdfs.server.sps; package org.apache.hadoop.hdfs.server.sps;
import java.io.IOException; import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
@ -57,13 +51,12 @@ public class ExternalSPSContext implements Context {
LoggerFactory.getLogger(ExternalSPSContext.class); LoggerFactory.getLogger(ExternalSPSContext.class);
private SPSService service; private SPSService service;
private NameNodeConnector nnc = null; private NameNodeConnector nnc = null;
private Object nnConnectionLock = new Object();
private BlockStoragePolicySuite createDefaultSuite = private BlockStoragePolicySuite createDefaultSuite =
BlockStoragePolicySuite.createDefaultSuite(); BlockStoragePolicySuite.createDefaultSuite();
public ExternalSPSContext(SPSService service) { public ExternalSPSContext(SPSService service, NameNodeConnector nnc) {
this.service = service; this.service = service;
initializeNamenodeConnector(); this.nnc = nnc;
} }
@Override @Override
@ -73,7 +66,6 @@ public class ExternalSPSContext implements Context {
@Override @Override
public boolean isInSafeMode() { public boolean isInSafeMode() {
initializeNamenodeConnector();
try { try {
return nnc != null ? nnc.getDistributedFileSystem().isInSafeMode() return nnc != null ? nnc.getDistributedFileSystem().isInSafeMode()
: false; : false;
@ -85,7 +77,6 @@ public class ExternalSPSContext implements Context {
@Override @Override
public boolean isMoverRunning() { public boolean isMoverRunning() {
initializeNamenodeConnector();
try { try {
FSDataOutputStream out = nnc.getDistributedFileSystem() FSDataOutputStream out = nnc.getDistributedFileSystem()
.append(HdfsServerConstants.MOVER_ID_PATH); .append(HdfsServerConstants.MOVER_ID_PATH);
@ -101,7 +92,6 @@ public class ExternalSPSContext implements Context {
@Override @Override
public long getFileID(String path) throws UnresolvedLinkException, public long getFileID(String path) throws UnresolvedLinkException,
AccessControlException, ParentNotDirectoryException { AccessControlException, ParentNotDirectoryException {
initializeNamenodeConnector();
HdfsFileStatus fs = null; HdfsFileStatus fs = null;
try { try {
fs = (HdfsFileStatus) nnc.getDistributedFileSystem().getFileStatus( fs = (HdfsFileStatus) nnc.getDistributedFileSystem().getFileStatus(
@ -121,7 +111,6 @@ public class ExternalSPSContext implements Context {
@Override @Override
public boolean isFileExist(long inodeId) { public boolean isFileExist(long inodeId) {
initializeNamenodeConnector();
String filePath = null; String filePath = null;
try { try {
filePath = getFilePath(inodeId); filePath = getFilePath(inodeId);
@ -145,14 +134,12 @@ public class ExternalSPSContext implements Context {
@Override @Override
public void removeSPSHint(long inodeId) throws IOException { public void removeSPSHint(long inodeId) throws IOException {
initializeNamenodeConnector();
nnc.getDistributedFileSystem().removeXAttr(new Path(getFilePath(inodeId)), nnc.getDistributedFileSystem().removeXAttr(new Path(getFilePath(inodeId)),
HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY); HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY);
} }
@Override @Override
public int getNumLiveDataNodes() { public int getNumLiveDataNodes() {
initializeNamenodeConnector();
try { try {
return nnc.getDistributedFileSystem() return nnc.getDistributedFileSystem()
.getDataNodeStats(DatanodeReportType.LIVE).length; .getDataNodeStats(DatanodeReportType.LIVE).length;
@ -164,7 +151,6 @@ public class ExternalSPSContext implements Context {
@Override @Override
public HdfsFileStatus getFileInfo(long inodeID) throws IOException { public HdfsFileStatus getFileInfo(long inodeID) throws IOException {
initializeNamenodeConnector();
return nnc.getDistributedFileSystem().getClient() return nnc.getDistributedFileSystem().getClient()
.getLocatedFileInfo(getFilePath(inodeID), false); .getLocatedFileInfo(getFilePath(inodeID), false);
} }
@ -172,13 +158,11 @@ public class ExternalSPSContext implements Context {
@Override @Override
public DatanodeStorageReport[] getLiveDatanodeStorageReport() public DatanodeStorageReport[] getLiveDatanodeStorageReport()
throws IOException { throws IOException {
initializeNamenodeConnector();
return nnc.getLiveDatanodeStorageReport(); return nnc.getLiveDatanodeStorageReport();
} }
@Override @Override
public boolean hasLowRedundancyBlocks(long inodeID) { public boolean hasLowRedundancyBlocks(long inodeID) {
initializeNamenodeConnector();
try { try {
return nnc.getNNProtocolConnection().hasLowRedundancyBlocks(inodeID); return nnc.getNNProtocolConnection().hasLowRedundancyBlocks(inodeID);
} catch (IOException e) { } catch (IOException e) {
@ -191,7 +175,6 @@ public class ExternalSPSContext implements Context {
@Override @Override
public boolean checkDNSpaceForScheduling(DatanodeInfo dn, StorageType type, public boolean checkDNSpaceForScheduling(DatanodeInfo dn, StorageType type,
long estimatedSize) { long estimatedSize) {
initializeNamenodeConnector();
try { try {
return nnc.getNNProtocolConnection().checkDNSpaceForScheduling(dn, type, return nnc.getNNProtocolConnection().checkDNSpaceForScheduling(dn, type,
estimatedSize); estimatedSize);
@ -204,7 +187,6 @@ public class ExternalSPSContext implements Context {
@Override @Override
public Long getNextSPSPathId() { public Long getNextSPSPathId() {
initializeNamenodeConnector();
try { try {
return nnc.getNNProtocolConnection().getNextSPSPathId(); return nnc.getNNProtocolConnection().getNextSPSPathId();
} catch (IOException e) { } catch (IOException e) {
@ -233,39 +215,4 @@ public class ExternalSPSContext implements Context {
return null; return null;
} }
} }
@Override
public void close() throws IOException {
synchronized (nnConnectionLock) {
if (nnc != null) {
nnc.close();
}
}
}
private void initializeNamenodeConnector() {
synchronized (nnConnectionLock) {
if (nnc == null) {
try {
nnc = getNameNodeConnector(service.getConf());
} catch (IOException e) {
LOG.warn("Exception while creating Namenode Connector.."
+ "Namenode might not have started.", e);
}
}
}
}
public static NameNodeConnector getNameNodeConnector(Configuration conf)
throws IOException {
final Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf);
List<NameNodeConnector> nncs = Collections.emptyList();
NameNodeConnector.checkOtherInstanceRunning(false);
nncs = NameNodeConnector.newNameNodeConnectors(namenodes,
ExternalSPSContext.class.getSimpleName(),
HdfsServerConstants.MOVER_ID_PATH, conf,
NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS);
return nncs.get(0);
}
} }

View File

@ -139,11 +139,15 @@ public class ExternalSPSFileIDCollector implements FileIdCollector {
*/ */
public int remainingCapacity() { public int remainingCapacity() {
int size = service.processingQueueSize(); int size = service.processingQueueSize();
if (size >= maxQueueLimitToScan) { int remainingSize = 0;
return 0; if (size < maxQueueLimitToScan) {
} else { remainingSize = maxQueueLimitToScan - size;
return (maxQueueLimitToScan - size);
} }
if (LOG.isDebugEnabled()) {
LOG.debug("SPS processing Q -> maximum capacity:{}, current size:{},"
+ " remaining size:{}", maxQueueLimitToScan, size, remainingSize);
}
return remainingSize;
} }
@Override @Override

View File

@ -0,0 +1,130 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.sps;
import static org.apache.hadoop.util.ExitUtil.terminate;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode;
import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.namenode.sps.BlockMovementListener;
import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class starts and runs external SPS service.
*/
@InterfaceAudience.Private
public class ExternalStoragePolicySatisfier {
public static final Logger LOG = LoggerFactory
.getLogger(ExternalStoragePolicySatisfier.class);
/**
* Main method to start SPS service.
*/
public static void main(String args[]) throws Exception {
NameNodeConnector nnc = null;
try {
StringUtils.startupShutdownMessage(StoragePolicySatisfier.class, args,
LOG);
HdfsConfiguration spsConf = new HdfsConfiguration();
//TODO : login with SPS keytab
StoragePolicySatisfier sps = new StoragePolicySatisfier(spsConf);
nnc = getNameNodeConnector(spsConf);
boolean spsRunning;
spsRunning = nnc.getDistributedFileSystem().getClient()
.isStoragePolicySatisfierRunning();
if (spsRunning) {
throw new RuntimeException(
"Startup failed due to StoragePolicySatisfier"
+ " running inside Namenode.");
}
ExternalSPSContext context = new ExternalSPSContext(sps, nnc);
ExternalBlockMovementListener blkMoveListener =
new ExternalBlockMovementListener();
ExternalSPSBlockMoveTaskHandler externalHandler =
new ExternalSPSBlockMoveTaskHandler(spsConf, nnc, sps);
externalHandler.init();
sps.init(context, new ExternalSPSFileIDCollector(context, sps),
externalHandler, blkMoveListener);
sps.start(true, StoragePolicySatisfierMode.EXTERNAL);
if (sps != null) {
sps.join();
}
} catch (Throwable e) {
LOG.error("Failed to start storage policy satisfier.", e);
terminate(1, e);
} finally {
if (nnc != null) {
nnc.close();
}
}
}
private static NameNodeConnector getNameNodeConnector(Configuration conf)
throws IOException, InterruptedException {
final Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf);
final Path externalSPSPathId = HdfsServerConstants.MOVER_ID_PATH;
while (true) {
try {
final List<NameNodeConnector> nncs = NameNodeConnector
.newNameNodeConnectors(namenodes,
StoragePolicySatisfier.class.getSimpleName(),
externalSPSPathId, conf,
NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS);
return nncs.get(0);
} catch (IOException e) {
LOG.warn("Failed to connect with namenode", e);
Thread.sleep(3000); // retry the connection after few secs
}
}
}
/**
* It is implementation of BlockMovementListener.
*/
private static class ExternalBlockMovementListener
implements BlockMovementListener {
private List<Block> actualBlockMovements = new ArrayList<>();
@Override
public void notifyMovementTriedBlocks(Block[] moveAttemptFinishedBlks) {
for (Block block : moveAttemptFinishedBlks) {
actualBlockMovements.add(block);
}
LOG.info("Movement attempted blocks", actualBlockMovements);
}
}
}

View File

@ -238,5 +238,13 @@ Check the running status of Storage Policy Satisfier service in namenode. If it
### Enable(internal service inside NN or external service outside NN) or Disable SPS without restarting Namenode ### Enable(internal service inside NN or external service outside NN) or Disable SPS without restarting Namenode
If administrator wants to switch modes of SPS feature while Namenode is running, first he/she needs to update the desired value(internal or external or none) for the configuration item `dfs.storage.policy.satisfier.mode` in configuration file (`hdfs-site.xml`) and then run the following Namenode reconfig command If administrator wants to switch modes of SPS feature while Namenode is running, first he/she needs to update the desired value(internal or external or none) for the configuration item `dfs.storage.policy.satisfier.mode` in configuration file (`hdfs-site.xml`) and then run the following Namenode reconfig command
+ hdfs dfsadmin -reconfig namenode <host:ipc_port> start * Command:
hdfs dfsadmin -reconfig namenode <host:ipc_port> start
### Start External SPS Service.
If administrator wants to start external sps, first he/she needs to configure property `dfs.storage.policy.satisfier.mode` with `external` value in configuration file (`hdfs-site.xml`) and then run Namenode reconfig command. After this start external sps service using following command
* Command:
hdfs --daemon start sps

View File

@ -603,7 +603,7 @@ public class TestStoragePolicySatisfier {
if (out != null) { if (out != null) {
out.close(); out.close();
} }
hdfsCluster.shutdown(); shutdownCluster();
} }
} }
@ -626,9 +626,7 @@ public class TestStoragePolicySatisfier {
Assert.assertTrue("SPS should be running as " Assert.assertTrue("SPS should be running as "
+ "no Mover really running", running); + "no Mover really running", running);
} finally { } finally {
if (hdfsCluster != null) { shutdownCluster();
hdfsCluster.shutdown();
}
} }
} }
@ -672,9 +670,7 @@ public class TestStoragePolicySatisfier {
DFSTestUtil.waitExpectedStorageType( DFSTestUtil.waitExpectedStorageType(
file1, StorageType.DISK, 2, 30000, dfs); file1, StorageType.DISK, 2, 30000, dfs);
} finally { } finally {
if (hdfsCluster != null) { shutdownCluster();
hdfsCluster.shutdown();
}
} }
} }
@ -1381,7 +1377,11 @@ public class TestStoragePolicySatisfier {
// Remove 10 element and make queue free, So other traversing will start. // Remove 10 element and make queue free, So other traversing will start.
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
String path = expectedTraverseOrder.remove(0); String path = expectedTraverseOrder.remove(0);
long trackId = sps.getStorageMovementQueue().get().getFileId(); ItemInfo itemInfo = sps.getStorageMovementQueue().get();
if (itemInfo == null) {
continue;
}
long trackId = itemInfo.getFileId();
INode inode = fsDir.getInode(trackId); INode inode = fsDir.getInode(trackId);
assertTrue("Failed to traverse tree, expected " + path + " but got " assertTrue("Failed to traverse tree, expected " + path + " but got "
+ inode.getFullPathName(), path.equals(inode.getFullPathName())); + inode.getFullPathName(), path.equals(inode.getFullPathName()));
@ -1392,7 +1392,11 @@ public class TestStoragePolicySatisfier {
// Check other element traversed in order and E, M, U, R, S should not be // Check other element traversed in order and E, M, U, R, S should not be
// added in queue which we already removed from expected list // added in queue which we already removed from expected list
for (String path : expectedTraverseOrder) { for (String path : expectedTraverseOrder) {
long trackId = sps.getStorageMovementQueue().get().getFileId(); ItemInfo itemInfo = sps.getStorageMovementQueue().get();
if (itemInfo == null) {
continue;
}
long trackId = itemInfo.getFileId();
INode inode = fsDir.getInode(trackId); INode inode = fsDir.getInode(trackId);
assertTrue("Failed to traverse tree, expected " + path + " but got " assertTrue("Failed to traverse tree, expected " + path + " but got "
+ inode.getFullPathName(), path.equals(inode.getFullPathName())); + inode.getFullPathName(), path.equals(inode.getFullPathName()));

View File

@ -22,7 +22,6 @@ import java.net.URI;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@ -43,8 +42,6 @@ import org.apache.hadoop.hdfs.server.namenode.sps.TestStoragePolicySatisfier;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Ignore; import org.junit.Ignore;
import com.google.common.collect.Maps;
/** /**
* Tests the external sps service plugins. * Tests the external sps service plugins.
*/ */
@ -95,7 +92,8 @@ public class TestExternalStoragePolicySatisfier
SPSService spsService = blkMgr.getSPSService(); SPSService spsService = blkMgr.getSPSService();
spsService.stopGracefully(); spsService.stopGracefully();
ExternalSPSContext context = new ExternalSPSContext(spsService); ExternalSPSContext context = new ExternalSPSContext(spsService,
getNameNodeConnector(conf));
ExternalBlockMovementListener blkMoveListener = ExternalBlockMovementListener blkMoveListener =
new ExternalBlockMovementListener(); new ExternalBlockMovementListener();
@ -124,7 +122,8 @@ public class TestExternalStoragePolicySatisfier
spsService = blkMgr.getSPSService(); spsService = blkMgr.getSPSService();
spsService.stopGracefully(); spsService.stopGracefully();
ExternalSPSContext context = new ExternalSPSContext(spsService); ExternalSPSContext context = new ExternalSPSContext(spsService,
getNameNodeConnector(getConf()));
ExternalBlockMovementListener blkMoveListener = ExternalBlockMovementListener blkMoveListener =
new ExternalBlockMovementListener(); new ExternalBlockMovementListener();
ExternalSPSBlockMoveTaskHandler externalHandler = ExternalSPSBlockMoveTaskHandler externalHandler =
@ -161,16 +160,22 @@ public class TestExternalStoragePolicySatisfier
throws IOException { throws IOException {
final Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf); final Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf);
Assert.assertEquals(1, namenodes.size()); Assert.assertEquals(1, namenodes.size());
Map<URI, List<Path>> nnMap = Maps.newHashMap();
for (URI nn : namenodes) {
nnMap.put(nn, null);
}
final Path externalSPSPathId = new Path("/system/tmp.id"); final Path externalSPSPathId = new Path("/system/tmp.id");
final List<NameNodeConnector> nncs = NameNodeConnector NameNodeConnector.checkOtherInstanceRunning(false);
.newNameNodeConnectors(nnMap, while (true) {
StoragePolicySatisfier.class.getSimpleName(), externalSPSPathId, try {
conf, NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); final List<NameNodeConnector> nncs = NameNodeConnector
return nncs.get(0); .newNameNodeConnectors(namenodes,
StoragePolicySatisfier.class.getSimpleName(),
externalSPSPathId, conf,
NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS);
return nncs.get(0);
} catch (IOException e) {
LOG.warn("Failed to connect with namenode", e);
// Ignore
}
}
} }
/** /**