HDFS-16057. Make sure the order for location in ENTERING_MAINTENANCE state (#3084)

Reviewed-by: Wei-Chiu Chuang <weichiu@apache.org>
This commit is contained in:
litao 2021-06-11 16:30:55 +08:00 committed by GitHub
parent 6e5692e7e2
commit 51ebf7b2a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 146 additions and 5 deletions

View File

@ -506,9 +506,8 @@ public class DatanodeManager {
}
private boolean isInactive(DatanodeInfo datanode) {
return datanode.isDecommissioned() ||
return datanode.isDecommissioned() || datanode.isEnteringMaintenance() ||
(avoidStaleDataNodesForRead && datanode.isStale(staleInterval));
}
/**
@ -572,8 +571,8 @@ public class DatanodeManager {
}
/**
* Move decommissioned/stale datanodes to the bottom. Also, sort nodes by
* network distance.
* Move decommissioned/entering_maintenance/stale datanodes to the bottom.
* Also, sort nodes by network distance.
*
* @param lb located block
* @param targetHost target host
@ -603,7 +602,7 @@ public class DatanodeManager {
}
DatanodeInfoWithStorage[] di = lb.getLocations();
// Move decommissioned/stale datanodes to the bottom
// Move decommissioned/entering_maintenance/stale datanodes to the bottom
Arrays.sort(di, comparator);
// Sort nodes by network distance only for located blocks

View File

@ -0,0 +1,142 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.blockmanagement;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.util.Time;
import org.junit.BeforeClass;
import org.junit.Test;
import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import static org.junit.Assert.assertEquals;
/**
* This class tests the sorting of located blocks based on
* multiple states.
*/
public class TestSortLocatedBlock {
static final Logger LOG = LoggerFactory
.getLogger(TestSortLocatedBlock.class);
private static DatanodeManager dm;
private static final long STALE_INTERVAL = 30 * 1000 * 60;
@BeforeClass
public static void setup() throws IOException {
dm = mockDatanodeManager();
}
/**
* Test to verify sorting with multiple state
* datanodes exists in storage lists.
*
* We have the following list of datanodes, and create LocatedBlock.
* d0 - decommissioned
* d1 - entering_maintenance
* d2 - decommissioned
* d3 - stale
* d4 - live(in-service)
*
* After sorting the expected datanodes list will be:
* live -> stale -> entering_maintenance -> decommissioned,
* (d4 -> d3 -> d1 -> d0 -> d2)
* or
* (d4 -> d3 -> d1 -> d2 -> d0).
*/
@Test(timeout = 30000)
public void testWithMultipleStateDatanodes() {
LOG.info("Starting test testWithMultipleStateDatanodes");
long blockID = Long.MIN_VALUE;
int totalDns = 5;
DatanodeInfo[] locs = new DatanodeInfo[totalDns];
// create datanodes
for (int i = 0; i < totalDns; i++) {
String ip = i + "." + i + "." + i + "." + i;
locs[i] = DFSTestUtil.getDatanodeInfo(ip);
locs[i].setLastUpdateMonotonic(Time.monotonicNow());
}
// set decommissioned state
locs[0].setDecommissioned();
locs[2].setDecommissioned();
ArrayList<DatanodeInfo> decommissionedNodes = new ArrayList<>();
decommissionedNodes.add(locs[0]);
decommissionedNodes.add(locs[2]);
// set entering_maintenance state
locs[1].startMaintenance();
// set stale state
locs[3].setLastUpdateMonotonic(Time.monotonicNow() -
DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_DEFAULT * 1000 - 1);
ArrayList<LocatedBlock> locatedBlocks = new ArrayList<>();
locatedBlocks.add(new LocatedBlock(
new ExtendedBlock("pool", blockID,
1024L, new Date().getTime()), locs));
// sort located blocks
dm.sortLocatedBlocks(null, locatedBlocks);
// get locations after sorting
LocatedBlock locatedBlock = locatedBlocks.get(0);
DatanodeInfoWithStorage[] locations = locatedBlock.getLocations();
// assert location order:
// live -> stale -> entering_maintenance -> decommissioned
// live
assertEquals(locs[4].getIpAddr(), locations[0].getIpAddr());
// stale
assertEquals(locs[3].getIpAddr(), locations[1].getIpAddr());
// entering_maintenance
assertEquals(locs[1].getIpAddr(), locations[2].getIpAddr());
// decommissioned
assertEquals(true,
decommissionedNodes.contains(locations[3])
&& decommissionedNodes.contains(locations[4]));
}
private static DatanodeManager mockDatanodeManager() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY,
true);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY,
STALE_INTERVAL);
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
BlockManager bm = Mockito.mock(BlockManager.class);
BlockReportLeaseManager blm = new BlockReportLeaseManager(conf);
Mockito.when(bm.getBlockReportLeaseManager()).thenReturn(blm);
return new DatanodeManager(bm, fsn, conf);
}
}