From 946df98dce18975e37a6a14744ca7a5429f019ce Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Mon, 24 Nov 2014 10:46:33 -0800 Subject: [PATCH] HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in checkLeases (Ravi Prakash via Colin P. McCabe) (cherry picked from commit daacbc18d739d030822df0b75205eeb067f89850) --- .../hdfs/server/namenode/FSNamesystem.java | 20 +---- .../hdfs/server/namenode/LeaseManager.java | 82 +++++++++++++++---- .../server/namenode/TestLeaseManager.java | 25 ++++++ 3 files changed, 91 insertions(+), 36 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 41e07acaee7..77304ad882d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -6226,26 +6226,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, // Calculate number of blocks under construction long numUCBlocks = 0; readLock(); + numUCBlocks = leaseManager.getNumUnderConstructionBlocks(); try { - for (Lease lease : leaseManager.getSortedLeases()) { - for (String path : lease.getPaths()) { - final INodeFile cons; - try { - cons = dir.getINode(path).asFile(); - Preconditions.checkState(cons.isUnderConstruction()); - } catch (UnresolvedLinkException e) { - throw new AssertionError("Lease files should reside on this FS"); - } - BlockInfo[] blocks = cons.getBlocks(); - if(blocks == null) - continue; - for(BlockInfo b : blocks) { - if(!b.isComplete()) - numUCBlocks++; - } - } - } - LOG.info("Number of blocks under construction: " + numUCBlocks); return getBlocksTotal() - numUCBlocks; } finally { readUnlock(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index fea5b147db9..e13a5c67e9e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -25,8 +25,9 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.NavigableSet; +import java.util.NoSuchElementException; import java.util.SortedMap; -import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; @@ -36,6 +37,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.util.Daemon; @@ -79,7 +81,7 @@ public class LeaseManager { // private final SortedMap leases = new TreeMap(); // Set of: Lease - private final SortedSet sortedLeases = new TreeSet(); + private final NavigableSet sortedLeases = new TreeSet(); // // Map path names to leases. It is protected by the sortedLeases lock. @@ -95,8 +97,41 @@ public class LeaseManager { Lease getLease(String holder) { return leases.get(holder); } - - SortedSet getSortedLeases() {return sortedLeases;} + + @VisibleForTesting + int getNumSortedLeases() {return sortedLeases.size();} + + /** + * This method iterates through all the leases and counts the number of blocks + * which are not COMPLETE. The FSNamesystem read lock MUST be held before + * calling this method. + * @return + */ + synchronized long getNumUnderConstructionBlocks() { + assert this.fsnamesystem.hasReadLock() : "The FSNamesystem read lock wasn't" + + "acquired before counting under construction blocks"; + long numUCBlocks = 0; + for (Lease lease : sortedLeases) { + for (String path : lease.getPaths()) { + final INodeFile cons; + try { + cons = this.fsnamesystem.getFSDirectory().getINode(path).asFile(); + Preconditions.checkState(cons.isUnderConstruction()); + } catch (UnresolvedLinkException e) { + throw new AssertionError("Lease files should reside on this FS"); + } + BlockInfo[] blocks = cons.getBlocks(); + if(blocks == null) + continue; + for(BlockInfo b : blocks) { + if(!b.isComplete()) + numUCBlocks++; + } + } + } + LOG.info("Number of blocks under construction: " + numUCBlocks); + return numUCBlocks; + } /** @return the lease containing src */ public Lease getLeaseByPath(String src) {return sortedLeasesByPath.get(src);} @@ -421,33 +456,38 @@ public class LeaseManager { /** Check the leases beginning from the oldest. * @return true is sync is needed. */ - private synchronized boolean checkLeases() { + @VisibleForTesting + synchronized boolean checkLeases() { boolean needSync = false; assert fsnamesystem.hasWriteLock(); - for(; sortedLeases.size() > 0; ) { - final Lease oldest = sortedLeases.first(); - if (!oldest.expiredHardLimit()) { - return needSync; + Lease leaseToCheck = null; + try { + leaseToCheck = sortedLeases.first(); + } catch(NoSuchElementException e) {} + + while(leaseToCheck != null) { + if (!leaseToCheck.expiredHardLimit()) { + break; } - LOG.info(oldest + " has expired hard limit"); + LOG.info(leaseToCheck + " has expired hard limit"); final List removing = new ArrayList(); - // need to create a copy of the oldest lease paths, becuase + // need to create a copy of the oldest lease paths, because // internalReleaseLease() removes paths corresponding to empty files, // i.e. it needs to modify the collection being iterated over // causing ConcurrentModificationException - String[] leasePaths = new String[oldest.getPaths().size()]; - oldest.getPaths().toArray(leasePaths); + String[] leasePaths = new String[leaseToCheck.getPaths().size()]; + leaseToCheck.getPaths().toArray(leasePaths); for(String p : leasePaths) { try { - boolean completed = fsnamesystem.internalReleaseLease(oldest, p, + boolean completed = fsnamesystem.internalReleaseLease(leaseToCheck, p, HdfsServerConstants.NAMENODE_LEASE_HOLDER); if (LOG.isDebugEnabled()) { if (completed) { LOG.debug("Lease recovery for " + p + " is complete. File closed."); } else { - LOG.debug("Started block recovery " + p + " lease " + oldest); + LOG.debug("Started block recovery " + p + " lease " + leaseToCheck); } } // If a lease recovery happened, we need to sync later. @@ -456,15 +496,23 @@ public class LeaseManager { } } catch (IOException e) { LOG.error("Cannot release the path " + p + " in the lease " - + oldest, e); + + leaseToCheck, e); removing.add(p); } } for(String p : removing) { - removeLease(oldest, p); + removeLease(leaseToCheck, p); } + leaseToCheck = sortedLeases.higher(leaseToCheck); } + + try { + if(leaseToCheck != sortedLeases.first()) { + LOG.warn("Unable to release hard-limit expired lease: " + + sortedLeases.first()); + } + } catch(NoSuchElementException e) {} return needSync; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java index 5c906b4ff43..9b454ea0662 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -54,4 +55,28 @@ public class TestLeaseManager { assertNull(lm.getLeaseByPath("/a/b")); assertNull(lm.getLeaseByPath("/a/c")); } + + /** Check that even if LeaseManager.checkLease is not able to relinquish + * leases, the Namenode does't enter an infinite loop while holding the FSN + * write lock and thus become unresponsive + */ + @Test (timeout=1000) + public void testCheckLeaseNotInfiniteLoop() { + FSNamesystem fsn = Mockito.mock(FSNamesystem.class); + Mockito.when(fsn.isRunning()).thenReturn(true); + Mockito.when(fsn.hasWriteLock()).thenReturn(true); + LeaseManager lm = new LeaseManager(fsn); + + //Make sure the leases we are going to add exceed the hard limit + lm.setLeasePeriod(0,0); + + //Add some leases to the LeaseManager + lm.addLease("holder1", "src1"); + lm.addLease("holder2", "src2"); + lm.addLease("holder3", "src3"); + assertEquals(lm.getNumSortedLeases(), 3); + + //Initiate a call to checkLease. This should exit within the test timeout + lm.checkLeases(); + } }