HADOOP-14412. HostsFileReader#getHostDetails is very expensive on large clusters. Contributed by Jason Lowe.

This commit is contained in:
Rohith Sharma K S 2017-05-17 08:26:07 +05:30
parent 0e95ff78c3
commit 379e5bd26f
3 changed files with 161 additions and 151 deletions

View File

@ -20,13 +20,12 @@ package org.apache.hadoop.util;
import java.io.*; import java.io.*;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Set; import java.util.Set;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory;
@ -48,39 +47,26 @@ import org.xml.sax.SAXException;
@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
@InterfaceStability.Unstable @InterfaceStability.Unstable
public class HostsFileReader { public class HostsFileReader {
private Set<String> includes;
// exclude host list with optional timeout.
// If the value is null, it indicates default timeout.
private Map<String, Integer> excludes;
private String includesFile;
private String excludesFile;
private WriteLock writeLock;
private ReadLock readLock;
private static final Log LOG = LogFactory.getLog(HostsFileReader.class); private static final Log LOG = LogFactory.getLog(HostsFileReader.class);
private final AtomicReference<HostDetails> current;
public HostsFileReader(String inFile, public HostsFileReader(String inFile,
String exFile) throws IOException { String exFile) throws IOException {
includes = new HashSet<String>(); HostDetails hostDetails = new HostDetails(
excludes = new HashMap<String, Integer>(); inFile, Collections.<String>emptySet(),
includesFile = inFile; exFile, Collections.<String, Integer>emptyMap());
excludesFile = exFile; current = new AtomicReference<>(hostDetails);
ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); refresh(inFile, exFile);
this.writeLock = rwLock.writeLock();
this.readLock = rwLock.readLock();
refresh();
} }
@Private @Private
public HostsFileReader(String includesFile, InputStream inFileInputStream, public HostsFileReader(String includesFile, InputStream inFileInputStream,
String excludesFile, InputStream exFileInputStream) throws IOException { String excludesFile, InputStream exFileInputStream) throws IOException {
includes = new HashSet<String>(); HostDetails hostDetails = new HostDetails(
excludes = new HashMap<String, Integer>(); includesFile, Collections.<String>emptySet(),
this.includesFile = includesFile; excludesFile, Collections.<String, Integer>emptyMap());
this.excludesFile = excludesFile; current = new AtomicReference<>(hostDetails);
ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
this.writeLock = rwLock.writeLock();
this.readLock = rwLock.readLock();
refresh(inFileInputStream, exFileInputStream); refresh(inFileInputStream, exFileInputStream);
} }
@ -126,12 +112,8 @@ public class HostsFileReader {
} }
public void refresh() throws IOException { public void refresh() throws IOException {
this.writeLock.lock(); HostDetails hostDetails = current.get();
try { refresh(hostDetails.includesFile, hostDetails.excludesFile);
refresh(includesFile, excludesFile);
} finally {
this.writeLock.unlock();
}
} }
public static void readFileToMap(String type, public static void readFileToMap(String type,
@ -201,128 +183,163 @@ public class HostsFileReader {
return (nodes.getLength() == 0)? null : nodes.item(0).getTextContent(); return (nodes.getLength() == 0)? null : nodes.item(0).getTextContent();
} }
public void refresh(String includeFiles, String excludeFiles) public void refresh(String includesFile, String excludesFile)
throws IOException { throws IOException {
LOG.info("Refreshing hosts (include/exclude) list"); LOG.info("Refreshing hosts (include/exclude) list");
this.writeLock.lock(); HostDetails oldDetails = current.get();
try { Set<String> newIncludes = oldDetails.includes;
// update instance variables Map<String, Integer> newExcludes = oldDetails.excludes;
updateFileNames(includeFiles, excludeFiles); if (includesFile != null && !includesFile.isEmpty()) {
Set<String> newIncludes = new HashSet<String>(); newIncludes = new HashSet<>();
Map<String, Integer> newExcludes = new HashMap<String, Integer>(); readFileToSet("included", includesFile, newIncludes);
boolean switchIncludes = false; newIncludes = Collections.unmodifiableSet(newIncludes);
boolean switchExcludes = false;
if (includeFiles != null && !includeFiles.isEmpty()) {
readFileToSet("included", includeFiles, newIncludes);
switchIncludes = true;
} }
if (excludeFiles != null && !excludeFiles.isEmpty()) { if (excludesFile != null && !excludesFile.isEmpty()) {
readFileToMap("excluded", excludeFiles, newExcludes); newExcludes = new HashMap<>();
switchExcludes = true; readFileToMap("excluded", excludesFile, newExcludes);
} newExcludes = Collections.unmodifiableMap(newExcludes);
if (switchIncludes) {
// switch the new hosts that are to be included
includes = newIncludes;
}
if (switchExcludes) {
// switch the excluded hosts
excludes = newExcludes;
}
} finally {
this.writeLock.unlock();
} }
HostDetails newDetails = new HostDetails(includesFile, newIncludes,
excludesFile, newExcludes);
current.set(newDetails);
} }
@Private @Private
public void refresh(InputStream inFileInputStream, public void refresh(InputStream inFileInputStream,
InputStream exFileInputStream) throws IOException { InputStream exFileInputStream) throws IOException {
LOG.info("Refreshing hosts (include/exclude) list"); LOG.info("Refreshing hosts (include/exclude) list");
this.writeLock.lock(); HostDetails oldDetails = current.get();
try { Set<String> newIncludes = oldDetails.includes;
Set<String> newIncludes = new HashSet<String>(); Map<String, Integer> newExcludes = oldDetails.excludes;
Map<String, Integer> newExcludes = new HashMap<String, Integer>();
boolean switchIncludes = false;
boolean switchExcludes = false;
if (inFileInputStream != null) { if (inFileInputStream != null) {
readFileToSetWithFileInputStream("included", includesFile, newIncludes = new HashSet<>();
readFileToSetWithFileInputStream("included", oldDetails.includesFile,
inFileInputStream, newIncludes); inFileInputStream, newIncludes);
switchIncludes = true; newIncludes = Collections.unmodifiableSet(newIncludes);
} }
if (exFileInputStream != null) { if (exFileInputStream != null) {
readFileToMapWithFileInputStream("excluded", excludesFile, newExcludes = new HashMap<>();
readFileToMapWithFileInputStream("excluded", oldDetails.excludesFile,
exFileInputStream, newExcludes); exFileInputStream, newExcludes);
switchExcludes = true; newExcludes = Collections.unmodifiableMap(newExcludes);
}
if (switchIncludes) {
// switch the new hosts that are to be included
includes = newIncludes;
}
if (switchExcludes) {
// switch the excluded hosts
excludes = newExcludes;
}
} finally {
this.writeLock.unlock();
} }
HostDetails newDetails = new HostDetails(
oldDetails.includesFile, newIncludes,
oldDetails.excludesFile, newExcludes);
current.set(newDetails);
} }
public Set<String> getHosts() { public Set<String> getHosts() {
this.readLock.lock(); HostDetails hostDetails = current.get();
try { return hostDetails.getIncludedHosts();
return includes;
} finally {
this.readLock.unlock();
}
} }
public Set<String> getExcludedHosts() { public Set<String> getExcludedHosts() {
this.readLock.lock(); HostDetails hostDetails = current.get();
try { return hostDetails.getExcludedHosts();
return excludes.keySet();
} finally {
this.readLock.unlock();
}
} }
/**
* Retrieve an atomic view of the included and excluded hosts.
*
* @param includes set to populate with included hosts
* @param excludes set to populate with excluded hosts
* @deprecated use {@link #getHostDetails() instead}
*/
@Deprecated
public void getHostDetails(Set<String> includes, Set<String> excludes) { public void getHostDetails(Set<String> includes, Set<String> excludes) {
this.readLock.lock(); HostDetails hostDetails = current.get();
try { includes.addAll(hostDetails.getIncludedHosts());
includes.addAll(this.includes); excludes.addAll(hostDetails.getExcludedHosts());
excludes.addAll(this.excludes.keySet());
} finally {
this.readLock.unlock();
}
} }
/**
* Retrieve an atomic view of the included and excluded hosts.
*
* @param includeHosts set to populate with included hosts
* @param excludeHosts map to populate with excluded hosts
* @deprecated use {@link #getHostDetails() instead}
*/
@Deprecated
public void getHostDetails(Set<String> includeHosts, public void getHostDetails(Set<String> includeHosts,
Map<String, Integer> excludeHosts) { Map<String, Integer> excludeHosts) {
this.readLock.lock(); HostDetails hostDetails = current.get();
try { includeHosts.addAll(hostDetails.getIncludedHosts());
includeHosts.addAll(this.includes); excludeHosts.putAll(hostDetails.getExcludedMap());
excludeHosts.putAll(this.excludes);
} finally {
this.readLock.unlock();
} }
/**
* Retrieve an atomic view of the included and excluded hosts.
*
* @return the included and excluded hosts
*/
public HostDetails getHostDetails() {
return current.get();
} }
public void setIncludesFile(String includesFile) { public void setIncludesFile(String includesFile) {
LOG.info("Setting the includes file to " + includesFile); LOG.info("Setting the includes file to " + includesFile);
this.includesFile = includesFile; HostDetails oldDetails = current.get();
HostDetails newDetails = new HostDetails(includesFile, oldDetails.includes,
oldDetails.excludesFile, oldDetails.excludes);
current.set(newDetails);
} }
public void setExcludesFile(String excludesFile) { public void setExcludesFile(String excludesFile) {
LOG.info("Setting the excludes file to " + excludesFile); LOG.info("Setting the excludes file to " + excludesFile);
this.excludesFile = excludesFile; HostDetails oldDetails = current.get();
HostDetails newDetails = new HostDetails(
oldDetails.includesFile, oldDetails.includes,
excludesFile, oldDetails.excludes);
current.set(newDetails);
} }
public void updateFileNames(String includeFiles, String excludeFiles) { public void updateFileNames(String includesFile, String excludesFile) {
this.writeLock.lock(); LOG.info("Setting the includes file to " + includesFile);
try { LOG.info("Setting the excludes file to " + excludesFile);
setIncludesFile(includeFiles); HostDetails oldDetails = current.get();
setExcludesFile(excludeFiles); HostDetails newDetails = new HostDetails(includesFile, oldDetails.includes,
} finally { excludesFile, oldDetails.excludes);
this.writeLock.unlock(); current.set(newDetails);
}
/**
* An atomic view of the included and excluded hosts
*/
public static class HostDetails {
private final String includesFile;
private final Set<String> includes;
private final String excludesFile;
// exclude host list with optional timeout.
// If the value is null, it indicates default timeout.
private final Map<String, Integer> excludes;
HostDetails(String includesFile, Set<String> includes,
String excludesFile, Map<String, Integer> excludes) {
this.includesFile = includesFile;
this.includes = includes;
this.excludesFile = excludesFile;
this.excludes = excludes;
}
public String getIncludesFile() {
return includesFile;
}
public Set<String> getIncludedHosts() {
return includes;
}
public String getExcludesFile() {
return excludesFile;
}
public Set<String> getExcludedHosts() {
return excludes.keySet();
}
public Map<String, Integer> getExcludedMap() {
return excludes;
} }
} }
} }

View File

@ -20,12 +20,10 @@ package org.apache.hadoop.util;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.FileWriter; import java.io.FileWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.Map; import java.util.Map;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.HostsFileReader.HostDetails;
import org.junit.*; import org.junit.*;
import static org.junit.Assert.*; import static org.junit.Assert.*;
@ -121,11 +119,11 @@ public class TestHostsFileReader {
assertTrue(hfp.getExcludedHosts().contains("node1")); assertTrue(hfp.getExcludedHosts().contains("node1"));
assertTrue(hfp.getHosts().contains("node2")); assertTrue(hfp.getHosts().contains("node2"));
Set<String> hostsList = new HashSet<String>(); HostDetails hostDetails = hfp.getHostDetails();
Set<String> excludeList = new HashSet<String>(); assertTrue(hostDetails.getExcludedHosts().contains("node1"));
hfp.getHostDetails(hostsList, excludeList); assertTrue(hostDetails.getIncludedHosts().contains("node2"));
assertTrue(excludeList.contains("node1")); assertEquals(newIncludesFile, hostDetails.getIncludesFile());
assertTrue(hostsList.contains("node2")); assertEquals(newExcludesFile, hostDetails.getExcludesFile());
} }
/* /*
@ -328,9 +326,8 @@ public class TestHostsFileReader {
assertEquals(4, includesLen); assertEquals(4, includesLen);
assertEquals(9, excludesLen); assertEquals(9, excludesLen);
Set<String> includes = new HashSet<String>(); HostDetails hostDetails = hfp.getHostDetails();
Map<String, Integer> excludes = new HashMap<String, Integer>(); Map<String, Integer> excludes = hostDetails.getExcludedMap();
hfp.getHostDetails(includes, excludes);
assertTrue(excludes.containsKey("host1")); assertTrue(excludes.containsKey("host1"));
assertTrue(excludes.containsKey("host2")); assertTrue(excludes.containsKey("host2"));
assertTrue(excludes.containsKey("host3")); assertTrue(excludes.containsKey("host3"));

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.yarn.server.resourcemanager;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -40,6 +39,7 @@ import org.apache.hadoop.net.Node;
import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.HostsFileReader; import org.apache.hadoop.util.HostsFileReader;
import org.apache.hadoop.util.HostsFileReader.HostDetails;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
@ -192,14 +192,11 @@ public class NodesListManager extends CompositeService implements
conf.get(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, conf.get(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH,
YarnConfiguration.DEFAULT_RM_NODES_EXCLUDE_FILE_PATH)); YarnConfiguration.DEFAULT_RM_NODES_EXCLUDE_FILE_PATH));
Set<String> hostsList = new HashSet<String>(); HostDetails hostDetails = hostsReader.getHostDetails();
Set<String> excludeList = new HashSet<String>(); for (String include : hostDetails.getIncludedHosts()) {
hostsReader.getHostDetails(hostsList, excludeList);
for (String include : hostsList) {
LOG.debug("include: " + include); LOG.debug("include: " + include);
} }
for (String exclude : excludeList) { for (String exclude : hostDetails.getExcludedHosts()) {
LOG.debug("exclude: " + exclude); LOG.debug("exclude: " + exclude);
} }
} }
@ -262,9 +259,9 @@ public class NodesListManager extends CompositeService implements
// Nodes need to be decommissioned (graceful or forceful); // Nodes need to be decommissioned (graceful or forceful);
List<RMNode> nodesToDecom = new ArrayList<RMNode>(); List<RMNode> nodesToDecom = new ArrayList<RMNode>();
Set<String> includes = new HashSet<String>(); HostDetails hostDetails = hostsReader.getHostDetails();
Map<String, Integer> excludes = new HashMap<String, Integer>(); Set<String> includes = hostDetails.getIncludedHosts();
hostsReader.getHostDetails(includes, excludes); Map<String, Integer> excludes = hostDetails.getExcludedMap();
for (RMNode n : this.rmContext.getRMNodes().values()) { for (RMNode n : this.rmContext.getRMNodes().values()) {
NodeState s = n.getState(); NodeState s = n.getState();
@ -453,10 +450,9 @@ public class NodesListManager extends CompositeService implements
} }
public boolean isValidNode(String hostName) { public boolean isValidNode(String hostName) {
Set<String> hostsList = new HashSet<String>(); HostDetails hostDetails = hostsReader.getHostDetails();
Set<String> excludeList = new HashSet<String>(); return isValidNode(hostName, hostDetails.getIncludedHosts(),
hostsReader.getHostDetails(hostsList, excludeList); hostDetails.getExcludedHosts());
return isValidNode(hostName, hostsList, excludeList);
} }
private boolean isValidNode( private boolean isValidNode(
@ -563,9 +559,9 @@ public class NodesListManager extends CompositeService implements
public boolean isUntrackedNode(String hostName) { public boolean isUntrackedNode(String hostName) {
String ip = resolver.resolve(hostName); String ip = resolver.resolve(hostName);
Set<String> hostsList = new HashSet<String>(); HostDetails hostDetails = hostsReader.getHostDetails();
Set<String> excludeList = new HashSet<String>(); Set<String> hostsList = hostDetails.getIncludedHosts();
hostsReader.getHostDetails(hostsList, excludeList); Set<String> excludeList = hostDetails.getExcludedHosts();
return !hostsList.isEmpty() && !hostsList.contains(hostName) return !hostsList.isEmpty() && !hostsList.contains(hostName)
&& !hostsList.contains(ip) && !excludeList.contains(hostName) && !hostsList.contains(ip) && !excludeList.contains(hostName)