HADOOP-12107. long running apps may have a huge number of StatisticsData instances under FileSystem (Sangjin Lee via Ming Ma)
(cherry picked from commit 8e1bdc17d9
)
Conflicts:
hadoop-common-project/hadoop-common/CHANGES.txt
This commit is contained in:
parent
c1447e654d
commit
5e6bbe6031
|
@ -539,6 +539,9 @@ Release 2.7.1 - 2015-07-06
|
||||||
that method doesn't modify the FsPermission (Bibin A Chundatt via Colin P.
|
that method doesn't modify the FsPermission (Bibin A Chundatt via Colin P.
|
||||||
McCabe)
|
McCabe)
|
||||||
|
|
||||||
|
HADOOP-12107. long running apps may have a huge number of StatisticsData
|
||||||
|
instances under FileSystem (Sangjin Lee via Ming Ma)
|
||||||
|
|
||||||
Release 2.7.0 - 2015-04-20
|
Release 2.7.0 - 2015-04-20
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -20,7 +20,8 @@ package org.apache.hadoop.fs;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.ref.WeakReference;
|
import java.lang.ref.PhantomReference;
|
||||||
|
import java.lang.ref.ReferenceQueue;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
@ -32,7 +33,6 @@ import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.IdentityHashMap;
|
import java.util.IdentityHashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
@ -2918,16 +2918,6 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
volatile int readOps;
|
volatile int readOps;
|
||||||
volatile int largeReadOps;
|
volatile int largeReadOps;
|
||||||
volatile int writeOps;
|
volatile int writeOps;
|
||||||
/**
|
|
||||||
* Stores a weak reference to the thread owning this StatisticsData.
|
|
||||||
* This allows us to remove StatisticsData objects that pertain to
|
|
||||||
* threads that no longer exist.
|
|
||||||
*/
|
|
||||||
final WeakReference<Thread> owner;
|
|
||||||
|
|
||||||
StatisticsData(WeakReference<Thread> owner) {
|
|
||||||
this.owner = owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add another StatisticsData object to this one.
|
* Add another StatisticsData object to this one.
|
||||||
|
@ -2998,17 +2988,37 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
* Thread-local data.
|
* Thread-local data.
|
||||||
*/
|
*/
|
||||||
private final ThreadLocal<StatisticsData> threadData;
|
private final ThreadLocal<StatisticsData> threadData;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of all thread-local data areas. Protected by the Statistics lock.
|
* Set of all thread-local data areas. Protected by the Statistics lock.
|
||||||
|
* The references to the statistics data are kept using phantom references
|
||||||
|
* to the associated threads. Proper clean-up is performed by the cleaner
|
||||||
|
* thread when the threads are garbage collected.
|
||||||
*/
|
*/
|
||||||
private LinkedList<StatisticsData> allData;
|
private final Set<StatisticsDataReference> allData;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Global reference queue and a cleaner thread that manage statistics data
|
||||||
|
* references from all filesystem instances.
|
||||||
|
*/
|
||||||
|
private static final ReferenceQueue<Thread> STATS_DATA_REF_QUEUE;
|
||||||
|
private static final Thread STATS_DATA_CLEANER;
|
||||||
|
|
||||||
|
static {
|
||||||
|
STATS_DATA_REF_QUEUE = new ReferenceQueue<Thread>();
|
||||||
|
// start a single daemon cleaner thread
|
||||||
|
STATS_DATA_CLEANER = new Thread(new StatisticsDataReferenceCleaner());
|
||||||
|
STATS_DATA_CLEANER.
|
||||||
|
setName(StatisticsDataReferenceCleaner.class.getName());
|
||||||
|
STATS_DATA_CLEANER.setDaemon(true);
|
||||||
|
STATS_DATA_CLEANER.start();
|
||||||
|
}
|
||||||
|
|
||||||
public Statistics(String scheme) {
|
public Statistics(String scheme) {
|
||||||
this.scheme = scheme;
|
this.scheme = scheme;
|
||||||
this.rootData = new StatisticsData(null);
|
this.rootData = new StatisticsData();
|
||||||
this.threadData = new ThreadLocal<StatisticsData>();
|
this.threadData = new ThreadLocal<StatisticsData>();
|
||||||
this.allData = null;
|
this.allData = new HashSet<StatisticsDataReference>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3018,7 +3028,7 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
*/
|
*/
|
||||||
public Statistics(Statistics other) {
|
public Statistics(Statistics other) {
|
||||||
this.scheme = other.scheme;
|
this.scheme = other.scheme;
|
||||||
this.rootData = new StatisticsData(null);
|
this.rootData = new StatisticsData();
|
||||||
other.visitAll(new StatisticsAggregator<Void>() {
|
other.visitAll(new StatisticsAggregator<Void>() {
|
||||||
@Override
|
@Override
|
||||||
public void accept(StatisticsData data) {
|
public void accept(StatisticsData data) {
|
||||||
|
@ -3030,6 +3040,63 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
this.threadData = new ThreadLocal<StatisticsData>();
|
this.threadData = new ThreadLocal<StatisticsData>();
|
||||||
|
this.allData = new HashSet<StatisticsDataReference>();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A phantom reference to a thread that also includes the data associated
|
||||||
|
* with that thread. On the thread being garbage collected, it is enqueued
|
||||||
|
* to the reference queue for clean-up.
|
||||||
|
*/
|
||||||
|
private class StatisticsDataReference extends PhantomReference<Thread> {
|
||||||
|
private final StatisticsData data;
|
||||||
|
|
||||||
|
public StatisticsDataReference(StatisticsData data, Thread thread) {
|
||||||
|
super(thread, STATS_DATA_REF_QUEUE);
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public StatisticsData getData() {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs clean-up action when the associated thread is garbage
|
||||||
|
* collected.
|
||||||
|
*/
|
||||||
|
public void cleanUp() {
|
||||||
|
// use the statistics lock for safety
|
||||||
|
synchronized (Statistics.this) {
|
||||||
|
/*
|
||||||
|
* If the thread that created this thread-local data no longer exists,
|
||||||
|
* remove the StatisticsData from our list and fold the values into
|
||||||
|
* rootData.
|
||||||
|
*/
|
||||||
|
rootData.add(data);
|
||||||
|
allData.remove(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Background action to act on references being removed.
|
||||||
|
*/
|
||||||
|
private static class StatisticsDataReferenceCleaner implements Runnable {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
StatisticsDataReference ref =
|
||||||
|
(StatisticsDataReference)STATS_DATA_REF_QUEUE.remove();
|
||||||
|
ref.cleanUp();
|
||||||
|
} catch (Throwable th) {
|
||||||
|
// the cleaner thread should continue to run even if there are
|
||||||
|
// exceptions, including InterruptedException
|
||||||
|
LOG.warn("exception in the cleaner thread but it will continue to "
|
||||||
|
+ "run", th);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3038,14 +3105,12 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
public StatisticsData getThreadStatistics() {
|
public StatisticsData getThreadStatistics() {
|
||||||
StatisticsData data = threadData.get();
|
StatisticsData data = threadData.get();
|
||||||
if (data == null) {
|
if (data == null) {
|
||||||
data = new StatisticsData(
|
data = new StatisticsData();
|
||||||
new WeakReference<Thread>(Thread.currentThread()));
|
|
||||||
threadData.set(data);
|
threadData.set(data);
|
||||||
|
StatisticsDataReference ref =
|
||||||
|
new StatisticsDataReference(data, Thread.currentThread());
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
if (allData == null) {
|
allData.add(ref);
|
||||||
allData = new LinkedList<StatisticsData>();
|
|
||||||
}
|
|
||||||
allData.add(data);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return data;
|
return data;
|
||||||
|
@ -3103,21 +3168,9 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
*/
|
*/
|
||||||
private synchronized <T> T visitAll(StatisticsAggregator<T> visitor) {
|
private synchronized <T> T visitAll(StatisticsAggregator<T> visitor) {
|
||||||
visitor.accept(rootData);
|
visitor.accept(rootData);
|
||||||
if (allData != null) {
|
for (StatisticsDataReference ref: allData) {
|
||||||
for (Iterator<StatisticsData> iter = allData.iterator();
|
StatisticsData data = ref.getData();
|
||||||
iter.hasNext(); ) {
|
visitor.accept(data);
|
||||||
StatisticsData data = iter.next();
|
|
||||||
visitor.accept(data);
|
|
||||||
if (data.owner.get() == null) {
|
|
||||||
/*
|
|
||||||
* If the thread that created this thread-local data no
|
|
||||||
* longer exists, remove the StatisticsData from our list
|
|
||||||
* and fold the values into rootData.
|
|
||||||
*/
|
|
||||||
rootData.add(data);
|
|
||||||
iter.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return visitor.aggregate();
|
return visitor.aggregate();
|
||||||
}
|
}
|
||||||
|
@ -3224,7 +3277,7 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return visitAll(new StatisticsAggregator<String>() {
|
return visitAll(new StatisticsAggregator<String>() {
|
||||||
private StatisticsData total = new StatisticsData(null);
|
private StatisticsData total = new StatisticsData();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void accept(StatisticsData data) {
|
public void accept(StatisticsData data) {
|
||||||
|
@ -3257,7 +3310,7 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
*/
|
*/
|
||||||
public void reset() {
|
public void reset() {
|
||||||
visitAll(new StatisticsAggregator<Void>() {
|
visitAll(new StatisticsAggregator<Void>() {
|
||||||
private StatisticsData total = new StatisticsData(null);
|
private StatisticsData total = new StatisticsData();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void accept(StatisticsData data) {
|
public void accept(StatisticsData data) {
|
||||||
|
@ -3279,6 +3332,11 @@ public abstract class FileSystem extends Configured implements Closeable {
|
||||||
public String getScheme() {
|
public String getScheme() {
|
||||||
return scheme;
|
return scheme;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
synchronized int getAllThreadLocalDataSize() {
|
||||||
|
return allData.size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -18,26 +18,34 @@
|
||||||
|
|
||||||
package org.apache.hadoop.fs;
|
package org.apache.hadoop.fs;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.FileContextTestHelper.createFile;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FileSystem.Statistics;
|
import org.apache.hadoop.fs.FileSystem.Statistics;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.base.Supplier;
|
||||||
import com.google.common.util.concurrent.Uninterruptibles;
|
import com.google.common.util.concurrent.Uninterruptibles;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.FileContextTestHelper.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
* Base class to test {@link FileContext} Statistics.
|
* Base class to test {@link FileContext} Statistics.
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public abstract class FCStatisticsBaseTest {
|
public abstract class FCStatisticsBaseTest {
|
||||||
|
|
||||||
static protected int blockSize = 512;
|
static protected int blockSize = 512;
|
||||||
static protected int numBlocks = 1;
|
static protected int numBlocks = 1;
|
||||||
|
|
||||||
|
@ -102,6 +110,48 @@ public abstract class FCStatisticsBaseTest {
|
||||||
fc.delete(filePath, true);
|
fc.delete(filePath, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testStatisticsThreadLocalDataCleanUp() throws Exception {
|
||||||
|
final Statistics stats = new Statistics("test");
|
||||||
|
// create a small thread pool to test the statistics
|
||||||
|
final int size = 2;
|
||||||
|
ExecutorService es = Executors.newFixedThreadPool(size);
|
||||||
|
List<Callable<Boolean>> tasks = new ArrayList<Callable<Boolean>>(size);
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
tasks.add(new Callable<Boolean>() {
|
||||||
|
public Boolean call() {
|
||||||
|
// this populates the data set in statistics
|
||||||
|
stats.incrementReadOps(1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// run the threads
|
||||||
|
es.invokeAll(tasks);
|
||||||
|
// assert that the data size is exactly the number of threads
|
||||||
|
final AtomicInteger allDataSize = new AtomicInteger(0);
|
||||||
|
allDataSize.set(stats.getAllThreadLocalDataSize());
|
||||||
|
Assert.assertEquals(size, allDataSize.get());
|
||||||
|
Assert.assertEquals(size, stats.getReadOps());
|
||||||
|
// force the GC to collect the threads by shutting down the thread pool
|
||||||
|
es.shutdownNow();
|
||||||
|
es.awaitTermination(1, TimeUnit.MINUTES);
|
||||||
|
es = null;
|
||||||
|
System.gc();
|
||||||
|
|
||||||
|
// wait for up to 10 seconds
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
int size = stats.getAllThreadLocalDataSize();
|
||||||
|
allDataSize.set(size);
|
||||||
|
return size == 0;
|
||||||
|
}
|
||||||
|
}, 1000, 10*1000);
|
||||||
|
Assert.assertEquals(0, allDataSize.get());
|
||||||
|
Assert.assertEquals(size, stats.getReadOps());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bytes read may be different for different file systems. This method should
|
* Bytes read may be different for different file systems. This method should
|
||||||
* throw assertion error if bytes read are incorrect.
|
* throw assertion error if bytes read are incorrect.
|
||||||
|
|
Loading…
Reference in New Issue