mirror of https://github.com/apache/lucene.git
LUCENE-4158: Simplify DocumentsWriterStallControl to prevent further deadlocks
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1352535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0ae03a37cf
commit
c4e4b36037
|
@ -26,7 +26,6 @@ import java.util.Queue;
|
|||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
||||
import org.apache.lucene.index.DocumentsWriterStallControl.MemoryController;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
/**
|
||||
|
@ -41,7 +40,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
|||
* {@link IndexWriterConfig#getRAMPerThreadHardLimitMB()} to prevent address
|
||||
* space exhaustion.
|
||||
*/
|
||||
final class DocumentsWriterFlushControl implements MemoryController {
|
||||
final class DocumentsWriterFlushControl {
|
||||
|
||||
private final long hardMaxBytesPerDWPT;
|
||||
private long activeBytes = 0;
|
||||
|
@ -88,7 +87,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
return flushBytes + activeBytes;
|
||||
}
|
||||
|
||||
public long stallLimitBytes() {
|
||||
private long stallLimitBytes() {
|
||||
final double maxRamMB = config.getRAMBufferSizeMB();
|
||||
return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
|
||||
}
|
||||
|
@ -178,7 +177,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
}
|
||||
return flushingDWPT;
|
||||
} finally {
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
assert assertMemory();
|
||||
}
|
||||
}
|
||||
|
@ -192,13 +191,30 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
assert assertMemory();
|
||||
} finally {
|
||||
try {
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
} finally {
|
||||
notifyAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final void updateStallState() {
|
||||
|
||||
assert Thread.holdsLock(this);
|
||||
final long limit = stallLimitBytes();
|
||||
/*
|
||||
* we block indexing threads if net byte grows due to slow flushes
|
||||
* yet, for small ram buffers and large documents we can easily
|
||||
* reach the limit without any ongoing flushes. we need to ensure
|
||||
* that we don't stall/block if an ongoing or pending flush can
|
||||
* not free up enough memory to release the stall lock.
|
||||
*/
|
||||
final boolean stall = ((activeBytes + flushBytes) > limit) &&
|
||||
(activeBytes < limit) &&
|
||||
!closed;
|
||||
stallControl.updateStalled(stall);
|
||||
}
|
||||
|
||||
public synchronized void waitForFlush() {
|
||||
while (flushingWriters.size() != 0) {
|
||||
try {
|
||||
|
@ -238,7 +254,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
// Take it out of the loop this DWPT is stale
|
||||
perThreadPool.replaceForFlush(state, closed);
|
||||
} finally {
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -288,7 +304,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
}
|
||||
return null;
|
||||
} finally {
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -304,7 +320,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
synchronized (this) {
|
||||
final DocumentsWriterPerThread poll;
|
||||
if ((poll = flushQueue.poll()) != null) {
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
return poll;
|
||||
}
|
||||
fullFlush = this.fullFlush;
|
||||
|
@ -458,7 +474,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
assert assertBlockedFlushes(documentsWriter.deleteQueue);
|
||||
flushQueue.addAll(fullFlushBuffer);
|
||||
fullFlushBuffer.clear();
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
}
|
||||
assert assertActiveDeleteQueue(documentsWriter.deleteQueue);
|
||||
}
|
||||
|
@ -537,7 +553,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
}
|
||||
} finally {
|
||||
fullFlush = false;
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -572,7 +588,7 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
|||
fullFlush = false;
|
||||
flushQueue.clear();
|
||||
blockedFlushes.clear();
|
||||
stallControl.updateStalled(this);
|
||||
updateStallState();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,8 @@ package org.apache.lucene.index;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.util.concurrent.locks.AbstractQueuedSynchronizer;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
@ -37,107 +38,81 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
|||
* continue indexing.
|
||||
*/
|
||||
final class DocumentsWriterStallControl {
|
||||
@SuppressWarnings("serial")
|
||||
private static final class Sync extends AbstractQueuedSynchronizer {
|
||||
|
||||
Sync() {
|
||||
setState(0);
|
||||
}
|
||||
|
||||
boolean isHealthy() {
|
||||
return getState() == 0;
|
||||
}
|
||||
|
||||
boolean trySetStalled() {
|
||||
int state = getState();
|
||||
return compareAndSetState(state, state + 1);
|
||||
}
|
||||
|
||||
boolean tryReset() {
|
||||
final int oldState = getState();
|
||||
if (oldState == 0) {
|
||||
return true;
|
||||
}
|
||||
if (compareAndSetState(oldState, 0)) {
|
||||
return releaseShared(0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int tryAcquireShared(int acquires) {
|
||||
return getState() == 0 ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean tryReleaseShared(int newState) {
|
||||
return (getState() == 0);
|
||||
}
|
||||
}
|
||||
|
||||
private final Sync sync = new Sync();
|
||||
volatile boolean wasStalled = false; // only with asserts
|
||||
|
||||
boolean anyStalledThreads() {
|
||||
return !sync.isHealthy();
|
||||
}
|
||||
|
||||
|
||||
private volatile boolean stalled;
|
||||
private int numWaiting; // only with assert
|
||||
private boolean wasStalled; // only with assert
|
||||
private final Map<Thread, Boolean> waiting = new IdentityHashMap<Thread, Boolean>(); // only with assert
|
||||
|
||||
/**
|
||||
* Update the stalled flag status. This method will set the stalled flag to
|
||||
* <code>true</code> iff the number of flushing
|
||||
* {@link DocumentsWriterPerThread} is greater than the number of active
|
||||
* {@link DocumentsWriterPerThread}. Otherwise it will reset the
|
||||
* {@link DocumentsWriterStallControl} to healthy and release all threads waiting on
|
||||
* {@link #waitIfStalled()}
|
||||
* {@link DocumentsWriterStallControl} to healthy and release all threads
|
||||
* waiting on {@link #waitIfStalled()}
|
||||
*/
|
||||
void updateStalled(MemoryController controller) {
|
||||
do {
|
||||
final long netBytes = controller.netBytes();
|
||||
final long flushBytes = controller.flushBytes();
|
||||
final long limit = controller.stallLimitBytes();
|
||||
assert netBytes >= flushBytes;
|
||||
assert limit > 0;
|
||||
/*
|
||||
* we block indexing threads if net byte grows due to slow flushes
|
||||
* yet, for small ram buffers and large documents we can easily
|
||||
* reach the limit without any ongoing flushes. we need to ensure
|
||||
* that we don't stall/block if an ongoing or pending flush can
|
||||
* not free up enough memory to release the stall lock.
|
||||
*/
|
||||
while (netBytes > limit && (netBytes - flushBytes) < limit) {
|
||||
if (sync.trySetStalled()) {
|
||||
assert wasStalled = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
} while (!sync.tryReset());
|
||||
synchronized void updateStalled(boolean stalled) {
|
||||
this.stalled = stalled;
|
||||
if (stalled) {
|
||||
wasStalled = true;
|
||||
}
|
||||
notifyAll();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Blocks if documents writing is currently in a stalled state.
|
||||
*
|
||||
*/
|
||||
void waitIfStalled() {
|
||||
try {
|
||||
sync.acquireSharedInterruptibly(0);
|
||||
} catch (InterruptedException e) {
|
||||
throw new ThreadInterruptedException(e);
|
||||
if (stalled) {
|
||||
synchronized (this) {
|
||||
boolean hasWaited = false;
|
||||
while (stalled) {
|
||||
try {
|
||||
assert hasWaited || incWaiters();
|
||||
assert (hasWaited = true);
|
||||
wait();
|
||||
} catch (InterruptedException e) {
|
||||
throw new ThreadInterruptedException(e);
|
||||
}
|
||||
}
|
||||
assert !hasWaited || decrWaiters();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boolean hasBlocked() { // for tests
|
||||
return sync.hasQueuedThreads();
|
||||
boolean anyStalledThreads() {
|
||||
return stalled;
|
||||
}
|
||||
|
||||
static interface MemoryController {
|
||||
long netBytes();
|
||||
long flushBytes();
|
||||
long stallLimitBytes();
|
||||
|
||||
private boolean incWaiters() {
|
||||
numWaiting++;
|
||||
assert waiting.put(Thread.currentThread(), Boolean.TRUE) == null;
|
||||
|
||||
return numWaiting > 0;
|
||||
}
|
||||
|
||||
private boolean decrWaiters() {
|
||||
numWaiting--;
|
||||
assert waiting.remove(Thread.currentThread()) != null;
|
||||
return numWaiting >= 0;
|
||||
}
|
||||
|
||||
synchronized boolean hasBlocked() { // for tests
|
||||
return numWaiting > 0;
|
||||
}
|
||||
|
||||
boolean isHealthy() { // for tests
|
||||
return !stalled; // volatile read!
|
||||
}
|
||||
|
||||
synchronized boolean isThreadQueued(Thread t) { // for tests
|
||||
return waiting.containsKey(t);
|
||||
}
|
||||
|
||||
public boolean isHealthy() {
|
||||
return sync.isHealthy();
|
||||
}
|
||||
|
||||
public boolean isThreadQueued(Thread t) {
|
||||
return sync.isQueued(t);
|
||||
synchronized boolean wasStalled() { // for tests
|
||||
return wasStalled;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.concurrent.CountDownLatch;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.index.DocumentsWriterStallControl.MemoryController;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
|
@ -38,11 +37,8 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
|||
|
||||
public void testSimpleStall() throws InterruptedException {
|
||||
DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1000;
|
||||
memCtrl.flushBytes = 20;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
|
||||
ctrl.updateStalled(false);
|
||||
Thread[] waitThreads = waitThreads(atLeast(1), ctrl);
|
||||
start(waitThreads);
|
||||
assertFalse(ctrl.hasBlocked());
|
||||
|
@ -50,43 +46,31 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
|||
join(waitThreads, 10);
|
||||
|
||||
// now stall threads and wake them up again
|
||||
memCtrl.netBytes = 1001;
|
||||
memCtrl.flushBytes = 100;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(true);
|
||||
waitThreads = waitThreads(atLeast(1), ctrl);
|
||||
start(waitThreads);
|
||||
awaitState(100, Thread.State.WAITING, waitThreads);
|
||||
assertTrue(ctrl.hasBlocked());
|
||||
assertTrue(ctrl.anyStalledThreads());
|
||||
memCtrl.netBytes = 50;
|
||||
memCtrl.flushBytes = 0;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(false);
|
||||
assertFalse(ctrl.anyStalledThreads());
|
||||
join(waitThreads, 500);
|
||||
}
|
||||
|
||||
public void testRandom() throws InterruptedException {
|
||||
final DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(false);
|
||||
|
||||
Thread[] stallThreads = new Thread[atLeast(3)];
|
||||
for (int i = 0; i < stallThreads.length; i++) {
|
||||
final int threadId = i;
|
||||
final int stallProbability = 1 +random().nextInt(10);
|
||||
stallThreads[i] = new Thread() {
|
||||
public void run() {
|
||||
int baseBytes = threadId % 2 == 0 ? 500 : 700;
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
memCtrl.flushBytes = 0;
|
||||
|
||||
int iters = atLeast(1000);
|
||||
for (int j = 0; j < iters; j++) {
|
||||
memCtrl.netBytes = baseBytes + random().nextInt(1000);
|
||||
memCtrl.flushBytes = random().nextInt((int)memCtrl.netBytes);
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(random().nextInt(stallProbability) == 0);
|
||||
if (random().nextInt(5) == 0) { // thread 0 only updates
|
||||
ctrl.waitIfStalled();
|
||||
}
|
||||
|
@ -102,7 +86,7 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
|||
*/
|
||||
while ((System.currentTimeMillis() - time) < 100 * 1000
|
||||
&& !terminated(stallThreads)) {
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(false);
|
||||
if (random().nextBoolean()) {
|
||||
Thread.yield();
|
||||
} else {
|
||||
|
@ -116,11 +100,7 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
|||
|
||||
public void testAccquireReleaseRace() throws InterruptedException {
|
||||
final DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
memCtrl.flushBytes = 0;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(false);
|
||||
final AtomicBoolean stop = new AtomicBoolean(false);
|
||||
final AtomicBoolean checkPoint = new AtomicBoolean(true);
|
||||
|
||||
|
@ -191,10 +171,7 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
|||
|
||||
|
||||
for (int i = 0; i < threads.length; i++) {
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
memCtrl.flushBytes = 0;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(false);
|
||||
threads[i].join(2000);
|
||||
if (threads[i].isAlive() && threads[i] instanceof Waiter) {
|
||||
if (threads[i].getState() == Thread.State.WAITING) {
|
||||
|
@ -290,14 +267,11 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
|||
|
||||
public void run() {
|
||||
try {
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = release ? 1 : 2000;
|
||||
memCtrl.flushBytes = random().nextInt((int)memCtrl.netBytes);
|
||||
|
||||
while (!stop.get()) {
|
||||
int internalIters = release && random().nextBoolean() ? atLeast(5) : 1;
|
||||
for (int i = 0; i < internalIters; i++) {
|
||||
ctrl.updateStalled(memCtrl);
|
||||
ctrl.updateStalled(random().nextBoolean());
|
||||
}
|
||||
if (checkPoint.get()) {
|
||||
sync.updateJoin.countDown();
|
||||
|
@ -379,28 +353,6 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
|||
+ " ms");
|
||||
}
|
||||
|
||||
private static class SimpleMemCtrl implements MemoryController {
|
||||
long netBytes;
|
||||
long limit;
|
||||
long flushBytes;
|
||||
|
||||
@Override
|
||||
public long netBytes() {
|
||||
return netBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long stallLimitBytes() {
|
||||
return limit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long flushBytes() {
|
||||
return flushBytes;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final class Synchronizer {
|
||||
volatile CountDownLatch waiter;
|
||||
volatile CountDownLatch updateJoin;
|
||||
|
|
|
@ -109,7 +109,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
|
||||
}
|
||||
if (ensureNotStalled) {
|
||||
assertFalse(docsWriter.flushControl.stallControl.wasStalled);
|
||||
assertFalse(docsWriter.flushControl.stallControl.wasStalled());
|
||||
}
|
||||
writer.close();
|
||||
assertEquals(0, flushControl.activeBytes());
|
||||
|
@ -222,7 +222,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
assertEquals(numDocumentsToIndex, r.numDocs());
|
||||
assertEquals(numDocumentsToIndex, r.maxDoc());
|
||||
if (!flushPolicy.flushOnRAM()) {
|
||||
assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.wasStalled);
|
||||
assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.wasStalled());
|
||||
assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.hasBlocked());
|
||||
}
|
||||
r.close();
|
||||
|
@ -275,7 +275,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
docsWriter.flushControl.stallControl.hasBlocked());
|
||||
}
|
||||
if (docsWriter.flushControl.peakNetBytes > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) {
|
||||
assertTrue(docsWriter.flushControl.stallControl.wasStalled);
|
||||
assertTrue(docsWriter.flushControl.stallControl.wasStalled());
|
||||
}
|
||||
assertActiveBytesAfter(flushControl);
|
||||
writer.close(true);
|
||||
|
|
Loading…
Reference in New Issue