mirror of https://github.com/apache/lucene.git
SOLR-11443: Remove the usage of workqueue for Overseer
This commit is contained in:
parent
ec81838203
commit
9543e85460
|
@ -21,10 +21,12 @@ import java.io.IOException;
|
|||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
|
@ -46,6 +48,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
|
|||
import org.apache.solr.common.params.CollectionParams;
|
||||
import org.apache.solr.common.util.IOUtils;
|
||||
import org.apache.solr.common.util.ObjectReleaseTracker;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.core.CloudConfig;
|
||||
import org.apache.solr.handler.admin.CollectionsHandler;
|
||||
|
@ -84,6 +87,7 @@ public class Overseer implements Closeable {
|
|||
private final String myId;
|
||||
//queue where everybody can throw tasks
|
||||
private final ZkDistributedQueue stateUpdateQueue;
|
||||
//TODO remove in 9.0, we do not push message into this queue anymore
|
||||
//Internal queue where overseer stores events that have not yet been published into cloudstate
|
||||
//If Overseer dies while extracting the main queue a new overseer will start from this queue
|
||||
private final ZkDistributedQueue workQueue;
|
||||
|
@ -132,6 +136,10 @@ public class Overseer implements Closeable {
|
|||
ZkStateWriter zkStateWriter = null;
|
||||
ClusterState clusterState = null;
|
||||
boolean refreshClusterState = true; // let's refresh in the first iteration
|
||||
// we write updates in batch, but if an exception is thrown when writing new clusterstate,
|
||||
// we do not sure which message is bad message, therefore we will re-process node one by one
|
||||
int fallbackQueueSize = Integer.MAX_VALUE;
|
||||
ZkDistributedQueue fallbackQueue = workQueue;
|
||||
while (!this.isClosed) {
|
||||
isLeader = amILeader();
|
||||
if (LeaderStatus.NO == isLeader) {
|
||||
|
@ -153,11 +161,10 @@ public class Overseer implements Closeable {
|
|||
// if there were any errors while processing
|
||||
// the state queue, items would have been left in the
|
||||
// work queue so let's process those first
|
||||
byte[] data = workQueue.peek();
|
||||
boolean hadWorkItems = data != null;
|
||||
while (data != null) {
|
||||
byte[] data = fallbackQueue.peek();
|
||||
while (fallbackQueueSize > 0 && data != null) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(data);
|
||||
log.debug("processMessage: workQueueSize: {}, message = {}", workQueue.getStats().getQueueLength(), message);
|
||||
log.debug("processMessage: fallbackQueueSize: {}, message = {}", fallbackQueue.getStats().getQueueLength(), message);
|
||||
// force flush to ZK after each message because there is no fallback if workQueue items
|
||||
// are removed from workQueue but fail to be written to ZK
|
||||
try {
|
||||
|
@ -165,17 +172,19 @@ public class Overseer implements Closeable {
|
|||
} catch (Exception e) {
|
||||
if (isBadMessage(e)) {
|
||||
log.warn("Exception when process message = {}, consider as bad message and poll out from the queue", message);
|
||||
workQueue.poll();
|
||||
fallbackQueue.poll();
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
workQueue.poll(); // poll-ing removes the element we got by peek-ing
|
||||
data = workQueue.peek();
|
||||
fallbackQueue.poll(); // poll-ing removes the element we got by peek-ing
|
||||
data = fallbackQueue.peek();
|
||||
fallbackQueueSize--;
|
||||
}
|
||||
// force flush at the end of the loop
|
||||
if (hadWorkItems) {
|
||||
// force flush at the end of the loop, if there are no pending updates, this is a no op call
|
||||
clusterState = zkStateWriter.writePendingUpdates();
|
||||
}
|
||||
// the workQueue is empty now, use stateUpdateQueue as fallback queue
|
||||
fallbackQueue = stateUpdateQueue;
|
||||
fallbackQueueSize = 0;
|
||||
} catch (KeeperException.SessionExpiredException e) {
|
||||
log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e);
|
||||
return;
|
||||
|
@ -189,9 +198,10 @@ public class Overseer implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
byte[] head = null;
|
||||
LinkedList<Pair<String, byte[]>> queue = null;
|
||||
try {
|
||||
head = stateUpdateQueue.peek(true);
|
||||
// We do not need to filter any nodes here cause all processed nodes are removed once we flush clusterstate
|
||||
queue = new LinkedList<>(stateUpdateQueue.peekElements(1000, Long.MAX_VALUE, (x) -> true));
|
||||
} catch (KeeperException.SessionExpiredException e) {
|
||||
log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
|
||||
return;
|
||||
|
@ -202,45 +212,32 @@ public class Overseer implements Closeable {
|
|||
log.error("Exception in Overseer main queue loop", e);
|
||||
}
|
||||
try {
|
||||
boolean[] itemWasMoved = new boolean[1];
|
||||
while (head != null) {
|
||||
itemWasMoved[0] = false;
|
||||
byte[] data = head;
|
||||
Set<String> processedNodes = new HashSet<>();
|
||||
while (queue != null && !queue.isEmpty()) {
|
||||
for (Pair<String, byte[]> head : queue) {
|
||||
byte[] data = head.second();
|
||||
final ZkNodeProps message = ZkNodeProps.load(data);
|
||||
log.debug("processMessage: queueSize: {}, message = {} current state version: {}", stateUpdateQueue.getStats().getQueueLength(), message, clusterState.getZkClusterStateVersion());
|
||||
// we can batch here because workQueue is our fallback in case a ZK write failed
|
||||
clusterState = processQueueItem(message, clusterState, zkStateWriter, true, new ZkStateWriter.ZkWriteCallback() {
|
||||
@Override
|
||||
public void onEnqueue() throws Exception {
|
||||
if (!itemWasMoved[0]) {
|
||||
workQueue.offer(data);
|
||||
stateUpdateQueue.poll();
|
||||
itemWasMoved[0] = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onWrite() throws Exception {
|
||||
// remove everything from workQueue
|
||||
while (workQueue.poll() != null);
|
||||
}
|
||||
processedNodes.add(head.first());
|
||||
fallbackQueueSize = processedNodes.size();
|
||||
// The callback always be called on this thread
|
||||
clusterState = processQueueItem(message, clusterState, zkStateWriter, true, () -> {
|
||||
stateUpdateQueue.remove(processedNodes);
|
||||
processedNodes.clear();
|
||||
});
|
||||
|
||||
// If the ZkWriteCallback never fired, just dump the item, it might be an invalid message.
|
||||
if (!itemWasMoved[0]) {
|
||||
stateUpdateQueue.poll();
|
||||
}
|
||||
|
||||
if (isClosed) break;
|
||||
// if an event comes in the next 100ms batch it together
|
||||
head = stateUpdateQueue.peek(100);
|
||||
queue = new LinkedList<>(stateUpdateQueue.peekElements(1000, 100, node -> !processedNodes.contains(node)));
|
||||
}
|
||||
fallbackQueueSize = processedNodes.size();
|
||||
// we should force write all pending updates because the next iteration might sleep until there
|
||||
// are more items in the main queue
|
||||
clusterState = zkStateWriter.writePendingUpdates();
|
||||
// clean work queue
|
||||
while (workQueue.poll() != null);
|
||||
|
||||
stateUpdateQueue.remove(processedNodes);
|
||||
processedNodes.clear();
|
||||
} catch (KeeperException.SessionExpiredException e) {
|
||||
log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
|
||||
return;
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.solr.common.cloud.ZkCmdExecutor;
|
|||
import org.apache.solr.common.util.Pair;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.Op;
|
||||
import org.apache.zookeeper.WatchedEvent;
|
||||
import org.apache.zookeeper.Watcher;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
|
@ -224,6 +225,42 @@ public class ZkDistributedQueue implements DistributedQueue {
|
|||
}
|
||||
}
|
||||
|
||||
public void remove(Collection<String> paths) throws KeeperException, InterruptedException {
|
||||
if (paths.isEmpty()) return;
|
||||
List<Op> ops = new ArrayList<>();
|
||||
for (String path : paths) {
|
||||
ops.add(Op.delete(dir + "/" + path, -1));
|
||||
}
|
||||
for (int from = 0; from < ops.size(); from += 1000) {
|
||||
int to = Math.min(from + 1000, ops.size());
|
||||
if (from < to) {
|
||||
try {
|
||||
zookeeper.multi(ops.subList(from, to), true);
|
||||
} catch (KeeperException.NoNodeException e) {
|
||||
// don't know which nodes are not exist, so try to delete one by one node
|
||||
for (int j = from; j < to; j++) {
|
||||
try {
|
||||
zookeeper.delete(ops.get(j).getPath(), -1, true);
|
||||
} catch (KeeperException.NoNodeException e2) {
|
||||
LOG.debug("Can not remove node which is not exist : " + ops.get(j).getPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int cacheSizeBefore = knownChildren.size();
|
||||
knownChildren.removeAll(paths);
|
||||
if (cacheSizeBefore - paths.size() == knownChildren.size() && knownChildren.size() != 0) {
|
||||
stats.setQueueLength(knownChildren.size());
|
||||
} else {
|
||||
// There are elements get deleted but not present in the cache,
|
||||
// the cache seems not valid anymore
|
||||
knownChildren.clear();
|
||||
isDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the head of the queue and returns it, blocks until it succeeds.
|
||||
*
|
||||
|
|
|
@ -112,10 +112,6 @@ public class ZkStateWriter {
|
|||
if (cmds.isEmpty()) return prevState;
|
||||
if (isNoOps(cmds)) return prevState;
|
||||
|
||||
if (callback != null) {
|
||||
callback.onEnqueue();
|
||||
}
|
||||
|
||||
for (ZkWriteCommand cmd : cmds) {
|
||||
if (cmd == NO_OP) continue;
|
||||
if (!isClusterStateModified && clusterStateGetModifiedWith(cmd, prevState)) {
|
||||
|
@ -253,15 +249,10 @@ public class ZkStateWriter {
|
|||
}
|
||||
|
||||
public interface ZkWriteCallback {
|
||||
/**
|
||||
* Called by ZkStateWriter if a ZkWriteCommand is queued
|
||||
*/
|
||||
public void onEnqueue() throws Exception;
|
||||
|
||||
/**
|
||||
* Called by ZkStateWriter if state is flushed to ZK
|
||||
*/
|
||||
public void onWrite() throws Exception;
|
||||
void onWrite() throws Exception;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
doAnswer(invocation -> {
|
||||
queue.remove(invocation.getArgument(0));
|
||||
return null;
|
||||
}).when(workQueueMock).remove(any());
|
||||
}).when(workQueueMock).remove(any(QueueEvent.class));
|
||||
|
||||
when(workQueueMock.poll()).thenAnswer(invocation -> {
|
||||
queue.poll();
|
||||
|
|
|
@ -45,17 +45,7 @@ import org.slf4j.LoggerFactory;
|
|||
public class ZkStateWriterTest extends SolrTestCaseJ4 {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private static final ZkStateWriter.ZkWriteCallback FAIL_ON_WRITE = new ZkStateWriter.ZkWriteCallback() {
|
||||
@Override
|
||||
public void onEnqueue() throws Exception {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onWrite() throws Exception {
|
||||
fail("Got unexpected flush");
|
||||
}
|
||||
};
|
||||
private static final ZkStateWriter.ZkWriteCallback FAIL_ON_WRITE = () -> fail("Got unexpected flush");
|
||||
|
||||
@BeforeClass
|
||||
public static void setup() {
|
||||
|
@ -106,31 +96,11 @@ public class ZkStateWriterTest extends SolrTestCaseJ4 {
|
|||
|
||||
Thread.sleep(Overseer.STATE_UPDATE_DELAY + 100);
|
||||
AtomicBoolean didWrite = new AtomicBoolean(false);
|
||||
clusterState = writer.enqueueUpdate(clusterState, Collections.singletonList(c3), new ZkStateWriter.ZkWriteCallback() {
|
||||
@Override
|
||||
public void onEnqueue() throws Exception {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onWrite() throws Exception {
|
||||
didWrite.set(true);
|
||||
}
|
||||
});
|
||||
clusterState = writer.enqueueUpdate(clusterState, Collections.singletonList(c3), () -> didWrite.set(true));
|
||||
assertTrue("Exceed the update delay, should be flushed", didWrite.get());
|
||||
|
||||
for (int i = 0; i <= Overseer.STATE_UPDATE_BATCH_SIZE; i++) {
|
||||
clusterState = writer.enqueueUpdate(clusterState, Collections.singletonList(c3), new ZkStateWriter.ZkWriteCallback() {
|
||||
@Override
|
||||
public void onEnqueue() throws Exception {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onWrite() throws Exception {
|
||||
didWrite.set(true);
|
||||
}
|
||||
});
|
||||
clusterState = writer.enqueueUpdate(clusterState, Collections.singletonList(c3), () -> didWrite.set(true));
|
||||
}
|
||||
assertTrue("Exceed the update batch size, should be flushed", didWrite.get());
|
||||
}
|
||||
|
|
|
@ -38,4 +38,5 @@ public interface DistributedQueue {
|
|||
|
||||
Collection<Pair<String, byte[]>> peekElements(int max, long waitMillis, Predicate<String> acceptFilter) throws Exception;
|
||||
|
||||
void remove(Collection<String> paths) throws Exception;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue