mirror of https://github.com/apache/lucene.git
SOLR-8694: DistributedMap/Queue can create too many Watchers and some code simplification.
This commit is contained in:
parent
189e985b5c
commit
32fbca6ea7
|
@ -182,6 +182,9 @@ Bug Fixes
|
||||||
* SOLR-8701: CloudSolrClient decides that there are no healthy nodes to handle a request too early.
|
* SOLR-8701: CloudSolrClient decides that there are no healthy nodes to handle a request too early.
|
||||||
(Mark Miller)
|
(Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-8694: DistributedMap/Queue can create too many Watchers and some code simplification.
|
||||||
|
(Scott Blum via Mark Miller)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
|
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
|
||||||
|
|
|
@ -22,15 +22,9 @@ import org.apache.solr.common.cloud.SolrZkClient;
|
||||||
import org.apache.solr.common.cloud.ZkCmdExecutor;
|
import org.apache.solr.common.cloud.ZkCmdExecutor;
|
||||||
import org.apache.zookeeper.CreateMode;
|
import org.apache.zookeeper.CreateMode;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
import org.apache.zookeeper.WatchedEvent;
|
|
||||||
import org.apache.zookeeper.Watcher;
|
|
||||||
import org.apache.zookeeper.ZooDefs;
|
|
||||||
import org.apache.zookeeper.data.ACL;
|
import org.apache.zookeeper.data.ACL;
|
||||||
import org.apache.zookeeper.data.Stat;
|
import org.apache.zookeeper.data.Stat;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.lang.invoke.MethodHandles;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -39,19 +33,13 @@ import java.util.List;
|
||||||
* don't have to be ordered i.e. DistributedQueue.
|
* don't have to be ordered i.e. DistributedQueue.
|
||||||
*/
|
*/
|
||||||
public class DistributedMap {
|
public class DistributedMap {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
|
||||||
|
|
||||||
protected static long DEFAULT_TIMEOUT = 5*60*1000;
|
|
||||||
|
|
||||||
protected final String dir;
|
protected final String dir;
|
||||||
|
|
||||||
protected SolrZkClient zookeeper;
|
protected SolrZkClient zookeeper;
|
||||||
|
|
||||||
protected final String prefix = "mn-";
|
protected final String prefix = "mn-";
|
||||||
|
|
||||||
protected final String response_prefix = "mnr-" ;
|
public DistributedMap(SolrZkClient zookeeper, String dir) {
|
||||||
|
|
||||||
public DistributedMap(SolrZkClient zookeeper, String dir, List<ACL> acl) {
|
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
|
|
||||||
ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zookeeper.getZkClientTimeout());
|
ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zookeeper.getZkClientTimeout());
|
||||||
|
@ -67,89 +55,13 @@ public class DistributedMap {
|
||||||
this.zookeeper = zookeeper;
|
this.zookeeper = zookeeper;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected class LatchChildWatcher implements Watcher {
|
|
||||||
|
|
||||||
Object lock = new Object();
|
public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
|
||||||
private WatchedEvent event = null;
|
zookeeper.makePath(dir + "/" + prefix + trackingId, data, CreateMode.PERSISTENT, null, false, true);
|
||||||
|
|
||||||
public LatchChildWatcher() {}
|
|
||||||
|
|
||||||
public LatchChildWatcher(Object lock) {
|
|
||||||
this.lock = lock;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void process(WatchedEvent event) {
|
|
||||||
LOG.info("LatchChildWatcher fired on path: " + event.getPath() + " state: "
|
|
||||||
+ event.getState() + " type " + event.getType());
|
|
||||||
synchronized (lock) {
|
|
||||||
this.event = event;
|
|
||||||
lock.notifyAll();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void await(long timeout) throws InterruptedException {
|
|
||||||
synchronized (lock) {
|
|
||||||
lock.wait(timeout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public WatchedEvent getWatchedEvent() {
|
|
||||||
return event;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public byte[] get(String trackingId) throws KeeperException, InterruptedException {
|
||||||
* Inserts data into zookeeper.
|
return zookeeper.getData(dir + "/" + prefix + trackingId, null, null, true);
|
||||||
*
|
|
||||||
* @return true if data was successfully added
|
|
||||||
*/
|
|
||||||
protected String createData(String path, byte[] data, CreateMode mode)
|
|
||||||
throws KeeperException, InterruptedException {
|
|
||||||
for (;;) {
|
|
||||||
try {
|
|
||||||
return zookeeper.create(path, data, mode, true);
|
|
||||||
} catch (KeeperException.NoNodeException e) {
|
|
||||||
try {
|
|
||||||
zookeeper.create(dir, new byte[0], CreateMode.PERSISTENT, true);
|
|
||||||
} catch (KeeperException.NodeExistsException ne) {
|
|
||||||
// someone created it
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
|
|
||||||
return createData(dir + "/" + prefix + trackingId, data,
|
|
||||||
CreateMode.PERSISTENT) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Offer the data and wait for the response
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public MapEvent put(String trackingId, byte[] data, long timeout) throws KeeperException,
|
|
||||||
InterruptedException {
|
|
||||||
String path = createData(dir + "/" + prefix + trackingId, data,
|
|
||||||
CreateMode.PERSISTENT);
|
|
||||||
String watchID = createData(
|
|
||||||
dir + "/" + response_prefix + path.substring(path.lastIndexOf("-") + 1),
|
|
||||||
null, CreateMode.EPHEMERAL);
|
|
||||||
Object lock = new Object();
|
|
||||||
LatchChildWatcher watcher = new LatchChildWatcher(lock);
|
|
||||||
synchronized (lock) {
|
|
||||||
if (zookeeper.exists(watchID, watcher, true) != null) {
|
|
||||||
watcher.await(timeout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
byte[] bytes = zookeeper.getData(watchID, null, null, true);
|
|
||||||
zookeeper.delete(watchID, -1, true);
|
|
||||||
return new MapEvent(watchID, bytes, watcher.getWatchedEvent());
|
|
||||||
}
|
|
||||||
|
|
||||||
public MapEvent get(String trackingId) throws KeeperException, InterruptedException {
|
|
||||||
return new MapEvent(trackingId, zookeeper.getData(dir + "/" + prefix + trackingId, null, null, true), null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains(String trackingId) throws KeeperException, InterruptedException {
|
public boolean contains(String trackingId) throws KeeperException, InterruptedException {
|
||||||
|
@ -187,58 +99,4 @@ public class DistributedMap {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MapEvent {
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
final int prime = 31;
|
|
||||||
int result = 1;
|
|
||||||
result = prime * result + ((id == null) ? 0 : id.hashCode());
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (this == obj) return true;
|
|
||||||
if (obj == null) return false;
|
|
||||||
if (getClass() != obj.getClass()) return false;
|
|
||||||
MapEvent other = (MapEvent) obj;
|
|
||||||
if (id == null) {
|
|
||||||
if (other.id != null) return false;
|
|
||||||
} else if (!id.equals(other.id)) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private WatchedEvent event = null;
|
|
||||||
private String id;
|
|
||||||
private byte[] bytes;
|
|
||||||
|
|
||||||
MapEvent(String id, byte[] bytes, WatchedEvent event) {
|
|
||||||
this.id = id;
|
|
||||||
this.bytes = bytes;
|
|
||||||
this.event = event;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBytes(byte[] bytes) {
|
|
||||||
this.bytes = bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] getBytes() {
|
|
||||||
return bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
public WatchedEvent getWatchedEvent() {
|
|
||||||
return event;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -318,7 +318,7 @@ public class DistributedQueue {
|
||||||
}
|
}
|
||||||
return orderedChildren;
|
return orderedChildren;
|
||||||
} catch (KeeperException.NoNodeException e) {
|
} catch (KeeperException.NoNodeException e) {
|
||||||
zookeeper.create(dir, new byte[0], CreateMode.PERSISTENT, true);
|
zookeeper.makePath(dir, false, true);
|
||||||
// go back to the loop and try again
|
// go back to the loop and try again
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -408,6 +408,10 @@ public class DistributedQueue {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void process(WatchedEvent event) {
|
public void process(WatchedEvent event) {
|
||||||
|
// session events are not change events, and do not remove the watcher; except for Expired
|
||||||
|
if (Event.EventType.None.equals(event.getType()) && !Event.KeeperState.Expired.equals(event.getState())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
updateLock.lock();
|
updateLock.lock();
|
||||||
try {
|
try {
|
||||||
// this watcher is automatically cleared when fired
|
// this watcher is automatically cleared when fired
|
||||||
|
|
|
@ -912,19 +912,19 @@ public class Overseer implements Closeable {
|
||||||
/* Internal map for failed tasks, not to be used outside of the Overseer */
|
/* Internal map for failed tasks, not to be used outside of the Overseer */
|
||||||
static DistributedMap getRunningMap(final SolrZkClient zkClient) {
|
static DistributedMap getRunningMap(final SolrZkClient zkClient) {
|
||||||
createOverseerNode(zkClient);
|
createOverseerNode(zkClient);
|
||||||
return new DistributedMap(zkClient, "/overseer/collection-map-running", null);
|
return new DistributedMap(zkClient, "/overseer/collection-map-running");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Size-limited map for successfully completed tasks*/
|
/* Size-limited map for successfully completed tasks*/
|
||||||
static DistributedMap getCompletedMap(final SolrZkClient zkClient) {
|
static DistributedMap getCompletedMap(final SolrZkClient zkClient) {
|
||||||
createOverseerNode(zkClient);
|
createOverseerNode(zkClient);
|
||||||
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-completed", null, NUM_RESPONSES_TO_STORE);
|
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-completed", NUM_RESPONSES_TO_STORE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Map for failed tasks, not to be used outside of the Overseer */
|
/* Map for failed tasks, not to be used outside of the Overseer */
|
||||||
static DistributedMap getFailureMap(final SolrZkClient zkClient) {
|
static DistributedMap getFailureMap(final SolrZkClient zkClient) {
|
||||||
createOverseerNode(zkClient);
|
createOverseerNode(zkClient);
|
||||||
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-failure", null, NUM_RESPONSES_TO_STORE);
|
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-failure", NUM_RESPONSES_TO_STORE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Collection creation queue */
|
/* Collection creation queue */
|
||||||
|
|
|
@ -33,14 +33,14 @@ public class SizeLimitedDistributedMap extends DistributedMap {
|
||||||
|
|
||||||
private final int maxSize;
|
private final int maxSize;
|
||||||
|
|
||||||
public SizeLimitedDistributedMap(SolrZkClient zookeeper, String dir, List<ACL> acl, int maxSize) {
|
public SizeLimitedDistributedMap(SolrZkClient zookeeper, String dir, int maxSize) {
|
||||||
super(zookeeper, dir, acl);
|
super(zookeeper, dir);
|
||||||
this.maxSize = maxSize;
|
this.maxSize = maxSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
|
public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
|
||||||
if(this.size() >= maxSize) {
|
if (this.size() >= maxSize) {
|
||||||
// Bring down the size
|
// Bring down the size
|
||||||
List<String> children = zookeeper.getChildren(dir, null, true);
|
List<String> children = zookeeper.getChildren(dir, null, true);
|
||||||
|
|
||||||
|
@ -53,21 +53,20 @@ public class SizeLimitedDistributedMap extends DistributedMap {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
for(String child: children) {
|
for (String child : children) {
|
||||||
Stat stat = zookeeper.exists(dir + "/" + child, null, true);
|
Stat stat = zookeeper.exists(dir + "/" + child, null, true);
|
||||||
priorityQueue.insertWithOverflow(stat.getMzxid());
|
priorityQueue.insertWithOverflow(stat.getMzxid());
|
||||||
}
|
}
|
||||||
|
|
||||||
long topElementMzxId = (Long) priorityQueue.top();
|
long topElementMzxId = (Long) priorityQueue.top();
|
||||||
|
|
||||||
for(String child:children) {
|
for (String child : children) {
|
||||||
Stat stat = zookeeper.exists(dir + "/" + child, null, true);
|
Stat stat = zookeeper.exists(dir + "/" + child, null, true);
|
||||||
if(stat.getMzxid() <= topElementMzxId)
|
if (stat.getMzxid() <= topElementMzxId)
|
||||||
zookeeper.delete(dir + "/" + child, -1, true);
|
zookeeper.delete(dir + "/" + child, -1, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return createData(dir + "/" + prefix + trackingId, data,
|
super.put(trackingId, data);
|
||||||
CreateMode.PERSISTENT) != null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -564,12 +564,12 @@ public class CollectionsHandler extends RequestHandlerBase {
|
||||||
|
|
||||||
final NamedList<Object> results = new NamedList<>();
|
final NamedList<Object> results = new NamedList<>();
|
||||||
if (zkController.getOverseerCompletedMap().contains(requestId)) {
|
if (zkController.getOverseerCompletedMap().contains(requestId)) {
|
||||||
final DistributedMap.MapEvent mapEvent = zkController.getOverseerCompletedMap().get(requestId);
|
final byte[] mapEntry = zkController.getOverseerCompletedMap().get(requestId);
|
||||||
rsp.getValues().addAll(SolrResponse.deserialize(mapEvent.getBytes()).getResponse());
|
rsp.getValues().addAll(SolrResponse.deserialize(mapEntry).getResponse());
|
||||||
addStatusToResponse(results, COMPLETED, "found [" + requestId + "] in completed tasks");
|
addStatusToResponse(results, COMPLETED, "found [" + requestId + "] in completed tasks");
|
||||||
} else if (zkController.getOverseerFailureMap().contains(requestId)) {
|
} else if (zkController.getOverseerFailureMap().contains(requestId)) {
|
||||||
final DistributedMap.MapEvent mapEvent = zkController.getOverseerFailureMap().get(requestId);
|
final byte[] mapEntry = zkController.getOverseerFailureMap().get(requestId);
|
||||||
rsp.getValues().addAll(SolrResponse.deserialize(mapEvent.getBytes()).getResponse());
|
rsp.getValues().addAll(SolrResponse.deserialize(mapEntry).getResponse());
|
||||||
addStatusToResponse(results, FAILED, "found [" + requestId + "] in failed tasks");
|
addStatusToResponse(results, FAILED, "found [" + requestId + "] in failed tasks");
|
||||||
} else if (zkController.getOverseerRunningMap().contains(requestId)) {
|
} else if (zkController.getOverseerRunningMap().contains(requestId)) {
|
||||||
addStatusToResponse(results, RUNNING, "found [" + requestId + "] in running tasks");
|
addStatusToResponse(results, RUNNING, "found [" + requestId + "] in running tasks");
|
||||||
|
|
Loading…
Reference in New Issue