mirror of https://github.com/apache/lucene.git
Require Thread Names in Solr (#2264)
When we are creating a new thread we should give it a descriptive name and enforce this via ForbiddenAPIs. This doesn't apply to Runnable or Callable objects that we pass to an executor, since those should be getting named by the executor itself. We don't require this in tests because the tests should be more self contained and there is less benefit in descriptive names. If somebody is already profiling a test, then they likely have the context to understand what the unnamed threads are doing, whereas a thread dump from a running Solr instance should have good thread names for everything. This is especially helpful when doing profiling, otherwise we end up with a bunch of Thread-# that are hard to tell apart and search on.
This commit is contained in:
parent
9be71b3939
commit
b335034615
|
@ -117,6 +117,7 @@ allprojects { prj ->
|
|||
if (prj.path.startsWith(":solr")) {
|
||||
forbiddenApisMain {
|
||||
doFirst dynamicSignatures.curry(configurations.compileClasspath, "solr")
|
||||
signaturesFiles += files(file("${resources}/java.solr.txt"))
|
||||
}
|
||||
|
||||
forbiddenApisTest {
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
@defaultMessage Creates threads without a thread name
|
||||
java.lang.Thread#<init>()
|
||||
java.lang.Thread#<init>(java.lang.Runnable)
|
||||
java.lang.Thread#<init>(java.lang.ThreadGroup,java.lang.Runnable)
|
||||
|
|
@ -563,15 +563,10 @@ public class Overseer implements SolrCloseable {
|
|||
public static class OverseerThread extends Thread implements Closeable {
|
||||
|
||||
protected volatile boolean isClosed;
|
||||
private Closeable thread;
|
||||
private final Closeable thread;
|
||||
|
||||
public OverseerThread(ThreadGroup tg, Closeable thread) {
|
||||
super(tg, (Runnable) thread);
|
||||
this.thread = thread;
|
||||
}
|
||||
|
||||
public OverseerThread(ThreadGroup ccTg, Closeable thread, String name) {
|
||||
super(ccTg, (Runnable) thread, name);
|
||||
public <T extends Runnable & Closeable> OverseerThread(ThreadGroup ccTg, T thread, String name) {
|
||||
super(ccTg, thread, name);
|
||||
this.thread = thread;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.solr.common.cloud.ZkNodeProps;
|
|||
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
@ -297,37 +298,32 @@ public class SyncStrategy {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SuppressForbidden(reason = "Passed to an executor with a naming thread factory")
|
||||
private void requestRecovery(final ZkNodeProps leaderProps, final String baseUrl, final String coreName) throws SolrServerException, IOException {
|
||||
Thread thread = new Thread() {
|
||||
{
|
||||
setDaemon(true);
|
||||
Thread thread = new Thread(() -> {
|
||||
if (isClosed) {
|
||||
log.info("We have been closed, won't request recovery");
|
||||
return;
|
||||
}
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
if (isClosed) {
|
||||
log.info("We have been closed, won't request recovery");
|
||||
return;
|
||||
}
|
||||
RequestRecovery recoverRequestCmd = new RequestRecovery();
|
||||
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
|
||||
recoverRequestCmd.setCoreName(coreName);
|
||||
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(baseUrl)
|
||||
.withHttpClient(SyncStrategy.this.client)
|
||||
.withConnectionTimeout(30000)
|
||||
.withSocketTimeout(120000)
|
||||
.build()) {
|
||||
client.request(recoverRequestCmd);
|
||||
} catch (Throwable t) {
|
||||
SolrException.log(log, ZkCoreNodeProps.getCoreUrl(leaderProps) + ": Could not tell a replica to recover", t);
|
||||
if (t instanceof Error) {
|
||||
throw (Error) t;
|
||||
}
|
||||
RequestRecovery recoverRequestCmd = new RequestRecovery();
|
||||
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
|
||||
recoverRequestCmd.setCoreName(coreName);
|
||||
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(baseUrl)
|
||||
.withHttpClient(SyncStrategy.this.client)
|
||||
.withConnectionTimeout(30000)
|
||||
.withSocketTimeout(120000)
|
||||
.build()) {
|
||||
client.request(recoverRequestCmd);
|
||||
} catch (Throwable t) {
|
||||
SolrException.log(log, ZkCoreNodeProps.getCoreUrl(leaderProps) + ": Could not tell a replica to recover", t);
|
||||
if (t instanceof Error) {
|
||||
throw (Error) t;
|
||||
}
|
||||
}
|
||||
};
|
||||
});
|
||||
thread.setDaemon(true);
|
||||
updateExecutor.execute(thread);
|
||||
}
|
||||
|
||||
|
|
|
@ -2542,7 +2542,7 @@ public class ZkController implements Closeable {
|
|||
log.warn("listener throws error", e);
|
||||
}
|
||||
}
|
||||
}).start();
|
||||
}, "ZKEventListenerThread").start();
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,8 +94,7 @@ public class OverseerRoleCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
} catch (Exception e) {
|
||||
log.error("Error in prioritizing Overseer", e);
|
||||
}
|
||||
|
||||
}).start();
|
||||
}, "OverseerPrioritizationThread").start();
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -2239,6 +2239,7 @@ class CloserThread extends Thread {
|
|||
|
||||
|
||||
CloserThread(CoreContainer container, SolrCores solrCores, NodeConfig cfg) {
|
||||
super("CloserThread");
|
||||
this.container = container;
|
||||
this.solrCores = solrCores;
|
||||
this.cfg = cfg;
|
||||
|
|
|
@ -915,7 +915,7 @@ public class IndexFetcher {
|
|||
} finally {
|
||||
latch.countDown();
|
||||
}
|
||||
}).start();
|
||||
}, "CoreReload").start();
|
||||
try {
|
||||
latch.await();
|
||||
} catch (InterruptedException e) {
|
||||
|
|
|
@ -134,7 +134,7 @@ public class SnapShooter {
|
|||
}
|
||||
|
||||
protected void deleteSnapAsync(final ReplicationHandler replicationHandler) {
|
||||
new Thread(() -> deleteNamedSnapshot(replicationHandler)).start();
|
||||
new Thread(() -> deleteNamedSnapshot(replicationHandler), "DeleteNamedSnapshot").start();
|
||||
}
|
||||
|
||||
public void validateCreateSnapshot() throws IOException {
|
||||
|
@ -232,7 +232,7 @@ public class SnapShooter {
|
|||
}
|
||||
}
|
||||
if (null != snapShootDetails) result.accept(snapShootDetails);
|
||||
}).start();
|
||||
}, "CreateSnapshot").start();
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -293,7 +293,7 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V>, Accountable {
|
|||
int currentSize = stats.size.intValue();
|
||||
if ((currentSize > upperWaterMark || ramBytes.sum() > ramUpperWatermark || oldestEntryNs.get() < idleCutoff) && !isCleaning) {
|
||||
if (newThreadForCleanup) {
|
||||
new Thread(this::markAndSweep).start();
|
||||
new Thread(this::markAndSweep, "CacheCleanupThread").start();
|
||||
} else if (cleanupThread != null){
|
||||
cleanupThread.wakeThread();
|
||||
} else {
|
||||
|
@ -861,20 +861,19 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V>, Accountable {
|
|||
}
|
||||
|
||||
private static class CleanupThread extends Thread {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
private WeakReference<ConcurrentLRUCache> cache;
|
||||
private final WeakReference<ConcurrentLRUCache<?,?>> cache;
|
||||
|
||||
private boolean stop = false;
|
||||
|
||||
public CleanupThread(@SuppressWarnings({"rawtypes"})ConcurrentLRUCache c) {
|
||||
public CleanupThread(ConcurrentLRUCache<?,?> c) {
|
||||
super("CacheCleanupThread");
|
||||
cache = new WeakReference<>(c);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
while (true) {
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
ConcurrentLRUCache c = cache.get();
|
||||
ConcurrentLRUCache<?,?> c = cache.get();
|
||||
if(c == null) break;
|
||||
synchronized (this) {
|
||||
if (stop) break;
|
||||
|
|
|
@ -54,7 +54,7 @@ public class SolrZooKeeper extends ZooKeeper {
|
|||
}
|
||||
|
||||
public void closeCnxn() {
|
||||
final Thread t = new Thread() {
|
||||
final Thread t = new Thread(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
|
@ -64,7 +64,7 @@ public class SolrZooKeeper extends ZooKeeper {
|
|||
}
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "Hack for Zookeper needs access to private methods.")
|
||||
@SuppressForbidden(reason = "Hack for Zookeeper needs access to private methods.")
|
||||
private Void closeZookeeperChannel() {
|
||||
final ClientCnxn cnxn = getConnection();
|
||||
synchronized (cnxn) {
|
||||
|
@ -87,7 +87,7 @@ public class SolrZooKeeper extends ZooKeeper {
|
|||
}
|
||||
return null; // Void
|
||||
}
|
||||
};
|
||||
}, "closeCnxn");
|
||||
spawnedThreads.add(t);
|
||||
t.start();
|
||||
}
|
||||
|
|
|
@ -682,23 +682,20 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
|
|||
log.info("starting stress...");
|
||||
Thread[] threads = new Thread[nThreads];
|
||||
for (int i = 0; i < threads.length; i++) {
|
||||
threads[i] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
for (int j = 0; j < stress; j++) {
|
||||
int which = r.nextInt(clients.size());
|
||||
SolrClient client = clients.get(which);
|
||||
try {
|
||||
QueryResponse rsp = client.query(new ModifiableSolrParams(params));
|
||||
if (verifyStress) {
|
||||
compareResponses(rsp, controlRsp);
|
||||
}
|
||||
} catch (SolrServerException | IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
threads[i] = new Thread(() -> {
|
||||
for (int j = 0; j < stress; j++) {
|
||||
int which = r.nextInt(clients.size());
|
||||
SolrClient client = clients.get(which);
|
||||
try {
|
||||
QueryResponse rsp1 = client.query(new ModifiableSolrParams(params));
|
||||
if (verifyStress) {
|
||||
compareResponses(rsp1, controlRsp);
|
||||
}
|
||||
} catch (SolrServerException | IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}, "StressRunner");
|
||||
threads[i].start();
|
||||
}
|
||||
|
||||
|
|
|
@ -192,15 +192,13 @@ public class ChaosMonkey {
|
|||
List<CloudJettyRunner> jetties = shardToJetty.get(key);
|
||||
for (CloudJettyRunner jetty : jetties) {
|
||||
Thread.sleep(pauseBetweenMs);
|
||||
Thread thread = new Thread() {
|
||||
public void run() {
|
||||
try {
|
||||
stopJetty(jetty);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
Thread thread = new Thread(() -> {
|
||||
try {
|
||||
stopJetty(jetty);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
}, "ChaosMonkey");
|
||||
jettyThreads.add(thread);
|
||||
thread.start();
|
||||
|
||||
|
@ -490,30 +488,26 @@ public class ChaosMonkey {
|
|||
// TODO: when kill leaders is on, lets kill a higher percentage of leaders
|
||||
|
||||
stop = false;
|
||||
monkeyThread = new Thread() {
|
||||
monkeyThread = new Thread(() -> {
|
||||
while (!stop) {
|
||||
try {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
while (!stop) {
|
||||
try {
|
||||
|
||||
Thread.sleep(chaosRandom.nextInt(roundPauseUpperLimit));
|
||||
Thread.sleep(chaosRandom.nextInt(roundPauseUpperLimit));
|
||||
|
||||
causeSomeChaos();
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
//
|
||||
} catch (Exception e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
causeSomeChaos();
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
//
|
||||
} catch (Exception e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
monkeyLog("finished");
|
||||
monkeyLog("I ran for " + runTimer.getTime() / 1000 + "s. I stopped " + stops + " and I started " + starts
|
||||
+ ". I also expired " + expires.get() + " and caused " + connloss
|
||||
+ " connection losses");
|
||||
}
|
||||
};
|
||||
monkeyLog("finished");
|
||||
monkeyLog("I ran for " + runTimer.getTime() / 1000 + "s. I stopped " + stops + " and I started " + starts
|
||||
+ ". I also expired " + expires.get() + " and caused " + connloss
|
||||
+ " connection losses");
|
||||
}, "ChaosMonkey");
|
||||
monkeyThread.start();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue