Require Thread Names in Solr (#2264)

When we are creating a new thread we should give it a descriptive name and enforce this via ForbiddenAPIs. This doesn't apply to Runnable or Callable objects that we pass to an executor, since those should be getting named by the executor itself.

We don't require this in tests because the tests should be more self contained and there is less benefit in descriptive names. If somebody is already profiling a test, then they likely have the context to understand what the unnamed threads are doing, whereas a thread dump from a running Solr instance should have good thread names for everything. This is especially helpful when doing profiling, otherwise we end up with a bunch of Thread-# that are hard to tell apart and search on.
This commit is contained in:
Mike Drob 2021-01-28 15:04:15 -06:00 committed by GitHub
parent 9be71b3939
commit b335034615
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 94 additions and 92 deletions

View File

@ -117,6 +117,7 @@ allprojects { prj ->
if (prj.path.startsWith(":solr")) {
forbiddenApisMain {
doFirst dynamicSignatures.curry(configurations.compileClasspath, "solr")
signaturesFiles += files(file("${resources}/java.solr.txt"))
}
forbiddenApisTest {

View File

@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
@defaultMessage Creates threads without a thread name
java.lang.Thread#<init>()
java.lang.Thread#<init>(java.lang.Runnable)
java.lang.Thread#<init>(java.lang.ThreadGroup,java.lang.Runnable)

View File

@ -563,15 +563,10 @@ public class Overseer implements SolrCloseable {
public static class OverseerThread extends Thread implements Closeable {
protected volatile boolean isClosed;
private Closeable thread;
private final Closeable thread;
public OverseerThread(ThreadGroup tg, Closeable thread) {
super(tg, (Runnable) thread);
this.thread = thread;
}
public OverseerThread(ThreadGroup ccTg, Closeable thread, String name) {
super(ccTg, (Runnable) thread, name);
public <T extends Runnable & Closeable> OverseerThread(ThreadGroup ccTg, T thread, String name) {
super(ccTg, thread, name);
this.thread = thread;
}

View File

@ -32,6 +32,7 @@ import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore;
@ -297,37 +298,32 @@ public class SyncStrategy {
}
}
}
@SuppressForbidden(reason = "Passed to an executor with a naming thread factory")
private void requestRecovery(final ZkNodeProps leaderProps, final String baseUrl, final String coreName) throws SolrServerException, IOException {
Thread thread = new Thread() {
{
setDaemon(true);
Thread thread = new Thread(() -> {
if (isClosed) {
log.info("We have been closed, won't request recovery");
return;
}
@Override
public void run() {
if (isClosed) {
log.info("We have been closed, won't request recovery");
return;
}
RequestRecovery recoverRequestCmd = new RequestRecovery();
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
recoverRequestCmd.setCoreName(coreName);
try (HttpSolrClient client = new HttpSolrClient.Builder(baseUrl)
.withHttpClient(SyncStrategy.this.client)
.withConnectionTimeout(30000)
.withSocketTimeout(120000)
.build()) {
client.request(recoverRequestCmd);
} catch (Throwable t) {
SolrException.log(log, ZkCoreNodeProps.getCoreUrl(leaderProps) + ": Could not tell a replica to recover", t);
if (t instanceof Error) {
throw (Error) t;
}
RequestRecovery recoverRequestCmd = new RequestRecovery();
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
recoverRequestCmd.setCoreName(coreName);
try (HttpSolrClient client = new HttpSolrClient.Builder(baseUrl)
.withHttpClient(SyncStrategy.this.client)
.withConnectionTimeout(30000)
.withSocketTimeout(120000)
.build()) {
client.request(recoverRequestCmd);
} catch (Throwable t) {
SolrException.log(log, ZkCoreNodeProps.getCoreUrl(leaderProps) + ": Could not tell a replica to recover", t);
if (t instanceof Error) {
throw (Error) t;
}
}
};
});
thread.setDaemon(true);
updateExecutor.execute(thread);
}

View File

@ -2542,7 +2542,7 @@ public class ZkController implements Closeable {
log.warn("listener throws error", e);
}
}
}).start();
}, "ZKEventListenerThread").start();
}
}

View File

@ -94,8 +94,7 @@ public class OverseerRoleCmd implements OverseerCollectionMessageHandler.Cmd {
} catch (Exception e) {
log.error("Error in prioritizing Overseer", e);
}
}).start();
}, "OverseerPrioritizationThread").start();
}

View File

@ -2239,6 +2239,7 @@ class CloserThread extends Thread {
CloserThread(CoreContainer container, SolrCores solrCores, NodeConfig cfg) {
super("CloserThread");
this.container = container;
this.solrCores = solrCores;
this.cfg = cfg;

View File

@ -915,7 +915,7 @@ public class IndexFetcher {
} finally {
latch.countDown();
}
}).start();
}, "CoreReload").start();
try {
latch.await();
} catch (InterruptedException e) {

View File

@ -134,7 +134,7 @@ public class SnapShooter {
}
protected void deleteSnapAsync(final ReplicationHandler replicationHandler) {
new Thread(() -> deleteNamedSnapshot(replicationHandler)).start();
new Thread(() -> deleteNamedSnapshot(replicationHandler), "DeleteNamedSnapshot").start();
}
public void validateCreateSnapshot() throws IOException {
@ -232,7 +232,7 @@ public class SnapShooter {
}
}
if (null != snapShootDetails) result.accept(snapShootDetails);
}).start();
}, "CreateSnapshot").start();
}

View File

@ -293,7 +293,7 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V>, Accountable {
int currentSize = stats.size.intValue();
if ((currentSize > upperWaterMark || ramBytes.sum() > ramUpperWatermark || oldestEntryNs.get() < idleCutoff) && !isCleaning) {
if (newThreadForCleanup) {
new Thread(this::markAndSweep).start();
new Thread(this::markAndSweep, "CacheCleanupThread").start();
} else if (cleanupThread != null){
cleanupThread.wakeThread();
} else {
@ -861,20 +861,19 @@ public class ConcurrentLRUCache<K,V> implements Cache<K,V>, Accountable {
}
private static class CleanupThread extends Thread {
@SuppressWarnings({"rawtypes"})
private WeakReference<ConcurrentLRUCache> cache;
private final WeakReference<ConcurrentLRUCache<?,?>> cache;
private boolean stop = false;
public CleanupThread(@SuppressWarnings({"rawtypes"})ConcurrentLRUCache c) {
public CleanupThread(ConcurrentLRUCache<?,?> c) {
super("CacheCleanupThread");
cache = new WeakReference<>(c);
}
@Override
public void run() {
while (true) {
@SuppressWarnings({"rawtypes"})
ConcurrentLRUCache c = cache.get();
ConcurrentLRUCache<?,?> c = cache.get();
if(c == null) break;
synchronized (this) {
if (stop) break;

View File

@ -54,7 +54,7 @@ public class SolrZooKeeper extends ZooKeeper {
}
public void closeCnxn() {
final Thread t = new Thread() {
final Thread t = new Thread(new Runnable() {
@Override
public void run() {
try {
@ -64,7 +64,7 @@ public class SolrZooKeeper extends ZooKeeper {
}
}
@SuppressForbidden(reason = "Hack for Zookeper needs access to private methods.")
@SuppressForbidden(reason = "Hack for Zookeeper needs access to private methods.")
private Void closeZookeeperChannel() {
final ClientCnxn cnxn = getConnection();
synchronized (cnxn) {
@ -87,7 +87,7 @@ public class SolrZooKeeper extends ZooKeeper {
}
return null; // Void
}
};
}, "closeCnxn");
spawnedThreads.add(t);
t.start();
}

View File

@ -682,23 +682,20 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
log.info("starting stress...");
Thread[] threads = new Thread[nThreads];
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
for (int j = 0; j < stress; j++) {
int which = r.nextInt(clients.size());
SolrClient client = clients.get(which);
try {
QueryResponse rsp = client.query(new ModifiableSolrParams(params));
if (verifyStress) {
compareResponses(rsp, controlRsp);
}
} catch (SolrServerException | IOException e) {
throw new RuntimeException(e);
threads[i] = new Thread(() -> {
for (int j = 0; j < stress; j++) {
int which = r.nextInt(clients.size());
SolrClient client = clients.get(which);
try {
QueryResponse rsp1 = client.query(new ModifiableSolrParams(params));
if (verifyStress) {
compareResponses(rsp1, controlRsp);
}
} catch (SolrServerException | IOException e) {
throw new RuntimeException(e);
}
}
};
}, "StressRunner");
threads[i].start();
}

View File

@ -192,15 +192,13 @@ public class ChaosMonkey {
List<CloudJettyRunner> jetties = shardToJetty.get(key);
for (CloudJettyRunner jetty : jetties) {
Thread.sleep(pauseBetweenMs);
Thread thread = new Thread() {
public void run() {
try {
stopJetty(jetty);
} catch (Exception e) {
throw new RuntimeException(e);
}
Thread thread = new Thread(() -> {
try {
stopJetty(jetty);
} catch (Exception e) {
throw new RuntimeException(e);
}
};
}, "ChaosMonkey");
jettyThreads.add(thread);
thread.start();
@ -490,30 +488,26 @@ public class ChaosMonkey {
// TODO: when kill leaders is on, lets kill a higher percentage of leaders
stop = false;
monkeyThread = new Thread() {
monkeyThread = new Thread(() -> {
while (!stop) {
try {
@Override
public void run() {
while (!stop) {
try {
Thread.sleep(chaosRandom.nextInt(roundPauseUpperLimit));
Thread.sleep(chaosRandom.nextInt(roundPauseUpperLimit));
causeSomeChaos();
} catch (InterruptedException e) {
//
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
causeSomeChaos();
} catch (InterruptedException e) {
//
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
monkeyLog("finished");
monkeyLog("I ran for " + runTimer.getTime() / 1000 + "s. I stopped " + stops + " and I started " + starts
+ ". I also expired " + expires.get() + " and caused " + connloss
+ " connection losses");
}
};
monkeyLog("finished");
monkeyLog("I ran for " + runTimer.getTime() / 1000 + "s. I stopped " + stops + " and I started " + starts
+ ". I also expired " + expires.get() + " and caused " + connloss
+ " connection losses");
}, "ChaosMonkey");
monkeyThread.start();
}