From 1aeefc25d1ae5e1a56cce333f57c6f044da372de Mon Sep 17 00:00:00 2001 From: David Smiley Date: Fri, 20 Mar 2020 16:47:42 -0400 Subject: [PATCH] SOLR-14351: Fix/improve MDCLoggingContext usage * Some set/clear were not balanced. * Harden clear() in case of imbalance. * Sometimes coreContainger.getCore was called unnecessarily; just need a descriptor * SolrCore.open/close now calls MDCLoggerContext.setCore/clear * no need to clear MDC in HttpSolrCall --- solr/CHANGES.txt | 2 + .../java/org/apache/solr/api/V2HttpCall.java | 20 ++--- .../apache/solr/cloud/RecoveryStrategy.java | 4 - .../org/apache/solr/cloud/SyncStrategy.java | 34 ++++---- .../org/apache/solr/cloud/ZkController.java | 55 ++++++------- .../java/org/apache/solr/core/SolrCore.java | 3 +- .../org/apache/solr/core/ZkContainer.java | 16 ++-- .../solr/logging/MDCLoggingContext.java | 78 ++++++++++--------- .../solr/metrics/SolrMetricManager.java | 5 +- .../org/apache/solr/pkg/PackageListeners.java | 5 +- .../org/apache/solr/servlet/HttpSolrCall.java | 3 - .../java/org/apache/solr/update/PeerSync.java | 3 - .../solr/update/PeerSyncWithLeader.java | 3 - 13 files changed, 101 insertions(+), 130 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 1ae12570269..58c18fd68e8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -130,6 +130,8 @@ Other Changes * SOLR-14367: Upgrade Tika to 1.24 (Mibo via Erick Erickson) +* SOLR-14351: Hardened MDC logging; sometimes info was absent (David Smiley) + ================== 8.5.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. diff --git a/solr/core/src/java/org/apache/solr/api/V2HttpCall.java b/solr/core/src/java/org/apache/solr/api/V2HttpCall.java index 64ef8398a63..5dd89a3baeb 100644 --- a/solr/core/src/java/org/apache/solr/api/V2HttpCall.java +++ b/solr/core/src/java/org/apache/solr/api/V2HttpCall.java @@ -17,12 +17,8 @@ package org.apache.solr.api; -import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; -import static org.apache.solr.common.util.PathTrie.getPathSegments; -import static org.apache.solr.servlet.SolrDispatchFilter.Action.ADMIN; -import static org.apache.solr.servlet.SolrDispatchFilter.Action.PROCESS; -import static org.apache.solr.servlet.SolrDispatchFilter.Action.REMOTEQUERY; - +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; import java.lang.invoke.MethodHandles; import java.util.HashMap; import java.util.HashSet; @@ -33,9 +29,7 @@ import java.util.Map; import java.util.Set; import java.util.function.Supplier; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - +import com.google.common.collect.ImmutableSet; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.common.SolrException; import org.apache.solr.common.annotation.SolrThreadSafe; @@ -49,7 +43,6 @@ import org.apache.solr.core.CoreContainer; import org.apache.solr.core.PluginBag; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerUtils; -import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestHandler; import org.apache.solr.response.SolrQueryResponse; @@ -60,7 +53,11 @@ import org.apache.solr.servlet.SolrRequestParsers; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableSet; +import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; +import static org.apache.solr.common.util.PathTrie.getPathSegments; +import static org.apache.solr.servlet.SolrDispatchFilter.Action.ADMIN; +import static org.apache.solr.servlet.SolrDispatchFilter.Action.PROCESS; +import static org.apache.solr.servlet.SolrDispatchFilter.Action.REMOTEQUERY; // class that handle the '/v2' path @SolrThreadSafe @@ -151,7 +148,6 @@ public class V2HttpCall extends HttpSolrCall { } else { api = apiInfo == null ? api : apiInfo; } - MDCLoggingContext.setCore(core); parseRequest(); addCollectionParamIfNeeded(getCollectionsList()); diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java index 038494a0c54..8ce8886accf 100644 --- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java +++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java @@ -54,7 +54,6 @@ import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.ReplicationHandler; -import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestHandler; @@ -310,7 +309,6 @@ public class RecoveryStrategy implements Runnable, Closeable { SolrException.log(log, "SolrCore not found - cannot recover:" + coreName); return; } - MDCLoggingContext.setCore(core); log.info("Starting recovery process. recoveringAfterStartup=" + recoveringAfterStartup); @@ -324,8 +322,6 @@ public class RecoveryStrategy implements Runnable, Closeable { log.error("", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } - } finally { - MDCLoggingContext.clear(); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java index 2391414c359..cf840d91675 100644 --- a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java +++ b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java @@ -39,7 +39,6 @@ import org.apache.solr.handler.component.HttpShardHandlerFactory; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; -import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.update.PeerSync; import org.apache.solr.update.UpdateShardHandler; import org.slf4j.Logger; @@ -89,27 +88,22 @@ public class SyncStrategy { if (SKIP_AUTO_RECOVERY) { return PeerSync.PeerSyncResult.success(); } - - MDCLoggingContext.setCore(core); - try { - if (isClosed) { - log.warn("Closed, skipping sync up."); - return PeerSync.PeerSyncResult.failure(); - } - - recoveryRequests.clear(); - - log.info("Sync replicas to " + ZkCoreNodeProps.getCoreUrl(leaderProps)); - - if (core.getUpdateHandler().getUpdateLog() == null) { - log.error("No UpdateLog found - cannot sync"); - return PeerSync.PeerSyncResult.failure(); - } - return syncReplicas(zkController, core, leaderProps, peerSyncOnlyWithActive); - } finally { - MDCLoggingContext.clear(); + if (isClosed) { + log.warn("Closed, skipping sync up."); + return PeerSync.PeerSyncResult.failure(); } + + recoveryRequests.clear(); + + log.info("Sync replicas to " + ZkCoreNodeProps.getCoreUrl(leaderProps)); + + if (core.getUpdateHandler().getUpdateLog() == null) { + log.error("No UpdateLog found - cannot sync"); + return PeerSync.PeerSyncResult.failure(); + } + + return syncReplicas(zkController, core, leaderProps, peerSyncOnlyWithActive); } private PeerSync.PeerSyncResult syncReplicas(ZkController zkController, SolrCore core, diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 843236a9163..dc4c156ab1c 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -1172,9 +1172,7 @@ public class ZkController implements Closeable { */ public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, boolean afterExpiration, boolean skipRecovery) throws Exception { - try (SolrCore core = cc.getCore(desc.getName())) { - MDCLoggingContext.setCore(core); - } + MDCLoggingContext.setCoreDescriptor(cc, desc); try { // pre register has published our down state final String baseUrl = getBaseUrl(); @@ -1528,11 +1526,9 @@ public class ZkController implements Closeable { if (core == null || core.isClosed()) { return; } - MDCLoggingContext.setCore(core); } - } else { - MDCLoggingContext.setCoreDescriptor(cc, cd); } + MDCLoggingContext.setCoreDescriptor(cc, cd); try { String collection = cd.getCloudDescriptor().getCollectionName(); @@ -2188,41 +2184,40 @@ public class ZkController implements Closeable { } public void rejoinShardLeaderElection(SolrParams params) { + + String collectionName = params.get(COLLECTION_PROP); + String shardId = params.get(SHARD_ID_PROP); + String coreNodeName = params.get(CORE_NODE_NAME_PROP); + String coreName = params.get(CORE_NAME_PROP); + String electionNode = params.get(ELECTION_NODE_PROP); + String baseUrl = params.get(BASE_URL_PROP); + try { + MDCLoggingContext.setCoreDescriptor(cc, cc.getCoreDescriptor(coreName)); - String collectionName = params.get(COLLECTION_PROP); - String shardId = params.get(SHARD_ID_PROP); - String coreNodeName = params.get(CORE_NODE_NAME_PROP); - String coreName = params.get(CORE_NAME_PROP); - String electionNode = params.get(ELECTION_NODE_PROP); - String baseUrl = params.get(BASE_URL_PROP); + log.info("Rejoin the shard leader election."); - try (SolrCore core = cc.getCore(coreName)) { - MDCLoggingContext.setCore(core); + ContextKey contextKey = new ContextKey(collectionName, coreNodeName); - log.info("Rejoin the shard leader election."); + ElectionContext prevContext = electionContexts.get(contextKey); + if (prevContext != null) prevContext.cancelElection(); - ContextKey contextKey = new ContextKey(collectionName, coreNodeName); + ZkNodeProps zkProps = new ZkNodeProps(BASE_URL_PROP, baseUrl, CORE_NAME_PROP, coreName, NODE_NAME_PROP, getNodeName(), CORE_NODE_NAME_PROP, coreNodeName); - ElectionContext prevContext = electionContexts.get(contextKey); - if (prevContext != null) prevContext.cancelElection(); + LeaderElector elect = ((ShardLeaderElectionContextBase) prevContext).getLeaderElector(); + ShardLeaderElectionContext context = new ShardLeaderElectionContext(elect, shardId, collectionName, + coreNodeName, zkProps, this, getCoreContainer()); - ZkNodeProps zkProps = new ZkNodeProps(BASE_URL_PROP, baseUrl, CORE_NAME_PROP, coreName, NODE_NAME_PROP, getNodeName(), CORE_NODE_NAME_PROP, coreNodeName); + context.leaderSeqPath = context.electionPath + LeaderElector.ELECTION_NODE + "/" + electionNode; + elect.setup(context); + electionContexts.put(contextKey, context); - LeaderElector elect = ((ShardLeaderElectionContextBase) prevContext).getLeaderElector(); - ShardLeaderElectionContext context = new ShardLeaderElectionContext(elect, shardId, collectionName, - coreNodeName, zkProps, this, getCoreContainer()); - - context.leaderSeqPath = context.electionPath + LeaderElector.ELECTION_NODE + "/" + electionNode; - elect.setup(context); - electionContexts.put(contextKey, context); - - elect.retryElection(context, params.getBool(REJOIN_AT_HEAD_PROP, false)); - } + elect.retryElection(context, params.getBool(REJOIN_AT_HEAD_PROP, false)); } catch (Exception e) { throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to rejoin election", e); + } finally { + MDCLoggingContext.clear(); } - } public void checkOverseerDesignate() { diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 3578013f23b..330bdb6e655 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -930,7 +930,6 @@ public final class SolrCore implements SolrInfoBean, Closeable { coreContainer.solrCores.addCoreDescriptor(cd); setName(name); - MDCLoggingContext.setCore(this); this.solrConfig = configSet.getSolrConfig(); this.resourceLoader = configSet.getSolrConfig().getResourceLoader(); @@ -1499,6 +1498,7 @@ public final class SolrCore implements SolrInfoBean, Closeable { */ public void open() { refCount.incrementAndGet(); + MDCLoggingContext.setCore(this); } /** @@ -1528,6 +1528,7 @@ public final class SolrCore implements SolrInfoBean, Closeable { */ @Override public void close() { + MDCLoggingContext.clear(); // balance out open with close int count = refCount.decrementAndGet(); if (count > 0) return; // close is called often, and only actually closes if nothing is using it. if (count < 0) { diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java b/solr/core/src/java/org/apache/solr/core/ZkContainer.java index e866869e1f8..d35baa42155 100644 --- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java +++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java @@ -207,18 +207,12 @@ public class ZkContainer { } }; - if (zkController != null) { - if (background) { - coreZkRegister.execute(r); - } else { - MDCLoggingContext.setCore(core); - try { - r.run(); - } finally { - MDCLoggingContext.clear(); - } - } + if (background) { + coreZkRegister.execute(r); + } else { + r.run(); } + } public ZkController getZkController() { diff --git a/solr/core/src/java/org/apache/solr/logging/MDCLoggingContext.java b/solr/core/src/java/org/apache/solr/logging/MDCLoggingContext.java index 53f751d0e90..492735be6e7 100644 --- a/solr/core/src/java/org/apache/solr/logging/MDCLoggingContext.java +++ b/solr/core/src/java/org/apache/solr/logging/MDCLoggingContext.java @@ -16,14 +16,6 @@ */ package org.apache.solr.logging; -import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; -import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; -import static org.apache.solr.common.cloud.ZkStateReader.NODE_NAME_PROP; -import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP; -import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; - -import java.util.function.Supplier; - import org.apache.solr.cloud.CloudDescriptor; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.StringUtils; @@ -32,6 +24,12 @@ import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.SolrCore; import org.slf4j.MDC; +import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; +import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP; +import static org.apache.solr.common.cloud.ZkStateReader.NODE_NAME_PROP; +import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP; +import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; + /** * Set's per thread context info for logging. Nested calls will use the top level parent for all context. The first * caller always owns the context until it calls {@link #clear()}. Always call {@link #setCore(SolrCore)} or @@ -40,13 +38,8 @@ import org.slf4j.MDC; public class MDCLoggingContext { public static final String TRACE_ID = "trace_id"; // When a thread sets context and finds that the context is already set, we should noop and ignore the finally clear - private static ThreadLocal CALL_DEPTH = ThreadLocal.withInitial(new Supplier() { - @Override - public Integer get() { - return 0; - } - }); - + private static ThreadLocal CALL_DEPTH = ThreadLocal.withInitial(() -> 0); + public static void setCollection(String collection) { if (collection != null) { MDC.put(COLLECTION_PROP, "c:" + collection); @@ -112,28 +105,34 @@ public class MDCLoggingContext { MDC.remove(NODE_NAME_PROP); } } - + + /** + * Sets multiple information from the params. + * REMEMBER TO CALL {@link #clear()} in a finally! + */ public static void setCore(SolrCore core) { - if (core != null) { - setCoreDescriptor(core.getCoreContainer(), core.getCoreDescriptor()); - } + CoreContainer coreContainer = core == null ? null : core.getCoreContainer(); + CoreDescriptor coreDescriptor = core == null ? null : core.getCoreDescriptor(); + setCoreDescriptor(coreContainer, coreDescriptor); } - + + /** + * Sets multiple information from the params. + * REMEMBER TO CALL {@link #clear()} in a finally! + */ public static void setCoreDescriptor(CoreContainer coreContainer, CoreDescriptor cd) { + setNode(coreContainer); + + int callDepth = CALL_DEPTH.get(); + CALL_DEPTH.set(callDepth + 1); + if (callDepth > 0) { + return; + } + if (cd != null) { - int callDepth = CALL_DEPTH.get(); - CALL_DEPTH.set(callDepth + 1); - if (callDepth > 0) { - return; - } - + + assert cd.getName() != null; setCoreName(cd.getName()); - if (coreContainer != null) { - ZkController zkController = coreContainer.getZkController(); - if (zkController != null) { - setNodeName(zkController.getNodeName()); - } - } CloudDescriptor ccd = cd.getCloudDescriptor(); if (ccd != null) { @@ -143,15 +142,21 @@ public class MDCLoggingContext { } } } - + + /** + * Call this after {@link #setCore(SolrCore)} or {@link #setCoreDescriptor(CoreContainer, CoreDescriptor)} in a + * finally. + */ public static void clear() { int used = CALL_DEPTH.get(); - CALL_DEPTH.set(used - 1); - if (used == 0) { + if (used <= 1) { + CALL_DEPTH.set(0); MDC.remove(COLLECTION_PROP); MDC.remove(CORE_NAME_PROP); MDC.remove(REPLICA_PROP); MDC.remove(SHARD_ID_PROP); + } else { + CALL_DEPTH.set(used - 1); } } @@ -163,7 +168,8 @@ public class MDCLoggingContext { MDC.remove(NODE_NAME_PROP); MDC.remove(TRACE_ID); } - + + /** Resets to a cleared state. Used in-between requests into Solr. */ public static void reset() { CALL_DEPTH.set(0); removeAll(); diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java index 1679a09d173..6da53f6ca26 100644 --- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java +++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java @@ -967,10 +967,7 @@ public class SolrMetricManager { new Object[]{this, registry} ); // prepare MDC for plugins that want to use its properties - MDCLoggingContext.setNode(coreContainer); - if (solrCore != null) { - MDCLoggingContext.setCore(solrCore); - } + MDCLoggingContext.setCoreDescriptor(coreContainer, solrCore == null ? null : solrCore.getCoreDescriptor()); if (tag != null) { // add instance tag to MDC MDC.put("tag", "t:" + tag); diff --git a/solr/core/src/java/org/apache/solr/pkg/PackageListeners.java b/solr/core/src/java/org/apache/solr/pkg/PackageListeners.java index 0287f5e434d..a4835146a1e 100644 --- a/solr/core/src/java/org/apache/solr/pkg/PackageListeners.java +++ b/solr/core/src/java/org/apache/solr/pkg/PackageListeners.java @@ -63,14 +63,13 @@ public class PackageListeners { } synchronized void packagesUpdated(List pkgs) { - if(core != null) MDCLoggingContext.setCore(core); + MDCLoggingContext.setCore(core); try { for (PackageLoader.Package pkgInfo : pkgs) { invokeListeners(pkgInfo); } } finally { - if(core != null) MDCLoggingContext.clear(); - + MDCLoggingContext.clear(); } } diff --git a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java index 44418e45cca..245baf24816 100644 --- a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java +++ b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java @@ -307,7 +307,6 @@ public class HttpSolrCall { // With a valid core... if (core != null) { - MDCLoggingContext.setCore(core); config = core.getSolrConfig(); // get or create/cache the parser for the core SolrRequestParsers parser = config.getRequestParsers(); @@ -614,8 +613,6 @@ public class HttpSolrCall { t = t.getCause(); } return RETURN; - } finally { - MDCLoggingContext.clear(); } } diff --git a/solr/core/src/java/org/apache/solr/update/PeerSync.java b/solr/core/src/java/org/apache/solr/update/PeerSync.java index 58b338169d6..3a406c0fbd9 100644 --- a/solr/core/src/java/org/apache/solr/update/PeerSync.java +++ b/solr/core/src/java/org/apache/solr/update/PeerSync.java @@ -48,7 +48,6 @@ import org.apache.solr.handler.component.HttpShardHandlerFactory; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; -import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.metrics.SolrMetricProducer; import org.apache.solr.metrics.SolrMetricsContext; import org.apache.solr.request.LocalSolrQueryRequest; @@ -173,7 +172,6 @@ public class PeerSync implements SolrMetricProducer { syncErrors.inc(); return PeerSyncResult.failure(); } - MDCLoggingContext.setCore(core); Timer.Context timerContext = null; try { log.info(msg() + "START replicas=" + replicas + " nUpdates=" + nUpdates); @@ -257,7 +255,6 @@ public class PeerSync implements SolrMetricProducer { if (timerContext != null) { timerContext.close(); } - MDCLoggingContext.clear(); } } diff --git a/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java b/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java index fca105e3ba3..d1c9577b151 100644 --- a/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java +++ b/solr/core/src/java/org/apache/solr/update/PeerSyncWithLeader.java @@ -37,7 +37,6 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrInfoBean; -import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.metrics.SolrMetricProducer; import org.apache.solr.metrics.SolrMetricsContext; import org.slf4j.Logger; @@ -131,7 +130,6 @@ public class PeerSyncWithLeader implements SolrMetricProducer { return PeerSync.PeerSyncResult.failure(); } - MDCLoggingContext.setCore(core); Timer.Context timerContext = null; try { log.info(msg() + "START leader=" + leaderUrl + " nUpdates=" + nUpdates); @@ -194,7 +192,6 @@ public class PeerSyncWithLeader implements SolrMetricProducer { } catch (IOException e) { log.warn("{} unable to close client to leader", msg(), e); } - MDCLoggingContext.clear(); } }