From 07cc2d98ef97b78a5161e451ef18ed899f5d7dd3 Mon Sep 17 00:00:00 2001 From: erick Date: Fri, 22 Feb 2019 12:27:02 -0800 Subject: [PATCH] SOLR-12055: Enable async logging by default SOLR-12753: Async logging ring buffer and OOM error (cherry picked from commit 0de3905) --- solr/CHANGES.txt | 50 ++++++++----- .../apache/solr/util/StartupLoggingUtils.java | 17 +++++ solr/core/src/test-files/log4j2.xml | 45 +++++++++-- .../solr/cloud/rule/ImplicitSnitchTest.java | 3 +- .../solr/handler/RequestLoggingTest.java | 29 ++++++-- .../apache/solr/logging/TestLogWatcher.java | 74 +++++++++---------- .../org/apache/solr/schema/UUIDFieldTest.java | 4 +- .../SignificantTermsQParserPluginTest.java | 4 +- .../solr/util/TestObjectReleaseTracker.java | 3 +- .../solr/util/TestSystemIdResolver.java | 3 +- solr/server/resources/log4j2-console.xml | 38 ++++++++-- solr/server/resources/log4j2.xml | 68 ++++++++++++++++- .../src/solr-upgrade-notes.adoc | 10 +++ .../java/org/apache/solr/SolrTestCaseJ4.java | 2 + 14 files changed, 263 insertions(+), 87 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9528f2b8e3e..d86079683ae 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -16,6 +16,28 @@ In this release, there is an example Solr server including a bundled servlet container in the directory named "example". See the Solr tutorial at https://lucene.apache.org/solr/guide/solr-tutorial.html +================== 9.0.0 ================== + +Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. + +Versions of Major Components +--------------------- +Apache Tika 1.19.1 +Carrot2 3.16.0 +Velocity 1.7 and Velocity Tools 2.0 +Apache ZooKeeper 3.4.13 +Jetty 9.4.14.v20181114 + +Upgrade Notes +---------------------- + +* SOLR-12055 introduces async logging by default. There's a small window where log messages may be lost + in the event of some hard crash. Switch back to synchronous logging if this is unacceptable, see + see commeints in the log4j2 configuration files (log4j2.xml by default). + +New Features +---------------------- + ================== 8.1.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. @@ -56,7 +78,6 @@ Bug Fixes Improvements ---------------------- - * SOLR-12999: Index replication could delete segments before downloading segments from master if there is not enough disk space (noble) @@ -69,6 +90,15 @@ Other Changes * SOLR-13222: Improve logging in StreamingSolrClients (Peter Cseh via Kevin Risden) +* SOLR-12055: Enable async logging by default. This change improves throughput for logging. This opens + up a small window where log messages could possibly be lost. If this is unacceptable, switching back to synchronous + logging can be done by changing the log4j2.xml file, no internal Solr code changed to make async logging the default. + (Erick Erickson) + +* SOLR-12753: Async logging ring buffer and OOM error. When very long messages are written (1M messages or so), + it can produce an OOM error. Log messages are truncated at 10K via configuration in the log4j2.xml files. + + ================== 8.0.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. @@ -243,7 +273,7 @@ Bug Fixes this issue reverts the default replica placement policy to the 'legacy' assignment policy that was the default until Solr 7.4. (Gus Heck, Andrzej Bialecki, Bram Van Dam, shalin) -* SOLR-13255 : ClassCastException when URPs try to read a String field which returns a ByteArrayUtf8CharSequence . This is a regression +* SOLR-13255 : ClasscastException when URPs try to read a String field which returns a ByteArrayUTF8CHarSequence . This is a regression in release 7.7 (noble) @@ -309,21 +339,6 @@ Other Changes * SOLR-9515: Hadoop dependencies have been upgraded to Hadoop 3.2.0 from 2.7.2. (Mark Miller, Kevin Risden) -================== 7.8.0 ================== - -Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. - -Versions of Major Components ---------------------- -Apache Tika 1.19.1 -Carrot2 3.16.0 -Velocity 1.7 and Velocity Tools 2.0 -Apache ZooKeeper 3.4.13 -Jetty 9.4.14.v20181114 - - -(No Changes) - ================== 7.7.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. @@ -719,6 +734,7 @@ Upgrade Notes an object style. Although the object style makes more sense, this was an overlooked back-compat break; sorry. + New Features ---------------------- diff --git a/solr/core/src/java/org/apache/solr/util/StartupLoggingUtils.java b/solr/core/src/java/org/apache/solr/util/StartupLoggingUtils.java index e867f1a97ef..8181ae5b8f1 100644 --- a/solr/core/src/java/org/apache/solr/util/StartupLoggingUtils.java +++ b/solr/core/src/java/org/apache/solr/util/StartupLoggingUtils.java @@ -126,6 +126,23 @@ public final class StartupLoggingUtils { + "Please consult your logging framework of choice on how to configure the appropriate logging.", msg); } + /** + * Perhaps odd to put in startup utils, but this is where the logging-init code is so it seems logical to put the + * shutdown here too. + * + * Tests are particularly sensitive to this call or the object release tracker will report "lmax.disruptor" not + * terminating when asynch logging (new default as of 8.1) is enabled. + * + * + */ + @SuppressForbidden(reason = "Legitimate log4j2 access") + public static void shutdown() { + if (!isLog4jActive()) { + logNotSupported("Not running log4j2, could not call shutdown for async logging."); + return; + } + LogManager.shutdown(true); + } /** * Return a string representing the current static ROOT logging level * @return a string TRACE, DEBUG, WARN, ERROR or INFO representing current log level. Default is INFO diff --git a/solr/core/src/test-files/log4j2.xml b/solr/core/src/test-files/log4j2.xml index 612cfe8a46c..e3c2a3b5919 100644 --- a/solr/core/src/test-files/log4j2.xml +++ b/solr/core/src/test-files/log4j2.xml @@ -15,26 +15,55 @@ See the License for the specific language governing permissions and limitations under the License. --> - + - %-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n + %maxLen{%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{ + =>%ex{short}}}{10240}%n - - - - + + + + - + - + + + + diff --git a/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java b/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java index f379bbf007b..b312e969a10 100644 --- a/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java @@ -23,7 +23,6 @@ import java.util.HashMap; import java.util.Map; import com.google.common.collect.Sets; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestRuleLimitSysouts; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; @@ -41,7 +40,7 @@ import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.when; @TestRuleLimitSysouts.Limit(bytes=32000) -public class ImplicitSnitchTest extends LuceneTestCase { +public class ImplicitSnitchTest extends SolrTestCaseJ4 { private ImplicitSnitch snitch; private SnitchContext context; diff --git a/solr/core/src/test/org/apache/solr/handler/RequestLoggingTest.java b/solr/core/src/test/org/apache/solr/handler/RequestLoggingTest.java index e203e808928..040d2d2584a 100644 --- a/solr/core/src/test/org/apache/solr/handler/RequestLoggingTest.java +++ b/solr/core/src/test/org/apache/solr/handler/RequestLoggingTest.java @@ -17,6 +17,7 @@ package org.apache.solr.handler; import java.io.StringWriter; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -30,7 +31,9 @@ import org.apache.logging.log4j.core.config.LoggerConfig; import org.apache.logging.log4j.core.layout.PatternLayout; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.util.SuppressForbidden; +import org.apache.solr.common.util.TimeSource; import org.apache.solr.core.SolrCore; +import org.apache.solr.util.TimeOut; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -48,7 +51,6 @@ public class RequestLoggingTest extends SolrTestCaseJ4 { @Before public void setupAppender() { LoggerContext ctx = (LoggerContext) LogManager.getContext(false); - LoggerConfig config = ctx.getConfiguration().getLoggerConfig("RequestLoggingTest"); writer = new StringWriter(); appender = WriterAppender.createAppender( @@ -62,18 +64,18 @@ public class RequestLoggingTest extends SolrTestCaseJ4 { } @Test - public void testLogBeforeExecuteWithCoreLogger() { + public void testLogBeforeExecuteWithCoreLogger() throws InterruptedException { Logger logger = LogManager.getLogger(SolrCore.class); testLogBeforeExecute(logger); } @Test - public void testLogBeforeExecuteWithRequestLogger() { + public void testLogBeforeExecuteWithRequestLogger() throws InterruptedException { Logger logger = LogManager.getLogger("org.apache.solr.core.SolrCore.Request"); testLogBeforeExecute(logger); } - public void testLogBeforeExecute(Logger logger) { + public void testLogBeforeExecute(Logger logger) throws InterruptedException { Level level = logger.getLevel(); LoggerContext ctx = (LoggerContext) LogManager.getContext(false); @@ -85,9 +87,22 @@ public class RequestLoggingTest extends SolrTestCaseJ4 { try { assertQ(req("q", "*:*")); - String output = writer.toString(); - Matcher matcher = Pattern.compile("DEBUG.*q=\\*:\\*.*").matcher(output); - assertTrue(matcher.find()); + TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME); + boolean found = false; + Matcher matcher; + String pat = "DEBUG.*q=\\*:\\*.*"; + String output = ""; + Pattern pattern = Pattern.compile(pat); + do { + output = writer.toString(); + matcher = pattern.matcher(output); + found = matcher.find(); + if (found) { + break; + } + timeOut.sleep(10); + } while (timeOut.hasTimedOut() == false); + assertTrue("Did not find expected pattern: '" + pat + "' in output: '" + output + "'", found); final String group = matcher.group(); final String msg = "Should not have post query information"; assertFalse(msg, group.contains("hits")); diff --git a/solr/core/src/test/org/apache/solr/logging/TestLogWatcher.java b/solr/core/src/test/org/apache/solr/logging/TestLogWatcher.java index 6f7987f084f..d036737546a 100644 --- a/solr/core/src/test/org/apache/solr/logging/TestLogWatcher.java +++ b/solr/core/src/test/org/apache/solr/logging/TestLogWatcher.java @@ -16,65 +16,61 @@ */ package org.apache.solr.logging; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.util.TimeSource; +import org.apache.solr.util.TimeOut; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.lang.invoke.MethodHandles; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -public class TestLogWatcher { +public class TestLogWatcher extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private LogWatcherConfig config; @Before - public void setUp() { - config = new LogWatcherConfig(true, null, null, 50); + public void before() { + config = new LogWatcherConfig(true, null, "INFO", 1); } + // Create several log watchers and ensure that new messages go to the new watcher. @Test - public void testLog4jWatcher() { - LogWatcher watcher = LogWatcher.newRegisteredLogWatcher(config, null); + public void testLog4jWatcher() throws InterruptedException { + LogWatcher watcher = null; + int lim = random().nextInt(3) + 2; + for (int idx = 0; idx < lim; ++idx) { + String msg = "This is a test message: " + idx; + watcher = LogWatcher.newRegisteredLogWatcher(config, null); - assertEquals(watcher.getLastEvent(), -1); + // First ensure there's nothing in the new watcher. + assertEquals(-1, watcher.getLastEvent()); - log.warn("This is a test message"); + // Now log a message and ensure that the new watcher sees it. + log.warn(msg); - assertTrue(watcher.getLastEvent() > -1); - - SolrDocumentList events = watcher.getHistory(-1, new AtomicBoolean()); - assertEquals(events.size(), 1); - - SolrDocument event = events.get(0); - assertEquals(event.get("logger"), "org.apache.solr.logging.TestLogWatcher"); - assertEquals(event.get("message"), "This is a test message"); + // Loop to give the logger time to process the async message and notify the new watcher. + TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME); + boolean foundMsg = false; + // In local testing this loop usually succeeds 1-2 tries. + do { + // Returns an empty (but non-null) list even if there are no messages yet. + SolrDocumentList events = watcher.getHistory(-1, new AtomicBoolean()); + for (SolrDocument doc : events) { + if (doc.get("message").equals(msg)) { + foundMsg = true; + break; + } + } + Thread.sleep(10); + } while (timeOut.hasTimedOut() == false); + assertTrue("Should have found message " + msg + " in loop: " + idx, foundMsg); + } } - - // This seems weird to do the same thing twice, this is valid. We need to test whether listeners are replaced.... - @Test - public void testLog4jWatcherRepeat() { - LogWatcher watcher = LogWatcher.newRegisteredLogWatcher(config, null); - - assertEquals(watcher.getLastEvent(), -1); - - log.warn("This is a test message"); - - assertTrue(watcher.getLastEvent() > -1); - - SolrDocumentList events = watcher.getHistory(-1, new AtomicBoolean()); - assertEquals(events.size(), 1); - - SolrDocument event = events.get(0); - assertEquals(event.get("logger"), "org.apache.solr.logging.TestLogWatcher"); - assertEquals(event.get("message"), "This is a test message"); - - } - } diff --git a/solr/core/src/test/org/apache/solr/schema/UUIDFieldTest.java b/solr/core/src/test/org/apache/solr/schema/UUIDFieldTest.java index b7fa8749f1b..a7cbb4d0701 100644 --- a/solr/core/src/test/org/apache/solr/schema/UUIDFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/UUIDFieldTest.java @@ -16,10 +16,10 @@ */ package org.apache.solr.schema; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; -public class UUIDFieldTest extends LuceneTestCase { +public class UUIDFieldTest extends SolrTestCaseJ4 { public void testToInternal() { boolean ok = false; UUIDField uuidfield = new UUIDField(); diff --git a/solr/core/src/test/org/apache/solr/search/SignificantTermsQParserPluginTest.java b/solr/core/src/test/org/apache/solr/search/SignificantTermsQParserPluginTest.java index 4a68dc5c4d3..bcd66dabf9e 100644 --- a/solr/core/src/test/org/apache/solr/search/SignificantTermsQParserPluginTest.java +++ b/solr/core/src/test/org/apache/solr/search/SignificantTermsQParserPluginTest.java @@ -17,10 +17,10 @@ package org.apache.solr.search; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.SolrTestCaseJ4; import org.junit.Test; -public class SignificantTermsQParserPluginTest extends LuceneTestCase { +public class SignificantTermsQParserPluginTest extends SolrTestCaseJ4 { /** * Test the backwards compatibility for a typo in the SignificantTermsQParserPlugin. It will fail if the backwards diff --git a/solr/core/src/test/org/apache/solr/util/TestObjectReleaseTracker.java b/solr/core/src/test/org/apache/solr/util/TestObjectReleaseTracker.java index e7a76373c96..312c99bfb4d 100644 --- a/solr/core/src/test/org/apache/solr/util/TestObjectReleaseTracker.java +++ b/solr/core/src/test/org/apache/solr/util/TestObjectReleaseTracker.java @@ -16,7 +16,6 @@ */ package org.apache.solr.util; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestRuleLimitSysouts.Limit; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.util.ObjectReleaseTracker; @@ -24,7 +23,7 @@ import org.junit.Test; @Limit(bytes=150000) // raise limit as this writes to sys err -public class TestObjectReleaseTracker extends LuceneTestCase { +public class TestObjectReleaseTracker extends SolrTestCaseJ4 { @Test public void testObjectReleaseTracker() { diff --git a/solr/core/src/test/org/apache/solr/util/TestSystemIdResolver.java b/solr/core/src/test/org/apache/solr/util/TestSystemIdResolver.java index f87eeb47cf1..89ef1808354 100644 --- a/solr/core/src/test/org/apache/solr/util/TestSystemIdResolver.java +++ b/solr/core/src/test/org/apache/solr/util/TestSystemIdResolver.java @@ -23,12 +23,11 @@ import java.util.Arrays; import org.apache.commons.io.IOUtils; import org.apache.lucene.analysis.util.ResourceLoader; -import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.SolrResourceLoader; import org.xml.sax.InputSource; -public class TestSystemIdResolver extends LuceneTestCase { +public class TestSystemIdResolver extends SolrTestCaseJ4 { public void tearDown() throws Exception { System.clearProperty("solr.allow.unsafe.resourceloading"); diff --git a/solr/server/resources/log4j2-console.xml b/solr/server/resources/log4j2-console.xml index f32f4c142e2..e83edf942c5 100644 --- a/solr/server/resources/log4j2-console.xml +++ b/solr/server/resources/log4j2-console.xml @@ -18,22 +18,50 @@ + - %-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %c; %m%n + %maxLen{%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %c; %m%notEmpty{ =>%ex{short}}}{10240}%n - - + + - + - + + + + + + diff --git a/solr/server/resources/log4j2.xml b/solr/server/resources/log4j2.xml index 17bcf4c007a..e76973da645 100644 --- a/solr/server/resources/log4j2.xml +++ b/solr/server/resources/log4j2.xml @@ -16,7 +16,72 @@ limitations under the License. --> + + + + + + + %maxLen{%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{ =>%ex{short}}}{10240}%n + + + + + + + + %maxLen{%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{ =>%ex{short}}}{10240}%n + + + + + + + + + + + + + %maxLen{%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{ =>%ex{short}}}{10240}%n + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/solr-ref-guide/src/solr-upgrade-notes.adoc b/solr/solr-ref-guide/src/solr-upgrade-notes.adoc index e3beadb6e64..1b46a06ed65 100644 --- a/solr/solr-ref-guide/src/solr-upgrade-notes.adoc +++ b/solr/solr-ref-guide/src/solr-upgrade-notes.adoc @@ -26,6 +26,16 @@ When planning your Solr upgrade, consider the customizations you have made to yo Detailed steps for upgrading a Solr cluster are in the section <>. +== Upgrading to 8.x Releases + +=== Solr 8.1 + +*Logging* + +Solr 8.1 changed the default log4j2 logging mode from synchronous to asynchronous. This will improve logging throughput and reduce system contention at the cost of a _slight_ chance that some logging messages may be missed in the event of abnormal Solr termination. + +If even this slight risk is unacceptable, the log4j configuration file ../server/resources/log4j2.xml has the synchronous logging configuration in a commented section and can be edited to re-enable synchronous logging. + == Upgrading to 7.x Releases === Solr 7.6 diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java index 748721d0ac8..8b05c38146c 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java @@ -348,6 +348,8 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { sslConfig = null; testSolrHome = null; + StartupLoggingUtils.shutdown(); + IpTables.unblockAllPorts(); LogLevel.Configurer.restoreLogLevels(savedClassLogLevels);