SOLR-12055: Enable async logging by default SOLR-12753: Async logging ring buffer and OOM error

This commit is contained in:
Erick 2019-02-22 12:27:02 -08:00
parent 6b4e90617d
commit 0de3905ce7
14 changed files with 244 additions and 70 deletions

View File

@ -31,6 +31,10 @@ Jetty 9.4.14.v20181114
Upgrade Notes
----------------------
* SOLR-12055 introduces async logging by default. There's a small window where log messages may be lost
in the event of some hard crash. Switch back to synchronous logging if this is unacceptable, see
see commeints in the log4j2 configuration files (log4j2.xml by default).
New Features
----------------------
@ -86,6 +90,15 @@ Other Changes
* SOLR-13222: Improve logging in StreamingSolrClients (Peter Cseh via Kevin Risden)
* SOLR-12055: Enable async logging by default. This change improves throughput for logging. This opens
up a small window where log messages could possibly be lost. If this is unacceptable, switching back to synchronous
logging can be done by changing the log4j2.xml file, no internal Solr code changed to make async logging the default.
(Erick Erickson)
* SOLR-12753: Async logging ring buffer and OOM error. When very long messages are written (1M messages or so),
it can produce an OOM error. Log messages are truncated at 10K via configuration in the log4j2.xml files.
================== 8.0.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -721,6 +734,7 @@ Upgrade Notes
an object style. Although the object style makes more sense, this was an overlooked back-compat break; sorry.
New Features
----------------------

View File

@ -126,6 +126,23 @@ public final class StartupLoggingUtils {
+ "Please consult your logging framework of choice on how to configure the appropriate logging.", msg);
}
/**
* Perhaps odd to put in startup utils, but this is where the logging-init code is so it seems logical to put the
* shutdown here too.
*
* Tests are particularly sensitive to this call or the object release tracker will report "lmax.disruptor" not
* terminating when asynch logging (new default as of 8.1) is enabled.
*
*
*/
@SuppressForbidden(reason = "Legitimate log4j2 access")
public static void shutdown() {
if (!isLog4jActive()) {
logNotSupported("Not running log4j2, could not call shutdown for async logging.");
return;
}
LogManager.shutdown(true);
}
/**
* Return a string representing the current static ROOT logging level
* @return a string TRACE, DEBUG, WARN, ERROR or INFO representing current log level. Default is INFO

View File

@ -15,26 +15,55 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Configuration for asynchronous logging -->
<Configuration>
<Appenders>
<Console name="STDERR" target="SYSTEM_ERR">
<PatternLayout>
<Pattern>
%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
%maxLen{%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{
=>%ex{short}}}{10240}%n
</Pattern>
</PatternLayout>
</Console>
</Appenders>
<Loggers>
<Logger name="org.apache.zookeeper" level="WARN"/>
<Logger name="org.apache.hadoop" level="WARN"/>
<Logger name="org.apache.directory" level="WARN"/>
<Logger name="org.apache.solr.hadoop" level="INFO"/>
<AsyncLogger name="org.apache.zookeeper" level="WARN"/>
<AsyncLogger name="org.apache.hadoop" level="WARN"/>
<AsyncLogger name="org.apache.directory" level="WARN"/>
<AsyncLogger name="org.apache.solr.hadoop" level="INFO"/>
<Logger name="org.eclipse.jetty" level="INFO"/>
<Root level="INFO">
<AsyncRoot level="INFO">
<AppenderRef ref="STDERR"/>
</Root>
</AsyncRoot>
</Loggers>
</Configuration>
<!-- Configuration for synchronous logging
there _may_ be a very small window where log messages will not be flushed
to the log file on abnormal shutdown. If even this risk is unacceptable, use
the configuration below
-->
<!--Configuration>
<Appenders>
<Console name="STDERR" target="SYSTEM_ERR">
<PatternLayout>
<Pattern>
%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
</Pattern>
</PatternLayout>
</Console>
</Appenders>
<Loggers>
<Logger name="org.apache.zookeeper" level="WARN"/>
<Logger name="org.apache.hadoop" level="WARN"/>
<Logger name="org.apache.directory" level="WARN"/>
<Logger name="org.apache.solr.hadoop" level="INFO"/>
<Logger name="org.eclipse.jetty" level="INFO"/>
<Root level="INFO">
<AppenderRef ref="STDERR"/>
</Root>
</Loggers>
</Configuration-->

View File

@ -23,7 +23,6 @@ import java.util.HashMap;
import java.util.Map;
import com.google.common.collect.Sets;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestRuleLimitSysouts;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
@ -41,7 +40,7 @@ import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.when;
@TestRuleLimitSysouts.Limit(bytes=32000)
public class ImplicitSnitchTest extends LuceneTestCase {
public class ImplicitSnitchTest extends SolrTestCaseJ4 {
private ImplicitSnitch snitch;
private SnitchContext context;

View File

@ -17,6 +17,7 @@
package org.apache.solr.handler;
import java.io.StringWriter;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -30,7 +31,9 @@ import org.apache.logging.log4j.core.config.LoggerConfig;
import org.apache.logging.log4j.core.layout.PatternLayout;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.core.SolrCore;
import org.apache.solr.util.TimeOut;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@ -48,7 +51,6 @@ public class RequestLoggingTest extends SolrTestCaseJ4 {
@Before
public void setupAppender() {
LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
LoggerConfig config = ctx.getConfiguration().getLoggerConfig("RequestLoggingTest");
writer = new StringWriter();
appender = WriterAppender.createAppender(
@ -62,18 +64,18 @@ public class RequestLoggingTest extends SolrTestCaseJ4 {
}
@Test
public void testLogBeforeExecuteWithCoreLogger() {
public void testLogBeforeExecuteWithCoreLogger() throws InterruptedException {
Logger logger = LogManager.getLogger(SolrCore.class);
testLogBeforeExecute(logger);
}
@Test
public void testLogBeforeExecuteWithRequestLogger() {
public void testLogBeforeExecuteWithRequestLogger() throws InterruptedException {
Logger logger = LogManager.getLogger("org.apache.solr.core.SolrCore.Request");
testLogBeforeExecute(logger);
}
public void testLogBeforeExecute(Logger logger) {
public void testLogBeforeExecute(Logger logger) throws InterruptedException {
Level level = logger.getLevel();
LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
@ -85,9 +87,22 @@ public class RequestLoggingTest extends SolrTestCaseJ4 {
try {
assertQ(req("q", "*:*"));
String output = writer.toString();
Matcher matcher = Pattern.compile("DEBUG.*q=\\*:\\*.*").matcher(output);
assertTrue(matcher.find());
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME);
boolean found = false;
Matcher matcher;
String pat = "DEBUG.*q=\\*:\\*.*";
String output = "";
Pattern pattern = Pattern.compile(pat);
do {
output = writer.toString();
matcher = pattern.matcher(output);
found = matcher.find();
if (found) {
break;
}
timeOut.sleep(10);
} while (timeOut.hasTimedOut() == false);
assertTrue("Did not find expected pattern: '" + pat + "' in output: '" + output + "'", found);
final String group = matcher.group();
final String msg = "Should not have post query information";
assertFalse(msg, group.contains("hits"));

View File

@ -16,65 +16,61 @@
*/
package org.apache.solr.logging;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.util.TimeOut;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TestLogWatcher {
public class TestLogWatcher extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private LogWatcherConfig config;
@Before
public void setUp() {
config = new LogWatcherConfig(true, null, null, 50);
public void before() {
config = new LogWatcherConfig(true, null, "INFO", 1);
}
// Create several log watchers and ensure that new messages go to the new watcher.
@Test
public void testLog4jWatcher() {
LogWatcher watcher = LogWatcher.newRegisteredLogWatcher(config, null);
public void testLog4jWatcher() throws InterruptedException {
LogWatcher watcher = null;
int lim = random().nextInt(3) + 2;
for (int idx = 0; idx < lim; ++idx) {
String msg = "This is a test message: " + idx;
watcher = LogWatcher.newRegisteredLogWatcher(config, null);
assertEquals(watcher.getLastEvent(), -1);
// First ensure there's nothing in the new watcher.
assertEquals(-1, watcher.getLastEvent());
log.warn("This is a test message");
// Now log a message and ensure that the new watcher sees it.
log.warn(msg);
assertTrue(watcher.getLastEvent() > -1);
SolrDocumentList events = watcher.getHistory(-1, new AtomicBoolean());
assertEquals(events.size(), 1);
SolrDocument event = events.get(0);
assertEquals(event.get("logger"), "org.apache.solr.logging.TestLogWatcher");
assertEquals(event.get("message"), "This is a test message");
// Loop to give the logger time to process the async message and notify the new watcher.
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME);
boolean foundMsg = false;
// In local testing this loop usually succeeds 1-2 tries.
do {
// Returns an empty (but non-null) list even if there are no messages yet.
SolrDocumentList events = watcher.getHistory(-1, new AtomicBoolean());
for (SolrDocument doc : events) {
if (doc.get("message").equals(msg)) {
foundMsg = true;
break;
}
}
Thread.sleep(10);
} while (timeOut.hasTimedOut() == false);
assertTrue("Should have found message " + msg + " in loop: " + idx, foundMsg);
}
}
// This seems weird to do the same thing twice, this is valid. We need to test whether listeners are replaced....
@Test
public void testLog4jWatcherRepeat() {
LogWatcher watcher = LogWatcher.newRegisteredLogWatcher(config, null);
assertEquals(watcher.getLastEvent(), -1);
log.warn("This is a test message");
assertTrue(watcher.getLastEvent() > -1);
SolrDocumentList events = watcher.getHistory(-1, new AtomicBoolean());
assertEquals(events.size(), 1);
SolrDocument event = events.get(0);
assertEquals(event.get("logger"), "org.apache.solr.logging.TestLogWatcher");
assertEquals(event.get("message"), "This is a test message");
}
}

View File

@ -16,10 +16,10 @@
*/
package org.apache.solr.schema;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
public class UUIDFieldTest extends LuceneTestCase {
public class UUIDFieldTest extends SolrTestCaseJ4 {
public void testToInternal() {
boolean ok = false;
UUIDField uuidfield = new UUIDField();

View File

@ -17,10 +17,10 @@
package org.apache.solr.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Test;
public class SignificantTermsQParserPluginTest extends LuceneTestCase {
public class SignificantTermsQParserPluginTest extends SolrTestCaseJ4 {
/**
* Test the backwards compatibility for a typo in the SignificantTermsQParserPlugin. It will fail if the backwards

View File

@ -16,7 +16,6 @@
*/
package org.apache.solr.util;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestRuleLimitSysouts.Limit;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.ObjectReleaseTracker;
@ -24,7 +23,7 @@ import org.junit.Test;
@Limit(bytes=150000) // raise limit as this writes to sys err
public class TestObjectReleaseTracker extends LuceneTestCase {
public class TestObjectReleaseTracker extends SolrTestCaseJ4 {
@Test
public void testObjectReleaseTracker() {

View File

@ -23,12 +23,11 @@ import java.util.Arrays;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.SolrResourceLoader;
import org.xml.sax.InputSource;
public class TestSystemIdResolver extends LuceneTestCase {
public class TestSystemIdResolver extends SolrTestCaseJ4 {
public void tearDown() throws Exception {
System.clearProperty("solr.allow.unsafe.resourceloading");

View File

@ -18,22 +18,50 @@
<!-- Use this file for logging exlusively to the console, useful for
some development tasks. Should not be used for production -->
<!-- Configuration for asynchronous logging -->
<Configuration>
<Appenders>
<Console name="STDERR" target="SYSTEM_ERR">
<PatternLayout>
<Pattern>
%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %c; %m%n
%maxLen{%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %c; %m%notEmpty{ =>%ex{short}}}{10240}%n
</Pattern>
</PatternLayout>
</Console>
</Appenders>
<Loggers>
<Logger name="org.apache.zookeeper" level="WARN"/>
<Logger name="org.apache.hadoop" level="WARN"/>
<AsyncLogger name="org.apache.zookeeper" level="WARN"/>
<AsyncLogger name="org.apache.hadoop" level="WARN"/>
<Root level="INFO">
<AsyncRoot level="INFO">
<AppenderRef ref="STDERR"/>
</Root>
</AsyncRoot>
</Loggers>
</Configuration>
<!-- Configuration for synchronous logging
there _may_ be a very small window where log messages will not be flushed
to the log file on abnormal shutdown. If even this risk is unacceptable, use
the configuration below
-->
<!--Configuration>
<Appenders>
<Console name="STDERR" target="SYSTEM_ERR">
<PatternLayout>
<Pattern>
%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %c; %m%n
</Pattern>
</PatternLayout>
</Console>
</Appenders>
<Loggers>
<Logger name="org.apache.zookeeper" level="WARN"/>
<Logger name="org.apache.hadoop" level="WARN"/>
<Root level="INFO">
<AppenderRef ref="STDERR"/>
</Root>
</Loggers>
</Configuration-->

View File

@ -16,7 +16,72 @@
limitations under the License.
-->
<!-- Configuration for asynchronous logging -->
<Configuration>
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
<PatternLayout>
<Pattern>
%maxLen{%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{ =>%ex{short}}}{10240}%n
</Pattern>
</PatternLayout>
</Console>
<RollingRandomAccessFile
name="MainLogFile"
fileName="${sys:solr.log.dir}/solr.log"
filePattern="${sys:solr.log.dir}/solr.log.%i" >
<PatternLayout>
<Pattern>
%maxLen{%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{ =>%ex{short}}}{10240}%n
</Pattern>
</PatternLayout>
<Policies>
<OnStartupTriggeringPolicy />
<SizeBasedTriggeringPolicy size="32 MB"/>
</Policies>
<DefaultRolloverStrategy max="10"/>
</RollingRandomAccessFile>
<RollingRandomAccessFile
name="SlowLogFile"
fileName="${sys:solr.log.dir}/solr_slow_requests.log"
filePattern="${sys:solr.log.dir}/solr_slow_requests.log.%i" >
<PatternLayout>
<Pattern>
%maxLen{%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%notEmpty{ =>%ex{short}}}{10240}%n
</Pattern>
</PatternLayout>
<Policies>
<OnStartupTriggeringPolicy />
<SizeBasedTriggeringPolicy size="32 MB"/>
</Policies>
<DefaultRolloverStrategy max="10"/>
</RollingRandomAccessFile>
</Appenders>
<Loggers>
<AsyncLogger name="org.apache.hadoop" level="warn"/>
<AsyncLogger name="org.apache.solr.update.LoggingInfoStream" level="off"/>
<AsyncLogger name="org.apache.zookeeper" level="warn"/>
<AsyncLogger name="org.apache.solr.core.SolrCore.SlowRequest" level="info" additivity="false">
<AppenderRef ref="SlowLogFile"/>
</AsyncLogger>
<AsyncRoot level="info">
<AppenderRef ref="MainLogFile"/>
<AppenderRef ref="STDOUT"/>
</AsyncRoot>
</Loggers>
</Configuration>
<!-- Configuration for synchronous logging
there _may_ be a very small window where log messages will not be flushed
to the log file on abnormal shutdown. If even this risk is unacceptable, use
the configuration below
-->
<!--Configuration>
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
@ -73,4 +138,5 @@
<AppenderRef ref="STDOUT"/>
</Root>
</Loggers>
</Configuration>
</Configuration-->

View File

@ -26,6 +26,16 @@ When planning your Solr upgrade, consider the customizations you have made to yo
Detailed steps for upgrading a Solr cluster are in the section <<upgrading-a-solr-cluster.adoc#upgrading-a-solr-cluster,Upgrading a Solr Cluster>>.
== Upgrading to 8.x Releases
=== Solr 8.1
*Logging*
Solr 8.1 changed the default log4j2 logging mode from synchronous to asynchronous. This will improve logging throughput and reduce system contention at the cost of a _slight_ chance that some logging messages may be missed in the event of abnormal Solr termination.
If even this slight risk is unacceptable, the log4j configuration file ../server/resources/log4j2.xml has the synchronous logging configuration in a commented section and can be edited to re-enable synchronous logging.
== Upgrading to 7.x Releases
=== Solr 7.6

View File

@ -348,6 +348,8 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
sslConfig = null;
testSolrHome = null;
StartupLoggingUtils.shutdown();
IpTables.unblockAllPorts();
LogLevel.Configurer.restoreLogLevels(savedClassLogLevels);