Logging: Log less source in slowlog

Instead of logging the entire `_source` in the indexing slowlog we log by
default just the first 1000 characters - this is controlled by the
`index.indexing.slowlog.source` settings and can be set to `true` to log the
whole `_source`, `false` to log none of it, and a number to log at most that
many characters.

Closes #4485
This commit is contained in:
Nik Everett 2015-08-11 13:13:43 -07:00
parent 29d1924d78
commit 79d9f5b775
7 changed files with 180 additions and 30 deletions

View File

@ -175,6 +175,7 @@ public class IndexMetaData implements Diffable<IndexMetaData>, FromXContentBuild
public static final String SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE = "index.shared_filesystem.recover_on_any_node";
public static final String INDEX_UUID_NA_VALUE = "_na_";
// hard-coded hash function as of 2.0
// older indices will read which hash function to use in their index settings
private static final HashFunction MURMUR3_HASH_FUNCTION = new Murmur3HashFunction();

View File

@ -1093,4 +1093,28 @@ public class Strings {
throw new AssertionError("Cannot happen", e);
}
}
/**
* Truncates string to a length less than length. Backtracks to throw out
* high surrogates.
*/
public static String cleanTruncate(String s, int length) {
if (s == null) {
return s;
}
/*
* Its pretty silly for you to truncate to 0 length but just in case
* someone does this shouldn't break.
*/
if (length == 0) {
return "";
}
if (length >= s.length()) {
return s;
}
if (Character.isHighSurrogate(s.charAt(length - 1))) {
length--;
}
return s.substring(0, length);
}
}

View File

@ -19,6 +19,8 @@
package org.elasticsearch.index.indexing;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
@ -26,7 +28,6 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.settings.IndexSettingsService;
import java.io.IOException;
import java.util.Locale;
@ -42,6 +43,12 @@ public final class IndexingSlowLog {
private long indexInfoThreshold;
private long indexDebugThreshold;
private long indexTraceThreshold;
/**
* How much of the source to log in the slowlog - 0 means log none and
* anything greater than 0 means log at least that many <em>characters</em>
* of the source.
*/
private int maxSourceCharsToLog;
private String level;
@ -55,18 +62,27 @@ public final class IndexingSlowLog {
public static final String INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE = INDEX_INDEXING_SLOWLOG_PREFIX +".threshold.index.trace";
public static final String INDEX_INDEXING_SLOWLOG_REFORMAT = INDEX_INDEXING_SLOWLOG_PREFIX +".reformat";
public static final String INDEX_INDEXING_SLOWLOG_LEVEL = INDEX_INDEXING_SLOWLOG_PREFIX +".level";
public static final String INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG = INDEX_INDEXING_SLOWLOG_PREFIX + ".source";
IndexingSlowLog(Settings indexSettings) {
this(indexSettings, Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX + ".index"),
Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX + ".delete"));
}
/**
* Build with the specified loggers. Only used to testing.
*/
IndexingSlowLog(Settings indexSettings, ESLogger indexLogger, ESLogger deleteLogger) {
this.indexLogger = indexLogger;
this.deleteLogger = deleteLogger;
this.reformat = indexSettings.getAsBoolean(INDEX_INDEXING_SLOWLOG_REFORMAT, true);
this.indexWarnThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_WARN, TimeValue.timeValueNanos(-1)).nanos();
this.indexInfoThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_INFO, TimeValue.timeValueNanos(-1)).nanos();
this.indexDebugThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_DEBUG, TimeValue.timeValueNanos(-1)).nanos();
this.indexTraceThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE, TimeValue.timeValueNanos(-1)).nanos();
this.level = indexSettings.get(INDEX_INDEXING_SLOWLOG_LEVEL, "TRACE").toUpperCase(Locale.ROOT);
this.indexLogger = Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX +".index");
this.deleteLogger = Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX +".delete");
this.maxSourceCharsToLog = readSourceToLog(indexSettings);
indexLogger.setLevel(level);
deleteLogger.setLevel(level);
@ -101,6 +117,11 @@ public final class IndexingSlowLog {
if (reformat != this.reformat) {
this.reformat = reformat;
}
int maxSourceCharsToLog = readSourceToLog(settings);
if (maxSourceCharsToLog != this.maxSourceCharsToLog) {
this.maxSourceCharsToLog = maxSourceCharsToLog;
}
}
void postIndex(Engine.Index index, long tookInNanos) {
@ -111,27 +132,44 @@ public final class IndexingSlowLog {
postIndexing(create.parsedDoc(), tookInNanos);
}
private void postIndexing(ParsedDocument doc, long tookInNanos) {
if (indexWarnThreshold >= 0 && tookInNanos > indexWarnThreshold) {
indexLogger.warn("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
} else if (indexInfoThreshold >= 0 && tookInNanos > indexInfoThreshold) {
indexLogger.info("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
} else if (indexDebugThreshold >= 0 && tookInNanos > indexDebugThreshold) {
indexLogger.debug("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
} else if (indexTraceThreshold >= 0 && tookInNanos > indexTraceThreshold) {
indexLogger.trace("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat));
/**
* Reads how much of the source to log. The user can specify any value they
* like and numbers are interpreted the maximum number of characters to log
* and everything else is interpreted as Elasticsearch interprets booleans
* which is then converted to 0 for false and Integer.MAX_VALUE for true.
*/
private int readSourceToLog(Settings settings) {
String sourceToLog = settings.get(INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG, "1000");
try {
return Integer.parseInt(sourceToLog, 10);
} catch (NumberFormatException e) {
return Booleans.parseBoolean(sourceToLog, true) ? Integer.MAX_VALUE : 0;
}
}
final static class SlowLogParsedDocumentPrinter {
private void postIndexing(ParsedDocument doc, long tookInNanos) {
if (indexWarnThreshold >= 0 && tookInNanos > indexWarnThreshold) {
indexLogger.warn("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
} else if (indexInfoThreshold >= 0 && tookInNanos > indexInfoThreshold) {
indexLogger.info("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
} else if (indexDebugThreshold >= 0 && tookInNanos > indexDebugThreshold) {
indexLogger.debug("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
} else if (indexTraceThreshold >= 0 && tookInNanos > indexTraceThreshold) {
indexLogger.trace("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog));
}
}
static final class SlowLogParsedDocumentPrinter {
private final ParsedDocument doc;
private final long tookInNanos;
private final boolean reformat;
private final int maxSourceCharsToLog;
SlowLogParsedDocumentPrinter(ParsedDocument doc, long tookInNanos, boolean reformat) {
SlowLogParsedDocumentPrinter(ParsedDocument doc, long tookInNanos, boolean reformat, int maxSourceCharsToLog) {
this.doc = doc;
this.tookInNanos = tookInNanos;
this.reformat = reformat;
this.maxSourceCharsToLog = maxSourceCharsToLog;
}
@Override
@ -141,18 +179,19 @@ public final class IndexingSlowLog {
sb.append("type[").append(doc.type()).append("], ");
sb.append("id[").append(doc.id()).append("], ");
if (doc.routing() == null) {
sb.append("routing[], ");
sb.append("routing[] ");
} else {
sb.append("routing[").append(doc.routing()).append("], ");
sb.append("routing[").append(doc.routing()).append("] ");
}
if (doc.source() != null && doc.source().length() > 0) {
try {
sb.append("source[").append(XContentHelper.convertToJson(doc.source(), reformat)).append("]");
} catch (IOException e) {
sb.append("source[_failed_to_convert_]");
}
} else {
sb.append("source[]");
if (maxSourceCharsToLog == 0 || doc.source() == null || doc.source().length() == 0) {
return sb.toString();
}
try {
String source = XContentHelper.convertToJson(doc.source(), reformat);
sb.append(", source[").append(Strings.cleanTruncate(source, maxSourceCharsToLog)).append("]");
} catch (IOException e) {
sb.append(", source[_failed_to_convert_]");
}
return sb.toString();
}

View File

@ -30,7 +30,6 @@ import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocatio
import org.elasticsearch.cluster.settings.DynamicSettings;
import org.elasticsearch.cluster.settings.Validator;
import org.elasticsearch.common.inject.AbstractModule;
import org.elasticsearch.gateway.GatewayAllocator;
import org.elasticsearch.index.engine.EngineConfig;
import org.elasticsearch.index.indexing.IndexingSlowLog;
import org.elasticsearch.index.search.stats.SearchSlowLog;
@ -86,6 +85,7 @@ public class IndexDynamicSettingsModule extends AbstractModule {
indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE, Validator.TIME);
indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_REFORMAT);
indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_LEVEL);
indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG);
indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_WARN, Validator.TIME);
indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_INFO, Validator.TIME);
indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_DEBUG, Validator.TIME);

View File

@ -20,11 +20,8 @@
package org.elasticsearch.common;
import org.elasticsearch.test.ESTestCase;
import org.junit.Test;
public class StringsTests extends ESTestCase {
@Test
public void testToCamelCase() {
assertEquals("foo", Strings.toCamelCase("foo"));
assertEquals("fooBar", Strings.toCamelCase("fooBar"));
@ -33,4 +30,32 @@ public class StringsTests extends ESTestCase {
assertEquals("fooBarFooBar", Strings.toCamelCase("foo_bar_foo_bar"));
assertEquals("fooBar", Strings.toCamelCase("foo_bar_"));
}
public void testSubstring() {
assertEquals(null, Strings.substring(null, 0, 1000));
assertEquals("foo", Strings.substring("foo", 0, 1000));
assertEquals("foo", Strings.substring("foo", 0, 3));
assertEquals("oo", Strings.substring("foo", 1, 3));
assertEquals("oo", Strings.substring("foo", 1, 100));
assertEquals("f", Strings.substring("foo", 0, 1));
}
public void testCleanTruncate() {
assertEquals(null, Strings.cleanTruncate(null, 10));
assertEquals("foo", Strings.cleanTruncate("foo", 10));
assertEquals("foo", Strings.cleanTruncate("foo", 3));
// Throws out high surrogates
assertEquals("foo", Strings.cleanTruncate("foo\uD83D\uDEAB", 4));
// But will keep the whole character
assertEquals("foo\uD83D\uDEAB", Strings.cleanTruncate("foo\uD83D\uDEAB", 5));
/*
* Doesn't take care around combining marks. This example has its
* meaning changed because that last codepoint is supposed to combine
* backwards into the find "o" and be represented as the "o" with a
* circle around it with a slash through it. As in "no 'o's allowed
* here.
*/
assertEquals("o", Strings.cleanTruncate("o\uD83D\uDEAB", 1));
assertEquals("", Strings.cleanTruncate("foo", 0));
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.indexing;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.indexing.IndexingSlowLog.SlowLogParsedDocumentPrinter;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.not;
public class IndexingSlowLogTests extends ESTestCase {
public void testSlowLogParsedDocumentPrinterSourceToLog() throws IOException {
BytesReference source = JsonXContent.contentBuilder().startObject().field("foo", "bar").endObject().bytes();
ParsedDocument pd = new ParsedDocument(new StringField("uid", "test:id", Store.YES), new IntField("version", 1, Store.YES), "id",
"test", null, 0, -1, null, source, null);
// Turning off document logging doesn't log source[]
SlowLogParsedDocumentPrinter p = new SlowLogParsedDocumentPrinter(pd, 10, true, 0);
assertThat(p.toString(), not(containsString("source[")));
// Turning on document logging logs the whole thing
p = new SlowLogParsedDocumentPrinter(pd, 10, true, Integer.MAX_VALUE);
assertThat(p.toString(), containsString("source[{\"foo\":\"bar\"}]"));
// And you can truncate the source
p = new SlowLogParsedDocumentPrinter(pd, 10, true, 3);
assertThat(p.toString(), containsString("source[{\"f]"));
}
}

View File

@ -68,10 +68,17 @@ index.indexing.slowlog.threshold.index.warn: 10s
index.indexing.slowlog.threshold.index.info: 5s
index.indexing.slowlog.threshold.index.debug: 2s
index.indexing.slowlog.threshold.index.trace: 500ms
index.indexing.slowlog.level: info
index.indexing.slowlog.source: 1000
--------------------------------------------------
All of the above settings are _dynamic_ and can be set per-index.
By default Elasticsearch will log the first 1000 characters of the _source in
the slowlog. You can change that with `index.indexing.slowlog.source`. Setting
it to `false` or `0` will skip logging the source entirely an setting it to
`true` will log the entire source regardless of size.
The index slow log file is configured by default in the `logging.yml`
file: