Deduplicate Strings in REST Bulk Request Parsing (#56506) (#56568)

We can save a little memory here since these strings might live for quite
a while on the coordinating node.
This commit is contained in:
Armin Braun 2020-05-12 09:52:44 +02:00 committed by GitHub
parent 5c0f26de1d
commit 2d08ef729c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 4 deletions

View File

@ -40,7 +40,10 @@ import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;
import java.util.function.Function;
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
@ -139,6 +142,10 @@ public final class BulkRequestParser {
int from = 0;
byte marker = xContent.streamSeparator();
boolean typesDeprecationLogged = false;
// Bulk requests can contain a lot of repeated strings for the index, pipeline and routing parameters. This map is used to
// deduplicate duplicate strings parsed for these parameters. While it does not prevent instantiating the duplicate strings, it
// reduces their lifetime to the lifetime of this parse call instead of the lifetime of the full bulk request.
final Map<String, String> stringDeduplicator = new HashMap<>();
while (true) {
int nextMarker = findNextMarker(marker, from, data);
if (nextMarker == -1) {
@ -198,17 +205,17 @@ public final class BulkRequestParser {
if (!allowExplicitIndex) {
throw new IllegalArgumentException("explicit index in bulk is not allowed");
}
index = parser.text();
index = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
} else if (TYPE.match(currentFieldName, parser.getDeprecationHandler())) {
if (warnOnTypeUsage && typesDeprecationLogged == false) {
deprecationLogger.deprecatedAndMaybeLog("bulk_with_types", RestBulkAction.TYPES_DEPRECATION_MESSAGE);
typesDeprecationLogged = true;
}
type = parser.text();
type = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
} else if (ID.match(currentFieldName, parser.getDeprecationHandler())) {
id = parser.text();
} else if (ROUTING.match(currentFieldName, parser.getDeprecationHandler())) {
routing = parser.text();
routing = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
} else if (OP_TYPE.match(currentFieldName, parser.getDeprecationHandler())) {
opType = parser.text();
} else if (VERSION.match(currentFieldName, parser.getDeprecationHandler())) {
@ -222,7 +229,7 @@ public final class BulkRequestParser {
} else if (RETRY_ON_CONFLICT.match(currentFieldName, parser.getDeprecationHandler())) {
retryOnConflict = parser.intValue();
} else if (PIPELINE.match(currentFieldName, parser.getDeprecationHandler())) {
pipeline = parser.text();
pipeline = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
} else if (SOURCE.match(currentFieldName, parser.getDeprecationHandler())) {
fetchSourceContext = FetchSourceContext.fromXContent(parser);
} else {

View File

@ -19,12 +19,16 @@
package org.elasticsearch.action.bulk;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.rest.action.document.RestBulkAction;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.Matchers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
public class BulkRequestParserTests extends ESTestCase {
@ -111,4 +115,20 @@ public class BulkRequestParserTests extends ESTestCase {
assertWarnings(RestBulkAction.TYPES_DEPRECATION_MESSAGE);
}
public void testParseDeduplicatesParameterStrings() throws IOException {
BytesArray request = new BytesArray(
"{ \"index\":{ \"_index\": \"bar\", \"pipeline\": \"foo\", \"routing\": \"blub\"} }\n{}\n"
+ "{ \"index\":{ \"_index\": \"bar\", \"pipeline\": \"foo\", \"routing\": \"blub\" } }\n{}\n");
BulkRequestParser parser = new BulkRequestParser(randomBoolean());
final List<IndexRequest> indexRequests = new ArrayList<>();
parser.parse(request, null, null, null, null, true, XContentType.JSON,
indexRequests::add,
req -> fail(), req -> fail());
assertThat(indexRequests, Matchers.hasSize(2));
final IndexRequest first = indexRequests.get(0);
final IndexRequest second = indexRequests.get(1);
assertSame(first.index(), second.index());
assertSame(first.getPipeline(), second.getPipeline());
assertSame(first.routing(), second.routing());
}
}