From a3ef674992acb2910e0834ea9efe3041f726da64 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 30 Nov 2016 13:35:56 +0100 Subject: [PATCH] Reduce memory pressure when sending large terms queries. (#21776) When users send large `terms` query to Elasticsearch, every value is stored in an object. This change does not reduce the amount of created objects, but makes sure these objects die young by optimizing the list storage in case all values are either non-null instances of Long objects or BytesRef objects, which seems to help the JVM significantly. --- .../common/io/stream/BytesStreamOutput.java | 2 +- .../index/query/TermsQueryBuilder.java | 139 ++++++++++++++---- .../index/query/TermsQueryBuilderTests.java | 26 +++- 3 files changed, 140 insertions(+), 27 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/common/io/stream/BytesStreamOutput.java b/core/src/main/java/org/elasticsearch/common/io/stream/BytesStreamOutput.java index 21de0c421b7..e65e8efb27b 100644 --- a/core/src/main/java/org/elasticsearch/common/io/stream/BytesStreamOutput.java +++ b/core/src/main/java/org/elasticsearch/common/io/stream/BytesStreamOutput.java @@ -75,7 +75,7 @@ public class BytesStreamOutput extends StreamOutput implements BytesStream { } @Override - public void writeBytes(byte[] b, int offset, int length) throws IOException { + public void writeBytes(byte[] b, int offset, int length) { // nothing to copy if (length == 0) { return; diff --git a/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java index ad5d90d838d..f39bca0779a 100644 --- a/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java @@ -26,11 +26,14 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.BytesRefs; @@ -42,11 +45,15 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.indices.TermsLookup; import java.io.IOException; +import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -78,7 +85,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { throw new IllegalArgumentException("Both values and termsLookup specified for terms query"); } this.fieldName = fieldName; - this.values = values; + this.values = values == null ? null : convert(values); this.termsLookup = termsLookup; } @@ -157,7 +164,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { throw new IllegalArgumentException("No value specified for terms query"); } this.fieldName = fieldName; - this.values = convertToBytesRefListIfStringList(values); + this.values = convert(values); this.termsLookup = null; } @@ -183,43 +190,125 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { } public List values() { - return convertToStringListIfBytesRefList(this.values); + return convertBack(this.values); } public TermsLookup termsLookup() { return this.termsLookup; } + private static final Set> INTEGER_TYPES = new HashSet<>( + Arrays.asList(Byte.class, Short.class, Integer.class, Long.class)); + private static final Set> STRING_TYPES = new HashSet<>( + Arrays.asList(BytesRef.class, String.class)); + /** - * Same as {@link #convertToBytesRefIfString} but on Iterable. - * @param objs the Iterable of input object - * @return the same input or a list of {@link BytesRef} representation if input was a list of type string + * Same as {@link #convert(List)} but on an {@link Iterable}. */ - private static List convertToBytesRefListIfStringList(Iterable objs) { - if (objs == null) { - return null; + private static List convert(Iterable values) { + List list; + if (values instanceof List) { + list = (List) values; + } else { + ArrayList arrayList = new ArrayList(); + for (Object o : values) { + arrayList.add(o); + } + list = arrayList; } - List newObjs = new ArrayList<>(); - for (Object obj : objs) { - newObjs.add(convertToBytesRefIfString(obj)); - } - return newObjs; + return convert(list); } /** - * Same as {@link #convertToStringIfBytesRef} but on Iterable. - * @param objs the Iterable of input object - * @return the same input or a list of utf8 string if input was a list of type {@link BytesRef} + * Convert the list in a way that optimizes storage in the case that all + * elements are either integers or {@link String}s/{@link BytesRef}s. This + * is useful to help garbage collections for use-cases that involve sending + * very large terms queries to Elasticsearch. If the list does not only + * contain integers or {@link String}s, then a list is returned where all + * {@link String}s have been replaced with {@link BytesRef}s. */ - private static List convertToStringListIfBytesRefList(Iterable objs) { - if (objs == null) { - return null; + static List convert(List list) { + if (list.isEmpty()) { + return Collections.emptyList(); } - List newObjs = new ArrayList<>(); - for (Object obj : objs) { - newObjs.add(convertToStringIfBytesRef(obj)); + + final boolean allNumbers = list.stream().allMatch(o -> o != null && INTEGER_TYPES.contains(o.getClass())); + if (allNumbers) { + final long[] elements = list.stream().mapToLong(o -> ((Number) o).longValue()).toArray(); + return new AbstractList() { + @Override + public Object get(int index) { + return elements[index]; + } + @Override + public int size() { + return elements.length; + } + }; } - return newObjs; + + final boolean allStrings = list.stream().allMatch(o -> o != null && STRING_TYPES.contains(o.getClass())); + if (allStrings) { + final BytesRefBuilder builder = new BytesRefBuilder(); + try (final BytesStreamOutput bytesOut = new BytesStreamOutput()) { + final int[] endOffsets = new int[list.size()]; + int i = 0; + for (Object o : list) { + BytesRef b; + if (o instanceof BytesRef) { + b = (BytesRef) o; + } else { + builder.copyChars(o.toString()); + b = builder.get(); + } + bytesOut.writeBytes(b.bytes, b.offset, b.length); + if (i == 0) { + endOffsets[0] = b.length; + } else { + endOffsets[i] = Math.addExact(endOffsets[i-1], b.length); + } + ++i; + } + final BytesReference bytes = bytesOut.bytes(); + return new AbstractList() { + public Object get(int i) { + final int startOffset = i == 0 ? 0 : endOffsets[i-1]; + final int endOffset = endOffsets[i]; + return bytes.slice(startOffset, endOffset - startOffset).toBytesRef(); + } + public int size() { + return endOffsets.length; + } + }; + } + } + + return list.stream().map(o -> o instanceof String ? new BytesRef(o.toString()) : o).collect(Collectors.toList()); + } + + /** + * Convert the internal {@link List} of values back to a user-friendly list. + * Integers are kept as-is since the terms query does not make any difference + * between {@link Integer}s and {@link Long}s, but {@link BytesRef}s are + * converted back to {@link String}s. + */ + static List convertBack(List list) { + return new AbstractList() { + @Override + public int size() { + return list.size(); + } + @Override + public Object get(int index) { + Object o = list.get(index); + if (o instanceof BytesRef) { + o = ((BytesRef) o).utf8ToString(); + } + // we do not convert longs, all integer types are equivalent + // as far as this query is concerned + return o; + } + }; } @Override @@ -230,7 +319,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { termsLookup.toXContent(builder, params); builder.endObject(); } else { - builder.field(fieldName, convertToStringListIfBytesRefList(values)); + builder.field(fieldName, convertBack(values)); } printBoostAndQueryName(builder); builder.endObject(); diff --git a/core/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java index 051c37958b0..fea2801ef96 100644 --- a/core/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.get.GetRequest; @@ -42,6 +43,7 @@ import org.junit.Before; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -213,7 +215,7 @@ public class TermsQueryBuilderTests extends AbstractQueryTestCase values = copy.values(); - assertEquals(Arrays.asList(1, 3, 4), values); + assertEquals(Arrays.asList(1L, 3L, 4L), values); } { TermsQueryBuilder builder = new TermsQueryBuilder("foo", new double[]{1, 3, 4}); @@ -283,5 +285,27 @@ public class TermsQueryBuilderTests extends AbstractQueryTestCase list = Arrays.asList(); + assertSame(Collections.emptyList(), TermsQueryBuilder.convert(list)); + assertEquals(list, TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list))); + + list = Arrays.asList("abc"); + assertEquals(Arrays.asList(new BytesRef("abc")), TermsQueryBuilder.convert(list)); + assertEquals(list, TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list))); + + list = Arrays.asList("abc", new BytesRef("def")); + assertEquals(Arrays.asList(new BytesRef("abc"), new BytesRef("def")), TermsQueryBuilder.convert(list)); + assertEquals(Arrays.asList("abc", "def"), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list))); + + list = Arrays.asList(5, 42L); + assertEquals(Arrays.asList(5L, 42L), TermsQueryBuilder.convert(list)); + assertEquals(Arrays.asList(5L, 42L), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list))); + + list = Arrays.asList(5, 42d); + assertEquals(Arrays.asList(5, 42d), TermsQueryBuilder.convert(list)); + assertEquals(Arrays.asList(5, 42d), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list))); + } }