Reduce memory pressure when sending large terms queries. (#21776)
When users send large `terms` query to Elasticsearch, every value is stored in an object. This change does not reduce the amount of created objects, but makes sure these objects die young by optimizing the list storage in case all values are either non-null instances of Long objects or BytesRef objects, which seems to help the JVM significantly.
This commit is contained in:
parent
6231009a8f
commit
a3ef674992
|
@ -75,7 +75,7 @@ public class BytesStreamOutput extends StreamOutput implements BytesStream {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
||||
public void writeBytes(byte[] b, int offset, int length) {
|
||||
// nothing to copy
|
||||
if (length == 0) {
|
||||
return;
|
||||
|
|
|
@ -26,11 +26,14 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.action.get.GetRequest;
|
||||
import org.elasticsearch.action.get.GetResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.lucene.BytesRefs;
|
||||
|
@ -42,11 +45,15 @@ import org.elasticsearch.index.mapper.MappedFieldType;
|
|||
import org.elasticsearch.indices.TermsLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.AbstractList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
|
@ -78,7 +85,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
|
|||
throw new IllegalArgumentException("Both values and termsLookup specified for terms query");
|
||||
}
|
||||
this.fieldName = fieldName;
|
||||
this.values = values;
|
||||
this.values = values == null ? null : convert(values);
|
||||
this.termsLookup = termsLookup;
|
||||
}
|
||||
|
||||
|
@ -157,7 +164,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
|
|||
throw new IllegalArgumentException("No value specified for terms query");
|
||||
}
|
||||
this.fieldName = fieldName;
|
||||
this.values = convertToBytesRefListIfStringList(values);
|
||||
this.values = convert(values);
|
||||
this.termsLookup = null;
|
||||
}
|
||||
|
||||
|
@ -183,43 +190,125 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
|
|||
}
|
||||
|
||||
public List<Object> values() {
|
||||
return convertToStringListIfBytesRefList(this.values);
|
||||
return convertBack(this.values);
|
||||
}
|
||||
|
||||
public TermsLookup termsLookup() {
|
||||
return this.termsLookup;
|
||||
}
|
||||
|
||||
private static final Set<Class<? extends Number>> INTEGER_TYPES = new HashSet<>(
|
||||
Arrays.asList(Byte.class, Short.class, Integer.class, Long.class));
|
||||
private static final Set<Class<?>> STRING_TYPES = new HashSet<>(
|
||||
Arrays.asList(BytesRef.class, String.class));
|
||||
|
||||
/**
|
||||
* Same as {@link #convertToBytesRefIfString} but on Iterable.
|
||||
* @param objs the Iterable of input object
|
||||
* @return the same input or a list of {@link BytesRef} representation if input was a list of type string
|
||||
* Same as {@link #convert(List)} but on an {@link Iterable}.
|
||||
*/
|
||||
private static List<Object> convertToBytesRefListIfStringList(Iterable<?> objs) {
|
||||
if (objs == null) {
|
||||
return null;
|
||||
private static List<?> convert(Iterable<?> values) {
|
||||
List<?> list;
|
||||
if (values instanceof List<?>) {
|
||||
list = (List<?>) values;
|
||||
} else {
|
||||
ArrayList<Object> arrayList = new ArrayList<Object>();
|
||||
for (Object o : values) {
|
||||
arrayList.add(o);
|
||||
}
|
||||
List<Object> newObjs = new ArrayList<>();
|
||||
for (Object obj : objs) {
|
||||
newObjs.add(convertToBytesRefIfString(obj));
|
||||
list = arrayList;
|
||||
}
|
||||
return newObjs;
|
||||
return convert(list);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link #convertToStringIfBytesRef} but on Iterable.
|
||||
* @param objs the Iterable of input object
|
||||
* @return the same input or a list of utf8 string if input was a list of type {@link BytesRef}
|
||||
* Convert the list in a way that optimizes storage in the case that all
|
||||
* elements are either integers or {@link String}s/{@link BytesRef}s. This
|
||||
* is useful to help garbage collections for use-cases that involve sending
|
||||
* very large terms queries to Elasticsearch. If the list does not only
|
||||
* contain integers or {@link String}s, then a list is returned where all
|
||||
* {@link String}s have been replaced with {@link BytesRef}s.
|
||||
*/
|
||||
private static List<Object> convertToStringListIfBytesRefList(Iterable<?> objs) {
|
||||
if (objs == null) {
|
||||
return null;
|
||||
static List<?> convert(List<?> list) {
|
||||
if (list.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
List<Object> newObjs = new ArrayList<>();
|
||||
for (Object obj : objs) {
|
||||
newObjs.add(convertToStringIfBytesRef(obj));
|
||||
|
||||
final boolean allNumbers = list.stream().allMatch(o -> o != null && INTEGER_TYPES.contains(o.getClass()));
|
||||
if (allNumbers) {
|
||||
final long[] elements = list.stream().mapToLong(o -> ((Number) o).longValue()).toArray();
|
||||
return new AbstractList<Object>() {
|
||||
@Override
|
||||
public Object get(int index) {
|
||||
return elements[index];
|
||||
}
|
||||
return newObjs;
|
||||
@Override
|
||||
public int size() {
|
||||
return elements.length;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
final boolean allStrings = list.stream().allMatch(o -> o != null && STRING_TYPES.contains(o.getClass()));
|
||||
if (allStrings) {
|
||||
final BytesRefBuilder builder = new BytesRefBuilder();
|
||||
try (final BytesStreamOutput bytesOut = new BytesStreamOutput()) {
|
||||
final int[] endOffsets = new int[list.size()];
|
||||
int i = 0;
|
||||
for (Object o : list) {
|
||||
BytesRef b;
|
||||
if (o instanceof BytesRef) {
|
||||
b = (BytesRef) o;
|
||||
} else {
|
||||
builder.copyChars(o.toString());
|
||||
b = builder.get();
|
||||
}
|
||||
bytesOut.writeBytes(b.bytes, b.offset, b.length);
|
||||
if (i == 0) {
|
||||
endOffsets[0] = b.length;
|
||||
} else {
|
||||
endOffsets[i] = Math.addExact(endOffsets[i-1], b.length);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
final BytesReference bytes = bytesOut.bytes();
|
||||
return new AbstractList<Object>() {
|
||||
public Object get(int i) {
|
||||
final int startOffset = i == 0 ? 0 : endOffsets[i-1];
|
||||
final int endOffset = endOffsets[i];
|
||||
return bytes.slice(startOffset, endOffset - startOffset).toBytesRef();
|
||||
}
|
||||
public int size() {
|
||||
return endOffsets.length;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return list.stream().map(o -> o instanceof String ? new BytesRef(o.toString()) : o).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the internal {@link List} of values back to a user-friendly list.
|
||||
* Integers are kept as-is since the terms query does not make any difference
|
||||
* between {@link Integer}s and {@link Long}s, but {@link BytesRef}s are
|
||||
* converted back to {@link String}s.
|
||||
*/
|
||||
static List<Object> convertBack(List<?> list) {
|
||||
return new AbstractList<Object>() {
|
||||
@Override
|
||||
public int size() {
|
||||
return list.size();
|
||||
}
|
||||
@Override
|
||||
public Object get(int index) {
|
||||
Object o = list.get(index);
|
||||
if (o instanceof BytesRef) {
|
||||
o = ((BytesRef) o).utf8ToString();
|
||||
}
|
||||
// we do not convert longs, all integer types are equivalent
|
||||
// as far as this query is concerned
|
||||
return o;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -230,7 +319,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
|
|||
termsLookup.toXContent(builder, params);
|
||||
builder.endObject();
|
||||
} else {
|
||||
builder.field(fieldName, convertToStringListIfBytesRefList(values));
|
||||
builder.field(fieldName, convertBack(values));
|
||||
}
|
||||
printBoostAndQueryName(builder);
|
||||
builder.endObject();
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.get.GetRequest;
|
||||
|
@ -42,6 +43,7 @@ import org.junit.Before;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
@ -213,7 +215,7 @@ public class TermsQueryBuilderTests extends AbstractQueryTestCase<TermsQueryBuil
|
|||
TermsQueryBuilder builder = new TermsQueryBuilder("foo", new int[]{1, 3, 4});
|
||||
TermsQueryBuilder copy = (TermsQueryBuilder) assertSerialization(builder);
|
||||
List<Object> values = copy.values();
|
||||
assertEquals(Arrays.asList(1, 3, 4), values);
|
||||
assertEquals(Arrays.asList(1L, 3L, 4L), values);
|
||||
}
|
||||
{
|
||||
TermsQueryBuilder builder = new TermsQueryBuilder("foo", new double[]{1, 3, 4});
|
||||
|
@ -283,5 +285,27 @@ public class TermsQueryBuilderTests extends AbstractQueryTestCase<TermsQueryBuil
|
|||
// that's why we return true here all the time
|
||||
return super.isCachable(queryBuilder);
|
||||
}
|
||||
|
||||
public void testConversion() {
|
||||
List<Object> list = Arrays.asList();
|
||||
assertSame(Collections.emptyList(), TermsQueryBuilder.convert(list));
|
||||
assertEquals(list, TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));
|
||||
|
||||
list = Arrays.asList("abc");
|
||||
assertEquals(Arrays.asList(new BytesRef("abc")), TermsQueryBuilder.convert(list));
|
||||
assertEquals(list, TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));
|
||||
|
||||
list = Arrays.asList("abc", new BytesRef("def"));
|
||||
assertEquals(Arrays.asList(new BytesRef("abc"), new BytesRef("def")), TermsQueryBuilder.convert(list));
|
||||
assertEquals(Arrays.asList("abc", "def"), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));
|
||||
|
||||
list = Arrays.asList(5, 42L);
|
||||
assertEquals(Arrays.asList(5L, 42L), TermsQueryBuilder.convert(list));
|
||||
assertEquals(Arrays.asList(5L, 42L), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));
|
||||
|
||||
list = Arrays.asList(5, 42d);
|
||||
assertEquals(Arrays.asList(5, 42d), TermsQueryBuilder.convert(list));
|
||||
assertEquals(Arrays.asList(5, 42d), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue