PR #5706 introduced a bug in the sparse array-backed field data
When we load sparse single valued data, we automatically assign a missing value to represent a document who has none. We try to find a value that will increase the number of bits needed to represent the data. If that missing value happen to be 0, we do no properly intialize the value array. This commit solved this problem but also cleans up the code even more to make spotting such issues easier in the future.
This commit is contained in:
parent
63d1fa45ab
commit
e0fbd5df52
|
@ -141,51 +141,53 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
maxValue = values.get(values.size() - 1);
|
||||
}
|
||||
|
||||
// Encode document without a value with a special value
|
||||
long missingValue = 0;
|
||||
if (docsWithValues != null) {
|
||||
if ((maxValue - minValue + 1) == values.size()) {
|
||||
// values are dense
|
||||
if (minValue > Long.MIN_VALUE) {
|
||||
missingValue = --minValue;
|
||||
} else {
|
||||
assert maxValue != Long.MAX_VALUE;
|
||||
missingValue = ++maxValue;
|
||||
}
|
||||
} else {
|
||||
for (long i = 1; i < values.size(); ++i) {
|
||||
if (values.get(i) > values.get(i - 1) + 1) {
|
||||
missingValue = values.get(i - 1) + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final long valuesDelta = maxValue - minValue;
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024);
|
||||
|
||||
if (formatHint == null) {
|
||||
formatHint = chooseStorageFormat(reader, values, build, ordinals, missingValue, valuesDelta, acceptableOverheadRatio, pageSize);
|
||||
formatHint = chooseStorageFormat(reader, values, build, ordinals, minValue, maxValue, acceptableOverheadRatio, pageSize);
|
||||
}
|
||||
|
||||
logger.trace("single value format for field [{}] set to [{}]", getFieldNames().fullName(), formatHint);
|
||||
|
||||
switch (formatHint) {
|
||||
case PACKED:
|
||||
int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
|
||||
// Encode document without a value with a special value
|
||||
long missingValue = 0;
|
||||
if (docsWithValues != null) {
|
||||
if ((maxValue - minValue + 1) == values.size()) {
|
||||
// values are dense
|
||||
if (minValue > Long.MIN_VALUE) {
|
||||
missingValue = --minValue;
|
||||
} else {
|
||||
assert maxValue != Long.MAX_VALUE;
|
||||
missingValue = ++maxValue;
|
||||
}
|
||||
} else {
|
||||
for (long i = 1; i < values.size(); ++i) {
|
||||
if (values.get(i) > values.get(i - 1) + 1) {
|
||||
missingValue = values.get(i - 1) + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
missingValue -= minValue;
|
||||
}
|
||||
|
||||
final long valuesDelta = maxValue - minValue;
|
||||
int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
|
||||
final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
||||
|
||||
if (missingValue != 0) {
|
||||
missingValue -= minValue;
|
||||
if (docsWithValues != null) {
|
||||
sValues.fill(0, sValues.size(), missingValue);
|
||||
}
|
||||
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
final long ord = ordinals.getOrd(i);
|
||||
if (ord != Ordinals.MISSING_ORDINAL) {
|
||||
sValues.set(i, values.get(ord - 1) - minValue);
|
||||
long value = values.get(ord - 1);
|
||||
sValues.set(i, value - minValue);
|
||||
}
|
||||
}
|
||||
if (docsWithValues == null) {
|
||||
|
@ -238,14 +240,18 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
}
|
||||
|
||||
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader reader, MonotonicAppendingLongBuffer values, Ordinals build, Docs ordinals,
|
||||
long missingValue, long valuesDelta, float acceptableOverheadRatio, int pageSize) {
|
||||
CommonSettings.MemoryStorageFormat format;// estimate format
|
||||
// valuesDelta can be negative if the difference between max and min values overflows the positive side of longs.
|
||||
int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
|
||||
PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
||||
long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {
|
||||
|
||||
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
|
||||
CommonSettings.MemoryStorageFormat format;
|
||||
|
||||
// estimate memory usage for a single packed array
|
||||
long packedDelta = maxValue - minValue + 1; // allow for a missing value
|
||||
// valuesDelta can be negative if the difference between max and min values overflows the positive side of longs.
|
||||
int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta);
|
||||
PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
||||
final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
|
||||
|
||||
// ordinal memory usage
|
||||
final long ordinalsSize = build.getMemorySizeInBytes() + values.ramBytesUsed();
|
||||
|
||||
// estimate the memory signature of paged packing
|
||||
|
@ -261,24 +267,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
}
|
||||
if (pageIndex == pageSize - 1) {
|
||||
// end of page, we now know enough to estimate memory usage
|
||||
if (pageMaxOrdinal == Long.MIN_VALUE) {
|
||||
// empty page - will use the null reader which just stores size
|
||||
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
|
||||
} else {
|
||||
long pageMinValue = values.get(pageMinOrdinal - 1);
|
||||
long pageMaxValue = values.get(pageMaxOrdinal - 1);
|
||||
long pageDelta = pageMaxValue - pageMinValue;
|
||||
if (pageDelta != 0) {
|
||||
bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
|
||||
formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
|
||||
pagedSingleValuesSize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
pagedSingleValuesSize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
|
||||
} else {
|
||||
// empty page
|
||||
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
}
|
||||
pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
|
||||
|
||||
pageMinOrdinal = Long.MAX_VALUE;
|
||||
pageMaxOrdinal = Long.MIN_VALUE;
|
||||
|
@ -288,24 +277,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
if (pageIndex > 0) {
|
||||
// last page estimation
|
||||
pageIndex++;
|
||||
if (pageMaxOrdinal == Long.MIN_VALUE) {
|
||||
// empty page - will use the null reader which just stores size
|
||||
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
|
||||
} else {
|
||||
long pageMinValue = values.get(pageMinOrdinal - 1);
|
||||
long pageMaxValue = values.get(pageMaxOrdinal - 1);
|
||||
long pageDelta = pageMaxValue - pageMinValue;
|
||||
if (pageDelta != 0) {
|
||||
bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
|
||||
formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
|
||||
pagedSingleValuesSize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
pagedSingleValuesSize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
|
||||
} else {
|
||||
// empty page
|
||||
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
}
|
||||
pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
|
||||
}
|
||||
|
||||
if (ordinalsSize < singleValuesSize) {
|
||||
|
@ -324,6 +296,31 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
|||
return format;
|
||||
}
|
||||
|
||||
private long getPageMemoryUsage(MonotonicAppendingLongBuffer values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
|
||||
int bitsRequired;
|
||||
long pageMemorySize = 0;
|
||||
PackedInts.FormatAndBits formatAndBits;
|
||||
if (pageMaxOrdinal == Long.MIN_VALUE) {
|
||||
// empty page - will use the null reader which just stores size
|
||||
pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
|
||||
} else {
|
||||
long pageMinValue = values.get(pageMinOrdinal - 1);
|
||||
long pageMaxValue = values.get(pageMaxOrdinal - 1);
|
||||
long pageDelta = pageMaxValue - pageMinValue;
|
||||
if (pageDelta != 0) {
|
||||
bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
|
||||
formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
|
||||
pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
|
||||
} else {
|
||||
// empty page
|
||||
pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
}
|
||||
return pageMemorySize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
||||
return new LongValuesComparatorSource(this, missingValue, sortMode);
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
package org.elasticsearch.nested;
|
||||
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||
import org.elasticsearch.action.admin.indices.status.IndicesStatusResponse;
|
||||
import org.elasticsearch.action.delete.DeleteResponse;
|
||||
|
@ -379,10 +378,10 @@ public class SimpleNestedTests extends ElasticsearchIntegrationTest {
|
|||
.startObject("nested1")
|
||||
.field("type", "nested").startObject("properties")
|
||||
.startObject("nested2").field("type", "nested")
|
||||
.startObject("properties")
|
||||
.startObject("field2_1").field("type", "string").endObject()
|
||||
.startObject("field2_2").field("type", "long").endObject()
|
||||
.endObject()
|
||||
.startObject("properties")
|
||||
.startObject("field2_1").field("type", "string").endObject()
|
||||
.startObject("field2_2").field("type", "long").endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
|
@ -603,21 +602,21 @@ public class SimpleNestedTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
@Test
|
||||
public void testSimpleNestedSorting() throws Exception {
|
||||
assertAcked(prepareCreate("test")
|
||||
.setSettings(settingsBuilder()
|
||||
.put(indexSettings())
|
||||
.put("index.refresh_interval", -1))
|
||||
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("nested1")
|
||||
.field("type", "nested")
|
||||
.startObject("properties")
|
||||
.startObject("field1")
|
||||
.field("type", "long")
|
||||
.field("store", "yes")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
assertAcked(prepareCreate("test")
|
||||
.setSettings(settingsBuilder()
|
||||
.put(indexSettings())
|
||||
.put("index.refresh_interval", -1))
|
||||
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("nested1")
|
||||
.field("type", "nested")
|
||||
.startObject("properties")
|
||||
.startObject("field1")
|
||||
.field("type", "long")
|
||||
.field("store", "yes")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject()
|
||||
|
@ -773,15 +772,15 @@ public class SimpleNestedTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
@Test
|
||||
public void testSimpleNestedSorting_withNestedFilterMissing() throws Exception {
|
||||
assertAcked(prepareCreate("test")
|
||||
.setSettings(settingsBuilder()
|
||||
.put(indexSettings())
|
||||
.put("index.referesh_interval", -1))
|
||||
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("nested1")
|
||||
.field("type", "nested")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
assertAcked(prepareCreate("test")
|
||||
.setSettings(settingsBuilder()
|
||||
.put(indexSettings())
|
||||
.put("index.referesh_interval", -1))
|
||||
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("nested1")
|
||||
.field("type", "nested")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject()
|
||||
|
@ -845,7 +844,7 @@ public class SimpleNestedTests extends ElasticsearchIntegrationTest {
|
|||
assertThat(searchResponse.getHits().hits()[2].id(), equalTo("3"));
|
||||
assertThat(searchResponse.getHits().hits()[2].sortValues()[0].toString(), equalTo("10"));
|
||||
|
||||
searchRequestBuilder = client().prepareSearch("test").setTypes("type1") .setQuery(QueryBuilders.matchAllQuery())
|
||||
searchRequestBuilder = client().prepareSearch("test").setTypes("type1").setQuery(QueryBuilders.matchAllQuery())
|
||||
.addSort(SortBuilders.fieldSort("nested1.field1").setNestedFilter(termFilter("nested1.field2", true)).missing(10).order(SortOrder.DESC));
|
||||
|
||||
if (randomBoolean()) {
|
||||
|
@ -864,27 +863,26 @@ public class SimpleNestedTests extends ElasticsearchIntegrationTest {
|
|||
client().prepareClearScroll().addScrollId("_all").get();
|
||||
}
|
||||
|
||||
@LuceneTestCase.AwaitsFix(bugUrl = "boaz is looking into failures here")
|
||||
@Test
|
||||
public void testSortNestedWithNestedFilter() throws Exception {
|
||||
assertAcked(prepareCreate("test")
|
||||
.addMapping("type1", XContentFactory.jsonBuilder().startObject()
|
||||
.startObject("type1")
|
||||
.startObject("properties")
|
||||
.startObject("grand_parent_values").field("type", "long").endObject()
|
||||
.startObject("parent").field("type", "nested")
|
||||
.startObject("properties")
|
||||
.startObject("parent_values").field("type", "long").endObject()
|
||||
.startObject("child").field("type", "nested")
|
||||
.startObject("properties")
|
||||
.startObject("child_values").field("type", "long").endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()));
|
||||
assertAcked(prepareCreate("test")
|
||||
.addMapping("type1", XContentFactory.jsonBuilder().startObject()
|
||||
.startObject("type1")
|
||||
.startObject("properties")
|
||||
.startObject("grand_parent_values").field("type", "long").endObject()
|
||||
.startObject("parent").field("type", "nested")
|
||||
.startObject("properties")
|
||||
.startObject("parent_values").field("type", "long").endObject()
|
||||
.startObject("child").field("type", "nested")
|
||||
.startObject("properties")
|
||||
.startObject("child_values").field("type", "long").endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()));
|
||||
ensureGreen();
|
||||
|
||||
// sum: 11
|
||||
|
|
Loading…
Reference in New Issue