mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-09 14:34:43 +00:00
Ignore live docs when loading field data, the ID cache and filter caches.
Relying on deleted documents when loading field data is dangerous because a field data instance might be loaded for a given generation of a segment and then loaded from the cache by an older generation of the same segment which has fewer deleted documents. This could, for example, lead to under-estimated facet counts. The same issue applies to the ID cache and filter caches. Close #3224
This commit is contained in:
parent
477489ac82
commit
40cd549c37
@ -167,8 +167,9 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
|
||||
}
|
||||
}
|
||||
// we can't pass down acceptedDocs provided, because we are caching the result, and acceptedDocs
|
||||
// might be specific to a query AST, we do pass down the live docs to make sure we optimize the execution
|
||||
cacheValue = DocIdSets.toCacheable(context.reader(), filter.getDocIdSet(context, context.reader().getLiveDocs()));
|
||||
// might be specific to a query. We don't pass the live docs either because a cache built for a specific
|
||||
// generation of a segment might be reused by an older generation which has fewer deleted documents
|
||||
cacheValue = DocIdSets.toCacheable(context.reader(), filter.getDocIdSet(context, null));
|
||||
// we might put the same one concurrently, that's fine, it will be replaced and the removal
|
||||
// will be called
|
||||
ShardId shardId = ShardUtils.extractShardId(context.reader());
|
||||
|
@ -180,7 +180,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se
|
||||
}
|
||||
|
||||
HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]);
|
||||
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, 0);
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
typeBuilder.idToDoc.put(idAsBytes, docId);
|
||||
typeBuilder.docToId[docId] = idAsBytes;
|
||||
@ -216,7 +216,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se
|
||||
HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]);
|
||||
boolean added = false; // optimize for when all the docs are deleted for this id
|
||||
|
||||
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, 0);
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
if (!added) {
|
||||
typeBuilder.parentIdsValues.add(idAsBytes);
|
||||
|
@ -334,7 +334,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||
* only full-precision terms.
|
||||
* </p>
|
||||
*/
|
||||
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum, final Bits liveDocs) throws IOException {
|
||||
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
|
||||
return new BytesRefIterator() {
|
||||
private DocsEnum docsEnum = null;
|
||||
|
||||
@ -342,7 +342,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||
public BytesRef next() throws IOException {
|
||||
BytesRef ref;
|
||||
if ((ref = termsEnum.next()) != null) {
|
||||
docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
||||
nextOrdinal();
|
||||
int docId;
|
||||
while((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -98,7 +98,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
|
||||
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
||||
BytesRef term;
|
||||
while ((term = iter.next()) != null) {
|
||||
values.add(NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term)));
|
||||
|
@ -78,7 +78,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
|
||||
final int termOrd = builder.nextOrdinal();
|
||||
assert termOrd > 0;
|
||||
fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd);
|
||||
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
builder.addDoc(docId);
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
||||
BytesRef term;
|
||||
while ((term = iter.next()) != null) {
|
||||
values.add(NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term)));
|
||||
|
@ -91,7 +91,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
final CharsRef spare = new CharsRef();
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null), reader.getLiveDocs());
|
||||
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null));
|
||||
BytesRef term;
|
||||
while ((term = iter.next()) != null) {
|
||||
UnicodeUtil.UTF8toUTF16(term, spare);
|
||||
|
@ -113,7 +113,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
||||
BytesRef term;
|
||||
assert !getNumericType().isFloatingPoint();
|
||||
final boolean indexedAsLong = getNumericType().requiredBits() > 32;
|
||||
|
@ -96,7 +96,7 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
||||
final int termOrd = builder.nextOrdinal();
|
||||
assert termOrd == termOrdToBytesOffset.size();
|
||||
termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
|
||||
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
builder.addDoc(docId);
|
||||
}
|
||||
|
@ -24,10 +24,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
@ -75,6 +72,33 @@ import static org.hamcrest.Matchers.nullValue;
|
||||
*/
|
||||
public class SimpleIdCacheTests {
|
||||
|
||||
@Test
|
||||
public void testDeletedDocuments() throws Exception {
|
||||
SimpleIdCache idCache = createSimpleIdCache(Tuple.tuple("child", "parent"));
|
||||
IndexWriter writer = createIndexWriter();
|
||||
// Begins with parent, ends with child docs
|
||||
final Document parent = doc("parent", "1");
|
||||
writer.addDocument(parent);
|
||||
writer.addDocument(childDoc("child", "1", "parent", "1"));
|
||||
writer.addDocument(childDoc("child", "2", "parent", "1"));
|
||||
writer.addDocument(childDoc("child", "3", "parent", "1"));
|
||||
writer.commit();
|
||||
|
||||
final String parentUid = parent.get("_uid");
|
||||
assert parentUid != null;
|
||||
writer.deleteDocuments(new Term("_uid", parentUid));
|
||||
|
||||
writer.close();
|
||||
DirectoryReader topLevelReader = DirectoryReader.open(writer.getDirectory());
|
||||
List<AtomicReaderContext> leaves = topLevelReader.getContext().leaves();
|
||||
idCache.refresh(leaves);
|
||||
|
||||
assertThat(leaves.size(), equalTo(1));
|
||||
IdReaderCache readerCache = idCache.reader(leaves.get(0).reader());
|
||||
IdReaderTypeCache typeCache = readerCache.type("parent");
|
||||
assertThat(typeCache.idByDoc(0).toUtf8(), equalTo("1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRefresh() throws Exception {
|
||||
SimpleIdCache idCache = createSimpleIdCache(Tuple.tuple("child", "parent"));
|
||||
|
@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoubleField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
|
||||
/**
|
||||
@ -50,6 +51,21 @@ public class DoubleFieldDataTests extends NumericFieldDataTests {
|
||||
return "4.0";
|
||||
}
|
||||
|
||||
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new DoubleField("value", 2.0d, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new DoubleField("value", 4.0d, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
writer.commit();
|
||||
|
||||
writer.deleteDocuments(new Term("_id", "1"));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillSingleValueAllSet() throws Exception {
|
||||
|
@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
|
||||
/**
|
||||
@ -50,6 +51,21 @@ public class FloatFieldDataTests extends NumericFieldDataTests {
|
||||
return "4.0";
|
||||
}
|
||||
|
||||
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new FloatField("value", 2.0f, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new FloatField("value", 4.0f, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
writer.commit();
|
||||
|
||||
writer.deleteDocuments(new Term("_id", "1"));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillSingleValueAllSet() throws Exception {
|
||||
|
@ -28,6 +28,7 @@ import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.plain.PackedArrayAtomicFieldData;
|
||||
@ -54,6 +55,22 @@ public class LongFieldDataTests extends NumericFieldDataTests {
|
||||
return new FieldDataType("long", ImmutableSettings.builder());
|
||||
}
|
||||
|
||||
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", 2, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", 4, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
writer.commit();
|
||||
|
||||
writer.deleteDocuments(new Term("_id", "1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeLong() throws Exception {
|
||||
Document d = new Document();
|
||||
|
@ -24,6 +24,7 @@ import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.lucene.HashedBytesRef;
|
||||
@ -81,6 +82,35 @@ public abstract class StringFieldDataTests extends AbstractFieldDataTests {
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new StringField("value", "2", Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new StringField("value", "4", Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
writer.commit();
|
||||
|
||||
writer.deleteDocuments(new Term("_id", "1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletedDocs() throws Exception {
|
||||
add2SingleValuedDocumentsAndDeleteOneOfThem();
|
||||
IndexFieldData indexFieldData = getForField("value");
|
||||
AtomicReaderContext readerContext = refreshReader();
|
||||
AtomicFieldData fieldData = indexFieldData.load(readerContext);
|
||||
BytesValues values = fieldData.getBytesValues();
|
||||
assertThat(fieldData.getNumDocs(), equalTo(2));
|
||||
for (int i = 0; i < fieldData.getNumDocs(); ++i) {
|
||||
assertThat(values.hasValue(i), equalTo(true));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleValueAllSet() throws Exception {
|
||||
fillSingleValueAllSet();
|
||||
|
Loading…
x
Reference in New Issue
Block a user