Ignore live docs when loading field data, the ID cache and filter caches.

Relying on deleted documents when loading field data is dangerous because a
field data instance might be loaded for a given generation of a segment and
then loaded from the cache by an older generation of the same segment which
has fewer deleted documents. This could, for example, lead to under-estimated
facet counts. The same issue applies to the ID cache and filter caches.

Close #3224
This commit is contained in:
Adrien Grand 2013-06-26 23:10:48 +02:00
parent 477489ac82
commit 40cd549c37
14 changed files with 120 additions and 16 deletions

View File

@ -167,8 +167,9 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte
}
}
// we can't pass down acceptedDocs provided, because we are caching the result, and acceptedDocs
// might be specific to a query AST, we do pass down the live docs to make sure we optimize the execution
cacheValue = DocIdSets.toCacheable(context.reader(), filter.getDocIdSet(context, context.reader().getLiveDocs()));
// might be specific to a query. We don't pass the live docs either because a cache built for a specific
// generation of a segment might be reused by an older generation which has fewer deleted documents
cacheValue = DocIdSets.toCacheable(context.reader(), filter.getDocIdSet(context, null));
// we might put the same one concurrently, that's fine, it will be replaced and the removal
// will be called
ShardId shardId = ShardUtils.extractShardId(context.reader());

View File

@ -180,7 +180,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se
}
HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]);
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
docsEnum = termsEnum.docs(null, docsEnum, 0);
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
typeBuilder.idToDoc.put(idAsBytes, docId);
typeBuilder.docToId[docId] = idAsBytes;
@ -216,7 +216,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se
HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]);
boolean added = false; // optimize for when all the docs are deleted for this id
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
docsEnum = termsEnum.docs(null, docsEnum, 0);
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
if (!added) {
typeBuilder.parentIdsValues.add(idAsBytes);

View File

@ -334,7 +334,7 @@ public final class OrdinalsBuilder implements Closeable {
* only full-precision terms.
* </p>
*/
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum, final Bits liveDocs) throws IOException {
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
return new BytesRefIterator() {
private DocsEnum docsEnum = null;
@ -342,7 +342,7 @@ public final class OrdinalsBuilder implements Closeable {
public BytesRef next() throws IOException {
BytesRef ref;
if ((ref = termsEnum.next()) != null) {
docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
nextOrdinal();
int docId;
while((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {

View File

@ -98,7 +98,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
try {
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
BytesRef term;
while ((term = iter.next()) != null) {
values.add(NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term)));

View File

@ -78,7 +78,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
final int termOrd = builder.nextOrdinal();
assert termOrd > 0;
fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd);
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
builder.addDoc(docId);
}

View File

@ -98,7 +98,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
try {
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
BytesRef term;
while ((term = iter.next()) != null) {
values.add(NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term)));

View File

@ -91,7 +91,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
final CharsRef spare = new CharsRef();
try {
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null), reader.getLiveDocs());
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null));
BytesRef term;
while ((term = iter.next()) != null) {
UnicodeUtil.UTF8toUTF16(term, spare);

View File

@ -113,7 +113,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
try {
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
BytesRef term;
assert !getNumericType().isFloatingPoint();
final boolean indexedAsLong = getNumericType().requiredBits() > 32;

View File

@ -96,7 +96,7 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
final int termOrd = builder.nextOrdinal();
assert termOrd == termOrdToBytesOffset.size();
termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
builder.addDoc(docId);
}

View File

@ -24,10 +24,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.*;
import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Strings;
@ -75,6 +72,33 @@ import static org.hamcrest.Matchers.nullValue;
*/
public class SimpleIdCacheTests {
@Test
public void testDeletedDocuments() throws Exception {
SimpleIdCache idCache = createSimpleIdCache(Tuple.tuple("child", "parent"));
IndexWriter writer = createIndexWriter();
// Begins with parent, ends with child docs
final Document parent = doc("parent", "1");
writer.addDocument(parent);
writer.addDocument(childDoc("child", "1", "parent", "1"));
writer.addDocument(childDoc("child", "2", "parent", "1"));
writer.addDocument(childDoc("child", "3", "parent", "1"));
writer.commit();
final String parentUid = parent.get("_uid");
assert parentUid != null;
writer.deleteDocuments(new Term("_uid", parentUid));
writer.close();
DirectoryReader topLevelReader = DirectoryReader.open(writer.getDirectory());
List<AtomicReaderContext> leaves = topLevelReader.getContext().leaves();
idCache.refresh(leaves);
assertThat(leaves.size(), equalTo(1));
IdReaderCache readerCache = idCache.reader(leaves.get(0).reader());
IdReaderTypeCache typeCache = readerCache.type("parent");
assertThat(typeCache.idByDoc(0).toUtf8(), equalTo("1"));
}
@Test
public void testRefresh() throws Exception {
SimpleIdCache idCache = createSimpleIdCache(Tuple.tuple("child", "parent"));

View File

@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.elasticsearch.index.fielddata.FieldDataType;
/**
@ -50,6 +51,21 @@ public class DoubleFieldDataTests extends NumericFieldDataTests {
return "4.0";
}
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new DoubleField("value", 2.0d, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new DoubleField("value", 4.0d, Field.Store.NO));
writer.addDocument(d);
writer.commit();
writer.deleteDocuments(new Term("_id", "1"));
}
@Override
protected void fillSingleValueAllSet() throws Exception {

View File

@ -23,6 +23,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.elasticsearch.index.fielddata.FieldDataType;
/**
@ -50,6 +51,21 @@ public class FloatFieldDataTests extends NumericFieldDataTests {
return "4.0";
}
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new FloatField("value", 2.0f, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new FloatField("value", 4.0f, Field.Store.NO));
writer.addDocument(d);
writer.commit();
writer.deleteDocuments(new Term("_id", "1"));
}
@Override
protected void fillSingleValueAllSet() throws Exception {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.plain.PackedArrayAtomicFieldData;
@ -54,6 +55,22 @@ public class LongFieldDataTests extends NumericFieldDataTests {
return new FieldDataType("long", ImmutableSettings.builder());
}
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", 2, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", 4, Field.Store.NO));
writer.addDocument(d);
writer.commit();
writer.deleteDocuments(new Term("_id", "1"));
}
@Test
public void testOptimizeTypeLong() throws Exception {
Document d = new Document();

View File

@ -24,6 +24,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.HashedBytesRef;
@ -81,6 +82,35 @@ public abstract class StringFieldDataTests extends AbstractFieldDataTests {
writer.addDocument(d);
}
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new StringField("value", "2", Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new StringField("value", "4", Field.Store.NO));
writer.addDocument(d);
writer.commit();
writer.deleteDocuments(new Term("_id", "1"));
}
@Test
public void testDeletedDocs() throws Exception {
add2SingleValuedDocumentsAndDeleteOneOfThem();
IndexFieldData indexFieldData = getForField("value");
AtomicReaderContext readerContext = refreshReader();
AtomicFieldData fieldData = indexFieldData.load(readerContext);
BytesValues values = fieldData.getBytesValues();
assertThat(fieldData.getNumDocs(), equalTo(2));
for (int i = 0; i < fieldData.getNumDocs(); ++i) {
assertThat(values.hasValue(i), equalTo(true));
}
}
@Test
public void testSingleValueAllSet() throws Exception {
fillSingleValueAllSet();