LUCENE-8466: IndexWriter.deleteDocs(Query... query) incorrectly applies deletes on flush if the index is sorted

This commit is contained in:
Jim Ferenczi 2018-08-27 19:18:47 +02:00
parent 94503719ec
commit 5ef67e9f17
3 changed files with 81 additions and 5 deletions

View File

@ -220,6 +220,9 @@ Bug Fixes:
* LUCENE-8458: Adjust initialization condition of PendingSoftDeletes and ensures
it is initialized before accepting deletes (Simon Willnauer, Nhat Nguyen)
* LUCENE-8466: IndexWriter.deleteDocs(Query... query) incorrectly applies deletes on flush
if the index is sorted. (Adrien Grand, Jim Ferenczi)
Changes in Runtime Behavior:
* LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

View File

@ -707,11 +707,24 @@ final class FrozenBufferedUpdates {
final Scorer scorer = weight.scorer(readerContext);
if (scorer != null) {
final DocIdSetIterator it = scorer.iterator();
int docID;
while ((docID = it.nextDoc()) < limit) {
if (segState.rld.delete(docID)) {
delCount++;
if (segState.rld.sortMap != null && limit != Integer.MAX_VALUE) {
assert privateSegment != null;
// This segment was sorted on flush; we must apply seg-private deletes carefully in this case:
int docID;
while ((docID = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
// The limit is in the pre-sorted doc space:
if (segState.rld.sortMap.newToOld(docID) < limit) {
if (segState.rld.delete(docID)) {
delCount++;
}
}
}
} else {
int docID;
while ((docID = it.nextDoc()) < limit) {
if (segState.rld.delete(docID)) {
delCount++;
}
}
}
}

View File

@ -76,6 +76,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
@ -2512,4 +2513,63 @@ public class TestIndexSorting extends LuceneTestCase {
}
IOUtils.close(dir);
}
public void testDeleteByTermOrQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig();
config.setIndexSort(new Sort(new SortField("numeric", SortField.Type.LONG)));
IndexWriter w = new IndexWriter(dir, config);
Document doc = new Document();
int numDocs = random().nextInt(2000) + 5;
long[] expectedValues = new long[numDocs];
for (int i = 0; i < numDocs; i++) {
expectedValues[i] = random().nextInt(Integer.MAX_VALUE);
doc.clear();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("numeric", expectedValues[i]));
w.addDocument(doc);
}
int numDeleted = random().nextInt(numDocs) + 1;
for (int i = 0; i < numDeleted; i++) {
int idToDelete = random().nextInt(numDocs);
if (random().nextBoolean()) {
w.deleteDocuments(new TermQuery(new Term("id", Integer.toString(idToDelete))));
} else {
w.deleteDocuments(new Term("id", Integer.toString(idToDelete)));
}
expectedValues[idToDelete] = -random().nextInt(Integer.MAX_VALUE); // force a reordering
doc.clear();
doc.add(new StringField("id", Integer.toString(idToDelete), Store.YES));
doc.add(new NumericDocValuesField("numeric", expectedValues[idToDelete]));
w.addDocument(doc);
}
int docCount = 0;
try (IndexReader reader = DirectoryReader.open(w)) {
for (LeafReaderContext leafCtx : reader.leaves()) {
final Bits liveDocs = leafCtx.reader().getLiveDocs();
final NumericDocValues values = leafCtx.reader().getNumericDocValues("numeric");
if (values == null) {
continue;
}
for (int id = 0; id < leafCtx.reader().maxDoc(); id++) {
if (liveDocs != null && liveDocs.get(id) == false) {
continue;
}
if (values.advanceExact(id) == false) {
continue;
}
int globalId = Integer.parseInt(leafCtx.reader().document(id).getField("id").stringValue());
assertTrue(values.advanceExact(id));
assertEquals(expectedValues[globalId], values.longValue());
docCount ++;
}
}
assertEquals(docCount, numDocs);
}
w.close();
dir.close();
}
}