From 5ef67e9f17e11d77c1439f5320af47962a4451e6 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 27 Aug 2018 19:18:47 +0200 Subject: [PATCH] LUCENE-8466: IndexWriter.deleteDocs(Query... query) incorrectly applies deletes on flush if the index is sorted --- lucene/CHANGES.txt | 3 + .../lucene/index/FrozenBufferedUpdates.java | 23 +++++-- .../apache/lucene/index/TestIndexSorting.java | 60 +++++++++++++++++++ 3 files changed, 81 insertions(+), 5 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 21c6d237b76..587ed87cc88 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -220,6 +220,9 @@ Bug Fixes: * LUCENE-8458: Adjust initialization condition of PendingSoftDeletes and ensures it is initialized before accepting deletes (Simon Willnauer, Nhat Nguyen) +* LUCENE-8466: IndexWriter.deleteDocs(Query... query) incorrectly applies deletes on flush + if the index is sorted. (Adrien Grand, Jim Ferenczi) + Changes in Runtime Behavior: * LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index 1e234b7ee5c..36834a39ff6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -707,11 +707,24 @@ final class FrozenBufferedUpdates { final Scorer scorer = weight.scorer(readerContext); if (scorer != null) { final DocIdSetIterator it = scorer.iterator(); - - int docID; - while ((docID = it.nextDoc()) < limit) { - if (segState.rld.delete(docID)) { - delCount++; + if (segState.rld.sortMap != null && limit != Integer.MAX_VALUE) { + assert privateSegment != null; + // This segment was sorted on flush; we must apply seg-private deletes carefully in this case: + int docID; + while ((docID = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + // The limit is in the pre-sorted doc space: + if (segState.rld.sortMap.newToOld(docID) < limit) { + if (segState.rld.delete(docID)) { + delCount++; + } + } + } + } else { + int docID; + while ((docID = it.nextDoc()) < limit) { + if (segState.rld.delete(docID)) { + delCount++; + } } } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java index 674972f9f97..3e62aad45b9 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java @@ -76,6 +76,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; @@ -2512,4 +2513,63 @@ public class TestIndexSorting extends LuceneTestCase { } IOUtils.close(dir); } + + public void testDeleteByTermOrQuery() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setIndexSort(new Sort(new SortField("numeric", SortField.Type.LONG))); + IndexWriter w = new IndexWriter(dir, config); + Document doc = new Document(); + int numDocs = random().nextInt(2000) + 5; + long[] expectedValues = new long[numDocs]; + + for (int i = 0; i < numDocs; i++) { + expectedValues[i] = random().nextInt(Integer.MAX_VALUE); + doc.clear(); + doc.add(new StringField("id", Integer.toString(i), Store.YES)); + doc.add(new NumericDocValuesField("numeric", expectedValues[i])); + w.addDocument(doc); + } + int numDeleted = random().nextInt(numDocs) + 1; + for (int i = 0; i < numDeleted; i++) { + int idToDelete = random().nextInt(numDocs); + if (random().nextBoolean()) { + w.deleteDocuments(new TermQuery(new Term("id", Integer.toString(idToDelete)))); + } else { + w.deleteDocuments(new Term("id", Integer.toString(idToDelete))); + } + + expectedValues[idToDelete] = -random().nextInt(Integer.MAX_VALUE); // force a reordering + doc.clear(); + doc.add(new StringField("id", Integer.toString(idToDelete), Store.YES)); + doc.add(new NumericDocValuesField("numeric", expectedValues[idToDelete])); + w.addDocument(doc); + } + + int docCount = 0; + try (IndexReader reader = DirectoryReader.open(w)) { + for (LeafReaderContext leafCtx : reader.leaves()) { + final Bits liveDocs = leafCtx.reader().getLiveDocs(); + final NumericDocValues values = leafCtx.reader().getNumericDocValues("numeric"); + if (values == null) { + continue; + } + for (int id = 0; id < leafCtx.reader().maxDoc(); id++) { + if (liveDocs != null && liveDocs.get(id) == false) { + continue; + } + if (values.advanceExact(id) == false) { + continue; + } + int globalId = Integer.parseInt(leafCtx.reader().document(id).getField("id").stringValue()); + assertTrue(values.advanceExact(id)); + assertEquals(expectedValues[globalId], values.longValue()); + docCount ++; + } + } + assertEquals(docCount, numDocs); + } + w.close(); + dir.close(); + } }