diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/idversion/TestIDVersionPostingsFormat.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/idversion/TestIDVersionPostingsFormat.java index 7f67e4767f6..a34fbfa6db5 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/idversion/TestIDVersionPostingsFormat.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/idversion/TestIDVersionPostingsFormat.java @@ -343,7 +343,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase { /** Returns docID if found, else -1. */ public int lookup(BytesRef id, long version) throws IOException { - for (int seg = 0; seg < numSegs; seg++) { + for (int seg = 0; seg < numEnums; seg++) { if (((IDVersionSegmentTermsEnum) termsEnums[seg]).seekExact(id, version)) { if (VERBOSE) { System.out.println(" found in seg=" + termsEnums[seg]); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/PerThreadPKLookup.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/PerThreadPKLookup.java index 5cbb9bc3f83..5db9a2409e8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/PerThreadPKLookup.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/PerThreadPKLookup.java @@ -18,8 +18,13 @@ package org.apache.lucene.tests.index; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.CacheHelper; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Terms; @@ -35,17 +40,29 @@ import org.apache.lucene.util.BytesRef; */ public class PerThreadPKLookup { + private final String idFieldName; protected final TermsEnum[] termsEnums; protected final PostingsEnum[] postingsEnums; protected final Bits[] liveDocs; protected final int[] docBases; - protected final int numSegs; + protected final int numEnums; protected final boolean hasDeletions; + private final Map enumIndexes; - public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException { + public PerThreadPKLookup(IndexReader reader, String idFieldName) throws IOException { + this(reader, idFieldName, Collections.emptyMap(), null, null); + } - List leaves = new ArrayList<>(r.leaves()); + private PerThreadPKLookup( + IndexReader reader, + String idFieldName, + Map prevEnumIndexes, + TermsEnum[] reusableTermsEnums, + PostingsEnum[] reusablePostingsEnums) + throws IOException { + this.idFieldName = idFieldName; + List leaves = new ArrayList<>(reader.leaves()); // Larger segments are more likely to have the id, so we sort largest to smallest by numDocs: leaves.sort((c1, c2) -> c2.reader().numDocs() - c1.reader().numDocs()); @@ -53,26 +70,50 @@ public class PerThreadPKLookup { postingsEnums = new PostingsEnum[leaves.size()]; liveDocs = new Bits[leaves.size()]; docBases = new int[leaves.size()]; - int numSegs = 0; + enumIndexes = new HashMap<>(); + int numEnums = 0; boolean hasDeletions = false; + for (int i = 0; i < leaves.size(); i++) { - Terms terms = leaves.get(i).reader().terms(idFieldName); - if (terms != null) { - termsEnums[numSegs] = terms.iterator(); - assert termsEnums[numSegs] != null; - docBases[numSegs] = leaves.get(i).docBase; - liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs(); - hasDeletions |= leaves.get(i).reader().hasDeletions(); - numSegs++; + LeafReaderContext context = leaves.get(i); + LeafReader leafReader = context.reader(); + CacheHelper cacheHelper = leafReader.getCoreCacheHelper(); + IndexReader.CacheKey cacheKey = cacheHelper == null ? null : cacheHelper.getKey(); + + if (cacheKey != null && prevEnumIndexes.containsKey(cacheKey)) { + // Reuse termsEnum, postingsEnum. + int seg = prevEnumIndexes.get(cacheKey); + termsEnums[numEnums] = reusableTermsEnums[seg]; + postingsEnums[numEnums] = reusablePostingsEnums[seg]; + } else { + // New or empty segment. + Terms terms = leafReader.terms(idFieldName); + if (terms != null) { + termsEnums[numEnums] = terms.iterator(); + assert termsEnums[numEnums] != null; + } + } + + if (termsEnums[numEnums] != null) { + if (cacheKey != null) { + enumIndexes.put(cacheKey, numEnums); + } + + docBases[numEnums] = context.docBase; + liveDocs[numEnums] = leafReader.getLiveDocs(); + hasDeletions |= leafReader.hasDeletions(); + + numEnums++; } } - this.numSegs = numSegs; + + this.numEnums = numEnums; this.hasDeletions = hasDeletions; } /** Returns docID if found, else -1. */ public int lookup(BytesRef id) throws IOException { - for (int seg = 0; seg < numSegs; seg++) { + for (int seg = 0; seg < numEnums; seg++) { if (termsEnums[seg].seekExact(id)) { postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0); int docID = -1; @@ -88,5 +129,12 @@ public class PerThreadPKLookup { return -1; } - // TODO: add reopen method to carry over re-used enums...? + /** Reuse previous PerThreadPKLookup's termsEnum and postingsEnum. */ + public PerThreadPKLookup reopen(IndexReader reader) throws IOException { + if (reader == null) { + return null; + } + return new PerThreadPKLookup( + reader, this.idFieldName, this.enumIndexes, this.termsEnums, this.postingsEnums); + } } diff --git a/lucene/test-framework/src/test/org/apache/lucene/tests/search/TestPerThreadPKLookup.java b/lucene/test-framework/src/test/org/apache/lucene/tests/search/TestPerThreadPKLookup.java index 2136727838c..7807626c462 100644 --- a/lucene/test-framework/src/test/org/apache/lucene/tests/search/TestPerThreadPKLookup.java +++ b/lucene/test-framework/src/test/org/apache/lucene/tests/search/TestPerThreadPKLookup.java @@ -31,6 +31,94 @@ import org.apache.lucene.tests.util.LuceneTestCase; public class TestPerThreadPKLookup extends LuceneTestCase { + public void testReopen() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = + new IndexWriter( + dir, + new IndexWriterConfig(new MockAnalyzer(random())) + .setMergePolicy(NoMergePolicy.INSTANCE)); + + Document doc; + doc = new Document(); + doc.add(new KeywordField("PK", "1", Field.Store.NO)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new KeywordField("PK", "2", Field.Store.NO)); + writer.addDocument(doc); + writer.flush(); + + // Terms in PK is null. + doc = new Document(); + doc.add(new KeywordField("PK2", "3", Field.Store.NO)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new KeywordField("PK2", "4", Field.Store.NO)); + writer.addDocument(doc); + writer.flush(); + + DirectoryReader reader1 = DirectoryReader.open(writer); + PerThreadPKLookup pkLookup1 = new PerThreadPKLookup(reader1, "PK"); + + doc = new Document(); + doc.add(new KeywordField("PK", "5", Field.Store.NO)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new KeywordField("PK", "6", Field.Store.NO)); + writer.addDocument(doc); + // Update liveDocs. + writer.deleteDocuments(new Term("PK", "1")); + writer.flush(); + + // Terms in PK is null. + doc = new Document(); + doc.add(new KeywordField("PK2", "7", Field.Store.NO)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new KeywordField("PK2", "8", Field.Store.NO)); + writer.addDocument(doc); + writer.flush(); + + assertEquals(0, pkLookup1.lookup(newBytesRef("1"))); + assertEquals(1, pkLookup1.lookup(newBytesRef("2"))); + assertEquals(-1, pkLookup1.lookup(newBytesRef("5"))); + assertEquals(-1, pkLookup1.lookup(newBytesRef("8"))); + DirectoryReader reader2 = DirectoryReader.openIfChanged(reader1); + PerThreadPKLookup pkLookup2 = pkLookup1.reopen(reader2); + + assertEquals(-1, pkLookup2.lookup(newBytesRef("1"))); + assertEquals(1, pkLookup2.lookup(newBytesRef("2"))); + assertEquals(4, pkLookup2.lookup(newBytesRef("5"))); + assertEquals(-1, pkLookup2.lookup(newBytesRef("8"))); + + doc = new Document(); + doc.add(new KeywordField("PK", "9", Field.Store.NO)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new KeywordField("PK", "10", Field.Store.NO)); + writer.addDocument(doc); + writer.flush(); + + assertEquals(-1, pkLookup2.lookup(newBytesRef("9"))); + DirectoryReader reader3 = DirectoryReader.openIfChanged(reader2); + PerThreadPKLookup pkLookup3 = pkLookup2.reopen(reader3); + assertEquals(8, pkLookup3.lookup(newBytesRef("9"))); + + DirectoryReader reader4 = DirectoryReader.openIfChanged(reader3); + assertNull(pkLookup3.reopen(reader4)); + + writer.close(); + reader1.close(); + reader2.close(); + reader3.close(); + dir.close(); + } + public void testPKLookupWithUpdate() throws Exception { Directory dir = newDirectory(); IndexWriter writer =