mirror of https://github.com/apache/lucene.git
Add reopen method in PerThreadPKLookup (#13596)
Co-authored-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
parent
250bb03aac
commit
9a3467d352
|
@ -343,7 +343,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
|
||||
/** Returns docID if found, else -1. */
|
||||
public int lookup(BytesRef id, long version) throws IOException {
|
||||
for (int seg = 0; seg < numSegs; seg++) {
|
||||
for (int seg = 0; seg < numEnums; seg++) {
|
||||
if (((IDVersionSegmentTermsEnum) termsEnums[seg]).seekExact(id, version)) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" found in seg=" + termsEnums[seg]);
|
||||
|
|
|
@ -18,8 +18,13 @@ package org.apache.lucene.tests.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.CacheHelper;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -35,17 +40,29 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class PerThreadPKLookup {
|
||||
|
||||
private final String idFieldName;
|
||||
protected final TermsEnum[] termsEnums;
|
||||
protected final PostingsEnum[] postingsEnums;
|
||||
protected final Bits[] liveDocs;
|
||||
protected final int[] docBases;
|
||||
protected final int numSegs;
|
||||
protected final int numEnums;
|
||||
protected final boolean hasDeletions;
|
||||
private final Map<IndexReader.CacheKey, Integer> enumIndexes;
|
||||
|
||||
public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {
|
||||
public PerThreadPKLookup(IndexReader reader, String idFieldName) throws IOException {
|
||||
this(reader, idFieldName, Collections.emptyMap(), null, null);
|
||||
}
|
||||
|
||||
List<LeafReaderContext> leaves = new ArrayList<>(r.leaves());
|
||||
private PerThreadPKLookup(
|
||||
IndexReader reader,
|
||||
String idFieldName,
|
||||
Map<IndexReader.CacheKey, Integer> prevEnumIndexes,
|
||||
TermsEnum[] reusableTermsEnums,
|
||||
PostingsEnum[] reusablePostingsEnums)
|
||||
throws IOException {
|
||||
this.idFieldName = idFieldName;
|
||||
|
||||
List<LeafReaderContext> leaves = new ArrayList<>(reader.leaves());
|
||||
// Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
|
||||
leaves.sort((c1, c2) -> c2.reader().numDocs() - c1.reader().numDocs());
|
||||
|
||||
|
@ -53,26 +70,50 @@ public class PerThreadPKLookup {
|
|||
postingsEnums = new PostingsEnum[leaves.size()];
|
||||
liveDocs = new Bits[leaves.size()];
|
||||
docBases = new int[leaves.size()];
|
||||
int numSegs = 0;
|
||||
enumIndexes = new HashMap<>();
|
||||
int numEnums = 0;
|
||||
boolean hasDeletions = false;
|
||||
|
||||
for (int i = 0; i < leaves.size(); i++) {
|
||||
Terms terms = leaves.get(i).reader().terms(idFieldName);
|
||||
if (terms != null) {
|
||||
termsEnums[numSegs] = terms.iterator();
|
||||
assert termsEnums[numSegs] != null;
|
||||
docBases[numSegs] = leaves.get(i).docBase;
|
||||
liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
|
||||
hasDeletions |= leaves.get(i).reader().hasDeletions();
|
||||
numSegs++;
|
||||
LeafReaderContext context = leaves.get(i);
|
||||
LeafReader leafReader = context.reader();
|
||||
CacheHelper cacheHelper = leafReader.getCoreCacheHelper();
|
||||
IndexReader.CacheKey cacheKey = cacheHelper == null ? null : cacheHelper.getKey();
|
||||
|
||||
if (cacheKey != null && prevEnumIndexes.containsKey(cacheKey)) {
|
||||
// Reuse termsEnum, postingsEnum.
|
||||
int seg = prevEnumIndexes.get(cacheKey);
|
||||
termsEnums[numEnums] = reusableTermsEnums[seg];
|
||||
postingsEnums[numEnums] = reusablePostingsEnums[seg];
|
||||
} else {
|
||||
// New or empty segment.
|
||||
Terms terms = leafReader.terms(idFieldName);
|
||||
if (terms != null) {
|
||||
termsEnums[numEnums] = terms.iterator();
|
||||
assert termsEnums[numEnums] != null;
|
||||
}
|
||||
}
|
||||
|
||||
if (termsEnums[numEnums] != null) {
|
||||
if (cacheKey != null) {
|
||||
enumIndexes.put(cacheKey, numEnums);
|
||||
}
|
||||
|
||||
docBases[numEnums] = context.docBase;
|
||||
liveDocs[numEnums] = leafReader.getLiveDocs();
|
||||
hasDeletions |= leafReader.hasDeletions();
|
||||
|
||||
numEnums++;
|
||||
}
|
||||
}
|
||||
this.numSegs = numSegs;
|
||||
|
||||
this.numEnums = numEnums;
|
||||
this.hasDeletions = hasDeletions;
|
||||
}
|
||||
|
||||
/** Returns docID if found, else -1. */
|
||||
public int lookup(BytesRef id) throws IOException {
|
||||
for (int seg = 0; seg < numSegs; seg++) {
|
||||
for (int seg = 0; seg < numEnums; seg++) {
|
||||
if (termsEnums[seg].seekExact(id)) {
|
||||
postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0);
|
||||
int docID = -1;
|
||||
|
@ -88,5 +129,12 @@ public class PerThreadPKLookup {
|
|||
return -1;
|
||||
}
|
||||
|
||||
// TODO: add reopen method to carry over re-used enums...?
|
||||
/** Reuse previous PerThreadPKLookup's termsEnum and postingsEnum. */
|
||||
public PerThreadPKLookup reopen(IndexReader reader) throws IOException {
|
||||
if (reader == null) {
|
||||
return null;
|
||||
}
|
||||
return new PerThreadPKLookup(
|
||||
reader, this.idFieldName, this.enumIndexes, this.termsEnums, this.postingsEnums);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,94 @@ import org.apache.lucene.tests.util.LuceneTestCase;
|
|||
|
||||
public class TestPerThreadPKLookup extends LuceneTestCase {
|
||||
|
||||
public void testReopen() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer =
|
||||
new IndexWriter(
|
||||
dir,
|
||||
new IndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMergePolicy(NoMergePolicy.INSTANCE));
|
||||
|
||||
Document doc;
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK", "1", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK", "2", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
writer.flush();
|
||||
|
||||
// Terms in PK is null.
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK2", "3", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK2", "4", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
writer.flush();
|
||||
|
||||
DirectoryReader reader1 = DirectoryReader.open(writer);
|
||||
PerThreadPKLookup pkLookup1 = new PerThreadPKLookup(reader1, "PK");
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK", "5", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK", "6", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
// Update liveDocs.
|
||||
writer.deleteDocuments(new Term("PK", "1"));
|
||||
writer.flush();
|
||||
|
||||
// Terms in PK is null.
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK2", "7", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK2", "8", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
writer.flush();
|
||||
|
||||
assertEquals(0, pkLookup1.lookup(newBytesRef("1")));
|
||||
assertEquals(1, pkLookup1.lookup(newBytesRef("2")));
|
||||
assertEquals(-1, pkLookup1.lookup(newBytesRef("5")));
|
||||
assertEquals(-1, pkLookup1.lookup(newBytesRef("8")));
|
||||
DirectoryReader reader2 = DirectoryReader.openIfChanged(reader1);
|
||||
PerThreadPKLookup pkLookup2 = pkLookup1.reopen(reader2);
|
||||
|
||||
assertEquals(-1, pkLookup2.lookup(newBytesRef("1")));
|
||||
assertEquals(1, pkLookup2.lookup(newBytesRef("2")));
|
||||
assertEquals(4, pkLookup2.lookup(newBytesRef("5")));
|
||||
assertEquals(-1, pkLookup2.lookup(newBytesRef("8")));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK", "9", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new KeywordField("PK", "10", Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
writer.flush();
|
||||
|
||||
assertEquals(-1, pkLookup2.lookup(newBytesRef("9")));
|
||||
DirectoryReader reader3 = DirectoryReader.openIfChanged(reader2);
|
||||
PerThreadPKLookup pkLookup3 = pkLookup2.reopen(reader3);
|
||||
assertEquals(8, pkLookup3.lookup(newBytesRef("9")));
|
||||
|
||||
DirectoryReader reader4 = DirectoryReader.openIfChanged(reader3);
|
||||
assertNull(pkLookup3.reopen(reader4));
|
||||
|
||||
writer.close();
|
||||
reader1.close();
|
||||
reader2.close();
|
||||
reader3.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testPKLookupWithUpdate() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer =
|
||||
|
|
Loading…
Reference in New Issue