From a7e43403c3655727b57c51df113849ab966a0cb0 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 29 Oct 2013 17:33:04 +0000 Subject: [PATCH] LUCENE-5306: DocumentExpressionDictionary now accepts composite readers git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1536826 13f79535-47bb-0310-9956-ffa450edef68 --- .../suggest/DocumentExpressionDictionary.java | 37 ++++++++++++++----- .../DocumentExpressionDictionaryTest.java | 24 ++++-------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java index e9c6cb2212c..ea494e1cfd2 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java @@ -28,8 +28,8 @@ import org.apache.lucene.expressions.Expression; import org.apache.lucene.expressions.SimpleBindings; import org.apache.lucene.expressions.js.JavascriptCompiler; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.CompositeReader; // javadocs import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; @@ -49,9 +49,6 @@ import org.apache.lucene.util.BytesRefIterator; * The term and (optionally) payload fields supplied * are required for ALL documents and has to be stored * - *
  • - * {@link CompositeReader} is not supported. - *
  • * */ public class DocumentExpressionDictionary extends DocumentDictionary { @@ -100,21 +97,41 @@ public class DocumentExpressionDictionary extends DocumentDictionary { final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator { - private FunctionValues weightValues; + private FunctionValues currentWeightValues; + private int currentLeafIndex = 0; + private final List leaves; + + private final int[] starts; public DocumentExpressionInputIterator(boolean hasPayloads) throws IOException { super(hasPayloads); - List leaves = reader.leaves(); - if (leaves.size() > 1) { - throw new IllegalArgumentException("CompositeReader is not supported"); + leaves = reader.leaves(); + if (leaves.size() == 0) { + throw new IllegalArgumentException("Reader has to have at least one leaf"); } - weightValues = weightsValueSource.getValues(new HashMap(), leaves.get(0)); + starts = new int[leaves.size() + 1]; + for (int i = 0; i < leaves.size(); i++) { + starts[i] = leaves.get(i).docBase; + } + starts[leaves.size()] = reader.maxDoc(); + + currentLeafIndex = 0; + currentWeightValues = weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)); } @Override protected long getWeight(int docId) { - return weightValues.longVal(docId); + int subIndex = ReaderUtil.subIndex(docId, starts); + if (subIndex != currentLeafIndex) { + currentLeafIndex = subIndex; + try { + currentWeightValues = weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)); + } catch (IOException e) { + throw new RuntimeException(); + } + } + return currentWeightValues.longVal(docId - starts[subIndex]); } } diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java index a8c28df2eee..4cc719662a4 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java @@ -36,7 +36,6 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.search.SortField; import org.apache.lucene.search.spell.Dictionary; @@ -78,16 +77,14 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map docs = generateIndexDocuments(atLeast(10)); for(Document doc: docs.values()) { writer.addDocument(doc); } writer.commit(); writer.close(); - // TODO: once we fix DocumentExpressionDictionary to - // accept readers with more than one segment, we can - // remove this wrapping: - IndexReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); + + IndexReader ir = DirectoryReader.open(dir); Set sortFields = new HashSet(); sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); @@ -115,16 +112,14 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map docs = generateIndexDocuments(atLeast(10)); for(Document doc: docs.values()) { writer.addDocument(doc); } writer.commit(); writer.close(); - // TODO: once we fix DocumentExpressionDictionary to - // accept readers with more than one segment, we can - // remove this wrapping: - IndexReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); + + IndexReader ir = DirectoryReader.open(dir); Set sortFields = new HashSet(); sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); @@ -152,7 +147,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map docs = generateIndexDocuments(atLeast(10)); Random rand = random(); List termsToDel = new ArrayList<>(); for(Document doc : docs.values()) { @@ -178,10 +173,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase { assertTrue(null!=docs.remove(termToDel)); } - // TODO: once we fix DocumentExpressionDictionary to - // accept readers with more than one segment, we can - // remove this wrapping: - IndexReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); + IndexReader ir = DirectoryReader.open(dir); assertEquals(ir.numDocs(), docs.size()); Set sortFields = new HashSet(); sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));