LUCENE-5306: DocumentExpressionDictionary now accepts composite readers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1536826 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-10-29 17:33:04 +00:00
parent 1255539501
commit a7e43403c3
2 changed files with 35 additions and 26 deletions

View File

@ -28,8 +28,8 @@ import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings; import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler; import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CompositeReader; // javadocs
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
@ -49,9 +49,6 @@ import org.apache.lucene.util.BytesRefIterator;
* The term and (optionally) payload fields supplied * The term and (optionally) payload fields supplied
* are required for ALL documents and has to be stored * are required for ALL documents and has to be stored
* </li> * </li>
* <li>
* {@link CompositeReader} is not supported.
* </li>
* </ul> * </ul>
*/ */
public class DocumentExpressionDictionary extends DocumentDictionary { public class DocumentExpressionDictionary extends DocumentDictionary {
@ -100,21 +97,41 @@ public class DocumentExpressionDictionary extends DocumentDictionary {
final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator { final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator {
private FunctionValues weightValues; private FunctionValues currentWeightValues;
private int currentLeafIndex = 0;
private final List<AtomicReaderContext> leaves;
private final int[] starts;
public DocumentExpressionInputIterator(boolean hasPayloads) public DocumentExpressionInputIterator(boolean hasPayloads)
throws IOException { throws IOException {
super(hasPayloads); super(hasPayloads);
List<AtomicReaderContext> leaves = reader.leaves(); leaves = reader.leaves();
if (leaves.size() > 1) { if (leaves.size() == 0) {
throw new IllegalArgumentException("CompositeReader is not supported"); throw new IllegalArgumentException("Reader has to have at least one leaf");
} }
weightValues = weightsValueSource.getValues(new HashMap<String, Object>(), leaves.get(0)); starts = new int[leaves.size() + 1];
for (int i = 0; i < leaves.size(); i++) {
starts[i] = leaves.get(i).docBase;
}
starts[leaves.size()] = reader.maxDoc();
currentLeafIndex = 0;
currentWeightValues = weightsValueSource.getValues(new HashMap<String, Object>(), leaves.get(currentLeafIndex));
} }
@Override @Override
protected long getWeight(int docId) { protected long getWeight(int docId) {
return weightValues.longVal(docId); int subIndex = ReaderUtil.subIndex(docId, starts);
if (subIndex != currentLeafIndex) {
currentLeafIndex = subIndex;
try {
currentWeightValues = weightsValueSource.getValues(new HashMap<String, Object>(), leaves.get(currentLeafIndex));
} catch (IOException e) {
throw new RuntimeException();
}
}
return currentWeightValues.longVal(docId - starts[subIndex]);
} }
} }

View File

@ -36,7 +36,6 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.Dictionary;
@ -78,16 +77,14 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy()); iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map<String, Document> docs = generateIndexDocuments(10); Map<String, Document> docs = generateIndexDocuments(atLeast(10));
for(Document doc: docs.values()) { for(Document doc: docs.values()) {
writer.addDocument(doc); writer.addDocument(doc);
} }
writer.commit(); writer.commit();
writer.close(); writer.close();
// TODO: once we fix DocumentExpressionDictionary to
// accept readers with more than one segment, we can IndexReader ir = DirectoryReader.open(dir);
// remove this wrapping:
IndexReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
Set<SortField> sortFields = new HashSet<SortField>(); Set<SortField> sortFields = new HashSet<SortField>();
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
@ -115,16 +112,14 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy()); iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map<String, Document> docs = generateIndexDocuments(10); Map<String, Document> docs = generateIndexDocuments(atLeast(10));
for(Document doc: docs.values()) { for(Document doc: docs.values()) {
writer.addDocument(doc); writer.addDocument(doc);
} }
writer.commit(); writer.commit();
writer.close(); writer.close();
// TODO: once we fix DocumentExpressionDictionary to
// accept readers with more than one segment, we can IndexReader ir = DirectoryReader.open(dir);
// remove this wrapping:
IndexReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
Set<SortField> sortFields = new HashSet<SortField>(); Set<SortField> sortFields = new HashSet<SortField>();
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
@ -152,7 +147,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy()); iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map<String, Document> docs = generateIndexDocuments(10); Map<String, Document> docs = generateIndexDocuments(atLeast(10));
Random rand = random(); Random rand = random();
List<String> termsToDel = new ArrayList<>(); List<String> termsToDel = new ArrayList<>();
for(Document doc : docs.values()) { for(Document doc : docs.values()) {
@ -178,10 +173,7 @@ public class DocumentExpressionDictionaryTest extends LuceneTestCase {
assertTrue(null!=docs.remove(termToDel)); assertTrue(null!=docs.remove(termToDel));
} }
// TODO: once we fix DocumentExpressionDictionary to IndexReader ir = DirectoryReader.open(dir);
// accept readers with more than one segment, we can
// remove this wrapping:
IndexReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
assertEquals(ir.numDocs(), docs.size()); assertEquals(ir.numDocs(), docs.size());
Set<SortField> sortFields = new HashSet<SortField>(); Set<SortField> sortFields = new HashSet<SortField>();
sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));