LUCENE-6766: don't throw exc from MultiXXX if there is an index sort

This commit is contained in:
Mike McCandless 2016-05-07 11:45:59 -04:00
parent 849fd54f2c
commit 54650eccf3
8 changed files with 30 additions and 172 deletions

View File

@ -78,9 +78,6 @@ public class MultiDocValues {
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader());
}
NumericDocValues v = context.reader().getNormValues(field);
if (v == null) {
v = DocValues.emptyNumeric();
@ -123,9 +120,6 @@ public class MultiDocValues {
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader());
}
NumericDocValues v = context.reader().getNumericDocValues(field);
if (v == null) {
v = DocValues.emptyNumeric();
@ -171,9 +165,6 @@ public class MultiDocValues {
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader());
}
Bits v = context.reader().getDocsWithField(field);
if (v == null) {
v = new Bits.MatchNoBits(context.reader().maxDoc());
@ -219,9 +210,6 @@ public class MultiDocValues {
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader());
}
BinaryDocValues v = context.reader().getBinaryDocValues(field);
if (v == null) {
v = DocValues.emptyBinary();
@ -266,9 +254,6 @@ public class MultiDocValues {
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader());
}
SortedNumericDocValues v = context.reader().getSortedNumericDocValues(field);
if (v == null) {
v = DocValues.emptySortedNumeric(context.reader().maxDoc());
@ -327,9 +312,6 @@ public class MultiDocValues {
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader());
}
SortedDocValues v = context.reader().getSortedDocValues(field);
if (v == null) {
v = DocValues.emptySorted();
@ -370,9 +352,6 @@ public class MultiDocValues {
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
if (context.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader());
}
SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
if (v == null) {
v = DocValues.emptySortedSet();

View File

@ -51,8 +51,6 @@ public final class MultiFields extends Fields {
private final ReaderSlice[] subSlices;
private final Map<String,Terms> terms = new ConcurrentHashMap<>();
// nocommit make test for sorted fields
/** Returns a single {@link Fields} instance for this
* reader, merging fields/terms/docs/positions on the
* fly. This method will return null if the reader
@ -72,9 +70,6 @@ public final class MultiFields extends Fields {
final List<ReaderSlice> slices = new ArrayList<>(leaves.size());
for (final LeafReaderContext ctx : leaves) {
final LeafReader r = ctx.reader();
if (r.getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + r);
}
final Fields f = r.fields();
fields.add(f);
slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1));
@ -110,10 +105,6 @@ public final class MultiFields extends Fields {
for (int i = 0; i < size; i++) {
// record all liveDocs, even if they are null
final LeafReaderContext ctx = leaves.get(i);
if (ctx.reader().getIndexSort() != null) {
throw new IllegalArgumentException("cannot handle index sort: reader=" + ctx.reader());
}
liveDocs[i] = ctx.reader().getLiveDocs();
starts[i] = ctx.docBase;
}

View File

@ -123,7 +123,7 @@ final class MultiSorter {
private static CrossReaderComparator getComparator(List<CodecReader> readers, SortField sortField) throws IOException {
switch(sortField.getType()) {
// TODO: use global ords for string sort
// ncommit: use segment-local ords for string sort
case INT:
{
List<NumericDocValues> values = new ArrayList<>();

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
@ -802,35 +803,23 @@ public class IndexSearcher {
* @lucene.experimental
*/
public CollectionStatistics collectionStatistics(String field) throws IOException {
int docCount = 0;
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
final int docCount;
final long sumTotalTermFreq;
final long sumDocFreq;
assert field != null;
for(LeafReaderContext ctx : reader.leaves()) {
Terms terms = ctx.reader().fields().terms(field);
if (terms != null) {
int subDocCount = terms.getDocCount();
if (subDocCount == -1) {
docCount = -1;
} else if (docCount != -1) {
docCount += subDocCount;
}
long subSumDocFreq = terms.getSumDocFreq();
if (subSumDocFreq == -1) {
sumDocFreq = -1;
} else if (sumDocFreq != -1) {
sumDocFreq += subSumDocFreq;
}
long subSumTotalTermFreq = terms.getSumTotalTermFreq();
if (subSumTotalTermFreq == -1) {
sumTotalTermFreq = -1;
} else if (sumTotalTermFreq != -1) {
sumTotalTermFreq += subSumTotalTermFreq;
}
}
Terms terms = MultiFields.getTerms(reader, field);
if (terms == null) {
docCount = 0;
sumTotalTermFreq = 0;
sumDocFreq = 0;
} else {
docCount = terms.getDocCount();
sumTotalTermFreq = terms.getSumTotalTermFreq();
sumDocFreq = terms.getSumDocFreq();
}
return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
}
}

View File

@ -137,7 +137,7 @@ public class TestIndexSorting extends LuceneTestCase {
assertEquals(0, topDocs.totalHits);
} else {
assertEquals(1, topDocs.totalHits);
assertEquals(i, getNumericDocValue(reader, "id", topDocs.scoreDocs[0].doc));
assertEquals(i, MultiDocValues.getNumericValues(reader, "id").get(topDocs.scoreDocs[0].doc));
Document document = reader.document(topDocs.scoreDocs[0].doc);
assertEquals(Integer.toString(i), document.get("id"));
}
@ -148,14 +148,6 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
private static long getNumericDocValue(IndexReader reader, String field, int docID) throws IOException {
// We can't use MultiDocValues because it gets angry about the sorting:
List<LeafReaderContext> leaves = reader.leaves();
int sub = ReaderUtil.subIndex(docID, leaves);
LeafReaderContext leaf = leaves.get(sub);
return leaf.reader().getNumericDocValues(field).get(docID - leaf.docBase);
}
public void testSortOnMerge() throws IOException {
testSortOnMerge(false);
}
@ -249,7 +241,7 @@ public class TestIndexSorting extends LuceneTestCase {
assertEquals(0, topDocs.totalHits);
} else {
assertEquals(1, topDocs.totalHits);
assertEquals(values.get(i).longValue(), getNumericDocValue(reader, "foo", topDocs.scoreDocs[0].doc));
assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc));
}
}
reader.close();
@ -343,7 +335,7 @@ public class TestIndexSorting extends LuceneTestCase {
for (int i = 0; i < numDocs; ++i) {
final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
assertEquals(1, topDocs.totalHits);
assertEquals(values.get(i).longValue(), getNumericDocValue(reader, "foo", topDocs.scoreDocs[0].doc));
assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc));
}
reader.close();
w.close();
@ -388,8 +380,8 @@ public class TestIndexSorting extends LuceneTestCase {
assertEquals(topDocs.totalHits, topDocs2.totalHits);
if (topDocs.totalHits == 1) {
assertEquals(
getNumericDocValue(reader, "foo", topDocs.scoreDocs[0].doc),
getNumericDocValue(reader2, "foo", topDocs2.scoreDocs[0].doc));
MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc),
MultiDocValues.getNumericValues(reader2, "foo").get(topDocs2.scoreDocs[0].doc));
}
}

View File

@ -26,8 +26,6 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -414,59 +412,4 @@ public class TestMultiDocValues extends LuceneTestCase {
ir2.close();
dir.close();
}
public void testNoIndexSort() throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, iwc);
w.addDocument(new Document());
DirectoryReader.open(w).close();
w.addDocument(new Document());
// this makes a sorted segment:
w.forceMerge(1);
// this makes another segment, so that MultiDocValues isn't just a no-op:
w.addDocument(new Document());
IndexReader r = DirectoryReader.open(w);
String message = expectThrows(IllegalArgumentException.class, () -> {
MultiDocValues.getDocsWithField(r, "foo");
}).getMessage();
assertTrue(message.contains("cannot handle index sort"));
assertTrue(message.contains("indexSort=<int: \"foo\">"));
message = expectThrows(IllegalArgumentException.class, () -> {
MultiDocValues.getNumericValues(r, "foo");
}).getMessage();
assertTrue(message.contains("cannot handle index sort"));
assertTrue(message.contains("indexSort=<int: \"foo\">"));
message = expectThrows(IllegalArgumentException.class, () -> {
MultiDocValues.getBinaryValues(r, "foo");
}).getMessage();
assertTrue(message.contains("cannot handle index sort"));
assertTrue(message.contains("indexSort=<int: \"foo\">"));
message = expectThrows(IllegalArgumentException.class, () -> {
MultiDocValues.getSortedValues(r, "foo");
}).getMessage();
assertTrue(message.contains("cannot handle index sort"));
assertTrue(message.contains("indexSort=<int: \"foo\">"));
message = expectThrows(IllegalArgumentException.class, () -> {
MultiDocValues.getSortedSetValues(r, "foo");
}).getMessage();
assertTrue(message.contains("cannot handle index sort"));
assertTrue(message.contains("indexSort=<int: \"foo\">"));
message = expectThrows(IllegalArgumentException.class, () -> {
MultiDocValues.getSortedNumericValues(r, "foo");
}).getMessage();
assertTrue(message.contains("cannot handle index sort"));
assertTrue(message.contains("indexSort=<int: \"foo\">"));
r.close();
w.close();
dir.close();
}
}

View File

@ -29,8 +29,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -201,28 +199,4 @@ public class TestMultiFields extends LuceneTestCase {
r.close();
dir.close();
}
public void testNoIndexSort() throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT)));
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, iwc);
w.addDocument(new Document());
DirectoryReader.open(w).close();
w.addDocument(new Document());
// this makes a sorted segment:
w.forceMerge(1);
// this makes another segment, so that MultiFields.getFields isn't just a no-op:
w.addDocument(new Document());
IndexReader r = DirectoryReader.open(w);
Exception e = expectThrows(IllegalArgumentException.class, () -> {
MultiFields.getFields(r);
});
assertTrue(e.getMessage().contains("cannot handle index sort"));
assertTrue(e.getMessage().contains("indexSort=<int: \"foo\">"));
r.close();
w.close();
dir.close();
}
}

View File

@ -48,7 +48,6 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
@ -608,19 +607,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
return results;
}
private static BytesRef getBinaryDocValue(IndexReader reader, String field, int docID) throws IOException {
// We can't use MultiDocValues because it gets angry about the sorting:
List<LeafReaderContext> leaves = reader.leaves();
int sub = ReaderUtil.subIndex(docID, leaves);
LeafReaderContext leaf = leaves.get(sub);
BinaryDocValues bdv = leaf.reader().getBinaryDocValues(field);
if (bdv == null) {
return null;
} else {
return bdv.get(docID - leaf.docBase);
}
}
/**
* Create the results based on the search hits.
* Can be overridden by subclass to add particular behavior (e.g. weight transformation).
@ -635,20 +621,24 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
boolean doHighlight, Set<String> matchedTokens, String prefixToken)
throws IOException {
BinaryDocValues textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);
// This will just be null if app didn't pass payloads to build():
// TODO: maybe just stored fields? they compress...
BinaryDocValues payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");
List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
List<LookupResult> results = new ArrayList<>();
for (int i=0;i<hits.scoreDocs.length;i++) {
FieldDoc fd = (FieldDoc) hits.scoreDocs[i];
BytesRef term = getBinaryDocValue(searcher.getIndexReader(), TEXT_FIELD_NAME, fd.doc);
BytesRef term = textDV.get(fd.doc);
String text = term.utf8ToString();
long score = (Long) fd.fields[0];
BytesRef payload = getBinaryDocValue(searcher.getIndexReader(), "payloads", fd.doc);
if (payload != null) {
payload = BytesRef.deepCopyOf(payload);
BytesRef payload;
if (payloadsDV != null) {
payload = BytesRef.deepCopyOf(payloadsDV.get(fd.doc));
} else {
payload = null;
}
// Must look up sorted-set by segment: