From 54650eccf397b20788bd08abaa09cef217b8cd3c Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Sat, 7 May 2016 11:45:59 -0400 Subject: [PATCH] LUCENE-6766: don't throw exc from MultiXXX if there is an index sort --- .../apache/lucene/index/MultiDocValues.java | 21 ------- .../org/apache/lucene/index/MultiFields.java | 9 --- .../org/apache/lucene/index/MultiSorter.java | 2 +- .../apache/lucene/search/IndexSearcher.java | 41 +++++-------- .../apache/lucene/index/TestIndexSorting.java | 18 ++---- .../lucene/index/TestMultiDocValues.java | 57 ------------------- .../apache/lucene/index/TestMultiFields.java | 26 --------- .../analyzing/AnalyzingInfixSuggester.java | 28 +++------ 8 files changed, 30 insertions(+), 172 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index ae61183527b..33947974bce 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -78,9 +78,6 @@ public class MultiDocValues { final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { LeafReaderContext context = leaves.get(i); - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader()); - } NumericDocValues v = context.reader().getNormValues(field); if (v == null) { v = DocValues.emptyNumeric(); @@ -123,9 +120,6 @@ public class MultiDocValues { final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { LeafReaderContext context = leaves.get(i); - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader()); - } NumericDocValues v = context.reader().getNumericDocValues(field); if (v == null) { v = DocValues.emptyNumeric(); @@ -171,9 +165,6 @@ public class MultiDocValues { final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { LeafReaderContext context = leaves.get(i); - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader()); - } Bits v = context.reader().getDocsWithField(field); if (v == null) { v = new Bits.MatchNoBits(context.reader().maxDoc()); @@ -219,9 +210,6 @@ public class MultiDocValues { final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { LeafReaderContext context = leaves.get(i); - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader()); - } BinaryDocValues v = context.reader().getBinaryDocValues(field); if (v == null) { v = DocValues.emptyBinary(); @@ -266,9 +254,6 @@ public class MultiDocValues { final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { LeafReaderContext context = leaves.get(i); - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader()); - } SortedNumericDocValues v = context.reader().getSortedNumericDocValues(field); if (v == null) { v = DocValues.emptySortedNumeric(context.reader().maxDoc()); @@ -327,9 +312,6 @@ public class MultiDocValues { final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { LeafReaderContext context = leaves.get(i); - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader()); - } SortedDocValues v = context.reader().getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); @@ -370,9 +352,6 @@ public class MultiDocValues { final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { LeafReaderContext context = leaves.get(i); - if (context.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + context.reader()); - } SortedSetDocValues v = context.reader().getSortedSetDocValues(field); if (v == null) { v = DocValues.emptySortedSet(); diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java index d8e79acad77..1736bace115 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java @@ -51,8 +51,6 @@ public final class MultiFields extends Fields { private final ReaderSlice[] subSlices; private final Map terms = new ConcurrentHashMap<>(); - // nocommit make test for sorted fields - /** Returns a single {@link Fields} instance for this * reader, merging fields/terms/docs/positions on the * fly. This method will return null if the reader @@ -72,9 +70,6 @@ public final class MultiFields extends Fields { final List slices = new ArrayList<>(leaves.size()); for (final LeafReaderContext ctx : leaves) { final LeafReader r = ctx.reader(); - if (r.getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + r); - } final Fields f = r.fields(); fields.add(f); slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1)); @@ -110,10 +105,6 @@ public final class MultiFields extends Fields { for (int i = 0; i < size; i++) { // record all liveDocs, even if they are null final LeafReaderContext ctx = leaves.get(i); - if (ctx.reader().getIndexSort() != null) { - throw new IllegalArgumentException("cannot handle index sort: reader=" + ctx.reader()); - } - liveDocs[i] = ctx.reader().getLiveDocs(); starts[i] = ctx.docBase; } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java index 062dde9dc5d..7f71eb55880 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java @@ -123,7 +123,7 @@ final class MultiSorter { private static CrossReaderComparator getComparator(List readers, SortField sortField) throws IOException { switch(sortField.getType()) { - // TODO: use global ords for string sort + // ncommit: use segment-local ords for string sort case INT: { List values = new ArrayList<>(); diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index c91fc773408..b81b8079a1c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; // javadocs import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -802,35 +803,23 @@ public class IndexSearcher { * @lucene.experimental */ public CollectionStatistics collectionStatistics(String field) throws IOException { - int docCount = 0; - long sumTotalTermFreq = 0; - long sumDocFreq = 0; + final int docCount; + final long sumTotalTermFreq; + final long sumDocFreq; assert field != null; - - for(LeafReaderContext ctx : reader.leaves()) { - Terms terms = ctx.reader().fields().terms(field); - if (terms != null) { - int subDocCount = terms.getDocCount(); - if (subDocCount == -1) { - docCount = -1; - } else if (docCount != -1) { - docCount += subDocCount; - } - long subSumDocFreq = terms.getSumDocFreq(); - if (subSumDocFreq == -1) { - sumDocFreq = -1; - } else if (sumDocFreq != -1) { - sumDocFreq += subSumDocFreq; - } - long subSumTotalTermFreq = terms.getSumTotalTermFreq(); - if (subSumTotalTermFreq == -1) { - sumTotalTermFreq = -1; - } else if (sumTotalTermFreq != -1) { - sumTotalTermFreq += subSumTotalTermFreq; - } - } + + Terms terms = MultiFields.getTerms(reader, field); + if (terms == null) { + docCount = 0; + sumTotalTermFreq = 0; + sumDocFreq = 0; + } else { + docCount = terms.getDocCount(); + sumTotalTermFreq = terms.getSumTotalTermFreq(); + sumDocFreq = terms.getSumDocFreq(); } + return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java index 6e93986c415..8df81bab264 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java @@ -137,7 +137,7 @@ public class TestIndexSorting extends LuceneTestCase { assertEquals(0, topDocs.totalHits); } else { assertEquals(1, topDocs.totalHits); - assertEquals(i, getNumericDocValue(reader, "id", topDocs.scoreDocs[0].doc)); + assertEquals(i, MultiDocValues.getNumericValues(reader, "id").get(topDocs.scoreDocs[0].doc)); Document document = reader.document(topDocs.scoreDocs[0].doc); assertEquals(Integer.toString(i), document.get("id")); } @@ -148,14 +148,6 @@ public class TestIndexSorting extends LuceneTestCase { dir.close(); } - private static long getNumericDocValue(IndexReader reader, String field, int docID) throws IOException { - // We can't use MultiDocValues because it gets angry about the sorting: - List leaves = reader.leaves(); - int sub = ReaderUtil.subIndex(docID, leaves); - LeafReaderContext leaf = leaves.get(sub); - return leaf.reader().getNumericDocValues(field).get(docID - leaf.docBase); - } - public void testSortOnMerge() throws IOException { testSortOnMerge(false); } @@ -249,7 +241,7 @@ public class TestIndexSorting extends LuceneTestCase { assertEquals(0, topDocs.totalHits); } else { assertEquals(1, topDocs.totalHits); - assertEquals(values.get(i).longValue(), getNumericDocValue(reader, "foo", topDocs.scoreDocs[0].doc)); + assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc)); } } reader.close(); @@ -343,7 +335,7 @@ public class TestIndexSorting extends LuceneTestCase { for (int i = 0; i < numDocs; ++i) { final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1); assertEquals(1, topDocs.totalHits); - assertEquals(values.get(i).longValue(), getNumericDocValue(reader, "foo", topDocs.scoreDocs[0].doc)); + assertEquals(values.get(i).longValue(), MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc)); } reader.close(); w.close(); @@ -388,8 +380,8 @@ public class TestIndexSorting extends LuceneTestCase { assertEquals(topDocs.totalHits, topDocs2.totalHits); if (topDocs.totalHits == 1) { assertEquals( - getNumericDocValue(reader, "foo", topDocs.scoreDocs[0].doc), - getNumericDocValue(reader2, "foo", topDocs2.scoreDocs[0].doc)); + MultiDocValues.getNumericValues(reader, "foo").get(topDocs.scoreDocs[0].doc), + MultiDocValues.getNumericValues(reader2, "foo").get(topDocs2.scoreDocs[0].doc)); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java index 5ab54483f19..5b70c38a7fa 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java @@ -26,8 +26,6 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -414,59 +412,4 @@ public class TestMultiDocValues extends LuceneTestCase { ir2.close(); dir.close(); } - - public void testNoIndexSort() throws Exception { - IndexWriterConfig iwc = newIndexWriterConfig(); - iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, iwc); - w.addDocument(new Document()); - DirectoryReader.open(w).close(); - w.addDocument(new Document()); - // this makes a sorted segment: - w.forceMerge(1); - // this makes another segment, so that MultiDocValues isn't just a no-op: - w.addDocument(new Document()); - IndexReader r = DirectoryReader.open(w); - - String message = expectThrows(IllegalArgumentException.class, () -> { - MultiDocValues.getDocsWithField(r, "foo"); - }).getMessage(); - assertTrue(message.contains("cannot handle index sort")); - assertTrue(message.contains("indexSort=")); - - message = expectThrows(IllegalArgumentException.class, () -> { - MultiDocValues.getNumericValues(r, "foo"); - }).getMessage(); - assertTrue(message.contains("cannot handle index sort")); - assertTrue(message.contains("indexSort=")); - - message = expectThrows(IllegalArgumentException.class, () -> { - MultiDocValues.getBinaryValues(r, "foo"); - }).getMessage(); - assertTrue(message.contains("cannot handle index sort")); - assertTrue(message.contains("indexSort=")); - - message = expectThrows(IllegalArgumentException.class, () -> { - MultiDocValues.getSortedValues(r, "foo"); - }).getMessage(); - assertTrue(message.contains("cannot handle index sort")); - assertTrue(message.contains("indexSort=")); - - message = expectThrows(IllegalArgumentException.class, () -> { - MultiDocValues.getSortedSetValues(r, "foo"); - }).getMessage(); - assertTrue(message.contains("cannot handle index sort")); - assertTrue(message.contains("indexSort=")); - - message = expectThrows(IllegalArgumentException.class, () -> { - MultiDocValues.getSortedNumericValues(r, "foo"); - }).getMessage(); - assertTrue(message.contains("cannot handle index sort")); - assertTrue(message.contains("indexSort=")); - - r.close(); - w.close(); - dir.close(); - } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java index 0aae9a1be90..27f2f1a3699 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java @@ -29,8 +29,6 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -201,28 +199,4 @@ public class TestMultiFields extends LuceneTestCase { r.close(); dir.close(); } - - public void testNoIndexSort() throws Exception { - IndexWriterConfig iwc = newIndexWriterConfig(); - iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.INT))); - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, iwc); - w.addDocument(new Document()); - DirectoryReader.open(w).close(); - w.addDocument(new Document()); - // this makes a sorted segment: - w.forceMerge(1); - // this makes another segment, so that MultiFields.getFields isn't just a no-op: - w.addDocument(new Document()); - IndexReader r = DirectoryReader.open(w); - - Exception e = expectThrows(IllegalArgumentException.class, () -> { - MultiFields.getFields(r); - }); - assertTrue(e.getMessage().contains("cannot handle index sort")); - assertTrue(e.getMessage().contains("indexSort=")); - r.close(); - w.close(); - dir.close(); - } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 7124aae0180..16e9406310f 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -48,7 +48,6 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; @@ -608,19 +607,6 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { return results; } - private static BytesRef getBinaryDocValue(IndexReader reader, String field, int docID) throws IOException { - // We can't use MultiDocValues because it gets angry about the sorting: - List leaves = reader.leaves(); - int sub = ReaderUtil.subIndex(docID, leaves); - LeafReaderContext leaf = leaves.get(sub); - BinaryDocValues bdv = leaf.reader().getBinaryDocValues(field); - if (bdv == null) { - return null; - } else { - return bdv.get(docID - leaf.docBase); - } - } - /** * Create the results based on the search hits. * Can be overridden by subclass to add particular behavior (e.g. weight transformation). @@ -635,20 +621,24 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { boolean doHighlight, Set matchedTokens, String prefixToken) throws IOException { + BinaryDocValues textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME); + // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... + BinaryDocValues payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads"); List leaves = searcher.getIndexReader().leaves(); List results = new ArrayList<>(); for (int i=0;i