mirror of https://github.com/apache/lucene.git
LUCENE-6529: Removed an optimization in UninvertingReader that was causing incorrect results for Numeric fields using precisionStep
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1684704 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
507ebfc247
commit
40f4e7de5e
|
@ -81,6 +81,10 @@ API Changes
|
||||||
SpanPayloadNearCheckQuery have moved from the .spans package to the .payloads
|
SpanPayloadNearCheckQuery have moved from the .spans package to the .payloads
|
||||||
package. (Alan Woodward, David Smiley, Paul Elschot, Robert Muir)
|
package. (Alan Woodward, David Smiley, Paul Elschot, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-6529: Removed an optimization in UninvertingReader that was causing
|
||||||
|
incorrect results for Numeric fields using precisionStep
|
||||||
|
(hossman, Robert Muir)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-6500: ParallelCompositeReader did not always call
|
* LUCENE-6500: ParallelCompositeReader did not always call
|
||||||
|
|
|
@ -67,13 +67,11 @@ import org.apache.lucene.util.StringHelper;
|
||||||
* are also de-dup'd (ie if doc has same term more than once
|
* are also de-dup'd (ie if doc has same term more than once
|
||||||
* in this field, you'll only get that ord back once).
|
* in this field, you'll only get that ord back once).
|
||||||
*
|
*
|
||||||
* This class tests whether the provided reader is able to
|
* This class
|
||||||
* retrieve terms by ord (ie, it's single segment, and it
|
|
||||||
* uses an ord-capable terms index). If not, this class
|
|
||||||
* will create its own term index internally, allowing to
|
* will create its own term index internally, allowing to
|
||||||
* create a wrapped TermsEnum that can handle ord. The
|
* create a wrapped TermsEnum that can handle ord. The
|
||||||
* {@link #getOrdTermsEnum} method then provides this
|
* {@link #getOrdTermsEnum} method then provides this
|
||||||
* wrapped enum, if necessary.
|
* wrapped enum.
|
||||||
*
|
*
|
||||||
* The RAM consumption of this class can be high!
|
* The RAM consumption of this class can be high!
|
||||||
*
|
*
|
||||||
|
@ -152,7 +150,7 @@ public class DocTermOrds implements Accountable {
|
||||||
protected long sizeOfIndexedStrings;
|
protected long sizeOfIndexedStrings;
|
||||||
|
|
||||||
/** Holds the indexed (by default every 128th) terms. */
|
/** Holds the indexed (by default every 128th) terms. */
|
||||||
protected BytesRef[] indexedTermsArray;
|
protected BytesRef[] indexedTermsArray = new BytesRef[0];
|
||||||
|
|
||||||
/** If non-null, only terms matching this prefix were
|
/** If non-null, only terms matching this prefix were
|
||||||
* indexed. */
|
* indexed. */
|
||||||
|
@ -219,27 +217,27 @@ public class DocTermOrds implements Accountable {
|
||||||
indexInterval = 1 << indexIntervalBits;
|
indexInterval = 1 << indexIntervalBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a TermsEnum that implements ord. If the
|
/**
|
||||||
* provided reader supports ord, we just return its
|
* Returns a TermsEnum that implements ord, or null if no terms in field.
|
||||||
* TermsEnum; if it does not, we build a "private" terms
|
* <p>
|
||||||
|
* we build a "private" terms
|
||||||
* index internally (WARNING: consumes RAM) and use that
|
* index internally (WARNING: consumes RAM) and use that
|
||||||
* index to implement ord. This also enables ord on top
|
* index to implement ord. This also enables ord on top
|
||||||
* of a composite reader. The returned TermsEnum is
|
* of a composite reader. The returned TermsEnum is
|
||||||
* unpositioned. This returns null if there are no terms.
|
* unpositioned. This returns null if there are no terms.
|
||||||
*
|
* </p>
|
||||||
* <p><b>NOTE</b>: you must pass the same reader that was
|
* <p><b>NOTE</b>: you must pass the same reader that was
|
||||||
* used when creating this class */
|
* used when creating this class
|
||||||
|
*/
|
||||||
public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
|
public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
|
||||||
if (indexedTermsArray == null) {
|
// NOTE: see LUCENE-6529 before attempting to optimize this method to
|
||||||
//System.out.println("GET normal enum");
|
// return a TermsEnum directly from the reader if it already supports ord().
|
||||||
final Terms terms = reader.terms(field);
|
|
||||||
if (terms == null) {
|
assert null != indexedTermsArray;
|
||||||
|
|
||||||
|
if (0 == indexedTermsArray.length) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return terms.iterator();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
//System.out.println("GET wrapped enum ordBase=" + ordBase);
|
|
||||||
return new OrdWrappedTermsEnum(reader);
|
return new OrdWrappedTermsEnum(reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -297,12 +295,9 @@ public class DocTermOrds implements Accountable {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we need our "term index wrapper", these will be
|
// For our "term index wrapper"
|
||||||
// init'd below:
|
final List<BytesRef> indexedTerms = new ArrayList<>();
|
||||||
List<BytesRef> indexedTerms = null;
|
final PagedBytes indexedTermsBytes = new PagedBytes(15);
|
||||||
PagedBytes indexedTermsBytes = null;
|
|
||||||
|
|
||||||
boolean testedOrd = false;
|
|
||||||
|
|
||||||
// we need a minimum of 9 bytes, but round up to 12 since the space would
|
// we need a minimum of 9 bytes, but round up to 12 since the space would
|
||||||
// be wasted with most allocators anyway.
|
// be wasted with most allocators anyway.
|
||||||
|
@ -336,23 +331,9 @@ public class DocTermOrds implements Accountable {
|
||||||
}
|
}
|
||||||
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
|
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
|
||||||
|
|
||||||
if (!testedOrd) {
|
|
||||||
try {
|
|
||||||
ordBase = (int) te.ord();
|
|
||||||
//System.out.println("got ordBase=" + ordBase);
|
|
||||||
} catch (UnsupportedOperationException uoe) {
|
|
||||||
// Reader cannot provide ord support, so we wrap
|
|
||||||
// our own support by creating our own terms index:
|
|
||||||
indexedTerms = new ArrayList<>();
|
|
||||||
indexedTermsBytes = new PagedBytes(15);
|
|
||||||
//System.out.println("NO ORDS");
|
|
||||||
}
|
|
||||||
testedOrd = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
visitTerm(te, termNum);
|
visitTerm(te, termNum);
|
||||||
|
|
||||||
if (indexedTerms != null && (termNum & indexIntervalMask) == 0) {
|
if ((termNum & indexIntervalMask) == 0) {
|
||||||
// Index this term
|
// Index this term
|
||||||
sizeOfIndexedStrings += t.length;
|
sizeOfIndexedStrings += t.length;
|
||||||
BytesRef indexedTerm = new BytesRef();
|
BytesRef indexedTerm = new BytesRef();
|
||||||
|
@ -547,9 +528,7 @@ public class DocTermOrds implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (indexedTerms != null) {
|
|
||||||
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
|
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
|
||||||
}
|
|
||||||
|
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
@ -598,9 +577,10 @@ public class DocTermOrds implements Accountable {
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Only used if original IndexReader doesn't implement
|
/**
|
||||||
* ord; in this case we "wrap" our own terms index
|
* "wrap" our own terms index around the original IndexReader.
|
||||||
* around it. */
|
* Only valid if there are terms for this field rom the original reader
|
||||||
|
*/
|
||||||
private final class OrdWrappedTermsEnum extends TermsEnum {
|
private final class OrdWrappedTermsEnum extends TermsEnum {
|
||||||
private final TermsEnum termsEnum;
|
private final TermsEnum termsEnum;
|
||||||
private BytesRef term;
|
private BytesRef term;
|
||||||
|
@ -608,6 +588,7 @@ public class DocTermOrds implements Accountable {
|
||||||
|
|
||||||
public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
|
public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
|
||||||
assert indexedTermsArray != null;
|
assert indexedTermsArray != null;
|
||||||
|
assert 0 != indexedTermsArray.length;
|
||||||
termsEnum = reader.fields().terms(field).iterator();
|
termsEnum = reader.fields().terms(field).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,36 @@ import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestDocTermOrds extends LuceneTestCase {
|
public class TestDocTermOrds extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testEmptyIndex() throws IOException {
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
final DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
|
||||||
|
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||||
|
TestUtil.checkReader(composite);
|
||||||
|
|
||||||
|
// check the leaves
|
||||||
|
// (normally there are none for an empty index, so this is really just future
|
||||||
|
// proofing in case that changes for some reason)
|
||||||
|
for (LeafReaderContext rc : ir.leaves()) {
|
||||||
|
final LeafReader r = rc.reader();
|
||||||
|
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
|
||||||
|
assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
|
||||||
|
assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
// check the composite
|
||||||
|
final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
|
||||||
|
assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
|
||||||
|
assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testSimple() throws Exception {
|
public void testSimple() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||||
|
@ -82,6 +112,7 @@ public class TestDocTermOrds extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(ar);
|
||||||
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
|
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
|
||||||
SortedSetDocValues iter = dto.iterator(ar);
|
SortedSetDocValues iter = dto.iterator(ar);
|
||||||
|
|
||||||
|
@ -185,6 +216,7 @@ public class TestDocTermOrds extends LuceneTestCase {
|
||||||
System.out.println("TEST: top reader");
|
System.out.println("TEST: top reader");
|
||||||
}
|
}
|
||||||
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(slowR);
|
||||||
verify(slowR, idToOrds, termsArray, null);
|
verify(slowR, idToOrds, termsArray, null);
|
||||||
|
|
||||||
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
||||||
|
@ -270,6 +302,7 @@ public class TestDocTermOrds extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(slowR);
|
||||||
for(String prefix : prefixesArray) {
|
for(String prefix : prefixesArray) {
|
||||||
|
|
||||||
final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
|
final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
|
||||||
|
|
|
@ -119,6 +119,7 @@ public class TestFieldCache extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
IndexReader r = writer.getReader();
|
IndexReader r = writer.getReader();
|
||||||
reader = SlowCompositeReaderWrapper.wrap(r);
|
reader = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,6 +294,7 @@ public class TestFieldCache extends LuceneTestCase {
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexReader r = DirectoryReader.open(dir);
|
IndexReader r = DirectoryReader.open(dir);
|
||||||
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
||||||
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
||||||
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||||
|
|
|
@ -192,6 +192,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(ar);
|
||||||
|
|
||||||
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||||
for(int docID=0;docID<docBytes.size();docID++) {
|
for(int docID=0;docID<docBytes.size();docID++) {
|
||||||
|
@ -265,6 +266,8 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(ar
|
||||||
|
);
|
||||||
|
|
||||||
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||||
for(int docID=0;docID<docBytes.size();docID++) {
|
for(int docID=0;docID<docBytes.size();docID++) {
|
||||||
|
|
|
@ -18,15 +18,23 @@ package org.apache.lucene.uninverting;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.EnumSet;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.IntField;
|
import org.apache.lucene.document.IntField;
|
||||||
import org.apache.lucene.document.LongField;
|
import org.apache.lucene.document.LongField;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||||
|
@ -201,4 +209,133 @@ public class TestUninvertingReader extends LuceneTestCase {
|
||||||
ir.close();
|
ir.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
|
||||||
|
public void testSortedSetIntegerManyValues() throws IOException {
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
final FieldType NO_TRIE_TYPE = new FieldType(IntField.TYPE_NOT_STORED);
|
||||||
|
NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
|
||||||
|
|
||||||
|
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||||
|
UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
|
||||||
|
UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
|
||||||
|
UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
|
||||||
|
UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
|
||||||
|
final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
|
||||||
|
MULTI_VALUES.add("trie_multi");
|
||||||
|
MULTI_VALUES.add("notrie_multi");
|
||||||
|
|
||||||
|
|
||||||
|
final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
|
||||||
|
final int MIN = TestUtil.nextInt(random(), 10, 100);
|
||||||
|
final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
|
||||||
|
final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
|
||||||
|
|
||||||
|
{ // (at least) one doc should have every value, so that at least one segment has every value
|
||||||
|
final Document doc = new Document();
|
||||||
|
for (int i = MIN; i <= MAX; i++) {
|
||||||
|
doc.add(new IntField("trie_multi", i, Field.Store.NO));
|
||||||
|
doc.add(new IntField("notrie_multi", i, NO_TRIE_TYPE));
|
||||||
|
}
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now add some more random docs (note: starting at i=1 because of previously added doc)
|
||||||
|
for (int i = 1; i < NUM_DOCS; i++) {
|
||||||
|
final Document doc = new Document();
|
||||||
|
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||||
|
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||||
|
doc.add(new IntField("trie_single", val, Field.Store.NO));
|
||||||
|
doc.add(new IntField("notrie_single", val, NO_TRIE_TYPE));
|
||||||
|
}
|
||||||
|
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||||
|
int numMulti = atLeast(1);
|
||||||
|
while (0 < numMulti--) {
|
||||||
|
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||||
|
doc.add(new IntField("trie_multi", val, Field.Store.NO));
|
||||||
|
doc.add(new IntField("notrie_multi", val, NO_TRIE_TYPE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
|
||||||
|
final int NUM_LEAVES = ir.leaves().size();
|
||||||
|
|
||||||
|
// check the leaves: no more then total set size
|
||||||
|
for (LeafReaderContext rc : ir.leaves()) {
|
||||||
|
final LeafReader ar = rc.reader();
|
||||||
|
for (String f : UNINVERT_MAP.keySet()) {
|
||||||
|
final SortedSetDocValues v = ar.getSortedSetDocValues(f);
|
||||||
|
final long valSetSize = v.getValueCount();
|
||||||
|
assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " +
|
||||||
|
valSetSize + " from: " + ar.toString(),
|
||||||
|
valSetSize <= EXPECTED_VALSET_SIZE);
|
||||||
|
|
||||||
|
if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
|
||||||
|
// tighter check on multi fields in single segment index since we know one doc has all of them
|
||||||
|
assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size",
|
||||||
|
EXPECTED_VALSET_SIZE, valSetSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check the composite of all leaves: exact expectation of set size
|
||||||
|
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||||
|
TestUtil.checkReader(composite);
|
||||||
|
|
||||||
|
for (String f : MULTI_VALUES) {
|
||||||
|
final SortedSetDocValues v = composite.getSortedSetDocValues(f);
|
||||||
|
final long valSetSize = v.getValueCount();
|
||||||
|
assertEquals(f + ": Composite reader value set should have had exactly expected size",
|
||||||
|
EXPECTED_VALSET_SIZE, valSetSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetEmptyIndex() throws IOException {
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||||
|
for (Type t : EnumSet.allOf(Type.class)) {
|
||||||
|
UNINVERT_MAP.put(t.name(), t);
|
||||||
|
}
|
||||||
|
|
||||||
|
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
|
||||||
|
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||||
|
TestUtil.checkReader(composite);
|
||||||
|
|
||||||
|
for (String f : UNINVERT_MAP.keySet()) {
|
||||||
|
// check the leaves
|
||||||
|
// (normally there are none for an empty index, so this is really just future
|
||||||
|
// proofing in case that changes for some reason)
|
||||||
|
for (LeafReaderContext rc : ir.leaves()) {
|
||||||
|
final LeafReader ar = rc.reader();
|
||||||
|
assertNull(f + ": Expected no doc values from empty index (leaf)",
|
||||||
|
ar.getSortedSetDocValues(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
// check the composite
|
||||||
|
assertNull(f + ": Expected no doc values from empty index (composite)",
|
||||||
|
composite.getSortedSetDocValues(f));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,201 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.schema.TrieIntField;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
public class TestTrieFacet extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
final static int MIN_VALUE = 20;
|
||||||
|
final static int MAX_VALUE = 60;
|
||||||
|
|
||||||
|
final static String TRIE_INT_P8_S_VALUED = "foo_ti1";
|
||||||
|
final static String TRIE_INT_P8_M_VALUED = "foo_ti";
|
||||||
|
|
||||||
|
final static String TRIE_INT_P0_S_VALUED = "foo_i1";
|
||||||
|
final static String TRIE_INT_P0_M_VALUED = "foo_i";
|
||||||
|
|
||||||
|
final static String[] M_VALUED = new String[] { TRIE_INT_P0_M_VALUED, TRIE_INT_P8_M_VALUED };
|
||||||
|
final static String[] S_VALUED = new String[] { TRIE_INT_P0_S_VALUED, TRIE_INT_P8_S_VALUED };
|
||||||
|
|
||||||
|
final static String[] P0 = new String[] { TRIE_INT_P0_M_VALUED, TRIE_INT_P0_S_VALUED };
|
||||||
|
final static String[] P8 = new String[] { TRIE_INT_P8_M_VALUED, TRIE_INT_P8_S_VALUED };
|
||||||
|
|
||||||
|
static int NUM_DOCS;
|
||||||
|
|
||||||
|
private static TrieIntField assertCastFieldType(SchemaField f) {
|
||||||
|
assertTrue("who changed the schema? test isn't valid: " + f.getName(),
|
||||||
|
f.getType() instanceof TrieIntField);
|
||||||
|
return (TrieIntField) f.getType();
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
|
||||||
|
initCore("solrconfig-tlog.xml","schema.xml");
|
||||||
|
|
||||||
|
// don't break the test
|
||||||
|
assertTrue("min value must be less then max value", MIN_VALUE < MAX_VALUE);
|
||||||
|
assertTrue("min value must be greater then zero", 0 < MIN_VALUE);
|
||||||
|
|
||||||
|
// sanity check no one breaks the schema out from under us...
|
||||||
|
for (String f : M_VALUED) {
|
||||||
|
SchemaField sf = h.getCore().getLatestSchema().getField(f);
|
||||||
|
assertTrue("who changed the schema? test isn't valid: " + f, sf.multiValued());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String f : S_VALUED) {
|
||||||
|
SchemaField sf = h.getCore().getLatestSchema().getField(f);
|
||||||
|
assertFalse("who changed the schema? test isn't valid: " + f, sf.multiValued());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String f : P0) {
|
||||||
|
SchemaField sf = h.getCore().getLatestSchema().getField(f);
|
||||||
|
assertEquals("who changed the schema? test isn't valid: " + f,
|
||||||
|
0, assertCastFieldType(sf).getPrecisionStep());
|
||||||
|
}
|
||||||
|
for (String f : P8) {
|
||||||
|
SchemaField sf = h.getCore().getLatestSchema().getField(f);
|
||||||
|
assertEquals("who changed the schema? test isn't valid: " + f,
|
||||||
|
8, assertCastFieldType(sf).getPrecisionStep());
|
||||||
|
}
|
||||||
|
|
||||||
|
// we don't need a lot of docs -- at least one failure only had ~1000
|
||||||
|
NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
|
||||||
|
|
||||||
|
{ // ensure at least one doc has every valid value in the multivalued fields
|
||||||
|
SolrInputDocument doc = sdoc("id", "0");
|
||||||
|
for (int val = MIN_VALUE; val <= MAX_VALUE; val++) {
|
||||||
|
for (String f : M_VALUED) {
|
||||||
|
doc.addField(f, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertU(adoc(doc));
|
||||||
|
}
|
||||||
|
|
||||||
|
// randomized docs (note: starting at i=1)
|
||||||
|
for (int i=1; i < NUM_DOCS; i++) {
|
||||||
|
SolrInputDocument doc = sdoc("id", i+"");
|
||||||
|
if (useField()) {
|
||||||
|
int val = TestUtil.nextInt(random(), MIN_VALUE, MAX_VALUE);
|
||||||
|
for (String f : S_VALUED) {
|
||||||
|
doc.addField(f, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (useField()) {
|
||||||
|
int numMulti = atLeast(1);
|
||||||
|
while (0 < numMulti--) {
|
||||||
|
int val = TestUtil.nextInt(random(), MIN_VALUE, MAX_VALUE);
|
||||||
|
for (String f: M_VALUED) {
|
||||||
|
doc.addField(f, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertU(adoc(doc));
|
||||||
|
}
|
||||||
|
assertU(commit());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Similar to usually() but we want it to happen just as often regardless
|
||||||
|
* of test multiplier and nightly status
|
||||||
|
*/
|
||||||
|
private static boolean useField() {
|
||||||
|
return 0 != TestUtil.nextInt(random(), 0, 9);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void doTestNoZeros(final String field, final String method) throws Exception {
|
||||||
|
|
||||||
|
assertQ("sanity check # docs in index: " + NUM_DOCS,
|
||||||
|
req("q", "*:*", "rows", "0")
|
||||||
|
,"//result[@numFound="+NUM_DOCS+"]");
|
||||||
|
assertQ("sanity check that no docs match 0 failed",
|
||||||
|
req("q", field+":0", "rows", "0")
|
||||||
|
,"//result[@numFound=0]");
|
||||||
|
assertQ("sanity check that no docs match [0 TO 0] failed",
|
||||||
|
req("q", field+":[0 TO 0]", "rows", "0")
|
||||||
|
,"//result[@numFound=0]");
|
||||||
|
|
||||||
|
assertQ("*:* facet with mincount 0 found unexpected 0 value",
|
||||||
|
req("q", "*:*"
|
||||||
|
,"rows", "0"
|
||||||
|
,"indent","true"
|
||||||
|
,"facet", "true"
|
||||||
|
,"facet.field", field
|
||||||
|
,"facet.limit", "-1"
|
||||||
|
,"facet.mincount", "0"
|
||||||
|
,"facet.method", method
|
||||||
|
)
|
||||||
|
// trivial sanity check we're at least getting facet counts in output
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst[@name='"+field+"']/int)!=0]"
|
||||||
|
// main point of test
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst[@name='"+field+"']/int[@name='0'])=0]"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// enum
|
||||||
|
public void testSingleValuedTrieP0_enum() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P0_S_VALUED, "enum");
|
||||||
|
}
|
||||||
|
public void testMultiValuedTrieP0_enum() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P0_M_VALUED, "enum");
|
||||||
|
}
|
||||||
|
public void testSingleValuedTrieP8_enum() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P8_S_VALUED, "enum");
|
||||||
|
}
|
||||||
|
public void testMultiValuedTrieP8_enum() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P8_M_VALUED, "enum");
|
||||||
|
}
|
||||||
|
|
||||||
|
// fc
|
||||||
|
public void testSingleValuedTrieP0_fc() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P0_S_VALUED, "fc");
|
||||||
|
}
|
||||||
|
public void testMultiValuedTrieP0_fc() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P0_M_VALUED, "fc");
|
||||||
|
}
|
||||||
|
public void testSingleValuedTrieP8_fc() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P8_S_VALUED, "fc");
|
||||||
|
}
|
||||||
|
public void testMultiValuedTrieP8_fc() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P8_M_VALUED, "fc");
|
||||||
|
}
|
||||||
|
|
||||||
|
// fcs
|
||||||
|
public void testSingleValuedTrieP0_fcs() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P0_S_VALUED, "fcs");
|
||||||
|
}
|
||||||
|
public void testMultiValuedTrieP0_fcs() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P0_M_VALUED, "fcs");
|
||||||
|
}
|
||||||
|
public void testSingleValuedTrieP8_fcs() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P8_S_VALUED, "fcs");
|
||||||
|
}
|
||||||
|
public void testMultiValuedTrieP8_fcs() throws Exception {
|
||||||
|
doTestNoZeros(TRIE_INT_P8_M_VALUED, "fcs");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue