LUCENE-5666: Add UninvertingReader

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1595259 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-16 16:39:56 +00:00
commit e2ba693a6e
204 changed files with 7193 additions and 6121 deletions

View File

@ -65,6 +65,18 @@ API Changes
as tokens anymore, and now iterates cells on-demand during indexing instead of
building a collection. RPT now has more setters. (David Smiley)
* LUCENE-5666: Change uninverted access (sorting, faceting, grouping, etc)
to use the DocValues API instead of FieldCache. For FieldCache functionality,
use UninvertingReader in lucene/misc (or implement your own FilterReader).
UninvertingReader is more efficient: supports multi-valued numeric fields,
detects when a multi-valued field is single-valued, reuses caches
of compatible types (e.g. SORTED also supports BINARY and SORTED_SET access
without insanity). "Insanity" is no longer possible unless you explicitly want it.
Rename FieldCache* and DocTermOrds* classes in the search package to DocValues*.
Move SortedSetSortField to core and add SortedSetFieldSource to queries/, which
takes the same selectors. Add helper methods to DocValues.java that are better
suited for search code (never return null, etc). (Mike McCandless, Robert Muir)
Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect

View File

@ -0,0 +1,70 @@
package org.apache.lucene.collation;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.text.Collator;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.search.DocValuesRangeFilter;
import org.apache.lucene.util.BytesRef;
/**
* Indexes collation keys as a single-valued {@link SortedDocValuesField}.
* <p>
* This is more efficient that {@link CollationKeyAnalyzer} if the field
* only has one value: no uninversion is necessary to sort on the field,
* locale-sensitive range queries can still work via {@link DocValuesRangeFilter},
* and the underlying data structures built at index-time are likely more efficient
* and use less memory than FieldCache.
*/
public final class CollationDocValuesField extends Field {
private final String name;
private final Collator collator;
private final BytesRef bytes = new BytesRef();
/**
* Create a new ICUCollationDocValuesField.
* <p>
* NOTE: you should not create a new one for each document, instead
* just make one and reuse it during your indexing process, setting
* the value via {@link #setStringValue(String)}.
* @param name field name
* @param collator Collator for generating collation keys.
*/
// TODO: can we make this trap-free? maybe just synchronize on the collator
// instead?
public CollationDocValuesField(String name, Collator collator) {
super(name, SortedDocValuesField.TYPE);
this.name = name;
this.collator = (Collator) collator.clone();
fieldsData = bytes; // so wrong setters cannot be called
}
@Override
public String name() {
return name;
}
@Override
public void setStringValue(String value) {
bytes.bytes = collator.getCollationKey(value).toByteArray();
bytes.offset = 0;
bytes.length = bytes.bytes.length;
}
}

View File

@ -0,0 +1,143 @@
package org.apache.lucene.collation;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.text.Collator;
import java.util.Locale;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesRangeFilter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/**
* trivial test of CollationDocValuesField
*/
@SuppressCodecs("Lucene3x")
public class TestCollationDocValuesField extends LuceneTestCase {
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field field = newField("field", "", StringField.TYPE_STORED);
CollationDocValuesField collationField = new CollationDocValuesField("collated", Collator.getInstance(Locale.ENGLISH));
doc.add(field);
doc.add(collationField);
field.setStringValue("ABC");
collationField.setStringValue("ABC");
iw.addDocument(doc);
field.setStringValue("abc");
collationField.setStringValue("abc");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.shutdown();
IndexSearcher is = newSearcher(ir);
SortField sortField = new SortField("collated", SortField.Type.STRING);
TopDocs td = is.search(new MatchAllDocsQuery(), 5, new Sort(sortField));
assertEquals("abc", ir.document(td.scoreDocs[0].doc).get("field"));
assertEquals("ABC", ir.document(td.scoreDocs[1].doc).get("field"));
ir.close();
dir.close();
}
public void testRanges() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field field = newField("field", "", StringField.TYPE_STORED);
Collator collator = Collator.getInstance(Locale.getDefault()); // uses -Dtests.locale
if (random().nextBoolean()) {
collator.setStrength(Collator.PRIMARY);
}
CollationDocValuesField collationField = new CollationDocValuesField("collated", collator);
doc.add(field);
doc.add(collationField);
int numDocs = atLeast(500);
for (int i = 0; i < numDocs; i++) {
String value = TestUtil.randomSimpleString(random());
field.setStringValue(value);
collationField.setStringValue(value);
iw.addDocument(doc);
}
IndexReader ir = iw.getReader();
iw.shutdown();
IndexSearcher is = newSearcher(ir);
int numChecks = atLeast(100);
for (int i = 0; i < numChecks; i++) {
String start = TestUtil.randomSimpleString(random());
String end = TestUtil.randomSimpleString(random());
BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
Query query = new ConstantScoreQuery(DocValuesRangeFilter.newBytesRefRange("collated", lowerVal, upperVal, true, true));
doTestRanges(is, start, end, query, collator);
}
ir.close();
dir.close();
}
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, Query query, Collator collator) throws Exception {
QueryUtils.check(query);
// positive test
TopDocs docs = is.search(query, is.getIndexReader().maxDoc());
for (ScoreDoc doc : docs.scoreDocs) {
String value = is.doc(doc.doc).get("field");
assertTrue(collator.compare(value, startPoint) >= 0);
assertTrue(collator.compare(value, endPoint) <= 0);
}
// negative test
BooleanQuery bq = new BooleanQuery();
bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
bq.add(query, Occur.MUST_NOT);
docs = is.search(bq, is.getIndexReader().maxDoc());
for (ScoreDoc doc : docs.scoreDocs) {
String value = is.doc(doc.doc).get("field");
assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0);
}
}
}

View File

@ -60,23 +60,6 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
secondRangeBeginning, secondRangeEnd);
}
public void testCollationKeySort() throws Exception {
Analyzer usAnalyzer
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
Analyzer franceAnalyzer
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
Analyzer swedenAnalyzer
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se")));
Analyzer denmarkAnalyzer
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
// The ICU Collator and Sun java.text.Collator implementations differ in their
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
}
public void testThreadSafe() throws Exception {
int iters = 20 * RANDOM_MULTIPLIER;
for (int i = 0; i < iters; i++) {

View File

@ -19,7 +19,7 @@ package org.apache.lucene.collation;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.search.FieldCacheRangeFilter;
import org.apache.lucene.search.DocValuesRangeFilter;
import org.apache.lucene.util.BytesRef;
import com.ibm.icu.text.Collator;
@ -30,7 +30,7 @@ import com.ibm.icu.text.RawCollationKey;
* <p>
* This is more efficient that {@link ICUCollationKeyAnalyzer} if the field
* only has one value: no uninversion is necessary to sort on the field,
* locale-sensitive range queries can still work via {@link FieldCacheRangeFilter},
* locale-sensitive range queries can still work via {@link DocValuesRangeFilter},
* and the underlying data structures built at index-time are likely more efficient
* and use less memory than FieldCache.
*/

View File

@ -24,7 +24,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FieldCacheRangeFilter;
import org.apache.lucene.search.DocValuesRangeFilter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
@ -111,7 +111,7 @@ public class TestICUCollationDocValuesField extends LuceneTestCase {
String end = TestUtil.randomSimpleString(random());
BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
Query query = new ConstantScoreQuery(FieldCacheRangeFilter.newBytesRefRange("collated", lowerVal, upperVal, true, true));
Query query = new ConstantScoreQuery(DocValuesRangeFilter.newBytesRefRange("collated", lowerVal, upperVal, true, true));
doTestRanges(is, start, end, query, collator);
}

View File

@ -56,29 +56,6 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
secondRangeBeginning, secondRangeEnd);
}
// Test using various international locales with accented characters (which
// sort differently depending on locale)
//
// Copied (and slightly modified) from
// org.apache.lucene.search.TestSort.testInternationalSort()
//
public void testCollationKeySort() throws Exception {
Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.ROOT));
Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se")));
Analyzer denmarkAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
// The ICU Collator and java.text.Collator implementations differ in their
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.ROOT.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
}
public void testThreadSafe() throws Exception {
int iters = 20 * RANDOM_MULTIPLIER;
for (int i = 0; i < iters; i++) {

View File

@ -31,6 +31,7 @@ import org.junit.AfterClass;
import org.junit.BeforeClass;
/** Base class for all Benchmark unit tests. */
@SuppressSysoutChecks(bugUrl = "very noisy")
public abstract class BenchmarkTestCase extends LuceneTestCase {
private static File WORKDIR;

View File

@ -52,17 +52,12 @@ import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
/**
* Test very simply that perf tasks - simple algorithms - are doing what they should.
@ -328,7 +323,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
"content.source.forever=true",
"directory=RAMDirectory",
"doc.reuse.fields=false",
"doc.stored=false",
"doc.stored=true",
"doc.tokenized=false",
"doc.index.props=true",
"# ----- alg ",
@ -344,11 +339,11 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
Benchmark benchmark = execBenchmark(algLines);
DirectoryReader r = DirectoryReader.open(benchmark.getRunData().getDirectory());
SortedDocValues idx = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(r), "country");
final int maxDoc = r.maxDoc();
assertEquals(1000, maxDoc);
for(int i=0;i<1000;i++) {
assertTrue("doc " + i + " has null country", idx.getOrd(i) != -1);
assertNotNull("doc " + i + " has null country", r.document(i).getField("country"));
}
r.close();
}

View File

@ -42,6 +42,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import conf.ConfLoader;
/** Test very simply that perf tasks are parses as expected. */
@SuppressSysoutChecks(bugUrl = "very noisy")
public class TestPerfTasksParse extends LuceneTestCase {
static final String NEW_LINE = System.getProperty("line.separator");

View File

@ -31,7 +31,6 @@
"/>
<property name="forbidden-rue-excludes" value="
org/apache/lucene/search/FieldCache$CacheEntry.class
org/apache/lucene/util/RamUsageEstimator.class
org/apache/lucene/search/CachingWrapperFilter.class
"/>

View File

@ -18,14 +18,13 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.index.AtomicReader; // javadocs
import org.apache.lucene.search.FieldCache; // javadocs
/**
* Syntactic sugar for encoding doubles as NumericDocValues
* via {@link Double#doubleToRawLongBits(double)}.
* <p>
* Per-document double values can be retrieved via
* {@link FieldCache#getDoubles(AtomicReader, String, boolean)}.
* {@link AtomicReader#getNumericDocValues(String)}.
* <p>
* <b>NOTE</b>: In most all cases this will be rather inefficient,
* requiring eight bytes per document. Consider encoding double

View File

@ -18,8 +18,8 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.analysis.NumericTokenStream; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.util.NumericUtils;
@ -57,7 +57,7 @@ import org.apache.lucene.util.NumericUtils;
* NumericRangeFilter}. To sort according to a
* <code>DoubleField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#DOUBLE}. <code>DoubleField</code>
* values can also be loaded directly from {@link FieldCache}.</p>
* values can also be loaded directly from {@link AtomicReader#getNumericDocValues}.</p>
*
* <p>You may add the same field name as an <code>DoubleField</code> to
* the same document more than once. Range querying and

View File

@ -18,14 +18,13 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.index.AtomicReader; // javadocs
import org.apache.lucene.search.FieldCache; // javadocs
/**
* Syntactic sugar for encoding floats as NumericDocValues
* via {@link Float#floatToRawIntBits(float)}.
* <p>
* Per-document floating point values can be retrieved via
* {@link FieldCache#getFloats(AtomicReader, String, boolean)}.
* {@link AtomicReader#getNumericDocValues(String)}.
* <p>
* <b>NOTE</b>: In most all cases this will be rather inefficient,
* requiring four bytes per document. Consider encoding floating

View File

@ -18,8 +18,8 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.analysis.NumericTokenStream; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.util.NumericUtils;
@ -57,7 +57,7 @@ import org.apache.lucene.util.NumericUtils;
* NumericRangeFilter}. To sort according to a
* <code>FloatField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#FLOAT}. <code>FloatField</code>
* values can also be loaded directly from {@link FieldCache}.</p>
* values can also be loaded directly from {@link AtomicReader#getNumericDocValues}.</p>
*
* <p>You may add the same field name as an <code>FloatField</code> to
* the same document more than once. Range querying and

View File

@ -18,8 +18,8 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.analysis.NumericTokenStream; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.util.NumericUtils;
@ -57,7 +57,7 @@ import org.apache.lucene.util.NumericUtils;
* NumericRangeFilter}. To sort according to a
* <code>IntField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#INT}. <code>IntField</code>
* values can also be loaded directly from {@link FieldCache}.</p>
* values can also be loaded directly from {@link AtomicReader#getNumericDocValues}.</p>
*
* <p>You may add the same field name as an <code>IntField</code> to
* the same document more than once. Range querying and

View File

@ -18,8 +18,8 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.analysis.NumericTokenStream; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.util.NumericUtils;
@ -67,7 +67,7 @@ import org.apache.lucene.util.NumericUtils;
* NumericRangeFilter}. To sort according to a
* <code>LongField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#LONG}. <code>LongField</code>
* values can also be loaded directly from {@link FieldCache}.</p>
* values can also be loaded directly from {@link AtomicReader#getNumericDocValues}.</p>
*
* <p>You may add the same field name as an <code>LongField</code> to
* the same document more than once. Range querying and

View File

@ -17,6 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -159,4 +161,72 @@ public final class DocValues {
}
};
}
// some helpers, for transition from fieldcache apis.
// as opposed to the AtomicReader apis (which must be strict for consistency), these are lenient
/**
* Returns NumericDocValues for the reader, or {@link #EMPTY_NUMERIC} if it has none.
*/
public static NumericDocValues getNumeric(AtomicReader in, String field) throws IOException {
NumericDocValues dv = in.getNumericDocValues(field);
if (dv == null) {
return EMPTY_NUMERIC;
} else {
return dv;
}
}
/**
* Returns BinaryDocValues for the reader, or {@link #EMPTY_BINARY} if it has none.
*/
public static BinaryDocValues getBinary(AtomicReader in, String field) throws IOException {
BinaryDocValues dv = in.getBinaryDocValues(field);
if (dv == null) {
dv = in.getSortedDocValues(field);
if (dv == null) {
return EMPTY_BINARY;
}
}
return dv;
}
/**
* Returns SortedDocValues for the reader, or {@link #EMPTY_SORTED} if it has none.
*/
public static SortedDocValues getSorted(AtomicReader in, String field) throws IOException {
SortedDocValues dv = in.getSortedDocValues(field);
if (dv == null) {
return EMPTY_SORTED;
} else {
return dv;
}
}
/**
* Returns SortedSetDocValues for the reader, or {@link #EMPTY_SORTED_SET} if it has none.
*/
public static SortedSetDocValues getSortedSet(AtomicReader in, String field) throws IOException {
SortedSetDocValues dv = in.getSortedSetDocValues(field);
if (dv == null) {
SortedDocValues sorted = in.getSortedDocValues(field);
if (sorted == null) {
return EMPTY_SORTED_SET;
}
return singleton(sorted);
}
return dv;
}
/**
* Returns Bits for the reader, or {@link Bits} matching nothing if it has none.
*/
public static Bits getDocsWithField(AtomicReader in, String field) throws IOException {
Bits dv = in.getDocsWithField(field);
if (dv == null) {
return new Bits.MatchNoBits(in.maxDoc());
} else {
return dv;
}
}
}

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -38,8 +37,8 @@ import org.apache.lucene.util.BytesRef;
* to override {@link #numDocs()} as well and vice-versa.
* <p><b>NOTE</b>: If this {@link FilterAtomicReader} does not change the
* content the contained reader, you could consider overriding
* {@link #getCoreCacheKey()} so that {@link FieldCache} and
* {@link CachingWrapperFilter} share the same entries for this atomic reader
* {@link #getCoreCacheKey()} so that
* {@link CachingWrapperFilter} shares the same entries for this atomic reader
* and the wrapped one. {@link #getCombinedCoreAndDeletesKey()} could be
* overridden as well if the {@link #getLiveDocs() live docs} are not changed
* either.

View File

@ -426,7 +426,7 @@ public abstract class IndexReader implements Closeable {
return getContext().leaves();
}
/** Expert: Returns a key for this IndexReader, so FieldCache/CachingWrapperFilter can find
/** Expert: Returns a key for this IndexReader, so CachingWrapperFilter can find
* it again.
* This key must not have equals()/hashCode() methods, so &quot;equals&quot; means &quot;identical&quot;. */
public Object getCoreCacheKey() {
@ -436,7 +436,7 @@ public abstract class IndexReader implements Closeable {
}
/** Expert: Returns a key for this IndexReader that also includes deletions,
* so FieldCache/CachingWrapperFilter can find it again.
* so CachingWrapperFilter can find it again.
* This key must not have equals()/hashCode() methods, so &quot;equals&quot; means &quot;identical&quot;. */
public Object getCombinedCoreAndDeletesKey() {
// Don't call ensureOpen since FC calls this (to evict)

View File

@ -34,7 +34,7 @@ import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@ -361,7 +361,7 @@ public final class SegmentReader extends AtomicReader {
// This is necessary so that cloned SegmentReaders (which
// share the underlying postings data) will map to the
// same entry in the FieldCache. See LUCENE-1579.
// same entry for CachingWrapperFilter. See LUCENE-1579.
@Override
public Object getCoreCacheKey() {
// NOTE: if this ever changes, be sure to fix
@ -525,7 +525,7 @@ public final class SegmentReader extends AtomicReader {
* sharing the same core are closed. At this point it
* is safe for apps to evict this reader from any caches
* keyed on {@link #getCoreCacheKey}. This is the same
* interface that {@link FieldCache} uses, internally,
* interface that {@link CachingWrapperFilter} uses, internally,
* to evict entries.</p>
*
* @lucene.experimental

View File

@ -23,8 +23,7 @@ import org.apache.lucene.util.BytesRef;
* Exposes multi-valued view over a single-valued instance.
* <p>
* This can be used if you want to have one multi-valued implementation
* against e.g. FieldCache.getDocTermOrds that also works for single-valued
* fields.
* that works for single or multi-valued types.
*/
final class SingletonSortedSetDocValues extends SortedSetDocValues {
private final SortedDocValues in;

View File

@ -18,15 +18,17 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
/**
* A range filter built on top of a cached multi-valued term field (in {@link FieldCache}).
* A range filter built on top of a cached multi-valued term field (from {@link AtomicReader#getSortedSetDocValues}).
*
* <p>Like {@link FieldCacheRangeFilter}, this is just a specialized range query versus
* <p>Like {@link DocValuesRangeFilter}, this is just a specialized range query versus
* using a TermRangeQuery with {@link DocTermOrdsRewriteMethod}: it will only do
* two ordinal to term lookups.</p>
*/
@ -51,7 +53,7 @@ public abstract class DocTermOrdsRangeFilter extends Filter {
public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException;
/**
* Creates a BytesRef range filter using {@link FieldCache#getTermsIndex}. This works with all
* Creates a BytesRef range filter using {@link AtomicReader#getSortedSetDocValues}. This works with all
* fields containing zero or one term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
@ -59,7 +61,7 @@ public abstract class DocTermOrdsRangeFilter extends Filter {
return new DocTermOrdsRangeFilter(field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final SortedSetDocValues docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
final SortedSetDocValues docTermOrds = DocValues.getSortedSet(context.reader(), field);
final long lowerPoint = lowerVal == null ? -1 : docTermOrds.lookupTerm(lowerVal);
final long upperPoint = upperVal == null ? -1 : docTermOrds.lookupTerm(upperVal);
@ -95,7 +97,7 @@ public abstract class DocTermOrdsRangeFilter extends Filter {
assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0;
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
docTermOrds.setDocument(doc);

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
@ -83,7 +84,7 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
*/
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
final SortedSetDocValues docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), query.field);
final SortedSetDocValues docTermOrds = DocValues.getSortedSet(context.reader(), query.field);
// Cannot use FixedBitSet because we require long index (ord):
final LongBitSet termSet = new LongBitSet(docTermOrds.getValueCount());
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@ -144,7 +145,7 @@ public final class DocTermOrdsRewriteMethod extends MultiTermQuery.RewriteMethod
return null;
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
docTermOrds.setDocument(doc);

View File

@ -22,7 +22,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
/**
* Base class for DocIdSet to be used with FieldCache. The implementation
* Base class for DocIdSet to be used with DocValues. The implementation
* of its iterator is very stupid and slow if the implementation of the
* {@link #matchDoc} method is not optimized, as iterators simply increment
* the document id until {@code matchDoc(int)} returns true. Because of this
@ -30,12 +30,12 @@ import org.apache.lucene.util.OpenBitSet;
* I/O.
* @lucene.internal
*/
public abstract class FieldCacheDocIdSet extends DocIdSet {
public abstract class DocValuesDocIdSet extends DocIdSet {
protected final int maxDoc;
protected final Bits acceptDocs;
public FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) {
public DocValuesDocIdSet(int maxDoc, Bits acceptDocs) {
this.maxDoc = maxDoc;
this.acceptDocs = acceptDocs;
}
@ -123,7 +123,7 @@ public abstract class FieldCacheDocIdSet extends DocIdSet {
return new FilteredDocIdSetIterator(((DocIdSet) acceptDocs).iterator()) {
@Override
protected boolean match(int doc) {
return FieldCacheDocIdSet.this.matchDoc(doc);
return DocValuesDocIdSet.this.matchDoc(doc);
}
};
} else {

View File

@ -24,19 +24,22 @@ import org.apache.lucene.document.IntField; // for javadocs
import org.apache.lucene.document.LongField; // for javadocs
import org.apache.lucene.index.AtomicReader; // for javadocs
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
/**
* A range filter built on top of a cached single term field (in {@link FieldCache}).
* A range filter built on top of numeric doc values field
* (from {@link AtomicReader#getNumericDocValues(String)}).
*
* <p>{@code FieldCacheRangeFilter} builds a single cache for the field the first time it is used.
* Each subsequent {@code FieldCacheRangeFilter} on the same field then reuses this cache,
* <p>{@code DocValuesRangeFilter} builds a single cache for the field the first time it is used.
* Each subsequent {@code DocValuesRangeFilter} on the same field then reuses this cache,
* even if the range itself changes.
*
* <p>This means that {@code FieldCacheRangeFilter} is much faster (sometimes more than 100x as fast)
* <p>This means that {@code DocValuesRangeFilter} is much faster (sometimes more than 100x as fast)
* as building a {@link TermRangeFilter}, if using a {@link #newStringRange}.
* However, if the range never changes it is slower (around 2x as slow) than building
* a CachingWrapperFilter on top of a single {@link TermRangeFilter}.
@ -47,9 +50,10 @@ import org.apache.lucene.util.NumericUtils;
* LongField} or {@link DoubleField}. But
* it has the problem that it only works with exact one value/document (see below).
*
* <p>As with all {@link FieldCache} based functionality, {@code FieldCacheRangeFilter} is only valid for
* <p>As with all {@link AtomicReader#getNumericDocValues} based functionality,
* {@code DocValuesRangeFilter} is only valid for
* fields which exact one term for each document (except for {@link #newStringRange}
* where 0 terms are also allowed). Due to a restriction of {@link FieldCache}, for numeric ranges
* where 0 terms are also allowed). Due to historical reasons, for numeric ranges
* all terms that do not have a numeric value, 0 is assumed.
*
* <p>Thus it works on dates, prices and other single value fields but will not work on
@ -57,20 +61,18 @@ import org.apache.lucene.util.NumericUtils;
* there is only a single term.
*
* <p>This class does not have an constructor, use one of the static factory methods available,
* that create a correct instance for different data types supported by {@link FieldCache}.
* that create a correct instance for different data types.
*/
public abstract class FieldCacheRangeFilter<T> extends Filter {
// TODO: use docsWithField to handle empty properly
public abstract class DocValuesRangeFilter<T> extends Filter {
final String field;
final FieldCache.Parser parser;
final T lowerVal;
final T upperVal;
final boolean includeLower;
final boolean includeUpper;
private FieldCacheRangeFilter(String field, FieldCache.Parser parser, T lowerVal, T upperVal, boolean includeLower, boolean includeUpper) {
private DocValuesRangeFilter(String field, T lowerVal, T upperVal, boolean includeLower, boolean includeUpper) {
this.field = field;
this.parser = parser;
this.lowerVal = lowerVal;
this.upperVal = upperVal;
this.includeLower = includeLower;
@ -82,15 +84,15 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException;
/**
* Creates a string range filter using {@link FieldCache#getTermsIndex}. This works with all
* Creates a string range filter using {@link AtomicReader#getSortedDocValues(String)}. This works with all
* fields containing zero or one term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<String> newStringRange(String field, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<String>(field, null, lowerVal, upperVal, includeLower, includeUpper) {
public static DocValuesRangeFilter<String> newStringRange(String field, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
return new DocValuesRangeFilter<String>(field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
final SortedDocValues fcsi = DocValues.getSorted(context.reader(), field);
final int lowerPoint = lowerVal == null ? -1 : fcsi.lookupTerm(new BytesRef(lowerVal));
final int upperPoint = upperVal == null ? -1 : fcsi.lookupTerm(new BytesRef(upperVal));
@ -126,7 +128,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0;
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
final int docOrd = fcsi.getOrd(doc);
@ -138,16 +140,16 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
}
/**
* Creates a BytesRef range filter using {@link FieldCache#getTermsIndex}. This works with all
* Creates a BytesRef range filter using {@link AtomicReader#getSortedDocValues(String)}. This works with all
* fields containing zero or one term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
// TODO: bogus that newStringRange doesnt share this code... generics hell
public static FieldCacheRangeFilter<BytesRef> newBytesRefRange(String field, BytesRef lowerVal, BytesRef upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<BytesRef>(field, null, lowerVal, upperVal, includeLower, includeUpper) {
public static DocValuesRangeFilter<BytesRef> newBytesRefRange(String field, BytesRef lowerVal, BytesRef upperVal, boolean includeLower, boolean includeUpper) {
return new DocValuesRangeFilter<BytesRef>(field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
final SortedDocValues fcsi = DocValues.getSorted(context.reader(), field);
final int lowerPoint = lowerVal == null ? -1 : fcsi.lookupTerm(lowerVal);
final int upperPoint = upperVal == null ? -1 : fcsi.lookupTerm(upperVal);
@ -183,7 +185,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0;
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
final int docOrd = fcsi.getOrd(doc);
@ -195,21 +197,12 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
}
/**
* Creates a numeric range filter using {@link FieldCache#getInts(AtomicReader,String,boolean)}. This works with all
* Creates a numeric range filter using {@link AtomicReader#getSortedDocValues(String)}. This works with all
* int fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Integer> newIntRange(String field, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) {
return newIntRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
}
/**
* Creates a numeric range filter using {@link FieldCache#getInts(AtomicReader,String,FieldCache.IntParser,boolean)}. This works with all
* int fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Integer> newIntRange(String field, FieldCache.IntParser parser, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Integer>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
public static DocValuesRangeFilter<Integer> newIntRange(String field, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) {
return new DocValuesRangeFilter<Integer>(field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final int inclusiveLowerPoint, inclusiveUpperPoint;
@ -233,11 +226,11 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return null;
final FieldCache.Ints values = FieldCache.DEFAULT.getInts(context.reader(), field, (FieldCache.IntParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
final NumericDocValues values = DocValues.getNumeric(context.reader(), field);
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
final int value = values.get(doc);
final int value = (int) values.get(doc);
return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint;
}
};
@ -246,21 +239,12 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
}
/**
* Creates a numeric range filter using {@link FieldCache#getLongs(AtomicReader,String,boolean)}. This works with all
* Creates a numeric range filter using {@link AtomicReader#getNumericDocValues(String)}. This works with all
* long fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Long> newLongRange(String field, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) {
return newLongRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
}
/**
* Creates a numeric range filter using {@link FieldCache#getLongs(AtomicReader,String,FieldCache.LongParser,boolean)}. This works with all
* long fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Long> newLongRange(String field, FieldCache.LongParser parser, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Long>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
public static DocValuesRangeFilter<Long> newLongRange(String field, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) {
return new DocValuesRangeFilter<Long>(field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final long inclusiveLowerPoint, inclusiveUpperPoint;
@ -284,8 +268,8 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return null;
final FieldCache.Longs values = FieldCache.DEFAULT.getLongs(context.reader(), field, (FieldCache.LongParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
final NumericDocValues values = DocValues.getNumeric(context.reader(), field);
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
final long value = values.get(doc);
@ -297,21 +281,12 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
}
/**
* Creates a numeric range filter using {@link FieldCache#getFloats(AtomicReader,String,boolean)}. This works with all
* Creates a numeric range filter using {@link AtomicReader#getNumericDocValues(String)}. This works with all
* float fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Float> newFloatRange(String field, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) {
return newFloatRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
}
/**
* Creates a numeric range filter using {@link FieldCache#getFloats(AtomicReader,String,FieldCache.FloatParser,boolean)}. This works with all
* float fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Float> newFloatRange(String field, FieldCache.FloatParser parser, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Float>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
public static DocValuesRangeFilter<Float> newFloatRange(String field, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) {
return new DocValuesRangeFilter<Float>(field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
@ -339,11 +314,11 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return null;
final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), field, (FieldCache.FloatParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
final NumericDocValues values = DocValues.getNumeric(context.reader(), field);
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
final float value = values.get(doc);
final float value = Float.intBitsToFloat((int)values.get(doc));
return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint;
}
};
@ -352,21 +327,12 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
}
/**
* Creates a numeric range filter using {@link FieldCache#getDoubles(AtomicReader,String,boolean)}. This works with all
* Creates a numeric range filter using {@link AtomicReader#getNumericDocValues(String)}. This works with all
* double fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Double> newDoubleRange(String field, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) {
return newDoubleRange(field, null, lowerVal, upperVal, includeLower, includeUpper);
}
/**
* Creates a numeric range filter using {@link FieldCache#getDoubles(AtomicReader,String,FieldCache.DoubleParser,boolean)}. This works with all
* double fields containing exactly one numeric term in the field. The range can be half-open by setting one
* of the values to <code>null</code>.
*/
public static FieldCacheRangeFilter<Double> newDoubleRange(String field, FieldCache.DoubleParser parser, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Double>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
public static DocValuesRangeFilter<Double> newDoubleRange(String field, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) {
return new DocValuesRangeFilter<Double>(field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
@ -394,12 +360,12 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return null;
final FieldCache.Doubles values = FieldCache.DEFAULT.getDoubles(context.reader(), field, (FieldCache.DoubleParser) parser, false);
final NumericDocValues values = DocValues.getNumeric(context.reader(), field);
// ignore deleted docs if range doesn't contain 0
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
final double value = values.get(doc);
final double value = Double.longBitsToDouble(values.get(doc));
return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint;
}
};
@ -422,8 +388,8 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
@SuppressWarnings({"rawtypes"})
public final boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof FieldCacheRangeFilter)) return false;
FieldCacheRangeFilter other = (FieldCacheRangeFilter) o;
if (!(o instanceof DocValuesRangeFilter)) return false;
DocValuesRangeFilter other = (DocValuesRangeFilter) o;
if (!this.field.equals(other.field)
|| this.includeLower != other.includeLower
@ -431,7 +397,6 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
) { return false; }
if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
if (this.parser != null ? !this.parser.equals(other.parser) : other.parser != null) return false;
return true;
}
@ -441,7 +406,6 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
h ^= (lowerVal != null) ? lowerVal.hashCode() : 550356204;
h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper
h ^= (upperVal != null) ? upperVal.hashCode() : -1674416163;
h ^= (parser != null) ? parser.hashCode() : -1572457324;
h ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653);
return h;
}
@ -460,7 +424,4 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
/** Returns the upper value of this range filter */
public T getUpperVal() { return upperVal; }
/** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */
public FieldCache.Parser getParser() { return parser; }
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
@ -28,28 +29,28 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LongBitSet;
/**
* Rewrites MultiTermQueries into a filter, using the FieldCache for term enumeration.
* Rewrites MultiTermQueries into a filter, using DocValues for term enumeration.
* <p>
* This can be used to perform these queries against an unindexed docvalues field.
* @lucene.experimental
*/
public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod {
public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override
public Query rewrite(IndexReader reader, MultiTermQuery query) {
Query result = new ConstantScoreQuery(new MultiTermQueryFieldCacheWrapperFilter(query));
Query result = new ConstantScoreQuery(new MultiTermQueryDocValuesWrapperFilter(query));
result.setBoost(query.getBoost());
return result;
}
static class MultiTermQueryFieldCacheWrapperFilter extends Filter {
static class MultiTermQueryDocValuesWrapperFilter extends Filter {
protected final MultiTermQuery query;
/**
* Wrap a {@link MultiTermQuery} as a Filter.
*/
protected MultiTermQueryFieldCacheWrapperFilter(MultiTermQuery query) {
protected MultiTermQueryDocValuesWrapperFilter(MultiTermQuery query) {
this.query = query;
}
@ -64,7 +65,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
if (o==this) return true;
if (o==null) return false;
if (this.getClass().equals(o.getClass())) {
return this.query.equals( ((MultiTermQueryFieldCacheWrapperFilter)o).query );
return this.query.equals( ((MultiTermQueryDocValuesWrapperFilter)o).query );
}
return false;
}
@ -83,7 +84,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
*/
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), query.field);
final SortedDocValues fcsi = DocValues.getSorted(context.reader(), query.field);
// Cannot use FixedBitSet because we require long index (ord):
final LongBitSet termSet = new LongBitSet(fcsi.getValueCount());
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@ -147,7 +148,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
return null;
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
int ord = fcsi.getOrd(doc);

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsEnum; // javadoc @link
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SortedDocValues;
@ -41,17 +42,6 @@ import org.apache.lucene.util.FixedBitSet;
* also have different performance characteristics, as
* described below.
*
* <p/>
*
* The first invocation of this filter on a given field will
* be slower, since a {@link SortedDocValues} must be
* created. Subsequent invocations using the same field
* will re-use this cache. However, as with all
* functionality based on {@link FieldCache}, persistent RAM
* is consumed to hold the cache, and is not freed until the
* {@link IndexReader} is closed. In contrast, TermsFilter
* has no persistent RAM consumption.
*
*
* <p/>
*
@ -97,29 +87,25 @@ import org.apache.lucene.util.FixedBitSet;
* Which filter is best is very application dependent.
*/
public class FieldCacheTermsFilter extends Filter {
public class DocValuesTermsFilter extends Filter {
private String field;
private BytesRef[] terms;
public FieldCacheTermsFilter(String field, BytesRef... terms) {
public DocValuesTermsFilter(String field, BytesRef... terms) {
this.field = field;
this.terms = terms;
}
public FieldCacheTermsFilter(String field, String... terms) {
public DocValuesTermsFilter(String field, String... terms) {
this.field = field;
this.terms = new BytesRef[terms.length];
for (int i = 0; i < terms.length; i++)
this.terms[i] = new BytesRef(terms[i]);
}
public FieldCache getFieldCache() {
return FieldCache.DEFAULT;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final SortedDocValues fcsi = getFieldCache().getTermsIndex(context.reader(), field);
final SortedDocValues fcsi = DocValues.getSorted(context.reader(), field);
final FixedBitSet bits = new FixedBitSet(fcsi.getValueCount());
for (int i=0;i<terms.length;i++) {
int ord = fcsi.lookupTerm(terms[i]);
@ -127,7 +113,7 @@ public class FieldCacheTermsFilter extends Filter {
bits.set(ord);
}
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
int ord = fcsi.getOrd(doc);

View File

@ -19,13 +19,12 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.FieldCache.DoubleParser;
import org.apache.lucene.search.FieldCache.FloatParser;
import org.apache.lucene.search.FieldCache.IntParser;
import org.apache.lucene.search.FieldCache.LongParser;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -82,7 +81,7 @@ import org.apache.lucene.util.BytesRef;
* when the search is switching to the next segment.
* You may need to update internal state of the
* comparator, for example retrieving new values from
* the {@link FieldCache}.
* DocValues.
*
* <li> {@link #value} Return the sort value stored in
* the specified slot. This is only called at the end
@ -236,7 +235,7 @@ public abstract class FieldComparator<T> {
@Override
public FieldComparator<T> setNextReader(AtomicReaderContext context) throws IOException {
if (missingValue != null) {
docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
docsWithField = DocValues.getDocsWithField(context.reader(), field);
// optimization to remove unneeded checks on the bit interface:
if (docsWithField instanceof Bits.MatchAllBits) {
docsWithField = null;
@ -249,18 +248,16 @@ public abstract class FieldComparator<T> {
}
/** Parses field's values as double (using {@link
* FieldCache#getDoubles} and sorts by ascending value */
* AtomicReader#getNumericDocValues} and sorts by ascending value */
public static final class DoubleComparator extends NumericComparator<Double> {
private final double[] values;
private final DoubleParser parser;
private FieldCache.Doubles currentReaderValues;
private NumericDocValues currentReaderValues;
private double bottom;
private double topValue;
DoubleComparator(int numHits, String field, FieldCache.Parser parser, Double missingValue) {
DoubleComparator(int numHits, String field, Double missingValue) {
super(field, missingValue);
values = new double[numHits];
this.parser = (DoubleParser) parser;
}
@Override
@ -270,7 +267,7 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
double v2 = currentReaderValues.get(doc);
double v2 = Double.longBitsToDouble(currentReaderValues.get(doc));
// Test for v2 == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && v2 == 0 && !docsWithField.get(doc)) {
@ -282,7 +279,7 @@ public abstract class FieldComparator<T> {
@Override
public void copy(int slot, int doc) {
double v2 = currentReaderValues.get(doc);
double v2 = Double.longBitsToDouble(currentReaderValues.get(doc));
// Test for v2 == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && v2 == 0 && !docsWithField.get(doc)) {
@ -294,9 +291,7 @@ public abstract class FieldComparator<T> {
@Override
public FieldComparator<Double> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getDoubles(context.reader(), field, parser, missingValue != null);
currentReaderValues = DocValues.getNumeric(context.reader(), field);
return super.setNextReader(context);
}
@ -317,7 +312,7 @@ public abstract class FieldComparator<T> {
@Override
public int compareTop(int doc) {
double docValue = currentReaderValues.get(doc);
double docValue = Double.longBitsToDouble(currentReaderValues.get(doc));
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
@ -328,18 +323,16 @@ public abstract class FieldComparator<T> {
}
/** Parses field's values as float (using {@link
* FieldCache#getFloats} and sorts by ascending value */
* AtomicReader#getNumericDocValues(String)} and sorts by ascending value */
public static final class FloatComparator extends NumericComparator<Float> {
private final float[] values;
private final FloatParser parser;
private FieldCache.Floats currentReaderValues;
private NumericDocValues currentReaderValues;
private float bottom;
private float topValue;
FloatComparator(int numHits, String field, FieldCache.Parser parser, Float missingValue) {
FloatComparator(int numHits, String field, Float missingValue) {
super(field, missingValue);
values = new float[numHits];
this.parser = (FloatParser) parser;
}
@Override
@ -350,7 +343,7 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
// TODO: are there sneaky non-branch ways to compute sign of float?
float v2 = currentReaderValues.get(doc);
float v2 = Float.intBitsToFloat((int)currentReaderValues.get(doc));
// Test for v2 == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && v2 == 0 && !docsWithField.get(doc)) {
@ -362,7 +355,7 @@ public abstract class FieldComparator<T> {
@Override
public void copy(int slot, int doc) {
float v2 = currentReaderValues.get(doc);
float v2 = Float.intBitsToFloat((int)currentReaderValues.get(doc));
// Test for v2 == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && v2 == 0 && !docsWithField.get(doc)) {
@ -374,9 +367,7 @@ public abstract class FieldComparator<T> {
@Override
public FieldComparator<Float> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getFloats(context.reader(), field, parser, missingValue != null);
currentReaderValues = DocValues.getNumeric(context.reader(), field);
return super.setNextReader(context);
}
@ -397,7 +388,7 @@ public abstract class FieldComparator<T> {
@Override
public int compareTop(int doc) {
float docValue = currentReaderValues.get(doc);
float docValue = Float.intBitsToFloat((int)currentReaderValues.get(doc));
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
@ -408,18 +399,16 @@ public abstract class FieldComparator<T> {
}
/** Parses field's values as int (using {@link
* FieldCache#getInts} and sorts by ascending value */
* AtomicReader#getNumericDocValues(String)} and sorts by ascending value */
public static final class IntComparator extends NumericComparator<Integer> {
private final int[] values;
private final IntParser parser;
private FieldCache.Ints currentReaderValues;
private NumericDocValues currentReaderValues;
private int bottom; // Value of bottom of queue
private int topValue;
IntComparator(int numHits, String field, FieldCache.Parser parser, Integer missingValue) {
IntComparator(int numHits, String field, Integer missingValue) {
super(field, missingValue);
values = new int[numHits];
this.parser = (IntParser) parser;
}
@Override
@ -429,7 +418,7 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
int v2 = currentReaderValues.get(doc);
int v2 = (int) currentReaderValues.get(doc);
// Test for v2 == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && v2 == 0 && !docsWithField.get(doc)) {
@ -441,7 +430,7 @@ public abstract class FieldComparator<T> {
@Override
public void copy(int slot, int doc) {
int v2 = currentReaderValues.get(doc);
int v2 = (int) currentReaderValues.get(doc);
// Test for v2 == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && v2 == 0 && !docsWithField.get(doc)) {
@ -453,9 +442,7 @@ public abstract class FieldComparator<T> {
@Override
public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getInts(context.reader(), field, parser, missingValue != null);
currentReaderValues = DocValues.getNumeric(context.reader(), field);
return super.setNextReader(context);
}
@ -476,7 +463,7 @@ public abstract class FieldComparator<T> {
@Override
public int compareTop(int doc) {
int docValue = currentReaderValues.get(doc);
int docValue = (int) currentReaderValues.get(doc);
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
@ -487,18 +474,16 @@ public abstract class FieldComparator<T> {
}
/** Parses field's values as long (using {@link
* FieldCache#getLongs} and sorts by ascending value */
* AtomicReader#getNumericDocValues(String)} and sorts by ascending value */
public static final class LongComparator extends NumericComparator<Long> {
private final long[] values;
private final LongParser parser;
private FieldCache.Longs currentReaderValues;
private NumericDocValues currentReaderValues;
private long bottom;
private long topValue;
LongComparator(int numHits, String field, FieldCache.Parser parser, Long missingValue) {
LongComparator(int numHits, String field, Long missingValue) {
super(field, missingValue);
values = new long[numHits];
this.parser = (LongParser) parser;
}
@Override
@ -534,9 +519,7 @@ public abstract class FieldComparator<T> {
@Override
public FieldComparator<Long> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getLongs(context.reader(), field, parser, missingValue != null);
currentReaderValues = DocValues.getNumeric(context.reader(), field);
return super.setNextReader(context);
}
@ -712,7 +695,7 @@ public abstract class FieldComparator<T> {
* ordinals. This is functionally equivalent to {@link
* org.apache.lucene.search.FieldComparator.TermValComparator}, but it first resolves the string
* to their relative ordinal positions (using the index
* returned by {@link FieldCache#getTermsIndex}), and
* returned by {@link AtomicReader#getSortedDocValues(String)}), and
* does most comparisons using the ordinals. For medium
* to large results, this comparator will be much faster
* than {@link org.apache.lucene.search.FieldComparator.TermValComparator}. For very small
@ -856,7 +839,7 @@ public abstract class FieldComparator<T> {
/** Retrieves the SortedDocValues for the field in this segment */
protected SortedDocValues getSortedDocValues(AtomicReaderContext context, String field) throws IOException {
return FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
return DocValues.getSorted(context.reader(), field);
}
@Override
@ -1029,8 +1012,8 @@ public abstract class FieldComparator<T> {
@Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
docTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, true);
docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
docTerms = DocValues.getBinary(context.reader(), field);
docsWithField = DocValues.getDocsWithField(context.reader(), field);
return this;
}

View File

@ -18,15 +18,17 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Bits.MatchAllBits;
import org.apache.lucene.util.Bits.MatchNoBits;
/**
* A {@link Filter} that accepts all documents that have one or more values in a
* given field. This {@link Filter} request {@link Bits} from the
* {@link FieldCache} and build the bits if not present.
* given field. This {@link Filter} request {@link Bits} from
* {@link AtomicReader#getDocsWithField}
*/
public class FieldValueFilter extends Filter {
private final String field;
@ -76,13 +78,13 @@ public class FieldValueFilter extends Filter {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(
final Bits docsWithField = DocValues.getDocsWithField(
context.reader(), field);
if (negate) {
if (docsWithField instanceof MatchAllBits) {
return null;
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
return !docsWithField.get(doc);
@ -97,7 +99,7 @@ public class FieldValueFilter extends Filter {
// :-)
return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
return docsWithField.get(doc);

View File

@ -23,13 +23,10 @@ import org.apache.lucene.util.PriorityQueue;
/**
* Expert: A hit queue for sorting by hits by terms in more than one field.
* Uses <code>FieldCache.DEFAULT</code> for maintaining
* internal term lookup tables.
*
* @lucene.experimental
* @since 2.9
* @see IndexSearcher#search(Query,Filter,int,Sort)
* @see FieldCache
*/
public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends PriorityQueue<T> {

View File

@ -94,7 +94,6 @@ public class SortField {
private String field;
private Type type; // defaults to determining type dynamically
boolean reverse = false; // defaults to natural order
private FieldCache.Parser parser;
// Used for CUSTOM sort
private FieldComparatorSource comparatorSource;
@ -124,44 +123,6 @@ public class SortField {
this.reverse = reverse;
}
/** Creates a sort by terms in the given field, parsed
* to numeric values using a custom {@link FieldCache.Parser}.
* @param field Name of field to sort by. Must not be null.
* @param parser Instance of a {@link FieldCache.Parser},
* which must subclass one of the existing numeric
* parsers from {@link FieldCache}. Sort type is inferred
* by testing which numeric parser the parser subclasses.
* @throws IllegalArgumentException if the parser fails to
* subclass an existing numeric parser, or field is null
*/
public SortField(String field, FieldCache.Parser parser) {
this(field, parser, false);
}
/** Creates a sort, possibly in reverse, by terms in the given field, parsed
* to numeric values using a custom {@link FieldCache.Parser}.
* @param field Name of field to sort by. Must not be null.
* @param parser Instance of a {@link FieldCache.Parser},
* which must subclass one of the existing numeric
* parsers from {@link FieldCache}. Sort type is inferred
* by testing which numeric parser the parser subclasses.
* @param reverse True if natural order should be reversed.
* @throws IllegalArgumentException if the parser fails to
* subclass an existing numeric parser, or field is null
*/
public SortField(String field, FieldCache.Parser parser, boolean reverse) {
if (parser instanceof FieldCache.IntParser) initFieldType(field, Type.INT);
else if (parser instanceof FieldCache.FloatParser) initFieldType(field, Type.FLOAT);
else if (parser instanceof FieldCache.LongParser) initFieldType(field, Type.LONG);
else if (parser instanceof FieldCache.DoubleParser) initFieldType(field, Type.DOUBLE);
else {
throw new IllegalArgumentException("Parser instance does not subclass existing numeric parser from FieldCache (got " + parser + ")");
}
this.reverse = reverse;
this.parser = parser;
}
/** Pass this to {@link #setMissingValue} to have missing
* string values sort first. */
public final static Object STRING_FIRST = new Object() {
@ -239,14 +200,6 @@ public class SortField {
return type;
}
/** Returns the instance of a {@link FieldCache} parser that fits to the given sort type.
* May return <code>null</code> if no parser was specified. Sorting is using the default parser then.
* @return An instance of a {@link FieldCache} parser, or <code>null</code>.
*/
public FieldCache.Parser getParser() {
return parser;
}
/** Returns whether the sort should be reversed.
* @return True if natural order should be reversed.
*/
@ -320,8 +273,7 @@ public class SortField {
}
/** Returns true if <code>o</code> is equal to this. If a
* {@link FieldComparatorSource} or {@link
* FieldCache.Parser} was provided, it must properly
* {@link FieldComparatorSource} was provided, it must properly
* implement equals (unless a singleton is always used). */
@Override
public boolean equals(Object o) {
@ -337,8 +289,7 @@ public class SortField {
}
/** Returns true if <code>o</code> is equal to this. If a
* {@link FieldComparatorSource} or {@link
* FieldCache.Parser} was provided, it must properly
* {@link FieldComparatorSource} was provided, it must properly
* implement hashCode (unless a singleton is always
* used). */
@Override
@ -381,16 +332,16 @@ public class SortField {
return new FieldComparator.DocComparator(numHits);
case INT:
return new FieldComparator.IntComparator(numHits, field, parser, (Integer) missingValue);
return new FieldComparator.IntComparator(numHits, field, (Integer) missingValue);
case FLOAT:
return new FieldComparator.FloatComparator(numHits, field, parser, (Float) missingValue);
return new FieldComparator.FloatComparator(numHits, field, (Float) missingValue);
case LONG:
return new FieldComparator.LongComparator(numHits, field, parser, (Long) missingValue);
return new FieldComparator.LongComparator(numHits, field, (Long) missingValue);
case DOUBLE:
return new FieldComparator.DoubleComparator(numHits, field, parser, (Double) missingValue);
return new FieldComparator.DoubleComparator(numHits, field, (Double) missingValue);
case CUSTOM:
assert comparatorSource != null;

View File

@ -0,0 +1,228 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
/** Selects a value from the document's set to use as the representative value */
public class SortedSetSelector {
/**
* Type of selection to perform.
* <p>
* Limitations:
* <ul>
* <li>Fields containing {@link Integer#MAX_VALUE} or more unique values
* are unsupported.
* <li>Selectors other than ({@link Type#MIN}) require
* optional codec support. However several codecs provided by Lucene,
* including the current default codec, support this.
* </ul>
*/
public enum Type {
/**
* Selects the minimum value in the set
*/
MIN,
/**
* Selects the maximum value in the set
*/
MAX,
/**
* Selects the middle value in the set.
* <p>
* If the set has an even number of values, the lower of the middle two is chosen.
*/
MIDDLE_MIN,
/**
* Selects the middle value in the set.
* <p>
* If the set has an even number of values, the higher of the middle two is chosen
*/
MIDDLE_MAX
}
/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported");
}
SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
if (singleton != null) {
// it's actually single-valued in practice, but indexed as multi-valued,
// so just sort on the underlying single-valued dv directly.
// regardless of selector type, this optimization is safe!
return singleton;
} else if (selector == Type.MIN) {
return new MinValue(sortedSet);
} else {
if (sortedSet instanceof RandomAccessOrds == false) {
throw new UnsupportedOperationException("codec does not support random access ordinals, cannot use selector: " + selector);
}
RandomAccessOrds randomOrds = (RandomAccessOrds) sortedSet;
switch(selector) {
case MAX: return new MaxValue(randomOrds);
case MIDDLE_MIN: return new MiddleMinValue(randomOrds);
case MIDDLE_MAX: return new MiddleMaxValue(randomOrds);
case MIN:
default:
throw new AssertionError();
}
}
}
/** Wraps a SortedSetDocValues and returns the first ordinal (min) */
static class MinValue extends SortedDocValues {
final SortedSetDocValues in;
MinValue(SortedSetDocValues in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
return (int) in.nextOrd();
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
/** Wraps a SortedSetDocValues and returns the last ordinal (max) */
static class MaxValue extends SortedDocValues {
final RandomAccessOrds in;
MaxValue(RandomAccessOrds in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
final int count = in.cardinality();
if (count == 0) {
return -1;
} else {
return (int) in.ordAt(count-1);
}
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
/** Wraps a SortedSetDocValues and returns the middle ordinal (or min of the two) */
static class MiddleMinValue extends SortedDocValues {
final RandomAccessOrds in;
MiddleMinValue(RandomAccessOrds in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
final int count = in.cardinality();
if (count == 0) {
return -1;
} else {
return (int) in.ordAt((count-1) >>> 1);
}
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
/** Wraps a SortedSetDocValues and returns the middle ordinal (or max of the two) */
static class MiddleMaxValue extends SortedDocValues {
final RandomAccessOrds in;
MiddleMaxValue(RandomAccessOrds in) {
this.in = in;
}
@Override
public int getOrd(int docID) {
in.setDocument(docID);
final int count = in.cardinality();
if (count == 0) {
return -1;
} else {
return (int) in.ordAt(count >>> 1);
}
}
@Override
public void lookupOrd(int ord, BytesRef result) {
in.lookupOrd(ord, result);
}
@Override
public int getValueCount() {
return (int) in.getValueCount();
}
@Override
public int lookupTerm(BytesRef key) {
return (int) in.lookupTerm(key);
}
}
}

View File

@ -0,0 +1,133 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.SortField;
/**
* SortField for {@link SortedSetDocValues}.
* <p>
* A SortedSetDocValues contains multiple values for a field, so sorting with
* this technique "selects" a value as the representative sort value for the document.
* <p>
* By default, the minimum value in the set is selected as the sort value, but
* this can be customized. Selectors other than the default do have some limitations
* to ensure that all selections happen in constant-time for performance.
* <p>
* Like sorting by string, this also supports sorting missing values as first or last,
* via {@link #setMissingValue(Object)}.
* <p>
* @see SortedSetSelector
*/
public class SortedSetSortField extends SortField {
private final SortedSetSelector.Type selector;
/**
* Creates a sort, possibly in reverse, by the minimum value in the set
* for the document.
* @param field Name of field to sort by. Must not be null.
* @param reverse True if natural order should be reversed.
*/
public SortedSetSortField(String field, boolean reverse) {
this(field, reverse, SortedSetSelector.Type.MIN);
}
/**
* Creates a sort, possibly in reverse, specifying how the sort value from
* the document's set is selected.
* @param field Name of field to sort by. Must not be null.
* @param reverse True if natural order should be reversed.
* @param selector custom selector type for choosing the sort value from the set.
* <p>
* NOTE: selectors other than {@link SortedSetSelector.Type#MIN} require optional codec support.
*/
public SortedSetSortField(String field, boolean reverse, SortedSetSelector.Type selector) {
super(field, SortField.Type.CUSTOM, reverse);
if (selector == null) {
throw new NullPointerException();
}
this.selector = selector;
}
/** Returns the selector in use for this sort */
public SortedSetSelector.Type getSelector() {
return selector;
}
@Override
public int hashCode() {
return 31 * super.hashCode() + selector.hashCode();
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (!super.equals(obj)) return false;
if (getClass() != obj.getClass()) return false;
SortedSetSortField other = (SortedSetSortField) obj;
if (selector != other.selector) return false;
return true;
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append("<sortedset" + ": \"").append(getField()).append("\">");
if (getReverse()) buffer.append('!');
if (missingValue != null) {
buffer.append(" missingValue=");
buffer.append(missingValue);
}
buffer.append(" selector=");
buffer.append(selector);
return buffer.toString();
}
/**
* Set how missing values (the empty set) are sorted.
* <p>
* Note that this must be {@link #STRING_FIRST} or {@link #STRING_LAST}.
*/
@Override
public void setMissingValue(Object missingValue) {
if (missingValue != STRING_FIRST && missingValue != STRING_LAST) {
throw new IllegalArgumentException("For SORTED_SET type, missing value must be either STRING_FIRST or STRING_LAST");
}
this.missingValue = missingValue;
}
@Override
public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException {
return new FieldComparator.TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST) {
@Override
protected SortedDocValues getSortedDocValues(AtomicReaderContext context, String field) throws IOException {
SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
return SortedSetSelector.wrap(sortedSet, selector);
}
};
}
}

View File

@ -29,7 +29,7 @@ import org.apache.lucene.util.BytesRef;
* for numerical ranges; use {@link NumericRangeFilter} instead.
*
* <p>If you construct a large number of range filters with different ranges but on the
* same field, {@link FieldCacheRangeFilter} may have significantly better performance.
* same field, {@link DocValuesRangeFilter} may have significantly better performance.
* @since 2.9
*/
public class TermRangeFilter extends MultiTermQueryWrapperFilter<TermRangeQuery> {

View File

@ -337,8 +337,8 @@ extend by plugging in a different component (e.g. term frequency normalizer).
<p>
Finally, you can extend the low level {@link org.apache.lucene.search.similarities.Similarity Similarity} directly
to implement a new retrieval model, or to use external scoring factors particular to your application. For example,
a custom Similarity can access per-document values via {@link org.apache.lucene.search.FieldCache FieldCache} or
{@link org.apache.lucene.index.NumericDocValues} and integrate them into the score.
a custom Similarity can access per-document values via {@link org.apache.lucene.index.NumericDocValues} and
integrate them into the score.
</p>
<p>
See the {@link org.apache.lucene.search.similarities} package documentation for information

View File

@ -46,7 +46,6 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.ScoreDoc;
@ -64,6 +63,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
@ -877,17 +877,18 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
assertEquals("wrong number of hits", 34, hits.length);
// check decoding into field cache
FieldCache.Ints fci = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(searcher.getIndexReader()), "trieInt", false);
int maxDoc = searcher.getIndexReader().maxDoc();
for(int doc=0;doc<maxDoc;doc++) {
int val = fci.get(doc);
// check decoding of terms
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "trieInt");
TermsEnum termsEnum = NumericUtils.filterPrefixCodedInts(terms.iterator(null));
while (termsEnum.next() != null) {
int val = NumericUtils.prefixCodedToInt(termsEnum.term());
assertTrue("value in id bounds", val >= 0 && val < 35);
}
FieldCache.Longs fcl = FieldCache.DEFAULT.getLongs(SlowCompositeReaderWrapper.wrap(searcher.getIndexReader()), "trieLong", false);
for(int doc=0;doc<maxDoc;doc++) {
long val = fcl.get(doc);
terms = MultiFields.getTerms(searcher.getIndexReader(), "trieLong");
termsEnum = NumericUtils.filterPrefixCodedLongs(terms.iterator(null));
while (termsEnum.next() != null) {
long val = NumericUtils.prefixCodedToLong(termsEnum.term());
assertTrue("value in id bounds", val >= 0L && val < 35L);
}

View File

@ -32,13 +32,11 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.util.Bits;
@ -753,44 +751,6 @@ public void testFilesOpenClose() throws IOException {
dir.close();
}
// LUCENE-1579: Ensure that on a reopened reader, that any
// shared segments reuse the doc values arrays in
// FieldCache
public void testFieldCacheReuseAfterReopen() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMergePolicy(newLogMergePolicy(10))
);
Document doc = new Document();
doc.add(new IntField("number", 17, Field.Store.NO));
writer.addDocument(doc);
writer.commit();
// Open reader1
DirectoryReader r = DirectoryReader.open(dir);
AtomicReader r1 = getOnlySegmentReader(r);
final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(r1, "number", false);
assertEquals(17, ints.get(0));
// Add new segment
writer.addDocument(doc);
writer.commit();
// Reopen reader1 --> reader2
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
r.close();
AtomicReader sub0 = r2.leaves().get(0).reader();
final FieldCache.Ints ints2 = FieldCache.DEFAULT.getInts(sub0, "number", false);
r2.close();
assertTrue(ints == ints2);
writer.shutdown();
dir.close();
}
// LUCENE-1586: getUniqueTermCount
public void testUniqueTermCount() throws Exception {
Directory dir = newDirectory();

View File

@ -32,7 +32,6 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -109,7 +108,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
DirectoryReader r = w.getReader();
w.shutdown();
assertEquals(17, FieldCache.DEFAULT.getInts(getOnlySegmentReader(r), "field", false).get(0));
assertEquals(17, DocValues.getNumeric(getOnlySegmentReader(r), "field").get(0));
r.close();
d.close();
}
@ -133,7 +132,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
DirectoryReader r = w.getReader();
w.shutdown();
assertEquals(17, FieldCache.DEFAULT.getInts(getOnlySegmentReader(r), "field", false).get(0));
assertEquals(17, DocValues.getNumeric(getOnlySegmentReader(r), "field").get(0));
r.close();
d.close();
}
@ -176,7 +175,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = w.getReader();
BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field", false);
BinaryDocValues s = DocValues.getSorted(getOnlySegmentReader(r), "field");
BytesRef bytes1 = new BytesRef();
s.get(0, bytes1);
@ -783,7 +782,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
AtomicReader subR = r.leaves().get(0).reader();
assertEquals(2, subR.numDocs());
Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv");
Bits bits = DocValues.getDocsWithField(subR, "dv");
assertTrue(bits.get(0));
assertTrue(bits.get(1));
r.close();

View File

@ -55,7 +55,6 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
@ -1751,11 +1750,6 @@ public class TestIndexWriter extends LuceneTestCase {
w.shutdown();
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
SortedDocValues dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST);
assertEquals(4, dti.getValueCount());
BytesRef br = new BytesRef();
dti.lookupOrd(2, br);
assertEquals(bigTermBytesRef, br);
reader.close();
dir.close();
}

View File

@ -33,11 +33,11 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.English;
@ -240,6 +240,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
for(int docCount=0;docCount<numDocs;docCount++) {
Document doc = new Document();
doc.add(new IntField("id", docCount, Field.Store.YES));
doc.add(new NumericDocValuesField("id", docCount));
List<Token> tokens = new ArrayList<>();
final int numTokens = atLeast(100);
//final int numTokens = atLeast(20);
@ -296,7 +297,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
DocsEnum docs = null;
DocsAndPositionsEnum docsAndPositions = null;
DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(sub, "id", false);
final NumericDocValues docIDToID = DocValues.getNumeric(sub, "id");
for(String term : terms) {
//System.out.println(" term=" + term);
if (termsEnum.seekExact(new BytesRef(term))) {
@ -305,7 +306,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
int doc;
//System.out.println(" doc/freq");
while((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
final List<Token> expected = actualTokens.get(term).get(docIDToID.get(doc));
final List<Token> expected = actualTokens.get(term).get((int) docIDToID.get(doc));
//System.out.println(" doc=" + docIDToID.get(doc) + " docID=" + doc + " " + expected.size() + " freq");
assertNotNull(expected);
assertEquals(expected.size(), docs.freq());
@ -316,7 +317,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
assertNotNull(docsAndPositions);
//System.out.println(" doc/freq/pos");
while((doc = docsAndPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
final List<Token> expected = actualTokens.get(term).get(docIDToID.get(doc));
final List<Token> expected = actualTokens.get(term).get((int) docIDToID.get(doc));
//System.out.println(" doc=" + docIDToID.get(doc) + " " + expected.size() + " freq");
assertNotNull(expected);
assertEquals(expected.size(), docsAndPositions.freq());
@ -331,7 +332,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
assertNotNull(docsAndPositionsAndOffsets);
//System.out.println(" doc/freq/pos/offs");
while((doc = docsAndPositionsAndOffsets.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
final List<Token> expected = actualTokens.get(term).get(docIDToID.get(doc));
final List<Token> expected = actualTokens.get(term).get((int) docIDToID.get(doc));
//System.out.println(" doc=" + docIDToID.get(doc) + " " + expected.size() + " freq");
assertNotNull(expected);
assertEquals(expected.size(), docsAndPositionsAndOffsets.freq());

View File

@ -24,8 +24,8 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -160,6 +160,7 @@ public class TestTermsEnum extends LuceneTestCase {
private void addDoc(RandomIndexWriter w, Collection<String> terms, Map<BytesRef,Integer> termToID, int id) throws IOException {
Document doc = new Document();
doc.add(new IntField("id", id, Field.Store.YES));
doc.add(new NumericDocValuesField("id", id));
if (VERBOSE) {
System.out.println("TEST: addDoc id:" + id + " terms=" + terms);
}
@ -227,8 +228,7 @@ public class TestTermsEnum extends LuceneTestCase {
final IndexReader r = w.getReader();
w.shutdown();
// NOTE: intentional insanity!!
final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(r), "id", false);
final NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {

View File

@ -28,10 +28,13 @@ import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
@ -120,19 +123,33 @@ public class BaseTestRangeFilter extends LuceneTestCase {
Document doc = new Document();
Field idField = newStringField(random, "id", "", Field.Store.YES);
Field idDVField = new SortedDocValuesField("id", new BytesRef());
Field intIdField = new IntField("id_int", 0, Store.YES);
Field intDVField = new NumericDocValuesField("id_int", 0);
Field floatIdField = new FloatField("id_float", 0, Store.YES);
Field floatDVField = new NumericDocValuesField("id_float", 0);
Field longIdField = new LongField("id_long", 0, Store.YES);
Field longDVField = new NumericDocValuesField("id_long", 0);
Field doubleIdField = new DoubleField("id_double", 0, Store.YES);
Field doubleDVField = new NumericDocValuesField("id_double", 0);
Field randField = newStringField(random, "rand", "", Field.Store.YES);
Field randDVField = new SortedDocValuesField("rand", new BytesRef());
Field bodyField = newStringField(random, "body", "", Field.Store.NO);
Field bodyDVField = new SortedDocValuesField("body", new BytesRef());
doc.add(idField);
doc.add(idDVField);
doc.add(intIdField);
doc.add(intDVField);
doc.add(floatIdField);
doc.add(floatDVField);
doc.add(longIdField);
doc.add(longDVField);
doc.add(doubleIdField);
doc.add(doubleDVField);
doc.add(randField);
doc.add(randDVField);
doc.add(bodyField);
doc.add(bodyDVField);
RandomIndexWriter writer = new RandomIndexWriter(random, index.index,
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random))
@ -146,10 +163,15 @@ public class BaseTestRangeFilter extends LuceneTestCase {
for (int d = minId; d <= maxId; d++) {
idField.setStringValue(pad(d));
idDVField.setBytesValue(new BytesRef(pad(d)));
intIdField.setIntValue(d);
intDVField.setLongValue(d);
floatIdField.setFloatValue(d);
floatDVField.setLongValue(Float.floatToRawIntBits(d));
longIdField.setLongValue(d);
longDVField.setLongValue(d);
doubleIdField.setDoubleValue(d);
doubleDVField.setLongValue(Double.doubleToRawLongBits(d));
int r = index.allowNegativeRandomInts ? random.nextInt() : random
.nextInt(Integer.MAX_VALUE);
if (index.maxR < r) {
@ -166,7 +188,9 @@ public class BaseTestRangeFilter extends LuceneTestCase {
minCount++;
}
randField.setStringValue(pad(r));
randDVField.setBytesValue(new BytesRef(pad(r)));
bodyField.setStringValue("body");
bodyDVField.setBytesValue(new BytesRef("body"));
writer.addDocument(doc);
}

View File

@ -95,34 +95,6 @@ final class JustCompileSearch {
}
}
static final class JustCompileExtendedFieldCacheLongParser implements FieldCache.LongParser {
@Override
public long parseLong(BytesRef string) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public TermsEnum termsEnum(Terms terms) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileExtendedFieldCacheDoubleParser implements FieldCache.DoubleParser {
@Override
public double parseDouble(BytesRef term) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public TermsEnum termsEnum(Terms terms) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileFieldComparator extends FieldComparator<Object> {
@Override

View File

@ -254,7 +254,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
// returns default empty docidset, always cacheable:
assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", Integer.valueOf(10000), Integer.valueOf(-10000), true, true), true);
// is cacheable:
assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true);
assertDocIdSetCacheable(reader, DocValuesRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true);
// a fixedbitset filter is always cacheable
assertDocIdSetCacheable(reader, new Filter() {
@Override

View File

@ -20,11 +20,13 @@ package org.apache.lucene.search;
import java.util.Arrays;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StoredDocument;
@ -110,6 +112,7 @@ public class TestDateSort extends LuceneTestCase {
String dateTimeString = DateTools.timeToString(time, DateTools.Resolution.SECOND);
Field dateTimeField = newStringField(DATE_TIME_FIELD, dateTimeString, Field.Store.YES);
document.add(dateTimeField);
document.add(new SortedDocValuesField(DATE_TIME_FIELD, new BytesRef(dateTimeString)));
return document;
}

View File

@ -33,12 +33,14 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
/**
* Tests the DocTermOrdsRangeFilter
*/
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // needs SORTED_SET
public class TestDocTermOrdsRangeFilter extends LuceneTestCase {
protected IndexSearcher searcher1;
protected IndexSearcher searcher2;
@ -63,10 +65,7 @@ public class TestDocTermOrdsRangeFilter extends LuceneTestCase {
for (int j = 0; j < numTerms; j++) {
String s = TestUtil.randomUnicodeString(random());
doc.add(newStringField(fieldName, s, Field.Store.NO));
// if the default codec doesn't support sortedset, we will uninvert at search time
if (defaultCodecSupportsSortedSet()) {
doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
}
doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
terms.add(s);
}
writer.addDocument(doc);

View File

@ -34,6 +34,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.UnicodeUtil;
@ -41,6 +42,7 @@ import org.apache.lucene.util.UnicodeUtil;
/**
* Tests the DocTermOrdsRewriteMethod
*/
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // needs SORTED_SET
public class TestDocTermOrdsRewriteMethod extends LuceneTestCase {
protected IndexSearcher searcher1;
protected IndexSearcher searcher2;
@ -65,10 +67,7 @@ public class TestDocTermOrdsRewriteMethod extends LuceneTestCase {
for (int j = 0; j < numTerms; j++) {
String s = TestUtil.randomUnicodeString(random());
doc.add(newStringField(fieldName, s, Field.Store.NO));
// if the default codec doesn't support sortedset, we will uninvert at search time
if (defaultCodecSupportsSortedSet()) {
doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
}
doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
terms.add(s);
}
writer.addDocument(doc);

View File

@ -23,8 +23,10 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
@ -158,12 +160,12 @@ public class TestDocValuesScoring extends LuceneTestCase {
@Override
public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
final SimScorer sub = sim.simScorer(stats, context);
final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), boostField, false);
final NumericDocValues values = DocValues.getNumeric(context.reader(), boostField);
return new SimScorer() {
@Override
public float score(int doc, float freq) {
return values.get(doc) * sub.score(doc, freq);
return Float.intBitsToFloat((int)values.get(doc)) * sub.score(doc, freq);
}
@Override
@ -178,7 +180,7 @@ public class TestDocValuesScoring extends LuceneTestCase {
@Override
public Explanation explain(int doc, Explanation freq) {
Explanation boostExplanation = new Explanation(values.get(doc), "indexDocValue(" + boostField + ")");
Explanation boostExplanation = new Explanation(Float.intBitsToFloat((int)values.get(doc)), "indexDocValue(" + boostField + ")");
Explanation simExplanation = sub.explain(doc, freq);
Explanation expl = new Explanation(boostExplanation.getValue() * simExplanation.getValue(), "product of:");
expl.addDetail(boostExplanation);

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.search.similarities.DefaultSimilarity;
@ -126,6 +127,9 @@ public class TestElevationComparator extends LuceneTestCase {
Document doc = new Document();
for (int i = 0; i < vals.length - 2; i += 2) {
doc.add(newTextField(vals[i], vals[i + 1], Field.Store.YES));
if (vals[i].equals("id")) {
doc.add(new SortedDocValuesField(vals[i], new BytesRef(vals[i+1])));
}
}
return doc;
}
@ -185,7 +189,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
@Override
public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
idIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), fieldname);
idIndex = DocValues.getSorted(context.reader(), fieldname);
return this;
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -30,6 +31,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -73,6 +75,7 @@ public class TestExplanations extends LuceneTestCase {
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(newStringField(KEY, ""+i, Field.Store.NO));
doc.add(new SortedDocValuesField(KEY, new BytesRef(""+i)));
Field f = newTextField(FIELD, docFields[i], Field.Store.NO);
f.setBoost(i);
doc.add(f);
@ -110,7 +113,7 @@ public class TestExplanations extends LuceneTestCase {
/**
* Convenience subclass of FieldCacheTermsFilter
*/
public static class ItemizedFilter extends FieldCacheTermsFilter {
public static class ItemizedFilter extends DocValuesTermsFilter {
private static String[] int2str(int [] terms) {
String [] out = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
@ -118,9 +121,6 @@ public class TestExplanations extends LuceneTestCase {
}
return out;
}
public ItemizedFilter(String keyField, int [] keys) {
super(keyField, int2str(keys));
}
public ItemizedFilter(int [] keys) {
super(KEY, int2str(keys));
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@ -63,67 +64,67 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
Query q = new TermQuery(new Term("body","body"));
// test id, bounded on both ends
result = search.search(q, FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs;
result = search.search(q, DocValuesRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,maxIP,T,F), numDocs).scoreDocs;
assertEquals("all but last", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,maxIP,F,T), numDocs).scoreDocs;
assertEquals("all but first", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,maxIP,F,F), numDocs).scoreDocs;
assertEquals("all but ends", numDocs-2, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,maxIP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",medIP,maxIP,T,T), numDocs).scoreDocs;
assertEquals("med and up", 1+ maxId-medId, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,medIP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,medIP,T,T), numDocs).scoreDocs;
assertEquals("up to med", 1+ medId-minId, result.length);
// unbounded id
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,null,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",null,null,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,null,T,F), numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,maxIP,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",null,maxIP,F,T), numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,null,F,F), numDocs).scoreDocs;
assertEquals("not min, but up", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,maxIP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",null,maxIP,F,F), numDocs).scoreDocs;
assertEquals("not max, but down", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,maxIP,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",medIP,maxIP,T,F), numDocs).scoreDocs;
assertEquals("med and up, not max", maxId-medId, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,medIP,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,medIP,F,T), numDocs).scoreDocs;
assertEquals("not min, up to med", medId-minId, result.length);
// very small sets
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,minIP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,minIP,F,F), numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,medIP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",medIP,medIP,F,F), numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",maxIP,maxIP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",maxIP,maxIP,F,F), numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,minIP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",minIP,minIP,T,T), numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,minIP,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",null,minIP,F,T), numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",maxIP,maxIP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",maxIP,maxIP,T,T), numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",maxIP,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",maxIP,null,T,F), numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,medIP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("id",medIP,medIP,T,T), numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
}
@ -145,47 +146,47 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
// test extremes, bounded on both ends
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,maxRP,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,maxRP,T,F), numDocs).scoreDocs;
assertEquals("all but biggest", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,maxRP,F,T), numDocs).scoreDocs;
assertEquals("all but smallest", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,maxRP,F,F), numDocs).scoreDocs;
assertEquals("all but extremes", numDocs-2, result.length);
// unbounded
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,null,T,F), numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",null,maxRP,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",null,maxRP,F,T), numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,null,F,F), numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",null,maxRP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",null,maxRP,F,F), numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs-1, result.length);
// very small sets
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,minRP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,minRP,F,F), numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",maxRP,maxRP,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",maxRP,maxRP,F,F), numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,minRP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",minRP,minRP,T,T), numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",null,minRP,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",null,minRP,F,T), numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",maxRP,maxRP,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",maxRP,maxRP,T,T), numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",maxRP,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newStringRange("rand",maxRP,null,T,F), numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
}
@ -208,75 +209,75 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
// test id, bounded on both ends
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,maxIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,maxIdO,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,maxIdO,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,maxIdO,T,F), numDocs).scoreDocs;
assertEquals("all but last", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,maxIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,maxIdO,F,T), numDocs).scoreDocs;
assertEquals("all but first", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,maxIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,maxIdO,F,F), numDocs).scoreDocs;
assertEquals("all but ends", numDocs-2, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",medIdO,maxIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",medIdO,maxIdO,T,T), numDocs).scoreDocs;
assertEquals("med and up", 1+ maxId-medId, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,medIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,medIdO,T,T), numDocs).scoreDocs;
assertEquals("up to med", 1+ medId-minId, result.length);
// unbounded id
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",null,null,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",null,null,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,null,T,F), numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",null,maxIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",null,maxIdO,F,T), numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,null,F,F), numDocs).scoreDocs;
assertEquals("not min, but up", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",null,maxIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",null,maxIdO,F,F), numDocs).scoreDocs;
assertEquals("not max, but down", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",medIdO,maxIdO,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",medIdO,maxIdO,T,F), numDocs).scoreDocs;
assertEquals("med and up, not max", maxId-medId, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,medIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,medIdO,F,T), numDocs).scoreDocs;
assertEquals("not min, up to med", medId-minId, result.length);
// very small sets
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,minIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,minIdO,F,F), numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",medIdO,medIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",medIdO,medIdO,F,F), numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",maxIdO,maxIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",maxIdO,maxIdO,F,F), numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",minIdO,minIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",minIdO,minIdO,T,T), numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",null,minIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",null,minIdO,F,T), numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",maxIdO,maxIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",maxIdO,maxIdO,T,T), numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",maxIdO,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",maxIdO,null,T,F), numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",medIdO,medIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",medIdO,medIdO,T,T), numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
// special cases
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",Integer.valueOf(Integer.MAX_VALUE),null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",Integer.valueOf(Integer.MAX_VALUE),null,F,F), numDocs).scoreDocs;
assertEquals("overflow special case", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",null,Integer.valueOf(Integer.MIN_VALUE),F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",null,Integer.valueOf(Integer.MIN_VALUE),F,F), numDocs).scoreDocs;
assertEquals("overflow special case", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",maxIdO,minIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",maxIdO,minIdO,T,T), numDocs).scoreDocs;
assertEquals("inverse range", 0, result.length);
}
@ -299,75 +300,75 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
// test id, bounded on both ends
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,maxIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,maxIdO,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,maxIdO,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,maxIdO,T,F), numDocs).scoreDocs;
assertEquals("all but last", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,maxIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,maxIdO,F,T), numDocs).scoreDocs;
assertEquals("all but first", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,maxIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,maxIdO,F,F), numDocs).scoreDocs;
assertEquals("all but ends", numDocs-2, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",medIdO,maxIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",medIdO,maxIdO,T,T), numDocs).scoreDocs;
assertEquals("med and up", 1+ maxId-medId, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,medIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,medIdO,T,T), numDocs).scoreDocs;
assertEquals("up to med", 1+ medId-minId, result.length);
// unbounded id
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",null,null,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",null,null,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,null,T,F), numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",null,maxIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",null,maxIdO,F,T), numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,null,F,F), numDocs).scoreDocs;
assertEquals("not min, but up", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",null,maxIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",null,maxIdO,F,F), numDocs).scoreDocs;
assertEquals("not max, but down", numDocs-1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",medIdO,maxIdO,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",medIdO,maxIdO,T,F), numDocs).scoreDocs;
assertEquals("med and up, not max", maxId-medId, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,medIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,medIdO,F,T), numDocs).scoreDocs;
assertEquals("not min, up to med", medId-minId, result.length);
// very small sets
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,minIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,minIdO,F,F), numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",medIdO,medIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",medIdO,medIdO,F,F), numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",maxIdO,maxIdO,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",maxIdO,maxIdO,F,F), numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",minIdO,minIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",minIdO,minIdO,T,T), numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",null,minIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",null,minIdO,F,T), numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",maxIdO,maxIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",maxIdO,maxIdO,T,T), numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",maxIdO,null,T,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",maxIdO,null,T,F), numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",medIdO,medIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",medIdO,medIdO,T,T), numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
// special cases
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",Long.valueOf(Long.MAX_VALUE),null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",Long.valueOf(Long.MAX_VALUE),null,F,F), numDocs).scoreDocs;
assertEquals("overflow special case", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",null,Long.valueOf(Long.MIN_VALUE),F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",null,Long.valueOf(Long.MIN_VALUE),F,F), numDocs).scoreDocs;
assertEquals("overflow special case", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id_long",maxIdO,minIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newLongRange("id_long",maxIdO,minIdO,T,T), numDocs).scoreDocs;
assertEquals("inverse range", 0, result.length);
}
@ -386,19 +387,19 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
ScoreDoc[] result;
Query q = new TermQuery(new Term("body","body"));
result = search.search(q,FieldCacheRangeFilter.newFloatRange("id_float",minIdO,medIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newFloatRange("id_float",minIdO,medIdO,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs/2, result.length);
int count = 0;
result = search.search(q,FieldCacheRangeFilter.newFloatRange("id_float",null,medIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newFloatRange("id_float",null,medIdO,F,T), numDocs).scoreDocs;
count += result.length;
result = search.search(q,FieldCacheRangeFilter.newFloatRange("id_float",medIdO,null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newFloatRange("id_float",medIdO,null,F,F), numDocs).scoreDocs;
count += result.length;
assertEquals("sum of two concenatted ranges", numDocs, count);
result = search.search(q,FieldCacheRangeFilter.newFloatRange("id_float",null,null,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newFloatRange("id_float",null,null,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newFloatRange("id_float",Float.valueOf(Float.POSITIVE_INFINITY),null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newFloatRange("id_float",Float.valueOf(Float.POSITIVE_INFINITY),null,F,F), numDocs).scoreDocs;
assertEquals("infinity special case", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newFloatRange("id_float",null,Float.valueOf(Float.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newFloatRange("id_float",null,Float.valueOf(Float.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs;
assertEquals("infinity special case", 0, result.length);
}
@ -415,19 +416,19 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
ScoreDoc[] result;
Query q = new TermQuery(new Term("body","body"));
result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id_double",minIdO,medIdO,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newDoubleRange("id_double",minIdO,medIdO,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs/2, result.length);
int count = 0;
result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id_double",null,medIdO,F,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newDoubleRange("id_double",null,medIdO,F,T), numDocs).scoreDocs;
count += result.length;
result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id_double",medIdO,null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newDoubleRange("id_double",medIdO,null,F,F), numDocs).scoreDocs;
count += result.length;
assertEquals("sum of two concenatted ranges", numDocs, count);
result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id_double",null,null,T,T), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newDoubleRange("id_double",null,null,T,T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id_double",Double.valueOf(Double.POSITIVE_INFINITY),null,F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newDoubleRange("id_double",Double.valueOf(Double.POSITIVE_INFINITY),null,F,F), numDocs).scoreDocs;
assertEquals("infinity special case", 0, result.length);
result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id_double",null, Double.valueOf(Double.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newDoubleRange("id_double",null, Double.valueOf(Double.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs;
assertEquals("infinity special case", 0, result.length);
}
@ -440,6 +441,7 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
for (int d = -20; d <= 20; d++) {
Document doc = new Document();
doc.add(new IntField("id_int", d, Field.Store.NO));
doc.add(new NumericDocValuesField("id_int", d));
doc.add(newStringField("body", "body", Field.Store.NO));
writer.addDocument(doc);
}
@ -457,19 +459,19 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
ScoreDoc[] result;
Query q = new TermQuery(new Term("body","body"));
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",-20,20,T,T), 100).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",-20,20,T,T), 100).scoreDocs;
assertEquals("find all", 40, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",0,20,T,T), 100).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",0,20,T,T), 100).scoreDocs;
assertEquals("find all", 20, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",-20,0,T,T), 100).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",-20,0,T,T), 100).scoreDocs;
assertEquals("find all", 20, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",10,20,T,T), 100).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",10,20,T,T), 100).scoreDocs;
assertEquals("find all", 11, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id_int",-20,-10,T,T), 100).scoreDocs;
result = search.search(q,DocValuesRangeFilter.newIntRange("id_int",-20,-10,T,T), 100).scoreDocs;
assertEquals("find all", 11, result.length);
reader.close();
dir.close();

View File

@ -31,7 +31,7 @@ public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 {
@Override
protected void assertSame(String regexp) throws IOException {
RegexpQuery fieldCache = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
fieldCache.setRewriteMethod(new FieldCacheRewriteMethod());
fieldCache.setRewriteMethod(new DocValuesRewriteMethod());
RegexpQuery filter = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
@ -49,9 +49,9 @@ public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 {
assertEquals(a1, a2);
assertFalse(a1.equals(b));
a1.setRewriteMethod(new FieldCacheRewriteMethod());
a2.setRewriteMethod(new FieldCacheRewriteMethod());
b.setRewriteMethod(new FieldCacheRewriteMethod());
a1.setRewriteMethod(new DocValuesRewriteMethod());
a2.setRewriteMethod(new DocValuesRewriteMethod());
b.setRewriteMethod(new DocValuesRewriteMethod());
assertEquals(a1, a2);
assertFalse(a1.equals(b));
QueryUtils.check(a1);

View File

@ -18,6 +18,8 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.document.Document;
@ -31,10 +33,11 @@ import java.util.List;
/**
* A basic unit test for FieldCacheTermsFilter
*
* @see org.apache.lucene.search.FieldCacheTermsFilter
* @see org.apache.lucene.search.DocValuesTermsFilter
*/
public class TestFieldCacheTermsFilter extends LuceneTestCase {
public void testMissingTerms() throws Exception {
assumeTrue("requires support for missing values", defaultCodecSupportsMissingDocValues());
String fieldName = "field1";
Directory rd = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), rd);
@ -42,6 +45,7 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase {
Document doc = new Document();
int term = i * 10; //terms are units of 10;
doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
w.addDocument(doc);
}
IndexReader reader = w.getReader();
@ -54,18 +58,18 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase {
List<String> terms = new ArrayList<>();
terms.add("5");
results = searcher.search(q, new FieldCacheTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
results = searcher.search(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match nothing", 0, results.length);
terms = new ArrayList<>();
terms.add("10");
results = searcher.search(q, new FieldCacheTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
results = searcher.search(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match 1", 1, results.length);
terms = new ArrayList<>();
terms.add("10");
terms.add("20");
results = searcher.search(q, new FieldCacheTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
results = searcher.search(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match 2", 2, results.length);
reader.close();

View File

@ -21,16 +21,20 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/**
*
*/
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // suppress codecs without missing
public class TestFieldValueFilter extends LuceneTestCase {
public void testFieldValueFilterNoValue() throws IOException {
@ -96,9 +100,12 @@ public class TestFieldValueFilter extends LuceneTestCase {
if (random().nextBoolean()) {
docStates[i] = 1;
doc.add(newTextField("some", "value", Field.Store.YES));
doc.add(new SortedDocValuesField("some", new BytesRef("value")));
}
doc.add(newTextField("all", "test", Field.Store.NO));
doc.add(new SortedDocValuesField("all", new BytesRef("test")));
doc.add(newTextField("id", "" + i, Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef("" + i)));
writer.addDocument(doc);
}
writer.commit();

View File

@ -565,46 +565,6 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
testFloatRange(2);
}
private void testSorting(int precisionStep) throws Exception {
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
int lower=(int)(random().nextDouble()*noDocs*distance)+startOffset;
int upper=(int)(random().nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
int a=lower; lower=upper; upper=a;
}
Query tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(new SortField(field, SortField.Type.INT, true)));
if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
int last = searcher.doc(sd[0].doc).getField(field).numericValue().intValue();
for (int j=1; j<sd.length; j++) {
int act = searcher.doc(sd[j].doc).getField(field).numericValue().intValue();
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
@Test
public void testSorting_8bit() throws Exception {
testSorting(8);
}
@Test
public void testSorting_4bit() throws Exception {
testSorting(4);
}
@Test
public void testSorting_2bit() throws Exception {
testSorting(2);
}
@Test
public void testEqualsAndHash() throws Exception {
QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test1", 4, 10, 20, true, true));

View File

@ -38,7 +38,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestNumericUtils; // NaN arrays
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@ -608,51 +607,6 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
testDoubleRange(2);
}
private void testSorting(int precisionStep) throws Exception {
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
long a=lower; lower=upper; upper=a;
}
Query tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
for (int j=1; j<sd.length; j++) {
long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
@Test
public void testSorting_8bit() throws Exception {
testSorting(8);
}
@Test
public void testSorting_6bit() throws Exception {
testSorting(6);
}
@Test
public void testSorting_4bit() throws Exception {
testSorting(4);
}
@Test
public void testSorting_2bit() throws Exception {
testSorting(2);
}
@Test
public void testEqualsAndHash() throws Exception {
QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test1", 4, 10L, 20L, true, true));

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
@ -66,11 +67,14 @@ public class TestRegexpRandom2 extends LuceneTestCase {
Document doc = new Document();
Field field = newStringField(fieldName, "", Field.Store.NO);
doc.add(field);
Field dvField = new SortedDocValuesField(fieldName, new BytesRef());
doc.add(dvField);
List<String> terms = new ArrayList<>();
int num = atLeast(200);
for (int i = 0; i < num; i++) {
String s = TestUtil.randomUnicodeString(random());
field.setStringValue(s);
dvField.setBytesValue(new BytesRef(s));
terms.add(s);
writer.addDocument(doc);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,804 +0,0 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/** Tests basic sorting on docvalues fields.
* These are mostly like TestSort's tests, except each test
* indexes the field up-front as docvalues, and checks no fieldcaches were made */
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // avoid codecs that don't support "missing"
public class TestSortDocValues extends LuceneTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
// ensure there is nothing in fieldcache before test starts
FieldCache.DEFAULT.purgeAllCaches();
}
private void assertNoFieldCaches() {
// docvalues sorting should NOT create any fieldcache entries!
assertEquals(0, FieldCache.DEFAULT.getCacheEntries().length);
}
/** Tests sorting on type string */
public void testString() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'bar' comes before 'foo'
assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests reverse sorting on type string */
public void testStringReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'foo' comes after 'bar' in reverse order
assertEquals("foo", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type string_val */
public void testStringVal() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new BinaryDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new BinaryDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING_VAL));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'bar' comes before 'foo'
assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests reverse sorting on type string_val */
public void testStringValReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new BinaryDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new BinaryDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING_VAL, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'foo' comes after 'bar' in reverse order
assertEquals("foo", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type string_val, but with a SortedDocValuesField */
public void testStringValSorted() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING_VAL));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'bar' comes before 'foo'
assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests reverse sorting on type string_val, but with a SortedDocValuesField */
public void testStringValReverseSorted() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING_VAL, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'foo' comes after 'bar' in reverse order
assertEquals("foo", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type int */
public void testInt() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new NumericDocValuesField("value", 300000));
doc.add(newStringField("value", "300000", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.INT));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// numeric order
assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("300000", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type int in reverse */
public void testIntReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new NumericDocValuesField("value", 300000));
doc.add(newStringField("value", "300000", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.INT, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// reverse numeric order
assertEquals("300000", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("-1", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type int with a missing value */
public void testIntMissing() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.INT));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as a 0
assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type int, specifying the missing value should be treated as Integer.MAX_VALUE */
public void testIntMissingLast() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
SortField sortField = new SortField("value", SortField.Type.INT);
sortField.setMissingValue(Integer.MAX_VALUE);
Sort sort = new Sort(sortField);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as a Integer.MAX_VALUE
assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type long */
public void testLong() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new NumericDocValuesField("value", 3000000000L));
doc.add(newStringField("value", "3000000000", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// numeric order
assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("3000000000", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type long in reverse */
public void testLongReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new NumericDocValuesField("value", 3000000000L));
doc.add(newStringField("value", "3000000000", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.LONG, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// reverse numeric order
assertEquals("3000000000", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("-1", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type long with a missing value */
public void testLongMissing() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as 0
assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type long, specifying the missing value should be treated as Long.MAX_VALUE */
public void testLongMissingLast() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
SortField sortField = new SortField("value", SortField.Type.LONG);
sortField.setMissingValue(Long.MAX_VALUE);
Sort sort = new Sort(sortField);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as Long.MAX_VALUE
assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type float */
public void testFloat() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FloatDocValuesField("value", 30.1F));
doc.add(newStringField("value", "30.1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", -1.3F));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", 4.2F));
doc.add(newStringField("value", "4.2", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// numeric order
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("30.1", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type float in reverse */
public void testFloatReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FloatDocValuesField("value", 30.1F));
doc.add(newStringField("value", "30.1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", -1.3F));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", 4.2F));
doc.add(newStringField("value", "4.2", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// reverse numeric order
assertEquals("30.1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("-1.3", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type float with a missing value */
public void testFloatMissing() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", -1.3F));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", 4.2F));
doc.add(newStringField("value", "4.2", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as 0
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4.2", searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type float, specifying the missing value should be treated as Float.MAX_VALUE */
public void testFloatMissingLast() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", -1.3F));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FloatDocValuesField("value", 4.2F));
doc.add(newStringField("value", "4.2", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
SortField sortField = new SortField("value", SortField.Type.FLOAT);
sortField.setMissingValue(Float.MAX_VALUE);
Sort sort = new Sort(sortField);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as Float.MAX_VALUE
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type double */
public void testDouble() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new DoubleDocValuesField("value", 30.1));
doc.add(newStringField("value", "30.1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", -1.3));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333333));
doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333332));
doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(4, td.totalHits);
// numeric order
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertEquals("30.1", searcher.doc(td.scoreDocs[3].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type double with +/- zero */
public void testDoubleSignedZero() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new DoubleDocValuesField("value", +0D));
doc.add(newStringField("value", "+0", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", -0D));
doc.add(newStringField("value", "-0", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// numeric order
assertEquals("-0", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("+0", searcher.doc(td.scoreDocs[1].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type double in reverse */
public void testDoubleReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new DoubleDocValuesField("value", 30.1));
doc.add(newStringField("value", "30.1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", -1.3));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333333));
doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333332));
doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(4, td.totalHits);
// numeric order
assertEquals("30.1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertEquals("-1.3", searcher.doc(td.scoreDocs[3].doc).get("value"));
assertNoFieldCaches();
ir.close();
dir.close();
}
/** Tests sorting on type double with a missing value */
public void testDoubleMissing() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", -1.3));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333333));
doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333332));
doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(4, td.totalHits);
// null treated as a 0
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[3].doc).get("value"));
ir.close();
dir.close();
}
/** Tests sorting on type double, specifying the missing value should be treated as Double.MAX_VALUE */
public void testDoubleMissingLast() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", -1.3));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333333));
doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333332));
doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.shutdown();
IndexSearcher searcher = newSearcher(ir);
SortField sortField = new SortField("value", SortField.Type.DOUBLE);
sortField.setMissingValue(Double.MAX_VALUE);
Sort sort = new Sort(sortField);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(4, td.totalHits);
// null treated as Double.MAX_VALUE
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[3].doc).get("value"));
ir.close();
dir.close();
}
}

View File

@ -32,7 +32,9 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
@ -87,7 +89,6 @@ public class TestSortRandom extends LuceneTestCase {
br = new BytesRef(s);
doc.add(new SortedDocValuesField("stringdv", br));
doc.add(newStringField("string", s, Field.Store.NO));
docValues.add(br);
} else {
@ -124,17 +125,12 @@ public class TestSortRandom extends LuceneTestCase {
final SortField sf;
final boolean sortMissingLast;
final boolean missingIsNull;
if (random.nextBoolean()) {
sf = new SortField("stringdv", SortField.Type.STRING, reverse);
// Can only use sort missing if the DVFormat
// supports docsWithField:
sortMissingLast = defaultCodecSupportsDocsWithField() && random().nextBoolean();
missingIsNull = defaultCodecSupportsDocsWithField();
} else {
sf = new SortField("string", SortField.Type.STRING, reverse);
sortMissingLast = random().nextBoolean();
missingIsNull = true;
}
sf = new SortField("stringdv", SortField.Type.STRING, reverse);
// Can only use sort missing if the DVFormat
// supports docsWithField:
sortMissingLast = defaultCodecSupportsDocsWithField() && random().nextBoolean();
missingIsNull = defaultCodecSupportsDocsWithField();
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
@ -264,14 +260,14 @@ public class TestSortRandom extends LuceneTestCase {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final int maxDoc = context.reader().maxDoc();
final FieldCache.Ints idSource = FieldCache.DEFAULT.getInts(context.reader(), "id", false);
final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
assertNotNull(idSource);
final FixedBitSet bits = new FixedBitSet(maxDoc);
for(int docID=0;docID<maxDoc;docID++) {
if (random.nextFloat() <= density && (acceptDocs == null || acceptDocs.get(docID))) {
bits.set(docID);
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
matchValues.add(docValues.get(idSource.get(docID)));
matchValues.add(docValues.get((int) idSource.get(docID)));
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.sandbox.queries;
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -26,7 +26,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Sort;
@ -41,7 +40,7 @@ import org.junit.BeforeClass;
/** Tests for SortedSetSortField selectors other than MIN,
* these require optional codec support (random access to ordinals) */
public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
public class TestSortedSetSelector extends LuceneTestCase {
static Codec savedCodec;
@BeforeClass
@ -61,18 +60,6 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
Codec.setDefault(savedCodec);
}
@Override
public void setUp() throws Exception {
super.setUp();
// ensure there is nothing in fieldcache before test starts
FieldCache.DEFAULT.purgeAllCaches();
}
private void assertNoFieldCaches() {
// docvalues sorting should NOT create any fieldcache entries!
assertEquals(0, FieldCache.DEFAULT.getCacheEntries().length);
}
public void testMax() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@ -91,14 +78,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSortField.Selector.MAX));
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSelector.Type.MAX));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'baz' comes before 'foo'
assertEquals("2", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -122,14 +108,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", true, SortedSetSortField.Selector.MAX));
Sort sort = new Sort(new SortedSetSortField("value", true, SortedSetSelector.Type.MAX));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'baz' comes before 'foo'
assertEquals("1", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -156,7 +141,7 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
SortField sortField = new SortedSetSortField("value", false, SortedSetSortField.Selector.MAX);
SortField sortField = new SortedSetSortField("value", false, SortedSetSelector.Type.MAX);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort sort = new Sort(sortField);
@ -167,7 +152,6 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// 'baz' comes before 'foo'
assertEquals("3", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[2].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -194,7 +178,7 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
SortField sortField = new SortedSetSortField("value", false, SortedSetSortField.Selector.MAX);
SortField sortField = new SortedSetSortField("value", false, SortedSetSelector.Type.MAX);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField);
@ -205,7 +189,6 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
// null comes last
assertEquals("1", searcher.doc(td.scoreDocs[2].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -227,14 +210,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSortField.Selector.MAX));
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSelector.Type.MAX));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'bar' comes before 'baz'
assertEquals("1", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -259,14 +241,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MIN));
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MIN));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'b' comes before 'c'
assertEquals("1", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -291,14 +272,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", true, SortedSetSortField.Selector.MIDDLE_MIN));
Sort sort = new Sort(new SortedSetSortField("value", true, SortedSetSelector.Type.MIDDLE_MIN));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'b' comes before 'c'
assertEquals("2", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -326,7 +306,7 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
SortField sortField = new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MIN);
SortField sortField = new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MIN);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort sort = new Sort(sortField);
@ -337,7 +317,6 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// 'b' comes before 'c'
assertEquals("1", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[2].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -365,7 +344,7 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
SortField sortField = new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MIN);
SortField sortField = new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MIN);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField);
@ -376,7 +355,6 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
// null comes last
assertEquals("3", searcher.doc(td.scoreDocs[2].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -398,14 +376,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MIN));
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MIN));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'bar' comes before 'baz'
assertEquals("1", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -430,14 +407,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MAX));
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MAX));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'b' comes before 'c'
assertEquals("2", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -462,14 +438,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", true, SortedSetSortField.Selector.MIDDLE_MAX));
Sort sort = new Sort(new SortedSetSortField("value", true, SortedSetSelector.Type.MIDDLE_MAX));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'b' comes before 'c'
assertEquals("1", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -497,7 +472,7 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
SortField sortField = new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MAX);
SortField sortField = new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MAX);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort sort = new Sort(sortField);
@ -508,7 +483,6 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// 'b' comes before 'c'
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertEquals("1", searcher.doc(td.scoreDocs[2].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -536,7 +510,7 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
SortField sortField = new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MAX);
SortField sortField = new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MAX);
sortField.setMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sortField);
@ -547,7 +521,6 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
assertEquals("1", searcher.doc(td.scoreDocs[1].doc).get("id"));
// null comes last
assertEquals("3", searcher.doc(td.scoreDocs[2].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();
@ -569,14 +542,13 @@ public class TestSortedSetSortFieldSelectors extends LuceneTestCase {
// slow wrapper does not support random access ordinals (there is no need for that!)
IndexSearcher searcher = newSearcher(ir, false);
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSortField.Selector.MIDDLE_MAX));
Sort sort = new Sort(new SortedSetSortField("value", false, SortedSetSelector.Type.MIDDLE_MAX));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'bar' comes before 'baz'
assertEquals("1", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertNoFieldCaches();
ir.close();
dir.close();

View File

@ -1,4 +1,4 @@
package org.apache.lucene.sandbox.queries;
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,7 @@ package org.apache.lucene.sandbox.queries;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
@ -31,21 +32,57 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/** Simple tests for SortedSetSortField */
/** Simple tests for SortedSetSortField, indexing the sortedset up front */
@SuppressCodecs({"Lucene40", "Lucene41"}) // avoid codecs that don't support sortedset
public class TestSortedSetSortField extends LuceneTestCase {
public void testEmptyIndex() throws Exception {
IndexSearcher empty = newSearcher(new MultiReader());
Query query = new TermQuery(new Term("contents", "foo"));
Sort sort = new Sort();
sort.setSort(new SortedSetSortField("sortedset", false));
TopDocs td = empty.search(query, null, 10, sort, true, true);
assertEquals(0, td.totalHits);
// for an empty index, any selector should work
for (SortedSetSelector.Type v : SortedSetSelector.Type.values()) {
sort.setSort(new SortedSetSortField("sortedset", false, v));
td = empty.search(query, null, 10, sort, true, true);
assertEquals(0, td.totalHits);
}
}
public void testEquals() throws Exception {
SortField sf = new SortedSetSortField("a", false);
assertFalse(sf.equals(null));
assertEquals(sf, sf);
SortField sf2 = new SortedSetSortField("a", false);
assertEquals(sf, sf2);
assertEquals(sf.hashCode(), sf2.hashCode());
assertFalse(sf.equals(new SortedSetSortField("a", true)));
assertFalse(sf.equals(new SortedSetSortField("b", false)));
assertFalse(sf.equals(new SortedSetSortField("a", false, SortedSetSelector.Type.MAX)));
assertFalse(sf.equals("foo"));
}
public void testForward() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("value", "baz", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("baz")));
doc.add(newStringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.NO));
doc.add(newStringField("value", "bar", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("foo")));
doc.add(new SortedSetDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
@ -68,12 +105,12 @@ public class TestSortedSetSortField extends LuceneTestCase {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.NO));
doc.add(newStringField("value", "bar", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("foo")));
doc.add(new SortedSetDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "baz", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("baz")));
doc.add(newStringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
@ -88,7 +125,7 @@ public class TestSortedSetSortField extends LuceneTestCase {
// 'bar' comes before 'baz'
assertEquals("2", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(td.scoreDocs[1].doc).get("id"));
ir.close();
dir.close();
}
@ -97,12 +134,12 @@ public class TestSortedSetSortField extends LuceneTestCase {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("value", "baz", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("baz")));
doc.add(newStringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.NO));
doc.add(newStringField("value", "bar", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("foo")));
doc.add(new SortedSetDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
@ -123,7 +160,7 @@ public class TestSortedSetSortField extends LuceneTestCase {
assertEquals("3", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(td.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[2].doc).get("id"));
ir.close();
dir.close();
}
@ -132,12 +169,12 @@ public class TestSortedSetSortField extends LuceneTestCase {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("value", "baz", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("baz")));
doc.add(newStringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "foo", Field.Store.NO));
doc.add(newStringField("value", "bar", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("foo")));
doc.add(new SortedSetDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
@ -158,7 +195,7 @@ public class TestSortedSetSortField extends LuceneTestCase {
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
// null comes last
assertEquals("3", searcher.doc(td.scoreDocs[2].doc).get("id"));
ir.close();
dir.close();
}
@ -167,11 +204,11 @@ public class TestSortedSetSortField extends LuceneTestCase {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("value", "baz", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("baz")));
doc.add(newStringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newStringField("value", "bar", Field.Store.NO));
doc.add(new SortedSetDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
@ -185,41 +222,8 @@ public class TestSortedSetSortField extends LuceneTestCase {
// 'bar' comes before 'baz'
assertEquals("1", searcher.doc(td.scoreDocs[0].doc).get("id"));
assertEquals("2", searcher.doc(td.scoreDocs[1].doc).get("id"));
ir.close();
dir.close();
}
public void testEmptyIndex() throws Exception {
IndexSearcher empty = newSearcher(new MultiReader());
Query query = new TermQuery(new Term("contents", "foo"));
Sort sort = new Sort();
sort.setSort(new SortedSetSortField("sortedset", false));
TopDocs td = empty.search(query, null, 10, sort, true, true);
assertEquals(0, td.totalHits);
// for an empty index, any selector should work
for (SortedSetSortField.Selector v : SortedSetSortField.Selector.values()) {
sort.setSort(new SortedSetSortField("sortedset", false, v));
td = empty.search(query, null, 10, sort, true, true);
assertEquals(0, td.totalHits);
}
}
public void testEquals() throws Exception {
SortField sf = new SortedSetSortField("a", false);
assertFalse(sf.equals(null));
assertEquals(sf, sf);
SortField sf2 = new SortedSetSortField("a", false);
assertEquals(sf, sf2);
assertEquals(sf.hashCode(), sf2.hashCode());
assertFalse(sf.equals(new SortedSetSortField("a", true)));
assertFalse(sf.equals(new SortedSetSortField("b", false)));
assertFalse(sf.equals(new SortedSetSortField("a", false, SortedSetSortField.Selector.MAX)));
assertFalse(sf.equals("foo"));
}
}

View File

@ -1,80 +0,0 @@
package org.apache.lucene.util.junitcompat;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
public class TestFailOnFieldCacheInsanity extends WithNestedTests {
public TestFailOnFieldCacheInsanity() {
super(true);
}
public static class Nested1 extends WithNestedTests.AbstractNestedTest {
private Directory d;
private IndexReader r;
private AtomicReader subR;
private void makeIndex() throws Exception {
// we use RAMDirectory here, because we dont want to stay on open files on Windows:
d = new RAMDirectory();
@SuppressWarnings("resource") RandomIndexWriter w =
new RandomIndexWriter(random(), d);
Document doc = new Document();
doc.add(newField("ints", "1", StringField.TYPE_NOT_STORED));
w.addDocument(doc);
w.forceMerge(1);
r = w.getReader();
w.shutdown();
subR = r.leaves().get(0).reader();
}
public void testDummy() throws Exception {
makeIndex();
assertNotNull(FieldCache.DEFAULT.getTermsIndex(subR, "ints"));
assertNotNull(FieldCache.DEFAULT.getTerms(subR, "ints", false));
// NOTE: do not close reader/directory, else it
// purges FC entries
}
}
@Test
public void testFailOnFieldCacheInsanity() {
Result r = JUnitCore.runClasses(Nested1.class);
boolean insane = false;
for(Failure f : r.getFailures()) {
if (f.getMessage().indexOf("Insane") != -1) {
insane = true;
break;
}
}
Assert.assertTrue(insane);
}
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
@ -92,19 +93,26 @@ public class DistanceFacetsExample implements Closeable {
// TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
// Add documents with latitude/longitude location:
// we index these both as DoubleFields (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
Document doc = new Document();
doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
writer.addDocument(doc);
// Open near-real-time searcher

View File

@ -25,10 +25,6 @@ import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.search.FieldCache.DoubleParser;
import org.apache.lucene.search.FieldCache.FloatParser;
import org.apache.lucene.search.FieldCache.IntParser;
import org.apache.lucene.search.FieldCache.LongParser;
import org.apache.lucene.search.SortField;
/**
@ -87,13 +83,13 @@ public final class SimpleBindings extends Bindings {
SortField field = (SortField) o;
switch(field.getType()) {
case INT:
return new IntFieldSource(field.getField(), (IntParser) field.getParser());
return new IntFieldSource(field.getField());
case LONG:
return new LongFieldSource(field.getField(), (LongParser) field.getParser());
return new LongFieldSource(field.getField());
case FLOAT:
return new FloatFieldSource(field.getField(), (FloatParser) field.getParser());
return new FloatFieldSource(field.getField());
case DOUBLE:
return new DoubleFieldSource(field.getField(), (DoubleParser) field.getParser());
return new DoubleFieldSource(field.getField());
case SCORE:
return getScoreValueSource();
default:

View File

@ -1,7 +1,6 @@
package org.apache.lucene.expressions;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.expressions.js.JavascriptCompiler;
@ -53,24 +52,24 @@ public class TestDemoExpressions extends LuceneTestCase {
doc.add(newStringField("id", "1", Field.Store.YES));
doc.add(newTextField("body", "some contents and more contents", Field.Store.NO));
doc.add(new NumericDocValuesField("popularity", 5));
doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
iw.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "2", Field.Store.YES));
doc.add(newTextField("body", "another document with different contents", Field.Store.NO));
doc.add(new NumericDocValuesField("popularity", 20));
doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
iw.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "3", Field.Store.YES));
doc.add(newTextField("body", "crappy contents", Field.Store.NO));
doc.add(new NumericDocValuesField("popularity", 2));
doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
iw.addDocument(doc);
reader = iw.getReader();

View File

@ -30,6 +30,7 @@ import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
@ -497,6 +498,7 @@ public class TestDrillSideways extends FacetTestCase {
for(Doc rawDoc : docs) {
Document doc = new Document();
doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef(rawDoc.id)));
doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
if (VERBOSE) {

View File

@ -80,27 +80,27 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
// Reused across documents, to add the necessary facet
// fields:
Document doc = new Document();
doc.add(new IntField("num", 10, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 10));
doc.add(new FacetField("Author", "Bob"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new IntField("num", 20, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 20));
doc.add(new FacetField("Author", "Lisa"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new IntField("num", 30, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 30));
doc.add(new FacetField("Author", "Lisa"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new IntField("num", 40, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 40));
doc.add(new FacetField("Author", "Susan"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new IntField("num", 45, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 45));
doc.add(new FacetField("Author", "Frank"));
writer.addDocument(config.build(taxoWriter, doc));
@ -145,7 +145,7 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.add(new IntField("num", 10, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 10));
doc.add(new FacetField("a", "foo1"));
writer.addDocument(config.build(taxoWriter, doc));
@ -154,7 +154,7 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
}
doc = new Document();
doc.add(new IntField("num", 20, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 20));
doc.add(new FacetField("a", "foo2"));
doc.add(new FacetField("b", "bar1"));
writer.addDocument(config.build(taxoWriter, doc));
@ -164,7 +164,7 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
}
doc = new Document();
doc.add(new IntField("num", 30, Field.Store.NO));
doc.add(new NumericDocValuesField("num", 30));
doc.add(new FacetField("a", "foo3"));
doc.add(new FacetField("b", "bar2"));
doc.add(new FacetField("c", "baz1"));

View File

@ -300,7 +300,7 @@ public class BlockGroupingCollector extends SimpleCollector {
* This is normally not a problem, as you can obtain the
* value just like you obtain other values for each
* matching document (eg, via stored fields, via
* FieldCache, etc.)
* DocValues, etc.)
*
* @param withinGroupSort The {@link Sort} used to sort
* documents within each group. Passing null is

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search.grouping;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.CachingCollector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiCollector;
@ -78,7 +77,7 @@ public class GroupingSearch {
private Bits matchingGroupHeads;
/**
* Constructs a <code>GroupingSearch</code> instance that groups documents by index terms using the {@link FieldCache}.
* Constructs a <code>GroupingSearch</code> instance that groups documents by index terms using DocValues.
* The group field can only have one token per document. This means that the field must not be analysed.
*
* @param groupField The name of the field to group by.

View File

@ -80,8 +80,7 @@ field fall into a single group.</p>
<p>Known limitations:</p>
<ul>
<li> For the two-pass grouping search, the group field must be a
single-valued indexed field (or indexed as a {@link org.apache.lucene.document.SortedDocValuesField}).
{@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.index.SortedDocValues} for this field.
indexed as a {@link org.apache.lucene.document.SortedDocValuesField}).
<li> Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only
implementations for grouping based on terms.
<li> Sharding is not directly supported, though is not too

View File

@ -18,9 +18,8 @@ package org.apache.lucene.search.grouping.term;
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
@ -161,7 +160,7 @@ public abstract class TermAllGroupHeadsCollector<GH extends AbstractAllGroupHead
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
groupIndex = DocValues.getSorted(context.reader(), groupField);
for (GroupHead groupHead : groups.values()) {
for (int i = 0; i < groupHead.comparators.length; i++) {
@ -276,13 +275,13 @@ public abstract class TermAllGroupHeadsCollector<GH extends AbstractAllGroupHead
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
groupIndex = DocValues.getSorted(context.reader(), groupField);
for (int i = 0; i < fields.length; i++) {
if (fields[i].getType() == SortField.Type.SCORE) {
continue;
}
sortsIndex[i] = FieldCache.DEFAULT.getTermsIndex(context.reader(), fields[i].getField());
sortsIndex[i] = DocValues.getSorted(context.reader(), fields[i].getField());
}
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
@ -444,9 +443,9 @@ public abstract class TermAllGroupHeadsCollector<GH extends AbstractAllGroupHead
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
groupIndex = DocValues.getSorted(context.reader(), groupField);
for (int i = 0; i < fields.length; i++) {
sortsIndex[i] = FieldCache.DEFAULT.getTermsIndex(context.reader(), fields[i].getField());
sortsIndex[i] = DocValues.getSorted(context.reader(), fields[i].getField());
}
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
@ -587,7 +586,7 @@ public abstract class TermAllGroupHeadsCollector<GH extends AbstractAllGroupHead
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
groupIndex = DocValues.getSorted(context.reader(), groupField);
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
ordSet.clear();

View File

@ -18,9 +18,9 @@ package org.apache.lucene.search.grouping.term;
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.grouping.AbstractAllGroupsCollector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet;
@ -105,7 +105,7 @@ public class TermAllGroupsCollector extends AbstractAllGroupsCollector<BytesRef>
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
index = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
index = DocValues.getSorted(context.reader(), groupField);
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
ordSet.clear();

View File

@ -18,9 +18,9 @@ package org.apache.lucene.search.grouping.term;
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.grouping.AbstractDistinctValuesCollector;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.util.BytesRef;
@ -109,8 +109,8 @@ public class TermDistinctValuesCollector extends AbstractDistinctValuesCollector
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
groupFieldTermIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
countFieldTermIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), countField);
groupFieldTermIndex = DocValues.getSorted(context.reader(), groupField);
countFieldTermIndex = DocValues.getSorted(context.reader(), countField);
ordSet.clear();
for (GroupCount group : groups) {
int groupOrd = group.groupValue == null ? -1 : groupFieldTermIndex.lookupTerm(group.groupValue);

View File

@ -20,9 +20,9 @@ package org.apache.lucene.search.grouping.term;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.AbstractFirstPassGroupingCollector;
import org.apache.lucene.util.BytesRef;
@ -46,7 +46,7 @@ public class TermFirstPassGroupingCollector extends AbstractFirstPassGroupingCol
*
* @param groupField The field used to group
* documents. This field must be single-valued and
* indexed (FieldCache is used to access its value
* indexed (DocValues is used to access its value
* per-document).
* @param groupSort The {@link Sort} used to sort the
* groups. The top sorted document within each group
@ -88,6 +88,6 @@ public class TermFirstPassGroupingCollector extends AbstractFirstPassGroupingCol
@Override
protected void doSetNextReader(AtomicReaderContext readerContext) throws IOException {
super.doSetNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), groupField);
index = DocValues.getSorted(readerContext.reader(), groupField);
}
}

View File

@ -18,11 +18,11 @@ package org.apache.lucene.search.grouping.term;
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet;
@ -34,7 +34,7 @@ import java.util.List;
/**
* An implementation of {@link AbstractGroupFacetCollector} that computes grouped facets based on the indexed terms
* from the {@link FieldCache}.
* from DocValues.
*
* @lucene.experimental
*/
@ -128,8 +128,8 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
segmentResults.add(createSegmentResult());
}
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
facetFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), facetField);
groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
facetFieldTermsIndex = DocValues.getSorted(context.reader(), facetField);
// 1+ to allow for the -1 "not set":
segmentFacetCounts = new int[facetFieldTermsIndex.getValueCount()+1];
@ -283,8 +283,8 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
segmentResults.add(createSegmentResult());
}
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
facetFieldDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField);
facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount();
if (facetFieldNumTerms == 0) {
facetOrdTermsEnum = null;

View File

@ -21,9 +21,9 @@ import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.SearchGroup;
@ -56,7 +56,7 @@ public class TermSecondPassGroupingCollector extends AbstractSecondPassGroupingC
@Override
protected void doSetNextReader(AtomicReaderContext readerContext) throws IOException {
super.doSetNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), groupField);
index = DocValues.getSorted(readerContext.reader(), groupField);
// Rebuild ordSet
ordSet.clear();

View File

@ -16,6 +16,6 @@
-->
<html>
<body>
Support for grouping by indexed terms via {@link org.apache.lucene.search.FieldCache}.
Support for grouping by indexed terms via {@link org.apache.lucene.index.DocValues}.
</body>
</html>

View File

@ -17,23 +17,35 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
@ -48,22 +60,8 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
public class AllGroupHeadsCollectorTest extends LuceneTestCase {
private static final DocValuesType[] vts = new DocValuesType[]{
DocValuesType.BINARY, DocValuesType.SORTED
};
public void testBasic() throws Exception {
final String groupField = "author";
Directory dir = newDirectory();
@ -72,30 +70,30 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
DocValuesType valueType = vts[random().nextInt(vts.length)];
DocValuesType valueType = DocValuesType.SORTED;
// 0
Document doc = new Document();
addGroupField(doc, groupField, "author1", valueType);
doc.add(newTextField("content", "random text", Field.Store.NO));
doc.add(new IntField("id_1", 1, Field.Store.NO));
doc.add(newStringField("id_2", "1", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 1));
doc.add(new SortedDocValuesField("id_2", new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
addGroupField(doc, groupField, "author1", valueType);
doc.add(newTextField("content", "some more random text blob", Field.Store.NO));
doc.add(new IntField("id_1", 2, Field.Store.NO));
doc.add(newStringField("id_2", "2", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 2));
doc.add(new SortedDocValuesField("id_2", new BytesRef("2")));
w.addDocument(doc);
// 2
doc = new Document();
addGroupField(doc, groupField, "author1", valueType);
doc.add(newTextField("content", "some more random textual data", Field.Store.NO));
doc.add(new IntField("id_1", 3, Field.Store.NO));
doc.add(newStringField("id_2", "3", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 3));
doc.add(new SortedDocValuesField("id_2", new BytesRef("3")));
w.addDocument(doc);
w.commit(); // To ensure a second segment
@ -103,38 +101,38 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
doc = new Document();
addGroupField(doc, groupField, "author2", valueType);
doc.add(newTextField("content", "some random text", Field.Store.NO));
doc.add(new IntField("id_1", 4, Field.Store.NO));
doc.add(newStringField("id_2", "4", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 4));
doc.add(new SortedDocValuesField("id_2", new BytesRef("4")));
w.addDocument(doc);
// 4
doc = new Document();
addGroupField(doc, groupField, "author3", valueType);
doc.add(newTextField("content", "some more random text", Field.Store.NO));
doc.add(new IntField("id_1", 5, Field.Store.NO));
doc.add(newStringField("id_2", "5", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 5));
doc.add(new SortedDocValuesField("id_2", new BytesRef("5")));
w.addDocument(doc);
// 5
doc = new Document();
addGroupField(doc, groupField, "author3", valueType);
doc.add(newTextField("content", "random blob", Field.Store.NO));
doc.add(new IntField("id_1", 6, Field.Store.NO));
doc.add(newStringField("id_2", "6", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 6));
doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
w.addDocument(doc);
// 6 -- no author field
doc = new Document();
doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
doc.add(new IntField("id_1", 6, Field.Store.NO));
doc.add(newStringField("id_2", "6", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 6));
doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
w.addDocument(doc);
// 7 -- no author field
doc = new Document();
doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
doc.add(new IntField("id_1", 7, Field.Store.NO));
doc.add(newStringField("id_2", "7", Field.Store.NO));
doc.add(new NumericDocValuesField("id_1", 7));
doc.add(new SortedDocValuesField("id_2", new BytesRef("7")));
w.addDocument(doc);
IndexReader reader = w.getReader();
@ -198,6 +196,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
// B/c of DV based impl we can't see the difference between an empty string and a null value.
// For that reason we don't generate empty string groups.
randomValue = TestUtil.randomRealisticUnicodeString(random());
//randomValue = TestUtil.randomSimpleString(random());
} while ("".equals(randomValue));
groups.add(new BytesRef(randomValue));
}
@ -224,31 +223,20 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())));
DocValuesType valueType = vts[random().nextInt(vts.length)];
DocValuesType valueType = DocValuesType.SORTED;
Document doc = new Document();
Document docNoGroup = new Document();
Field group = newStringField("group", "", Field.Store.NO);
doc.add(group);
Field valuesField = null;
switch(valueType) {
case BINARY:
valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
break;
case SORTED:
valuesField = new SortedDocValuesField("group_dv", new BytesRef());
break;
default:
fail("unhandled type");
}
valuesField = new SortedDocValuesField("group", new BytesRef());
doc.add(valuesField);
Field sort1 = newStringField("sort1", "", Field.Store.NO);
Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
doc.add(sort1);
docNoGroup.add(sort1);
Field sort2 = newStringField("sort2", "", Field.Store.NO);
Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
doc.add(sort2);
docNoGroup.add(sort2);
Field sort3 = newStringField("sort3", "", Field.Store.NO);
Field sort3 = new SortedDocValuesField("sort3", new BytesRef());
doc.add(sort3);
docNoGroup.add(sort3);
Field content = newTextField("content", "", Field.Store.NO);
@ -257,6 +245,9 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
IntField id = new IntField("id", 0, Field.Store.NO);
doc.add(id);
docNoGroup.add(id);
NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
doc.add(idDV);
docNoGroup.add(idDV);
final GroupDoc[] groupDocs = new GroupDoc[numDocs];
for (int i = 0; i < numDocs; i++) {
final BytesRef groupValue;
@ -283,14 +274,14 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
groupDocs[i] = groupDoc;
if (groupDoc.group != null) {
group.setStringValue(groupDoc.group.utf8ToString());
valuesField.setBytesValue(new BytesRef(groupDoc.group.utf8ToString()));
}
sort1.setStringValue(groupDoc.sort1.utf8ToString());
sort2.setStringValue(groupDoc.sort2.utf8ToString());
sort3.setStringValue(groupDoc.sort3.utf8ToString());
sort1.setBytesValue(groupDoc.sort1);
sort2.setBytesValue(groupDoc.sort2);
sort3.setBytesValue(groupDoc.sort3);
content.setStringValue(groupDoc.content);
id.setIntValue(groupDoc.id);
idDV.setLongValue(groupDoc.id);
if (groupDoc.group == null) {
w.addDocument(docNoGroup);
} else {
@ -301,91 +292,86 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
final DirectoryReader r = w.getReader();
w.shutdown();
// NOTE: intentional but temporary field cache insanity!
final FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(r), "id", false);
final NumericDocValues docIdToFieldId = MultiDocValues.getNumericValues(r, "id");
final int[] fieldIdToDocID = new int[numDocs];
for (int i = 0; i < numDocs; i++) {
int fieldId = docIdToFieldId.get(i);
int fieldId = (int) docIdToFieldId.get(i);
fieldIdToDocID[fieldId] = i;
}
try {
final IndexSearcher s = newSearcher(r);
for (int contentID = 0; contentID < 3; contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
for (ScoreDoc hit : hits) {
final GroupDoc gd = groupDocs[docIdToFieldId.get(hit.doc)];
assertTrue(gd.score == 0.0);
gd.score = hit.score;
int docId = gd.id;
assertEquals(docId, docIdToFieldId.get(hit.doc));
}
final IndexSearcher s = newSearcher(r);
for (int contentID = 0; contentID < 3; contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
for (ScoreDoc hit : hits) {
final GroupDoc gd = groupDocs[(int) docIdToFieldId.get(hit.doc)];
assertTrue(gd.score == 0.0);
gd.score = hit.score;
int docId = gd.id;
assertEquals(docId, docIdToFieldId.get(hit.doc));
}
for (GroupDoc gd : groupDocs) {
assertTrue(gd.score != 0.0);
}
for (int searchIter = 0; searchIter < 100; searchIter++) {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random().nextInt(3);
boolean sortByScoreOnly = random().nextBoolean();
Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
AbstractAllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
// The actual group heads contains Lucene ids. Need to change them into our id value.
for (int i = 0; i < actualGroupHeads.length; i++) {
actualGroupHeads[i] = docIdToFieldId.get(actualGroupHeads[i]);
}
// Allows us the easily iterate and assert the actual and expected results.
Arrays.sort(expectedGroupHeads);
Arrays.sort(actualGroupHeads);
if (VERBOSE) {
System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName());
System.out.println("Sort within group: " + sortWithinGroup);
System.out.println("Num group: " + numGroups);
System.out.println("Num doc: " + numDocs);
System.out.println("\n=== Expected: \n");
for (int expectedDocId : expectedGroupHeads) {
GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
System.out.println(
String.format(Locale.ROOT,
"Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d",
expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(),
expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId
)
);
}
System.out.println("\n=== Actual: \n");
for (int actualDocId : actualGroupHeads) {
GroupDoc actualGroupDoc = groupDocs[actualDocId];
String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
System.out.println(
String.format(Locale.ROOT,
"Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d",
actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(),
actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId
)
);
}
System.out.println("\n===================================================================================");
}
assertArrayEquals(expectedGroupHeads, actualGroupHeads);
}
} finally {
QueryUtils.purgeFieldCache(r);
}
for (GroupDoc gd : groupDocs) {
assertTrue(gd.score != 0.0);
}
for (int searchIter = 0; searchIter < 100; searchIter++) {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random().nextInt(3);
boolean sortByScoreOnly = random().nextBoolean();
Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
AbstractAllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
// The actual group heads contains Lucene ids. Need to change them into our id value.
for (int i = 0; i < actualGroupHeads.length; i++) {
actualGroupHeads[i] = (int) docIdToFieldId.get(actualGroupHeads[i]);
}
// Allows us the easily iterate and assert the actual and expected results.
Arrays.sort(expectedGroupHeads);
Arrays.sort(actualGroupHeads);
if (VERBOSE) {
System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName());
System.out.println("Sort within group: " + sortWithinGroup);
System.out.println("Num group: " + numGroups);
System.out.println("Num doc: " + numDocs);
System.out.println("\n=== Expected: \n");
for (int expectedDocId : expectedGroupHeads) {
GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
System.out.println(
String.format(Locale.ROOT,
"Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d",
expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(),
expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId
)
);
}
System.out.println("\n=== Actual: \n");
for (int actualDocId : actualGroupHeads) {
GroupDoc actualGroupDoc = groupDocs[actualDocId];
String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
System.out.println(
String.format(Locale.ROOT,
"Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d",
actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(),
actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId
)
);
}
System.out.println("\n===================================================================================");
}
assertArrayEquals(expectedGroupHeads, actualGroupHeads);
}
r.close();
dir.close();
}
@ -542,14 +528,13 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
}
private void addGroupField(Document doc, String groupField, String value, DocValuesType valueType) {
doc.add(new TextField(groupField, value, Field.Store.NO));
Field valuesField = null;
switch(valueType) {
case BINARY:
valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
valuesField = new BinaryDocValuesField(groupField, new BytesRef(value));
break;
case SORTED:
valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value));
valuesField = new SortedDocValuesField(groupField, new BytesRef(value));
break;
default:
fail("unhandled type");

View File

@ -17,34 +17,6 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.grouping.function.FunctionDistinctValuesCollector;
import org.apache.lucene.search.grouping.function.FunctionFirstPassGroupingCollector;
import org.apache.lucene.search.grouping.term.TermDistinctValuesCollector;
import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueStr;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@ -59,74 +31,92 @@ import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.grouping.function.FunctionDistinctValuesCollector;
import org.apache.lucene.search.grouping.function.FunctionFirstPassGroupingCollector;
import org.apache.lucene.search.grouping.term.TermDistinctValuesCollector;
import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueStr;
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // we need missing support... i think?
public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
private final static NullComparator nullComparator = new NullComparator();
private final String groupField = "author";
private final String dvGroupField = "author_dv";
private final String countField = "publisher";
private final String dvCountField = "publisher_dv";
public void testSimple() throws Exception {
Random random = random();
DocValuesType[] dvTypes = new DocValuesType[]{
DocValuesType.NUMERIC,
DocValuesType.BINARY,
DocValuesType.SORTED,
};
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random,
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
boolean canUseDV = true;
DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null;
Document doc = new Document();
addField(doc, groupField, "1", dvType);
addField(doc, countField, "1", dvType);
addField(doc, groupField, "1");
addField(doc, countField, "1");
doc.add(new TextField("content", "random text", Field.Store.NO));
doc.add(new StringField("id", "1", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
addField(doc, groupField, "1", dvType);
addField(doc, countField, "1", dvType);
addField(doc, groupField, "1");
addField(doc, countField, "1");
doc.add(new TextField("content", "some more random text blob", Field.Store.NO));
doc.add(new StringField("id", "2", Field.Store.NO));
w.addDocument(doc);
// 2
doc = new Document();
addField(doc, groupField, "1", dvType);
addField(doc, countField, "2", dvType);
addField(doc, groupField, "1");
addField(doc, countField, "2");
doc.add(new TextField("content", "some more random textual data", Field.Store.NO));
doc.add(new StringField("id", "3", Field.Store.NO));
w.addDocument(doc);
w.commit(); // To ensure a second segment
// 3
// 3 -- no count field
doc = new Document();
addField(doc, groupField, "2", dvType);
addField(doc, groupField, "2");
doc.add(new TextField("content", "some random text", Field.Store.NO));
doc.add(new StringField("id", "4", Field.Store.NO));
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, groupField, "3", dvType);
addField(doc, countField, "1", dvType);
addField(doc, groupField, "3");
addField(doc, countField, "1");
doc.add(new TextField("content", "some more random text", Field.Store.NO));
doc.add(new StringField("id", "5", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
addField(doc, groupField, "3", dvType);
addField(doc, countField, "1", dvType);
addField(doc, groupField, "3");
addField(doc, countField, "1");
doc.add(new TextField("content", "random blob", Field.Store.NO));
doc.add(new StringField("id", "6", Field.Store.NO));
w.addDocument(doc);
@ -134,7 +124,7 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
// 6 -- no author field
doc = new Document();
doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
addField(doc, countField, "1", dvType);
addField(doc, countField, "1");
doc.add(new StringField("id", "6", Field.Store.NO));
w.addDocument(doc);
@ -160,13 +150,13 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
};
// === Search for content:random
AbstractFirstPassGroupingCollector<Comparable<Object>> firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
AbstractFirstPassGroupingCollector<Comparable<Object>> firstCollector = createRandomFirstPassCollector(new Sort(), groupField, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), firstCollector);
AbstractDistinctValuesCollector<? extends AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>> distinctValuesCollector
= createDistinctCountCollector(firstCollector, groupField, countField, dvType);
= createDistinctCountCollector(firstCollector, groupField, countField);
indexSearcher.search(new TermQuery(new Term("content", "random")), distinctValuesCollector);
List<? extends AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>> gcs = distinctValuesCollector.getGroups();
List<? extends AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>> gcs = distinctValuesCollector.getGroups();
Collections.sort(gcs, cmp);
assertEquals(4, gcs.size());
@ -193,9 +183,9 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
compare("1", countValues.get(0));
// === Search for content:some
firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
firstCollector = createRandomFirstPassCollector(new Sort(), groupField, 10);
indexSearcher.search(new TermQuery(new Term("content", "some")), firstCollector);
distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField);
indexSearcher.search(new TermQuery(new Term("content", "some")), distinctValuesCollector);
gcs = distinctValuesCollector.getGroups();
@ -220,9 +210,9 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
compare("1", countValues.get(0));
// === Search for content:blob
firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
firstCollector = createRandomFirstPassCollector(new Sort(), groupField, 10);
indexSearcher.search(new TermQuery(new Term("content", "blob")), firstCollector);
distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField);
indexSearcher.search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);
gcs = distinctValuesCollector.getGroups();
@ -251,18 +241,16 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
IndexContext context = createIndexContext();
for (int searchIter = 0; searchIter < 100; searchIter++) {
final IndexSearcher searcher = newSearcher(context.indexReader);
boolean useDv = context.dvType != null && random.nextBoolean();
DocValuesType dvType = useDv ? context.dvType : null;
String term = context.contentStrings[random.nextInt(context.contentStrings.length)];
Sort groupSort = new Sort(new SortField("id", SortField.Type.STRING));
int topN = 1 + random.nextInt(10);
List<AbstractDistinctValuesCollector.GroupCount<Comparable<?>>> expectedResult = createExpectedResult(context, term, groupSort, topN);
AbstractFirstPassGroupingCollector<Comparable<?>> firstCollector = createRandomFirstPassCollector(dvType, groupSort, groupField, topN);
AbstractFirstPassGroupingCollector<Comparable<?>> firstCollector = createRandomFirstPassCollector(groupSort, groupField, topN);
searcher.search(new TermQuery(new Term("content", term)), firstCollector);
AbstractDistinctValuesCollector<? extends AbstractDistinctValuesCollector.GroupCount<Comparable<?>>> distinctValuesCollector
= createDistinctCountCollector(firstCollector, groupField, countField, dvType);
= createDistinctCountCollector(firstCollector, groupField, countField);
searcher.search(new TermQuery(new Term("content", term)), distinctValuesCollector);
@SuppressWarnings("unchecked")
List<AbstractDistinctValuesCollector.GroupCount<Comparable<?>>> actualResult = (List<AbstractDistinctValuesCollector.GroupCount<Comparable<?>>>) distinctValuesCollector.getGroups();
@ -273,7 +261,6 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
System.out.println("1st pass collector class name=" + firstCollector.getClass().getName());
System.out.println("2nd pass collector class name=" + distinctValuesCollector.getClass().getName());
System.out.println("Search term=" + term);
System.out.println("DVType=" + dvType);
System.out.println("1st pass groups=" + firstCollector.getTopGroups(0, false));
System.out.println("Expected:");
printGroups(expectedResult);
@ -363,33 +350,14 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
}
}
private void addField(Document doc, String field, String value, DocValuesType type) {
doc.add(new StringField(field, value, Field.Store.YES));
if (type == null) {
return;
}
String dvField = field + "_dv";
Field valuesField = null;
switch (type) {
case NUMERIC:
valuesField = new NumericDocValuesField(dvField, Integer.parseInt(value));
break;
case BINARY:
valuesField = new BinaryDocValuesField(dvField, new BytesRef(value));
break;
case SORTED:
valuesField = new SortedDocValuesField(dvField, new BytesRef(value));
break;
}
doc.add(valuesField);
private void addField(Document doc, String field, String value) {
doc.add(new SortedDocValuesField(field, new BytesRef(value)));
}
@SuppressWarnings({"unchecked","rawtypes"})
private <T extends Comparable> AbstractDistinctValuesCollector<AbstractDistinctValuesCollector.GroupCount<T>> createDistinctCountCollector(AbstractFirstPassGroupingCollector<T> firstPassGroupingCollector,
String groupField,
String countField,
DocValuesType dvType) {
String countField) {
Random random = random();
Collection<SearchGroup<T>> searchGroups = firstPassGroupingCollector.getTopGroups(0, false);
if (FunctionFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
@ -400,20 +368,12 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
}
@SuppressWarnings({"unchecked","rawtypes"})
private <T> AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector(DocValuesType dvType, Sort groupSort, String groupField, int topNGroups) throws IOException {
private <T> AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector(Sort groupSort, String groupField, int topNGroups) throws IOException {
Random random = random();
if (dvType != null) {
if (random.nextBoolean()) {
return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<>(), groupSort, topNGroups);
} else {
return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
}
if (random.nextBoolean()) {
return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<>(), groupSort, topNGroups);
} else {
if (random.nextBoolean()) {
return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<>(), groupSort, topNGroups);
} else {
return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
}
return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
}
}
@ -444,10 +404,6 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
private IndexContext createIndexContext() throws Exception {
Random random = random();
DocValuesType[] dvTypes = new DocValuesType[]{
DocValuesType.BINARY,
DocValuesType.SORTED
};
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
@ -457,9 +413,6 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())
);
boolean canUseDV = true;
DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null;
int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
String[] groupValues = new String[numDocs / 5];
String[] countValues = new String[numDocs / 10];
@ -492,10 +445,10 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
Document doc = new Document();
doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.YES));
if (groupValue != null) {
addField(doc, groupField, groupValue, dvType);
addField(doc, groupField, groupValue);
}
if (countValue != null) {
addField(doc, countField, countValue, dvType);
addField(doc, countField, countValue);
}
doc.add(new TextField("content", content, Field.Store.YES));
w.addDocument(doc);
@ -510,22 +463,20 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase {
}
w.shutdown();
return new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
return new IndexContext(dir, reader, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
}
private static class IndexContext {
final Directory directory;
final DirectoryReader indexReader;
final DocValuesType dvType;
final Map<String, Map<String, Set<String>>> searchTermToGroupCounts;
final String[] contentStrings;
IndexContext(Directory directory, DirectoryReader indexReader, DocValuesType dvType,
IndexContext(Directory directory, DirectoryReader indexReader,
Map<String, Map<String, Set<String>>> searchTermToGroupCounts, String[] contentStrings) {
this.directory = directory;
this.indexReader = indexReader;
this.dvType = dvType;
this.searchTermToGroupCounts = searchTermToGroupCounts;
this.contentStrings = contentStrings;
}

View File

@ -17,24 +17,6 @@ package org.apache.lucene.search.grouping;
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.grouping.term.TermGroupFacetCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@ -49,6 +31,29 @@ import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.grouping.term.TermGroupFacetCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
// Need SSDV
@SuppressCodecs({"Lucene40", "Lucene41"})
public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
public void testSimple() throws Exception {
@ -62,7 +67,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
boolean useDv = random().nextBoolean();
boolean useDv = true;
// 0
Document doc = new Document();
@ -287,7 +292,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
boolean useDv = false;
boolean useDv = true;
// Cannot assert this since we use NoMergePolicy:
w.setDoRandomForceMergeAssert(false);
@ -300,7 +305,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
// 1
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
w.commit();
@ -309,32 +314,32 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
// 2
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 3
doc = new Document();
addField(doc, groupField, "a", useDv);
doc.add(new StringField("airport", "dus", Field.Store.NO));
doc.add(new SortedSetDocValuesField("airport", new BytesRef("dus")));
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 5
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
// 6
doc = new Document();
addField(doc, groupField, "b", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
doc.add(new SortedSetDocValuesField("airport", new BytesRef("ams")));
w.addDocument(doc);
w.commit();
@ -346,7 +351,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
w.shutdown();
IndexSearcher indexSearcher = newSearcher(DirectoryReader.open(dir));
AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true);
AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField + "_dv", "airport", null, true);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
@ -364,10 +369,8 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
}
private void addField(Document doc, String field, String value, boolean canUseIDV) {
doc.add(new StringField(field, value, Field.Store.NO));
if (canUseIDV) {
doc.add(new SortedDocValuesField(field + "_dv", new BytesRef(value)));
}
assert canUseIDV;
doc.add(new SortedDocValuesField(field + "_dv", new BytesRef(value)));
}
public void testRandom() throws Exception {
@ -386,7 +389,6 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
boolean useDv = !multipleFacetsPerDocument && context.useDV && random.nextBoolean();
String searchTerm = context.contentStrings[random.nextInt(context.contentStrings.length)];
int limit = random.nextInt(context.facetValues.size());
int offset = random.nextInt(context.facetValues.size() - limit);
@ -409,7 +411,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
}
GroupedFacetResult expectedFacetResult = createExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix);
AbstractGroupFacetCollector groupFacetCollector = createRandomCollector(useDv ? "group_dv" : "group", useDv ? "facet_dv" : "facet", facetPrefix, multipleFacetsPerDocument);
AbstractGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument);
searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector);
TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount);
@ -417,7 +419,6 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
List<TermGroupFacetCollector.FacetEntry> actualFacetEntries = actualFacetResult.getFacetEntries(offset, limit);
if (VERBOSE) {
System.out.println("Use DV: " + useDv);
System.out.println("Collector: " + groupFacetCollector.getClass().getSimpleName());
System.out.println("Num group: " + context.numGroups);
System.out.println("Num doc: " + context.numDocs);
@ -514,35 +515,29 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
new MockAnalyzer(random)
)
);
boolean canUseDV = true;
boolean useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();
Document doc = new Document();
Document docNoGroup = new Document();
Document docNoFacet = new Document();
Document docNoGroupNoFacet = new Document();
Field group = newStringField("group", "", Field.Store.NO);
Field groupDc = new SortedDocValuesField("group_dv", new BytesRef());
if (useDv) {
doc.add(groupDc);
docNoFacet.add(groupDc);
}
Field groupDc = new SortedDocValuesField("group", new BytesRef());
doc.add(groupDc);
docNoFacet.add(groupDc);
doc.add(group);
docNoFacet.add(group);
Field[] facetFields;
if (useDv) {
assert !multipleFacetValuesPerDocument;
if (multipleFacetValuesPerDocument == false) {
facetFields = new Field[2];
facetFields[0] = newStringField("facet", "", Field.Store.NO);
doc.add(facetFields[0]);
docNoGroup.add(facetFields[0]);
facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
facetFields[1] = new SortedDocValuesField("facet", new BytesRef());
doc.add(facetFields[1]);
docNoGroup.add(facetFields[1]);
} else {
facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
for (int i = 0; i < facetFields.length; i++) {
facetFields[i] = newStringField("facet", "", Field.Store.NO);
facetFields[i] = new SortedSetDocValuesField("facet", new BytesRef());
doc.add(facetFields[i]);
docNoGroup.add(facetFields[i]);
}
@ -576,11 +571,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
if (random.nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
if (useDv) {
groupValue = "";
} else {
groupValue = null;
}
groupValue = "";
} else {
groupValue = groups.get(random.nextInt(groups.size()));
}
@ -592,8 +583,22 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);
List<String> facetVals = new ArrayList<>();
if (useDv || random.nextInt(24) != 18) {
if (useDv) {
if (multipleFacetValuesPerDocument == false) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
facetToGroups.put(facetValue, new HashSet<String>());
}
Set<String> groupsInFacet = facetToGroups.get(facetValue);
groupsInFacet.add(groupValue);
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
facetFields[0].setStringValue(facetValue);
facetFields[1].setBytesValue(new BytesRef(facetValue));
facetVals.add(facetValue);
} else {
for (Field facetField : facetFields) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
@ -604,34 +609,8 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
facetFields[0].setStringValue(facetValue);
facetFields[1].setBytesValue(new BytesRef(facetValue));
facetField.setBytesValue(new BytesRef(facetValue));
facetVals.add(facetValue);
} else {
for (Field facetField : facetFields) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
facetToGroups.put(facetValue, new HashSet<String>());
}
Set<String> groupsInFacet = facetToGroups.get(facetValue);
groupsInFacet.add(groupValue);
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
facetField.setStringValue(facetValue);
facetVals.add(facetValue);
}
}
} else {
uniqueFacetValues.add(null);
if (!facetToGroups.containsKey(null)) {
facetToGroups.put(null, new HashSet<String>());
}
Set<String> groupsInFacet = facetToGroups.get(null);
groupsInFacet.add(groupValue);
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
}
@ -640,11 +619,10 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
}
if (groupValue != null) {
if (useDv) {
groupDc.setBytesValue(new BytesRef(groupValue));
}
groupDc.setBytesValue(new BytesRef(groupValue));
group.setStringValue(groupValue);
} else if (useDv) {
} else {
// TODO: not true
// DV cannot have missing values:
groupDc.setBytesValue(new BytesRef());
}
@ -663,7 +641,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
DirectoryReader reader = writer.getReader();
writer.shutdown();
return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv);
return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues);
}
private GroupedFacetResult createExpectedFacetResult(String searchTerm, IndexContext context, int offset, int limit, int minCount, final boolean orderByCount, String facetPrefix) {
@ -738,8 +716,6 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) {
BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix);
// DocValues cannot be multi-valued:
assert !multipleFacetsPerDocument || !groupField.endsWith("_dv");
return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random().nextInt(1024));
}
@ -764,10 +740,9 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
final int facetWithMostGroups;
final int numGroups;
final String[] contentStrings;
final boolean useDV;
public IndexContext(Map<String, Map<String, Set<String>>> searchTermToFacetGroups, DirectoryReader r,
int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet<String> facetValues, boolean useDV) {
int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet<String> facetValues) {
this.searchTermToFacetGroups = searchTermToFacetGroups;
this.indexReader = r;
this.numDocs = numDocs;
@ -776,7 +751,6 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
this.numGroups = numGroups;
this.contentStrings = contentStrings;
this.facetValues = facetValues;
this.useDV = useDV;
}
}

View File

@ -21,6 +21,8 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.RandomIndexWriter;
@ -39,6 +41,7 @@ import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueStr;
@ -52,6 +55,7 @@ import java.util.*;
// - test ties
// - test compound sort
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // we need missing support... i think?
public class TestGrouping extends LuceneTestCase {
public void testBasic() throws Exception {
@ -120,10 +124,6 @@ public class TestGrouping extends LuceneTestCase {
final Sort groupSort = Sort.RELEVANCE;
if (random().nextBoolean()) {
groupField += "_dv";
}
final AbstractFirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
@ -172,8 +172,7 @@ public class TestGrouping extends LuceneTestCase {
}
private void addGroupField(Document doc, String groupField, String value) {
doc.add(new TextField(groupField, value, Field.Store.YES));
doc.add(new SortedDocValuesField(groupField + "_dv", new BytesRef(value)));
doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
}
private AbstractFirstPassGroupingCollector<?> createRandomFirstPassCollector(String groupField, Sort groupSort, int topDocs) throws IOException {
@ -569,10 +568,14 @@ public class TestGrouping extends LuceneTestCase {
docs.add(doc);
if (groupValue.group != null) {
doc.add(newStringField("group", groupValue.group.utf8ToString(), Field.Store.YES));
doc.add(new SortedDocValuesField("group", BytesRef.deepCopyOf(groupValue.group)));
}
doc.add(newStringField("sort1", groupValue.sort1.utf8ToString(), Field.Store.NO));
doc.add(new SortedDocValuesField("sort1", BytesRef.deepCopyOf(groupValue.sort1)));
doc.add(newStringField("sort2", groupValue.sort2.utf8ToString(), Field.Store.NO));
doc.add(new SortedDocValuesField("sort2", BytesRef.deepCopyOf(groupValue.sort2)));
doc.add(new IntField("id", groupValue.id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", groupValue.id));
doc.add(newTextField("content", groupValue.content, Field.Store.NO));
//System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id);
}
@ -642,7 +645,7 @@ public class TestGrouping extends LuceneTestCase {
// For that reason we don't generate empty string
// groups.
randomValue = TestUtil.randomRealisticUnicodeString(random());
//randomValue = _TestUtil.randomSimpleString(random());
//randomValue = TestUtil.randomSimpleString(random());
} while ("".equals(randomValue));
groups.add(new BytesRef(randomValue));
@ -670,22 +673,18 @@ public class TestGrouping extends LuceneTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())));
boolean canUseIDV = true;
Document doc = new Document();
Document docNoGroup = new Document();
Field idvGroupField = new SortedDocValuesField("group_dv", new BytesRef());
if (canUseIDV) {
doc.add(idvGroupField);
docNoGroup.add(idvGroupField);
}
Field idvGroupField = new SortedDocValuesField("group", new BytesRef());
doc.add(idvGroupField);
docNoGroup.add(idvGroupField);
Field group = newStringField("group", "", Field.Store.NO);
doc.add(group);
Field sort1 = newStringField("sort1", "", Field.Store.NO);
Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
doc.add(sort1);
docNoGroup.add(sort1);
Field sort2 = newStringField("sort2", "", Field.Store.NO);
Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
doc.add(sort2);
docNoGroup.add(sort2);
Field content = newTextField("content", "", Field.Store.NO);
@ -693,7 +692,10 @@ public class TestGrouping extends LuceneTestCase {
docNoGroup.add(content);
IntField id = new IntField("id", 0, Field.Store.NO);
doc.add(id);
NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
doc.add(idDV);
docNoGroup.add(id);
docNoGroup.add(idDV);
final GroupDoc[] groupDocs = new GroupDoc[numDocs];
for(int i=0;i<numDocs;i++) {
final BytesRef groupValue;
@ -716,19 +718,19 @@ public class TestGrouping extends LuceneTestCase {
groupDocs[i] = groupDoc;
if (groupDoc.group != null) {
group.setStringValue(groupDoc.group.utf8ToString());
if (canUseIDV) {
idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
}
} else if (canUseIDV) {
idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
} else {
// TODO: not true
// Must explicitly set empty string, else eg if
// the segment has all docs missing the field then
// we get null back instead of empty BytesRef:
idvGroupField.setBytesValue(new BytesRef());
}
sort1.setStringValue(groupDoc.sort1.utf8ToString());
sort2.setStringValue(groupDoc.sort2.utf8ToString());
sort1.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort1));
sort2.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort2));
content.setStringValue(groupDoc.content);
id.setIntValue(groupDoc.id);
idDV.setLongValue(groupDoc.id);
if (groupDoc.group == null) {
w.addDocument(docNoGroup);
} else {
@ -742,405 +744,387 @@ public class TestGrouping extends LuceneTestCase {
final DirectoryReader r = w.getReader();
w.shutdown();
// NOTE: intentional but temporary field cache insanity!
final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(r), "id", false);
final NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
DirectoryReader rBlocks = null;
Directory dirBlocks = null;
try {
final IndexSearcher s = newSearcher(r);
if (VERBOSE) {
System.out.println("\nTEST: searcher=" + s);
}
if (SlowCompositeReaderWrapper.class.isAssignableFrom(s.getIndexReader().getClass())) {
canUseIDV = false;
} else {
canUseIDV = true;
}
final ShardState shards = new ShardState(s);
for(int contentID=0;contentID<3;contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
final GroupDoc gd = groupDocs[docIDToID.get(hit.doc)];
assertTrue(gd.score == 0.0);
gd.score = hit.score;
assertEquals(gd.id, docIDToID.get(hit.doc));
}
}
for(GroupDoc gd : groupDocs) {
assertTrue(gd.score != 0.0);
}
// Build 2nd index, where docs are added in blocks by
// group, so we can use single pass collector
dirBlocks = newDirectory();
rBlocks = getDocBlockReader(dirBlocks, groupDocs);
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
final FieldCache.Ints docIDToIDBlocks = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(rBlocks), "id", false);
final IndexSearcher sBlocks = newSearcher(rBlocks);
final ShardState shardsBlocks = new ShardState(sBlocks);
// ReaderBlocks only increases maxDoc() vs reader, which
// means a monotonic shift in scores, so we can
// reliably remap them w/ Map:
final Map<String,Map<Float,Float>> scoreMap = new HashMap<>();
// Tricky: must separately set .score2, because the doc
// block index was created with possible deletions!
//System.out.println("fixup score2");
for(int contentID=0;contentID<3;contentID++) {
//System.out.println(" term=real" + contentID);
final Map<Float,Float> termScoreMap = new HashMap<>();
scoreMap.put("real"+contentID, termScoreMap);
//System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
//" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
final GroupDoc gd = groupDocsByID[docIDToIDBlocks.get(hit.doc)];
assertTrue(gd.score2 == 0.0);
gd.score2 = hit.score;
assertEquals(gd.id, docIDToIDBlocks.get(hit.doc));
//System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks.get(hit.doc));
termScoreMap.put(gd.score, gd.score2);
}
}
for(int searchIter=0;searchIter<100;searchIter++) {
if (VERBOSE) {
System.out.println("\nTEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random().nextInt(3);
final boolean fillFields = random().nextBoolean();
boolean getScores = random().nextBoolean();
final boolean getMaxScores = random().nextBoolean();
final Sort groupSort = getRandomSort();
//final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
// TODO: also test null (= sort by relevance)
final Sort docSort = getRandomSort();
for(SortField sf : docSort.getSort()) {
if (sf.getType() == SortField.Type.SCORE) {
getScores = true;
break;
}
}
for(SortField sf : groupSort.getSort()) {
if (sf.getType() == SortField.Type.SCORE) {
getScores = true;
break;
}
}
final int topNGroups = TestUtil.nextInt(random(), 1, 30);
//final int topNGroups = 10;
final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
//final int groupOffset = 0;
final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
//final int docOffset = 0;
final boolean doCache = random().nextBoolean();
final boolean doAllGroups = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) +" dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
}
String groupField = "group";
if (canUseIDV && random().nextBoolean()) {
groupField += "_dv";
}
if (VERBOSE) {
System.out.println(" groupField=" + groupField);
}
final AbstractFirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
final AbstractAllGroupsCollector<?> allGroupsCollector;
if (doAllGroups) {
allGroupsCollector = createAllGroupsCollector(c1, groupField);
} else {
allGroupsCollector = null;
}
final boolean useWrappingCollector = random().nextBoolean();
if (doCache) {
final double maxCacheMB = random().nextDouble();
if (VERBOSE) {
System.out.println("TEST: maxCacheMB=" + maxCacheMB);
}
if (useWrappingCollector) {
if (doAllGroups) {
cCache = CachingCollector.create(c1, true, maxCacheMB);
c = MultiCollector.wrap(cCache, allGroupsCollector);
} else {
c = cCache = CachingCollector.create(c1, true, maxCacheMB);
}
} else {
// Collect only into cache, then replay multiple times:
c = cCache = CachingCollector.create(false, true, maxCacheMB);
}
} else {
cCache = null;
if (doAllGroups) {
c = MultiCollector.wrap(c1, allGroupsCollector);
} else {
c = c1;
}
}
// Search top reader:
final Query query = new TermQuery(new Term("content", searchTerm));
s.search(query, c);
if (doCache && !useWrappingCollector) {
if (cCache.isCached()) {
// Replay for first-pass grouping
cCache.replay(c1);
if (doAllGroups) {
// Replay for all groups:
cCache.replay(allGroupsCollector);
}
} else {
// Replay by re-running search:
s.search(query, c1);
if (doAllGroups) {
s.search(query, allGroupsCollector);
}
}
}
// Get 1st pass top groups
final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
final TopGroups<BytesRef> groupsResult;
if (VERBOSE) {
System.out.println("TEST: first pass topGroups");
if (topGroups == null) {
System.out.println(" null");
} else {
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
}
// Get 1st pass top groups using shards
ValueHolder<Boolean> idvBasedImplsUsedSharded = new ValueHolder<>(false);
final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort,
groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, canUseIDV, false, idvBasedImplsUsedSharded);
final AbstractSecondPassGroupingCollector<?> c2;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
c2 = createSecondPassCollector(c1, groupField, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {
System.out.println("TEST: cache is intact");
}
cCache.replay(c2);
} else {
if (VERBOSE) {
System.out.println("TEST: cache was too large");
}
s.search(query, c2);
}
} else {
s.search(query, c2);
}
if (doAllGroups) {
TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
} else {
groupsResult = getTopGroups(c2, docOffset);
}
} else {
c2 = null;
groupsResult = null;
if (VERBOSE) {
System.out.println("TEST: no results");
}
}
final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
if (VERBOSE) {
if (expectedGroups == null) {
System.out.println("TEST: no expected groups");
} else {
System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc + " score=" + sd.score);
}
}
}
if (groupsResult == null) {
System.out.println("TEST: no matched groups");
} else {
System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : groupsResult.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToID.get(sd.doc) + " score=" + sd.score);
}
}
if (searchIter == 14) {
for(int docIDX=0;docIDX<s.getIndexReader().maxDoc();docIDX++) {
System.out.println("ID=" + docIDToID.get(docIDX) + " explain=" + s.explain(query, docIDX));
}
}
}
if (topGroupsShards == null) {
System.out.println("TEST: no matched-merged groups");
} else {
System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : topGroupsShards.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToID.get(sd.doc) + " score=" + sd.score);
}
}
}
}
assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, groupField.endsWith("_dv"));
// Confirm merged shards match:
assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, idvBasedImplsUsedSharded.value);
if (topGroupsShards != null) {
verifyShards(shards.docStarts, topGroupsShards);
}
final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
final TermAllGroupsCollector allGroupsCollector2;
final Collector c4;
if (doAllGroups) {
// NOTE: must be "group" and not "group_dv"
// (groupField) because we didn't index doc
// values in the block index:
allGroupsCollector2 = new TermAllGroupsCollector("group");
c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else {
allGroupsCollector2 = null;
c4 = c3;
}
// Get block grouping result:
sBlocks.search(query, c4);
@SuppressWarnings({"unchecked","rawtypes"})
final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups<BytesRef> groupsResultBlocks;
if (doAllGroups && tempTopGroupsBlocks != null) {
assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
} else {
groupsResultBlocks = tempTopGroupsBlocks;
}
if (VERBOSE) {
if (groupsResultBlocks == null) {
System.out.println("TEST: no block groups");
} else {
System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
boolean first = true;
for(GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToIDBlocks.get(sd.doc) + " score=" + sd.score);
if (first) {
System.out.println("explain: " + sBlocks.explain(query, sd.doc));
first = false;
}
}
}
}
}
// Get shard'd block grouping result:
// Block index does not index DocValues so we pass
// false for canUseIDV:
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query,
groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false, new ValueHolder<>(false));
if (expectedGroups != null) {
// Fixup scores for reader2
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
for(ScoreDoc hit : groupDocsHits.scoreDocs) {
final GroupDoc gd = groupDocsByID[hit.doc];
assertEquals(gd.id, hit.doc);
//System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
hit.score = gd.score2;
}
}
final SortField[] sortFields = groupSort.getSort();
final Map<Float,Float> termScoreMap = scoreMap.get(searchTerm);
for(int groupSortIDX=0;groupSortIDX<sortFields.length;groupSortIDX++) {
if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
if (groupDocsHits.groupSortValues != null) {
//System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
}
}
}
}
final SortField[] docSortFields = docSort.getSort();
for(int docSortIDX=0;docSortIDX<docSortFields.length;docSortIDX++) {
if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
for(ScoreDoc _hit : groupDocsHits.scoreDocs) {
FieldDoc hit = (FieldDoc) _hit;
if (hit.fields != null) {
hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
assertNotNull(hit.fields[docSortIDX]);
}
}
}
}
}
}
assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
}
} finally {
QueryUtils.purgeFieldCache(r);
if (rBlocks != null) {
QueryUtils.purgeFieldCache(rBlocks);
final IndexSearcher s = newSearcher(r);
if (VERBOSE) {
System.out.println("\nTEST: searcher=" + s);
}
final ShardState shards = new ShardState(s);
for(int contentID=0;contentID<3;contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
final GroupDoc gd = groupDocs[(int) docIDToID.get(hit.doc)];
assertTrue(gd.score == 0.0);
gd.score = hit.score;
assertEquals(gd.id, docIDToID.get(hit.doc));
}
}
for(GroupDoc gd : groupDocs) {
assertTrue(gd.score != 0.0);
}
// Build 2nd index, where docs are added in blocks by
// group, so we can use single pass collector
dirBlocks = newDirectory();
rBlocks = getDocBlockReader(dirBlocks, groupDocs);
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
final NumericDocValues docIDToIDBlocks = MultiDocValues.getNumericValues(rBlocks, "id");
assertNotNull(docIDToIDBlocks);
final IndexSearcher sBlocks = newSearcher(rBlocks);
final ShardState shardsBlocks = new ShardState(sBlocks);
// ReaderBlocks only increases maxDoc() vs reader, which
// means a monotonic shift in scores, so we can
// reliably remap them w/ Map:
final Map<String,Map<Float,Float>> scoreMap = new HashMap<>();
// Tricky: must separately set .score2, because the doc
// block index was created with possible deletions!
//System.out.println("fixup score2");
for(int contentID=0;contentID<3;contentID++) {
//System.out.println(" term=real" + contentID);
final Map<Float,Float> termScoreMap = new HashMap<>();
scoreMap.put("real"+contentID, termScoreMap);
//System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
//" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
final GroupDoc gd = groupDocsByID[(int) docIDToIDBlocks.get(hit.doc)];
assertTrue(gd.score2 == 0.0);
gd.score2 = hit.score;
assertEquals(gd.id, docIDToIDBlocks.get(hit.doc));
//System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks.get(hit.doc));
termScoreMap.put(gd.score, gd.score2);
}
}
for(int searchIter=0;searchIter<100;searchIter++) {
if (VERBOSE) {
System.out.println("\nTEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random().nextInt(3);
final boolean fillFields = random().nextBoolean();
boolean getScores = random().nextBoolean();
final boolean getMaxScores = random().nextBoolean();
final Sort groupSort = getRandomSort();
//final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
// TODO: also test null (= sort by relevance)
final Sort docSort = getRandomSort();
for(SortField sf : docSort.getSort()) {
if (sf.getType() == SortField.Type.SCORE) {
getScores = true;
break;
}
}
for(SortField sf : groupSort.getSort()) {
if (sf.getType() == SortField.Type.SCORE) {
getScores = true;
break;
}
}
final int topNGroups = TestUtil.nextInt(random(), 1, 30);
//final int topNGroups = 10;
final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
//final int groupOffset = 0;
final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
//final int docOffset = 0;
final boolean doCache = random().nextBoolean();
final boolean doAllGroups = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) +" dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
}
String groupField = "group";
if (VERBOSE) {
System.out.println(" groupField=" + groupField);
}
final AbstractFirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
final AbstractAllGroupsCollector<?> allGroupsCollector;
if (doAllGroups) {
allGroupsCollector = createAllGroupsCollector(c1, groupField);
} else {
allGroupsCollector = null;
}
final boolean useWrappingCollector = random().nextBoolean();
if (doCache) {
final double maxCacheMB = random().nextDouble();
if (VERBOSE) {
System.out.println("TEST: maxCacheMB=" + maxCacheMB);
}
if (useWrappingCollector) {
if (doAllGroups) {
cCache = CachingCollector.create(c1, true, maxCacheMB);
c = MultiCollector.wrap(cCache, allGroupsCollector);
} else {
c = cCache = CachingCollector.create(c1, true, maxCacheMB);
}
} else {
// Collect only into cache, then replay multiple times:
c = cCache = CachingCollector.create(false, true, maxCacheMB);
}
} else {
cCache = null;
if (doAllGroups) {
c = MultiCollector.wrap(c1, allGroupsCollector);
} else {
c = c1;
}
}
// Search top reader:
final Query query = new TermQuery(new Term("content", searchTerm));
s.search(query, c);
if (doCache && !useWrappingCollector) {
if (cCache.isCached()) {
// Replay for first-pass grouping
cCache.replay(c1);
if (doAllGroups) {
// Replay for all groups:
cCache.replay(allGroupsCollector);
}
} else {
// Replay by re-running search:
s.search(query, c1);
if (doAllGroups) {
s.search(query, allGroupsCollector);
}
}
}
// Get 1st pass top groups
final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
final TopGroups<BytesRef> groupsResult;
if (VERBOSE) {
System.out.println("TEST: first pass topGroups");
if (topGroups == null) {
System.out.println(" null");
} else {
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
}
// Get 1st pass top groups using shards
final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort,
groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, true, false);
final AbstractSecondPassGroupingCollector<?> c2;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
c2 = createSecondPassCollector(c1, groupField, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {
System.out.println("TEST: cache is intact");
}
cCache.replay(c2);
} else {
if (VERBOSE) {
System.out.println("TEST: cache was too large");
}
s.search(query, c2);
}
} else {
s.search(query, c2);
}
if (doAllGroups) {
TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
} else {
groupsResult = getTopGroups(c2, docOffset);
}
} else {
c2 = null;
groupsResult = null;
if (VERBOSE) {
System.out.println("TEST: no results");
}
}
final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
if (VERBOSE) {
if (expectedGroups == null) {
System.out.println("TEST: no expected groups");
} else {
System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc + " score=" + sd.score);
}
}
}
if (groupsResult == null) {
System.out.println("TEST: no matched groups");
} else {
System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : groupsResult.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToID.get(sd.doc) + " score=" + sd.score);
}
}
if (searchIter == 14) {
for(int docIDX=0;docIDX<s.getIndexReader().maxDoc();docIDX++) {
System.out.println("ID=" + docIDToID.get(docIDX) + " explain=" + s.explain(query, docIDX));
}
}
}
if (topGroupsShards == null) {
System.out.println("TEST: no matched-merged groups");
} else {
System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : topGroupsShards.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToID.get(sd.doc) + " score=" + sd.score);
}
}
}
}
assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, true);
// Confirm merged shards match:
assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, true);
if (topGroupsShards != null) {
verifyShards(shards.docStarts, topGroupsShards);
}
final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
final TermAllGroupsCollector allGroupsCollector2;
final Collector c4;
if (doAllGroups) {
// NOTE: must be "group" and not "group_dv"
// (groupField) because we didn't index doc
// values in the block index:
allGroupsCollector2 = new TermAllGroupsCollector("group");
c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else {
allGroupsCollector2 = null;
c4 = c3;
}
// Get block grouping result:
sBlocks.search(query, c4);
@SuppressWarnings({"unchecked","rawtypes"})
final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups<BytesRef> groupsResultBlocks;
if (doAllGroups && tempTopGroupsBlocks != null) {
assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
} else {
groupsResultBlocks = tempTopGroupsBlocks;
}
if (VERBOSE) {
if (groupsResultBlocks == null) {
System.out.println("TEST: no block groups");
} else {
System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
boolean first = true;
for(GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToIDBlocks.get(sd.doc) + " score=" + sd.score);
if (first) {
System.out.println("explain: " + sBlocks.explain(query, sd.doc));
first = false;
}
}
}
}
}
// Get shard'd block grouping result:
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query,
groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false);
if (expectedGroups != null) {
// Fixup scores for reader2
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
for(ScoreDoc hit : groupDocsHits.scoreDocs) {
final GroupDoc gd = groupDocsByID[hit.doc];
assertEquals(gd.id, hit.doc);
//System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
hit.score = gd.score2;
}
}
final SortField[] sortFields = groupSort.getSort();
final Map<Float,Float> termScoreMap = scoreMap.get(searchTerm);
for(int groupSortIDX=0;groupSortIDX<sortFields.length;groupSortIDX++) {
if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
if (groupDocsHits.groupSortValues != null) {
//System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
}
}
}
}
final SortField[] docSortFields = docSort.getSort();
for(int docSortIDX=0;docSortIDX<docSortFields.length;docSortIDX++) {
if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
for(ScoreDoc _hit : groupDocsHits.scoreDocs) {
FieldDoc hit = (FieldDoc) _hit;
if (hit.fields != null) {
hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
assertNotNull(hit.fields[docSortIDX]);
}
}
}
}
}
}
assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
}
r.close();
dir.close();
rBlocks.close();
dirBlocks.close();
}
@ -1158,7 +1142,7 @@ public class TestGrouping extends LuceneTestCase {
}
private TopGroups<BytesRef> searchShards(IndexSearcher topSearcher, ShardSearcher[] subSearchers, Query query, Sort groupSort, Sort docSort, int groupOffset, int topNGroups, int docOffset,
int topNDocs, boolean getScores, boolean getMaxScores, boolean canUseIDV, boolean preFlex, ValueHolder<Boolean> usedIdvBasedImpl) throws Exception {
int topNDocs, boolean getScores, boolean getMaxScores, boolean canUseIDV, boolean preFlex) throws Exception {
// TODO: swap in caching, all groups collector hereassertEquals(expected.totalHitCount, actual.totalHitCount);
// too...
@ -1182,10 +1166,6 @@ public class TestGrouping extends LuceneTestCase {
}
String groupField = "group";
if (shardsCanUseIDV && random().nextBoolean()) {
groupField += "_dv";
usedIdvBasedImpl.value = true;
}
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
@ -1257,7 +1237,7 @@ public class TestGrouping extends LuceneTestCase {
}
}
private void assertEquals(FieldCache.Ints docIDtoID, TopGroups<BytesRef> expected, TopGroups<BytesRef> actual, boolean verifyGroupValues, boolean verifyTotalGroupCount, boolean verifySortValues, boolean testScores, boolean idvBasedImplsUsed) {
private void assertEquals(NumericDocValues docIDtoID, TopGroups<BytesRef> expected, TopGroups<BytesRef> actual, boolean verifyGroupValues, boolean verifyTotalGroupCount, boolean verifySortValues, boolean testScores, boolean idvBasedImplsUsed) {
if (expected == null) {
assertNull(actual);
return;

View File

@ -21,10 +21,10 @@ import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.BytesRef;
@ -85,7 +85,7 @@ abstract class TermsCollector extends SimpleCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
docTermOrds = DocValues.getSortedSet(context.reader(), field);
}
}
@ -107,7 +107,7 @@ abstract class TermsCollector extends SimpleCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false);
fromDocTerms = DocValues.getBinary(context.reader(), field);
}
}

View File

@ -21,10 +21,10 @@ import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.ArrayUtil;
@ -131,7 +131,7 @@ abstract class TermsWithScoreCollector extends SimpleCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false);
fromDocTerms = DocValues.getBinary(context.reader(), field);
}
static class Avg extends SV {
@ -217,7 +217,7 @@ abstract class TermsWithScoreCollector extends SimpleCollector {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
fromDocTermOrds = DocValues.getSortedSet(context.reader(), field);
}
static class Avg extends MV {

View File

@ -17,6 +17,13 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
@ -27,13 +34,6 @@ import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
public class TestBlockJoin extends LuceneTestCase {
// One resume...
@ -451,7 +451,7 @@ public class TestBlockJoin extends LuceneTestCase {
final String[] values = fields[fieldID] = new String[valueCount];
for(int i=0;i<valueCount;i++) {
values[i] = TestUtil.randomRealisticUnicodeString(random());
//values[i] = _TestUtil.randomSimpleString(random);
//values[i] = TestUtil.randomSimpleString(random());
}
}
@ -511,9 +511,18 @@ public class TestBlockJoin extends LuceneTestCase {
parentDoc.add(id);
parentJoinDoc.add(id);
parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
id = new NumericDocValuesField("parentID", parentDocID);
parentDoc.add(id);
parentJoinDoc.add(id);
parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
for(int field=0;field<parentFields.length;field++) {
if (random().nextDouble() < 0.9) {
Field f = newStringField("parent" + field, parentFields[field][random().nextInt(parentFields[field].length)], Field.Store.NO);
String s = parentFields[field][random().nextInt(parentFields[field].length)];
Field f = newStringField("parent" + field, s, Field.Store.NO);
parentDoc.add(f);
parentJoinDoc.add(f);
f = new SortedDocValuesField("parent" + field, new BytesRef(s));
parentDoc.add(f);
parentJoinDoc.add(f);
}
@ -548,10 +557,18 @@ public class TestBlockJoin extends LuceneTestCase {
Field childID = new IntField("childID", childDocID, Field.Store.YES);
childDoc.add(childID);
joinChildDoc.add(childID);
childID = new NumericDocValuesField("childID", childDocID);
childDoc.add(childID);
joinChildDoc.add(childID);
for(int childFieldID=0;childFieldID<childFields.length;childFieldID++) {
if (random().nextDouble() < 0.9) {
Field f = newStringField("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)], Field.Store.NO);
String s = childFields[childFieldID][random().nextInt(childFields[childFieldID].length)];
Field f = newStringField("child" + childFieldID, s, Field.Store.NO);
childDoc.add(f);
joinChildDoc.add(f);
f = new SortedDocValuesField("child" + childFieldID, new BytesRef(s));
childDoc.add(f);
joinChildDoc.add(f);
}
@ -727,7 +744,7 @@ public class TestBlockJoin extends LuceneTestCase {
parentAndChildSort);
if (VERBOSE) {
System.out.println("\nTEST: normal index gets " + results.totalHits + " hits");
System.out.println("\nTEST: normal index gets " + results.totalHits + " hits; sort=" + parentAndChildSort);
final ScoreDoc[] hits = results.scoreDocs;
for(int hitIDX=0;hitIDX<hits.length;hitIDX++) {
final StoredDocument doc = s.doc(hits[hitIDX].doc);
@ -735,7 +752,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println(" parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
FieldDoc fd = (FieldDoc) hits[hitIDX];
if (fd.fields != null) {
System.out.print(" ");
System.out.print(" " + fd.fields.length + " sort values: ");
for(Object o : fd.fields) {
if (o instanceof BytesRef) {
System.out.print(((BytesRef) o).utf8ToString() + " ");

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.join;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.NoMergePolicy;
@ -58,14 +59,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
List<Document> docs = new ArrayList<>();
Document document = new Document();
document.add(new StringField("field2", "a", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("a")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "b", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("b")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "c", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("c")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
@ -78,14 +82,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
docs.clear();
document = new Document();
document.add(new StringField("field2", "c", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("c")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "d", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("d")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "e", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("e")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
@ -97,14 +104,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
docs.clear();
document = new Document();
document.add(new StringField("field2", "e", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("e")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "f", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("f")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "g", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("g")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
@ -116,14 +126,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
docs.clear();
document = new Document();
document.add(new StringField("field2", "g", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("g")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "h", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("h")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "i", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("i")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
@ -136,14 +149,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
docs.clear();
document = new Document();
document.add(new StringField("field2", "i", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("i")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "j", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("j")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "k", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("k")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
@ -155,14 +171,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
docs.clear();
document = new Document();
document.add(new StringField("field2", "k", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("k")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "l", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("l")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "m", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("m")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
@ -180,14 +199,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
docs.clear();
document = new Document();
document.add(new StringField("field2", "m", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("m")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "n", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("n")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "o", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("o")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();

View File

@ -34,10 +34,13 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
@ -53,7 +56,6 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
@ -68,9 +70,11 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"}) // we need SortedSet, docsWithField
public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception {
@ -89,20 +93,25 @@ public class TestJoinUtil extends LuceneTestCase {
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("2")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("3")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 3
@ -110,6 +119,7 @@ public class TestJoinUtil extends LuceneTestCase {
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new TextField(idField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("4")));
w.addDocument(doc);
w.commit();
@ -117,14 +127,18 @@ public class TestJoinUtil extends LuceneTestCase {
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("5")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("6")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
@ -180,16 +194,18 @@ public class TestJoinUtil extends LuceneTestCase {
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "0", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("0")));
w.addDocument(doc);
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
for(int i=0;i<300;i++){
doc.add(new TextField(toField, ""+i, Field.Store.NO));
if(!multipleValues){
w.addDocument(doc);
doc.removeFields(toField);
if (multipleValues) {
for(int i=0;i<300;i++) {
doc.add(new SortedSetDocValuesField(toField, new BytesRef(""+i)));
}
} else {
doc.add(new SortedDocValuesField(toField, new BytesRef("0")));
}
w.addDocument(doc);
@ -317,20 +333,25 @@ public class TestJoinUtil extends LuceneTestCase {
doc.add(new TextField("description", "A random movie", Field.Store.NO));
doc.add(new TextField("name", "Movie 1", Field.Store.NO));
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("2")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("3")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 3
@ -338,6 +359,7 @@ public class TestJoinUtil extends LuceneTestCase {
doc.add(new TextField("description", "A second random movie", Field.Store.NO));
doc.add(new TextField("name", "Movie 2", Field.Store.NO));
doc.add(new TextField(idField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("4")));
w.addDocument(doc);
w.commit();
@ -345,14 +367,18 @@ public class TestJoinUtil extends LuceneTestCase {
doc = new Document();
doc.add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("5")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("6")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
@ -572,6 +598,11 @@ public class TestJoinUtil extends LuceneTestCase {
context.fromDocuments.get(linkValue).add(docs[i]);
context.randomValueFromDocs.get(value).add(docs[i]);
document.add(newTextField(random(), "from", linkValue, Field.Store.NO));
if (multipleValuesPerDocument) {
document.add(new SortedSetDocValuesField("from", new BytesRef(linkValue)));
} else {
document.add(new SortedDocValuesField("from", new BytesRef(linkValue)));
}
} else {
if (!context.toDocuments.containsKey(linkValue)) {
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
@ -583,6 +614,11 @@ public class TestJoinUtil extends LuceneTestCase {
context.toDocuments.get(linkValue).add(docs[i]);
context.randomValueToDocs.get(value).add(docs[i]);
document.add(newTextField(random(), "to", linkValue, Field.Store.NO));
if (multipleValuesPerDocument) {
document.add(new SortedSetDocValuesField("to", new BytesRef(linkValue)));
} else {
document.add(new SortedDocValuesField("to", new BytesRef(linkValue)));
}
}
}
@ -644,7 +680,7 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
docTermOrds = DocValues.getSortedSet(context.reader(), fromField);
}
@Override
@ -682,8 +718,8 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true);
docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField);
terms = DocValues.getBinary(context.reader(), fromField);
docsWithField = DocValues.getDocsWithField(context.reader(), fromField);
}
@Override
@ -753,7 +789,7 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
docTermOrds = DocValues.getSortedSet(context.reader(), toField);
}
@Override
@ -781,7 +817,7 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
protected void doSetNextReader(AtomicReaderContext context) throws IOException {
terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false);
terms = DocValues.getBinary(context.reader(), toField);
docBase = context.docBase;
}

View File

@ -32,6 +32,10 @@
org/apache/lucene/misc/IndexMergeTool.class
"/>
<property name="forbidden-rue-excludes" value="
org/apache/lucene/uninverting/FieldCache$CacheEntry.class
"/>
<import file="../module-build.xml"/>
<target name="install-cpptasks" unless="cpptasks.uptodate" depends="ivy-availability-check,ivy-fail,ivy-configure">

View File

@ -15,7 +15,7 @@
* limitations under the License.
*/
package org.apache.lucene.index;
package org.apache.lucene.uninverting;
import java.io.IOException;
import java.util.ArrayList;
@ -23,6 +23,15 @@ import java.util.Arrays;
import java.util.List;
import org.apache.lucene.codecs.PostingsFormat; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
@ -176,6 +185,8 @@ public class DocTermOrds {
public DocTermOrds(AtomicReader reader, Bits liveDocs, String field) throws IOException {
this(reader, liveDocs, field, null, Integer.MAX_VALUE);
}
// TODO: instead of all these ctors and options, take termsenum!
/** Inverts only terms starting w/ prefix */
public DocTermOrds(AtomicReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException {

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search;
package org.apache.lucene.uninverting;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -28,8 +28,8 @@ import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
@ -45,67 +45,11 @@ import org.apache.lucene.util.RamUsageEstimator;
* <p>Created: May 19, 2004 11:13:14 AM
*
* @since lucene 1.4
* @see org.apache.lucene.util.FieldCacheSanityChecker
* @see FieldCacheSanityChecker
*
* @lucene.internal
*/
public interface FieldCache {
/** Field values as 32-bit signed integers */
public static abstract class Ints {
/** Return an integer representation of this field's value. */
public abstract int get(int docID);
/** Zero value for every document */
public static final Ints EMPTY = new Ints() {
@Override
public int get(int docID) {
return 0;
}
};
}
/** Field values as 64-bit signed long integers */
public static abstract class Longs {
/** Return an long representation of this field's value. */
public abstract long get(int docID);
/** Zero value for every document */
public static final Longs EMPTY = new Longs() {
@Override
public long get(int docID) {
return 0;
}
};
}
/** Field values as 32-bit floats */
public static abstract class Floats {
/** Return an float representation of this field's value. */
public abstract float get(int docID);
/** Zero value for every document */
public static final Floats EMPTY = new Floats() {
@Override
public float get(int docID) {
return 0;
}
};
}
/** Field values as 64-bit doubles */
public static abstract class Doubles {
/** Return an double representation of this field's value. */
public abstract double get(int docID);
/** Zero value for every document */
public static final Doubles EMPTY = new Doubles() {
@Override
public double get(int docID) {
return 0;
}
};
}
interface FieldCache {
/**
* Placeholder indicating creation of this cache is currently in-progress.
@ -115,9 +59,7 @@ public interface FieldCache {
}
/**
* Marker interface as super-interface to all parsers. It
* is used to specify a custom parser to {@link
* SortField#SortField(String, FieldCache.Parser)}.
* interface to all parsers. It is used to parse different numeric types.
*/
public interface Parser {
@ -130,38 +72,9 @@ public interface FieldCache {
* @throws IOException if an {@link IOException} occurs
*/
public TermsEnum termsEnum(Terms terms) throws IOException;
}
/** Interface to parse ints from document fields.
* @see FieldCache#getInts(AtomicReader, String, FieldCache.IntParser, boolean)
*/
public interface IntParser extends Parser {
/** Return an integer representation of this field's value. */
public int parseInt(BytesRef term);
}
/** Interface to parse floats from document fields.
* @see FieldCache#getFloats(AtomicReader, String, FieldCache.FloatParser, boolean)
*/
public interface FloatParser extends Parser {
/** Return an float representation of this field's value. */
public float parseFloat(BytesRef term);
}
/** Interface to parse long from document fields.
* @see FieldCache#getLongs(AtomicReader, String, FieldCache.LongParser, boolean)
*/
public interface LongParser extends Parser {
/** Return an long representation of this field's value. */
public long parseLong(BytesRef term);
}
/** Interface to parse doubles from document fields.
* @see FieldCache#getDoubles(AtomicReader, String, FieldCache.DoubleParser, boolean)
*/
public interface DoubleParser extends Parser {
/** Return an double representation of this field's value. */
public double parseDouble(BytesRef term);
/** Parse's this field's value */
public long parseValue(BytesRef term);
}
/** Expert: The cache used internally by sorting and range query classes. */
@ -171,9 +84,9 @@ public interface FieldCache {
* A parser instance for int values encoded by {@link NumericUtils}, e.g. when indexed
* via {@link IntField}/{@link NumericTokenStream}.
*/
public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
public static final Parser NUMERIC_UTILS_INT_PARSER = new Parser() {
@Override
public int parseInt(BytesRef term) {
public long parseValue(BytesRef term) {
return NumericUtils.prefixCodedToInt(term);
}
@ -192,11 +105,14 @@ public interface FieldCache {
* A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed
* via {@link FloatField}/{@link NumericTokenStream}.
*/
public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
public static final Parser NUMERIC_UTILS_FLOAT_PARSER = new Parser() {
@Override
public float parseFloat(BytesRef term) {
return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term));
public long parseValue(BytesRef term) {
int val = NumericUtils.prefixCodedToInt(term);
if (val<0) val ^= 0x7fffffff;
return val;
}
@Override
public String toString() {
return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER";
@ -212,9 +128,9 @@ public interface FieldCache {
* A parser instance for long values encoded by {@link NumericUtils}, e.g. when indexed
* via {@link LongField}/{@link NumericTokenStream}.
*/
public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
public static final Parser NUMERIC_UTILS_LONG_PARSER = new Parser() {
@Override
public long parseLong(BytesRef term) {
public long parseValue(BytesRef term) {
return NumericUtils.prefixCodedToLong(term);
}
@Override
@ -232,10 +148,12 @@ public interface FieldCache {
* A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed
* via {@link DoubleField}/{@link NumericTokenStream}.
*/
public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
public static final Parser NUMERIC_UTILS_DOUBLE_PARSER = new Parser() {
@Override
public double parseDouble(BytesRef term) {
return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term));
public long parseValue(BytesRef term) {
long val = NumericUtils.prefixCodedToLong(term);
if (val<0) val ^= 0x7fffffffffffffffL;
return val;
}
@Override
public String toString() {
@ -256,83 +174,7 @@ public interface FieldCache {
public Bits getDocsWithField(AtomicReader reader, String field) throws IOException;
/**
* Returns an {@link Ints} over the values found in documents in the given
* field.
*
* @see #getInts(AtomicReader, String, IntParser, boolean)
*/
public Ints getInts(AtomicReader reader, String field, boolean setDocsWithField) throws IOException;
/**
* Returns an {@link Ints} over the values found in documents in the given
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
* uses {@link AtomicReader#getNumericDocValues(String)} to read the values.
* Otherwise, it checks the internal cache for an appropriate entry, and if
* none is found, reads the terms in <code>field</code> as ints and returns
* an array of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
*
* @param reader
* Used to get field values.
* @param field
* Which field contains the longs.
* @param parser
* Computes int for string values. May be {@code null} if the
* requested field was indexed as {@link NumericDocValuesField} or
* {@link IntField}.
* @param setDocsWithField
* If true then {@link #getDocsWithField} will also be computed and
* stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException
* If any error occurs.
*/
public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField) throws IOException;
/**
* Returns a {@link Floats} over the values found in documents in the given
* field.
*
* @see #getFloats(AtomicReader, String, FloatParser, boolean)
*/
public Floats getFloats(AtomicReader reader, String field, boolean setDocsWithField) throws IOException;
/**
* Returns a {@link Floats} over the values found in documents in the given
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
* uses {@link AtomicReader#getNumericDocValues(String)} to read the values.
* Otherwise, it checks the internal cache for an appropriate entry, and if
* none is found, reads the terms in <code>field</code> as floats and returns
* an array of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
*
* @param reader
* Used to get field values.
* @param field
* Which field contains the floats.
* @param parser
* Computes float for string values. May be {@code null} if the
* requested field was indexed as {@link NumericDocValuesField} or
* {@link FloatField}.
* @param setDocsWithField
* If true then {@link #getDocsWithField} will also be computed and
* stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException
* If any error occurs.
*/
public Floats getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField) throws IOException;
/**
* Returns a {@link Longs} over the values found in documents in the given
* field.
*
* @see #getLongs(AtomicReader, String, LongParser, boolean)
*/
public Longs getLongs(AtomicReader reader, String field, boolean setDocsWithField) throws IOException;
/**
* Returns a {@link Longs} over the values found in documents in the given
* Returns a {@link NumericDocValues} over the values found in documents in the given
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
* uses {@link AtomicReader#getNumericDocValues(String)} to read the values.
* Otherwise, it checks the internal cache for an appropriate entry, and if
@ -355,41 +197,7 @@ public interface FieldCache {
* @throws IOException
* If any error occurs.
*/
public Longs getLongs(AtomicReader reader, String field, LongParser parser, boolean setDocsWithField) throws IOException;
/**
* Returns a {@link Doubles} over the values found in documents in the given
* field.
*
* @see #getDoubles(AtomicReader, String, DoubleParser, boolean)
*/
public Doubles getDoubles(AtomicReader reader, String field, boolean setDocsWithField) throws IOException;
/**
* Returns a {@link Doubles} over the values found in documents in the given
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
* uses {@link AtomicReader#getNumericDocValues(String)} to read the values.
* Otherwise, it checks the internal cache for an appropriate entry, and if
* none is found, reads the terms in <code>field</code> as doubles and returns
* an array of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
*
* @param reader
* Used to get field values.
* @param field
* Which field contains the longs.
* @param parser
* Computes double for string values. May be {@code null} if the
* requested field was indexed as {@link NumericDocValuesField} or
* {@link DoubleField}.
* @param setDocsWithField
* If true then {@link #getDocsWithField} will also be computed and
* stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException
* If any error occurs.
*/
public Doubles getDoubles(AtomicReader reader, String field, DoubleParser parser, boolean setDocsWithField) throws IOException;
public NumericDocValues getNumerics(AtomicReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found, reads the term values in <code>field</code>
@ -431,6 +239,11 @@ public interface FieldCache {
* subsequent calls will share the same cache entry. */
public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException;
/** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */
public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { NumericUtils.SHIFT_START_INT });
/** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */
public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { NumericUtils.SHIFT_START_LONG });
/**
* Checks the internal cache for an appropriate entry, and if none is found, reads the term values
* in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
@ -438,10 +251,13 @@ public interface FieldCache {
*
* @param reader Used to build a {@link DocTermOrds} instance
* @param field Which field contains the strings.
* @param prefix prefix for a subset of the terms which should be uninverted. Can be null or
* {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX}
*
* @return a {@link DocTermOrds} instance
* @throws IOException If any error occurs.
*/
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException;
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field, BytesRef prefix) throws IOException;
/**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
@ -562,7 +378,7 @@ public interface FieldCache {
/**
* If non-null, FieldCacheImpl will warn whenever
* entries are created that are not sane according to
* {@link org.apache.lucene.util.FieldCacheSanityChecker}.
* {@link FieldCacheSanityChecker}.
*/
public void setInfoStream(PrintStream stream);

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search;
package org.apache.lucene.uninverting;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -27,7 +27,6 @@ import java.util.WeakHashMap;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
@ -38,9 +37,9 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FieldCacheSanityChecker;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.GrowableWriter;
@ -61,11 +60,8 @@ class FieldCacheImpl implements FieldCache {
}
private synchronized void init() {
caches = new HashMap<>(9);
caches.put(Integer.TYPE, new IntCache(this));
caches.put(Float.TYPE, new FloatCache(this));
caches = new HashMap<>(6);
caches.put(Long.TYPE, new LongCache(this));
caches.put(Double.TYPE, new DoubleCache(this));
caches.put(BinaryDocValues.class, new BinaryDocValuesCache(this));
caches.put(SortedDocValues.class, new SortedDocValuesCache(this));
caches.put(DocTermOrds.class, new DocTermOrdsCache(this));
@ -352,54 +348,6 @@ class FieldCacheImpl implements FieldCache {
caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, null), bits);
}
@Override
public Ints getInts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException {
return getInts(reader, field, null, setDocsWithField);
}
@Override
public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField)
throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Ints() {
@Override
public int get(int docID) {
return (int) valuesIn.get(docID);
}
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return Ints.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Ints.EMPTY;
}
return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
}
static class IntsFromArray extends Ints {
private final PackedInts.Reader values;
private final int minValue;
public IntsFromArray(PackedInts.Reader values, int minValue) {
assert values.getBitsPerValue() <= 32;
this.values = values;
this.minValue = minValue;
}
@Override
public int get(int docID) {
final long delta = values.get(docID);
return minValue + (int) delta;
}
}
private static class HoldsOneThing<T> {
private T it;
@ -421,79 +369,6 @@ class FieldCacheImpl implements FieldCache {
public long minValue;
}
static final class IntCache extends Cache {
IntCache(FieldCacheImpl wrapper) {
super(wrapper);
}
@Override
protected Object createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
final IntParser parser = (IntParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = NUMERIC_UTILS_INT_PARSER) so
// cache key includes NUMERIC_UTILS_INT_PARSER:
return wrapper.getInts(reader, key.field, NUMERIC_UTILS_INT_PARSER, setDocsWithField);
}
final HoldsOneThing<GrowableWriterAndMinValue> valuesRef = new HoldsOneThing<>();
Uninvert u = new Uninvert() {
private int minValue;
private int currentValue;
private GrowableWriter values;
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseInt(term);
if (values == null) {
// Lazy alloc so for the numeric field case
// (which will hit a NumberFormatException
// when we first try the DEFAULT_INT_PARSER),
// we don't double-alloc:
int startBitsPerValue;
// Make sure than missing values (0) can be stored without resizing
if (currentValue < 0) {
minValue = currentValue;
startBitsPerValue = PackedInts.bitsRequired((-minValue) & 0xFFFFFFFFL);
} else {
minValue = 0;
startBitsPerValue = PackedInts.bitsRequired(currentValue);
}
values = new GrowableWriter(startBitsPerValue, reader.maxDoc(), PackedInts.FAST);
if (minValue != 0) {
values.fill(0, values.size(), (-minValue) & 0xFFFFFFFFL); // default value must be 0
}
valuesRef.set(new GrowableWriterAndMinValue(values, minValue));
}
}
@Override
public void visitDoc(int docID) {
values.set(docID, (currentValue - minValue) & 0xFFFFFFFFL);
}
@Override
protected TermsEnum termsEnum(Terms terms) throws IOException {
return parser.termsEnum(terms);
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
GrowableWriterAndMinValue values = valuesRef.get();
if (values == null) {
return new IntsFromArray(new PackedInts.NullReader(reader.maxDoc()), 0);
}
return new IntsFromArray(values.writer.getMutable(), (int) values.minValue);
}
}
public Bits getDocsWithField(AtomicReader reader, String field) throws IOException {
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
@ -563,145 +438,31 @@ class FieldCacheImpl implements FieldCache {
return res;
}
}
@Override
public Floats getFloats (AtomicReader reader, String field, boolean setDocsWithField)
throws IOException {
return getFloats(reader, field, null, setDocsWithField);
}
@Override
public Floats getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField)
throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Floats() {
@Override
public float get(int docID) {
return Float.intBitsToFloat((int) valuesIn.get(docID));
}
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return Floats.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Floats.EMPTY;
}
return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
}
static class FloatsFromArray extends Floats {
private final float[] values;
public FloatsFromArray(float[] values) {
this.values = values;
}
@Override
public float get(int docID) {
return values[docID];
}
}
static final class FloatCache extends Cache {
FloatCache(FieldCacheImpl wrapper) {
super(wrapper);
}
@Override
protected Object createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
final FloatParser parser = (FloatParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = NUMERIC_UTILS_FLOAT_PARSER) so
// cache key includes NUMERIC_UTILS_FLOAT_PARSER:
return wrapper.getFloats(reader, key.field, NUMERIC_UTILS_FLOAT_PARSER, setDocsWithField);
}
final HoldsOneThing<float[]> valuesRef = new HoldsOneThing<>();
Uninvert u = new Uninvert() {
private float currentValue;
private float[] values;
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseFloat(term);
if (values == null) {
// Lazy alloc so for the numeric field case
// (which will hit a NumberFormatException
// when we first try the DEFAULT_INT_PARSER),
// we don't double-alloc:
values = new float[reader.maxDoc()];
valuesRef.set(values);
}
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
@Override
protected TermsEnum termsEnum(Terms terms) throws IOException {
return parser.termsEnum(terms);
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
float[] values = valuesRef.get();
if (values == null) {
values = new float[reader.maxDoc()];
}
return new FloatsFromArray(values);
}
}
@Override
public Longs getLongs(AtomicReader reader, String field, boolean setDocsWithField) throws IOException {
return getLongs(reader, field, null, setDocsWithField);
}
@Override
public Longs getLongs(AtomicReader reader, String field, FieldCache.LongParser parser, boolean setDocsWithField)
throws IOException {
public NumericDocValues getNumerics(AtomicReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException {
if (parser == null) {
throw new NullPointerException();
}
final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Longs() {
@Override
public long get(int docID) {
return valuesIn.get(docID);
}
};
return valuesIn;
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return Longs.EMPTY;
return DocValues.EMPTY_NUMERIC;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Longs.EMPTY;
return DocValues.EMPTY_NUMERIC;
}
return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
return (NumericDocValues) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
}
static class LongsFromArray extends Longs {
static class LongsFromArray extends NumericDocValues {
private final PackedInts.Reader values;
private final long minValue;
@ -725,13 +486,7 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
final LongParser parser = (LongParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = NUMERIC_UTILS_LONG_PARSER) so
// cache key includes NUMERIC_UTILS_LONG_PARSER:
return wrapper.getLongs(reader, key.field, NUMERIC_UTILS_LONG_PARSER, setDocsWithField);
}
final Parser parser = (Parser) key.custom;
final HoldsOneThing<GrowableWriterAndMinValue> valuesRef = new HoldsOneThing<>();
@ -742,7 +497,7 @@ class FieldCacheImpl implements FieldCache {
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseLong(term);
currentValue = parser.parseValue(term);
if (values == null) {
// Lazy alloc so for the numeric field case
// (which will hit a NumberFormatException
@ -789,111 +544,6 @@ class FieldCacheImpl implements FieldCache {
}
}
@Override
public Doubles getDoubles(AtomicReader reader, String field, boolean setDocsWithField)
throws IOException {
return getDoubles(reader, field, null, setDocsWithField);
}
@Override
public Doubles getDoubles(AtomicReader reader, String field, FieldCache.DoubleParser parser, boolean setDocsWithField)
throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Doubles() {
@Override
public double get(int docID) {
return Double.longBitsToDouble(valuesIn.get(docID));
}
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return Doubles.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Doubles.EMPTY;
}
return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
}
static class DoublesFromArray extends Doubles {
private final double[] values;
public DoublesFromArray(double[] values) {
this.values = values;
}
@Override
public double get(int docID) {
return values[docID];
}
}
static final class DoubleCache extends Cache {
DoubleCache(FieldCacheImpl wrapper) {
super(wrapper);
}
@Override
protected Object createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
final DoubleParser parser = (DoubleParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = NUMERIC_UTILS_DOUBLE_PARSER) so
// cache key includes NUMERIC_UTILS_DOUBLE_PARSER:
return wrapper.getDoubles(reader, key.field, NUMERIC_UTILS_DOUBLE_PARSER, setDocsWithField);
}
final HoldsOneThing<double[]> valuesRef = new HoldsOneThing<>();
Uninvert u = new Uninvert() {
private double currentValue;
private double[] values;
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseDouble(term);
if (values == null) {
// Lazy alloc so for the numeric field case
// (which will hit a NumberFormatException
// when we first try the DEFAULT_INT_PARSER),
// we don't double-alloc:
values = new double[reader.maxDoc()];
valuesRef.set(values);
}
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
@Override
protected TermsEnum termsEnum(Terms terms) throws IOException {
return parser.termsEnum(terms);
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
double[] values = valuesRef.get();
if (values == null) {
values = new double[reader.maxDoc()];
}
return new DoublesFromArray(values);
}
}
public static class SortedDocValuesImpl extends SortedDocValues {
private final PagedBytes.Reader bytes;
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
@ -1190,7 +840,10 @@ class FieldCacheImpl implements FieldCache {
// TODO: this if DocTermsIndex was already created, we
// should share it...
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException {
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field, BytesRef prefix) throws IOException {
// not a general purpose filtering mechanism...
assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;
SortedSetDocValues dv = reader.getSortedSetDocValues(field);
if (dv != null) {
return dv;
@ -1210,7 +863,22 @@ class FieldCacheImpl implements FieldCache {
return DocValues.EMPTY_SORTED_SET;
}
DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
// ok we need to uninvert. check if we can optimize a bit.
Terms terms = reader.terms(field);
if (terms == null) {
return DocValues.EMPTY_SORTED_SET;
} else {
// if #postings = #docswithfield we know that the field is "single valued enough".
// its possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
// its still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
long numPostings = terms.getSumDocFreq();
if (numPostings != -1 && numPostings == terms.getDocCount()) {
return DocValues.singleton(getTermsIndex(reader, field));
}
}
DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix), false);
return dto.iterator(reader);
}
@ -1222,7 +890,8 @@ class FieldCacheImpl implements FieldCache {
@Override
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
throws IOException {
return new DocTermOrds(reader, null, key.field);
BytesRef prefix = (BytesRef) key.custom;
return new DocTermOrds(reader, null, key.field, prefix);
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util;
package org.apache.lucene.uninverting;
/**
* Copyright 2009 The Apache Software Foundation
*
@ -23,12 +23,12 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.CompositeReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.CacheEntry;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.uninverting.FieldCache.CacheEntry;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.MapOfSets;
/**
* Provides methods for sanity checking that entries in the FieldCache
@ -52,7 +52,7 @@ import org.apache.lucene.store.AlreadyClosedException;
* @see FieldCacheSanityChecker.Insanity
* @see FieldCacheSanityChecker.InsanityType
*/
public final class FieldCacheSanityChecker {
final class FieldCacheSanityChecker {
private boolean estimateRam;

View File

@ -0,0 +1,326 @@
package org.apache.lucene.uninverting;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.lucene.document.IntField; // javadocs
import org.apache.lucene.document.LongField; // javadocs
import org.apache.lucene.document.FloatField; // javadocs
import org.apache.lucene.document.DoubleField; // javadocs
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
import org.apache.lucene.document.NumericDocValuesField; // javadocs
import org.apache.lucene.document.SortedDocValuesField; // javadocs
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
import org.apache.lucene.document.StringField; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.uninverting.FieldCache.CacheEntry;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
/**
* A FilterReader that exposes <i>indexed</i> values as if they also had
* docvalues.
* <p>
* This is accomplished by "inverting the inverted index" or "uninversion".
* <p>
* The uninversion process happens lazily: upon the first request for the
* field's docvalues (e.g. via {@link AtomicReader#getNumericDocValues(String)}
* or similar), it will create the docvalues on-the-fly if needed and cache it,
* based on the core cache key of the wrapped AtomicReader.
*/
public class UninvertingReader extends FilterAtomicReader {
/**
* Specifies the type of uninversion to apply for the field.
*/
public static enum Type {
/**
* Single-valued Integer, (e.g. indexed with {@link IntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
INTEGER,
/**
* Single-valued Long, (e.g. indexed with {@link LongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
LONG,
/**
* Single-valued Float, (e.g. indexed with {@link FloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
FLOAT,
/**
* Single-valued Double, (e.g. indexed with {@link DoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
DOUBLE,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link BinaryDocValuesField}.
*/
BINARY,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedDocValuesField}.
*/
SORTED,
/**
* Multi-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_BINARY,
/**
* Multi-valued Integer, (e.g. indexed with {@link IntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_INTEGER,
/**
* Multi-valued Float, (e.g. indexed with {@link FloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_FLOAT,
/**
* Multi-valued Long, (e.g. indexed with {@link LongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_LONG,
/**
* Multi-valued Double, (e.g. indexed with {@link DoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_DOUBLE
}
/**
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
* and so on.
*/
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) {
return new UninvertingDirectoryReader(in, mapping);
}
static class UninvertingDirectoryReader extends FilterDirectoryReader {
final Map<String,Type> mapping;
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public AtomicReader wrap(AtomicReader reader) {
return new UninvertingReader(reader, mapping);
}
});
this.mapping = mapping;
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) {
return new UninvertingDirectoryReader(in, mapping);
}
}
final Map<String,Type> mapping;
final FieldInfos fieldInfos;
/**
* Create a new UninvertingReader with the specified mapping
* <p>
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
* instead.
*
* @lucene.internal
*/
public UninvertingReader(AtomicReader in, Map<String,Type> mapping) {
super(in);
this.mapping = mapping;
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
for (FieldInfo fi : in.getFieldInfos()) {
FieldInfo.DocValuesType type = fi.getDocValuesType();
if (fi.isIndexed() && !fi.hasDocValues()) {
Type t = mapping.get(fi.name);
if (t != null) {
switch(t) {
case INTEGER:
case LONG:
case FLOAT:
case DOUBLE:
type = FieldInfo.DocValuesType.NUMERIC;
break;
case BINARY:
type = FieldInfo.DocValuesType.BINARY;
break;
case SORTED:
type = FieldInfo.DocValuesType.SORTED;
break;
case SORTED_SET_BINARY:
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
type = FieldInfo.DocValuesType.SORTED_SET;
break;
default:
throw new AssertionError();
}
}
}
filteredInfos.add(new FieldInfo(fi.name, fi.isIndexed(), fi.number, fi.hasVectors(), fi.omitsNorms(),
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getNormType(), null));
}
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
}
@Override
public FieldInfos getFieldInfos() {
return fieldInfos;
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (v) {
case INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_INT_PARSER, true);
case FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true);
case LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_LONG_PARSER, true);
case DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true);
default:
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
return super.getNumericDocValues(field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v == Type.BINARY) {
return FieldCache.DEFAULT.getTerms(in, field, true);
} else if (v != null && v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getBinaryDocValues(field);
}
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v == Type.SORTED) {
return FieldCache.DEFAULT.getTermsIndex(in, field);
} else if (v != null) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getSortedDocValues(field);
}
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (v) {
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
case SORTED_SET_BINARY:
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
default:
if (v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
}
return in.getSortedSetDocValues(field);
}
@Override
public Bits getDocsWithField(String field) throws IOException {
if (mapping.containsKey(field)) {
return FieldCache.DEFAULT.getDocsWithField(in, field);
} else {
return in.getDocsWithField(field);
}
}
@Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@Override
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
@Override
public String toString() {
return "Uninverting(" + in.toString() + ")";
}
/**
* Return information about the backing cache
* @lucene.internal
*/
public static String[] getUninvertedStats() {
CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
String[] info = new String[entries.length];
for (int i = 0; i < entries.length; i++) {
info[i] = entries[i].toString();
}
return info;
}
}

View File

@ -0,0 +1,21 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<body>
Support for creating docvalues on-the-fly from the inverted index at runtime.
</body>
</html>

View File

@ -1,4 +1,4 @@
package org.apache.lucene.index;
package org.apache.lucene.uninverting;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -31,12 +31,28 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
@ -306,7 +322,7 @@ public class TestDocTermOrds extends LuceneTestCase {
TestUtil.nextInt(random(), 2, 10));
final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(r, "id", false);
final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
/*
for(int docID=0;docID<subR.maxDoc();docID++) {
System.out.println(" docID=" + docID + " id=" + docIDToID[docID]);
@ -362,7 +378,7 @@ public class TestDocTermOrds extends LuceneTestCase {
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
}
iter.setDocument(docID);
final int[] answers = idToOrds[docIDToID.get(docID)];
final int[] answers = idToOrds[(int) docIDToID.get(docID)];
int upto = 0;
long ord;
while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
@ -387,6 +403,8 @@ public class TestDocTermOrds extends LuceneTestCase {
doc = new Document();
doc.add(newStringField("foo", "baz", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(newStringField("foo", "car", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader r1 = DirectoryReader.open(iw, true);
@ -394,10 +412,10 @@ public class TestDocTermOrds extends LuceneTestCase {
iw.deleteDocuments(new Term("foo", "baz"));
DirectoryReader r2 = DirectoryReader.open(iw, true);
FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r2), "foo");
FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r2), "foo", null);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r1), "foo");
assertEquals(2, v.getValueCount());
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r1), "foo", null);
assertEquals(3, v.getValueCount());
v.setDocument(1);
assertEquals(1, v.nextOrd());
@ -407,6 +425,90 @@ public class TestDocTermOrds extends LuceneTestCase {
dir.close();
}
public void testNumericEncoded32() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
Document doc = new Document();
doc.add(new IntField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new IntField("foo", 5, Field.Store.NO));
doc.add(new IntField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
DirectoryReader ir = DirectoryReader.open(dir);
AtomicReader ar = getOnlySegmentReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = new BytesRef();
v.lookupOrd(0, value);
assertEquals(-3, NumericUtils.prefixCodedToInt(value));
v.lookupOrd(1, value);
assertEquals(5, NumericUtils.prefixCodedToInt(value));
ir.close();
dir.close();
}
public void testNumericEncoded64() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
Document doc = new Document();
doc.add(new LongField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LongField("foo", 5, Field.Store.NO));
doc.add(new LongField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
DirectoryReader ir = DirectoryReader.open(dir);
AtomicReader ar = getOnlySegmentReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = new BytesRef();
v.lookupOrd(0, value);
assertEquals(-3, NumericUtils.prefixCodedToLong(value));
v.lookupOrd(1, value);
assertEquals(5, NumericUtils.prefixCodedToLong(value));
ir.close();
dir.close();
}
public void testSortedTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
@ -420,6 +522,8 @@ public class TestDocTermOrds extends LuceneTestCase {
doc = new Document();
doc.add(new StringField("field", "world", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(new StringField("field", "hello", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
@ -431,7 +535,7 @@ public class TestDocTermOrds extends LuceneTestCase {
iwriter.shutdown();
AtomicReader ar = getOnlySegmentReader(ireader);
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field");
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
@ -478,4 +582,62 @@ public class TestDocTermOrds extends LuceneTestCase {
ireader.close();
directory.close();
}
public void testActuallySingleValued() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
iwconfig.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwconfig);
Document doc = new Document();
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
DirectoryReader ir = DirectoryReader.open(dir);
AtomicReader ar = getOnlySegmentReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(0, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(2);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(3);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = new BytesRef();
v.lookupOrd(0, value);
assertEquals("bar", value.utf8ToString());
v.lookupOrd(1, value);
assertEquals("baz", value.utf8ToString());
ir.close();
dir.close();
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.search;
package org.apache.lucene.uninverting;
/**
* Copyright 2004 The Apache Software Foundation
@ -43,20 +43,16 @@ import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache.Doubles;
import org.apache.lucene.search.FieldCache.Floats;
import org.apache.lucene.search.FieldCache.Ints;
import org.apache.lucene.search.FieldCache.Longs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -141,15 +137,17 @@ public class TestFieldCache extends LuceneTestCase {
FieldCache cache = FieldCache.DEFAULT;
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
cache.getDoubles(reader, "theDouble", false);
cache.getFloats(reader, "theDouble", new FieldCache.FloatParser() {
cache.getNumerics(reader, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, false);
cache.getNumerics(reader, "theDouble", new FieldCache.Parser() {
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return NumericUtils.filterPrefixCodedLongs(terms.iterator(null));
}
@Override
public float parseFloat(BytesRef term) {
return NumericUtils.sortableIntToFloat((int) NumericUtils.prefixCodedToLong(term));
public long parseValue(BytesRef term) {
int val = (int) NumericUtils.prefixCodedToLong(term);
if (val<0) val ^= 0x7fffffff;
return val;
}
}, false);
assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1);
@ -161,32 +159,28 @@ public class TestFieldCache extends LuceneTestCase {
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
FieldCache.Doubles doubles = cache.getDoubles(reader, "theDouble", random().nextBoolean());
assertSame("Second request to cache return same array", doubles, cache.getDoubles(reader, "theDouble", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", doubles, cache.getDoubles(reader, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, random().nextBoolean()));
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertTrue(doubles.get(i) + " does not equal: " + (Double.MAX_VALUE - i), doubles.get(i) == (Double.MAX_VALUE - i));
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
}
FieldCache.Longs longs = cache.getLongs(reader, "theLong", random().nextBoolean());
assertSame("Second request to cache return same array", longs, cache.getLongs(reader, "theLong", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", longs, cache.getLongs(reader, "theLong", FieldCache.NUMERIC_UTILS_LONG_PARSER, random().nextBoolean()));
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.NUMERIC_UTILS_LONG_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.NUMERIC_UTILS_LONG_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertTrue(longs.get(i) + " does not equal: " + (Long.MAX_VALUE - i) + " i=" + i, longs.get(i) == (Long.MAX_VALUE - i));
assertEquals(Long.MAX_VALUE - i, longs.get(i));
}
FieldCache.Ints ints = cache.getInts(reader, "theInt", random().nextBoolean());
assertSame("Second request to cache return same array", ints, cache.getInts(reader, "theInt", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", ints, cache.getInts(reader, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, random().nextBoolean()));
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertTrue(ints.get(i) + " does not equal: " + (Integer.MAX_VALUE - i), ints.get(i) == (Integer.MAX_VALUE - i));
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
}
FieldCache.Floats floats = cache.getFloats(reader, "theFloat", random().nextBoolean());
assertSame("Second request to cache return same array", floats, cache.getFloats(reader, "theFloat", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", floats, cache.getFloats(reader, "theFloat", FieldCache.NUMERIC_UTILS_FLOAT_PARSER, random().nextBoolean()));
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.NUMERIC_UTILS_FLOAT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.NUMERIC_UTILS_FLOAT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertTrue(floats.get(i) + " does not equal: " + (Float.MAX_VALUE - i), floats.get(i) == (Float.MAX_VALUE - i));
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong");
@ -271,10 +265,10 @@ public class TestFieldCache extends LuceneTestCase {
terms = cache.getTerms(reader, "bogusfield", false);
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
int numEntries = cache.getCacheEntries().length;
// ask for it again, and check that we didnt create any additional entries:
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
assertEquals(numEntries, cache.getCacheEntries().length);
for (int i = 0; i < NUM_DOCS; i++) {
@ -296,7 +290,7 @@ public class TestFieldCache extends LuceneTestCase {
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield");
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
@ -335,22 +329,21 @@ public class TestFieldCache extends LuceneTestCase {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
cache.getDoubles(reader, "theDouble", true);
cache.getNumerics(reader, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true);
// The double[] takes two slots (one w/ null parser, one
// w/ real parser), and docsWithField should also
// The double[] takes one slots, and docsWithField should also
// have been populated:
assertEquals(3, cache.getCacheEntries().length);
assertEquals(2, cache.getCacheEntries().length);
Bits bits = cache.getDocsWithField(reader, "theDouble");
// No new entries should appear:
assertEquals(3, cache.getCacheEntries().length);
assertEquals(2, cache.getCacheEntries().length);
assertTrue(bits instanceof Bits.MatchAllBits);
FieldCache.Ints ints = cache.getInts(reader, "sparse", true);
assertEquals(6, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.NUMERIC_UTILS_INT_PARSER, true);
assertEquals(4, cache.getCacheEntries().length);
Bits docsWithField = cache.getDocsWithField(reader, "sparse");
assertEquals(6, cache.getCacheEntries().length);
assertEquals(4, cache.getCacheEntries().length);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
@ -360,7 +353,7 @@ public class TestFieldCache extends LuceneTestCase {
}
}
FieldCache.Ints numInts = cache.getInts(reader, "numInt", random().nextBoolean());
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.NUMERIC_UTILS_INT_PARSER, random().nextBoolean());
docsWithField = cache.getDocsWithField(reader, "numInt");
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
@ -410,7 +403,7 @@ public class TestFieldCache extends LuceneTestCase {
assertEquals(i%2 == 0, docsWithField.get(i));
}
} else {
FieldCache.Ints ints = cache.getInts(reader, "sparse", true);
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.NUMERIC_UTILS_INT_PARSER, true);
Bits docsWithField = cache.getDocsWithField(reader, "sparse");
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
@ -459,7 +452,7 @@ public class TestFieldCache extends LuceneTestCase {
// Binary type: can be retrieved via getTerms()
try {
FieldCache.DEFAULT.getInts(ar, "binary", false);
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
fail();
} catch (IllegalStateException expected) {}
@ -473,7 +466,7 @@ public class TestFieldCache extends LuceneTestCase {
} catch (IllegalStateException expected) {}
try {
FieldCache.DEFAULT.getDocTermOrds(ar, "binary");
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
fail();
} catch (IllegalStateException expected) {}
@ -487,7 +480,7 @@ public class TestFieldCache extends LuceneTestCase {
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
try {
FieldCache.DEFAULT.getInts(ar, "sorted", false);
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
fail();
} catch (IllegalStateException expected) {}
@ -506,7 +499,7 @@ public class TestFieldCache extends LuceneTestCase {
sorted.get(0, scratch);
assertEquals("sorted value", scratch.utf8ToString());
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted");
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
sortedSet.setDocument(0);
assertEquals(0, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
@ -516,7 +509,7 @@ public class TestFieldCache extends LuceneTestCase {
assertTrue(bits.get(0));
// Numeric type: can be retrieved via getInts() and so on
Ints numeric = FieldCache.DEFAULT.getInts(ar, "numeric", false);
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
assertEquals(42, numeric.get(0));
try {
@ -530,7 +523,7 @@ public class TestFieldCache extends LuceneTestCase {
} catch (IllegalStateException expected) {}
try {
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric");
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
fail();
} catch (IllegalStateException expected) {}
@ -545,7 +538,7 @@ public class TestFieldCache extends LuceneTestCase {
// SortedSet type: can be retrieved via getDocTermOrds()
if (defaultCodecSupportsSortedSet()) {
try {
FieldCache.DEFAULT.getInts(ar, "sortedset", false);
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
fail();
} catch (IllegalStateException expected) {}
@ -564,7 +557,7 @@ public class TestFieldCache extends LuceneTestCase {
fail();
} catch (IllegalStateException expected) {}
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset");
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
sortedSet.setDocument(0);
assertEquals(0, sortedSet.nextOrd());
assertEquals(1, sortedSet.nextOrd());
@ -593,17 +586,17 @@ public class TestFieldCache extends LuceneTestCase {
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
Ints ints = cache.getInts(ar, "bogusints", true);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.NUMERIC_UTILS_INT_PARSER, true);
assertEquals(0, ints.get(0));
Longs longs = cache.getLongs(ar, "boguslongs", true);
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.NUMERIC_UTILS_LONG_PARSER, true);
assertEquals(0, longs.get(0));
Floats floats = cache.getFloats(ar, "bogusfloats", true);
assertEquals(0, floats.get(0), 0.0f);
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true);
assertEquals(0, floats.get(0));
Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true);
assertEquals(0, doubles.get(0), 0.0D);
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true);
assertEquals(0, doubles.get(0));
BytesRef scratch = new BytesRef();
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
@ -615,7 +608,7 @@ public class TestFieldCache extends LuceneTestCase {
sorted.get(0, scratch);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
sortedSet.setDocument(0);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
@ -652,17 +645,17 @@ public class TestFieldCache extends LuceneTestCase {
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
Ints ints = cache.getInts(ar, "bogusints", true);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.NUMERIC_UTILS_INT_PARSER, true);
assertEquals(0, ints.get(0));
Longs longs = cache.getLongs(ar, "boguslongs", true);
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.NUMERIC_UTILS_LONG_PARSER, true);
assertEquals(0, longs.get(0));
Floats floats = cache.getFloats(ar, "bogusfloats", true);
assertEquals(0, floats.get(0), 0.0f);
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true);
assertEquals(0, floats.get(0));
Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true);
assertEquals(0, doubles.get(0), 0.0D);
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true);
assertEquals(0, doubles.get(0));
BytesRef scratch = new BytesRef();
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
@ -674,7 +667,7 @@ public class TestFieldCache extends LuceneTestCase {
sorted.get(0, scratch);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
sortedSet.setDocument(0);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
@ -724,7 +717,7 @@ public class TestFieldCache extends LuceneTestCase {
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final FieldCache.Longs longs = FieldCache.DEFAULT.getLongs(getOnlySegmentReader(reader), "f", false);
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlySegmentReader(reader), "f", FieldCache.NUMERIC_UTILS_LONG_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], longs.get(i));
}
@ -770,7 +763,7 @@ public class TestFieldCache extends LuceneTestCase {
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final FieldCache.Ints ints = FieldCache.DEFAULT.getInts(getOnlySegmentReader(reader), "f", false);
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlySegmentReader(reader), "f", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], ints.get(i));
}

View File

@ -0,0 +1,72 @@
package org.apache.lucene.uninverting;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestFieldCacheReopen extends LuceneTestCase {
// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
// LUCENE-1579: Ensure that on a reopened reader, that any
// shared segments reuse the doc values arrays in
// FieldCache
public void testFieldCacheReuseAfterReopen() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMergePolicy(newLogMergePolicy(10))
);
Document doc = new Document();
doc.add(new IntField("number", 17, Field.Store.NO));
writer.addDocument(doc);
writer.commit();
// Open reader1
DirectoryReader r = DirectoryReader.open(dir);
AtomicReader r1 = getOnlySegmentReader(r);
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
assertEquals(17, ints.get(0));
// Add new segment
writer.addDocument(doc);
writer.commit();
// Reopen reader1 --> reader2
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
r.close();
AtomicReader sub0 = r2.leaves().get(0).reader();
final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
r2.close();
assertTrue(ints == ints2);
writer.shutdown();
dir.close();
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util;
package org.apache.lucene.uninverting;
/**
* Copyright 2009 The Apache Software Foundation
@ -30,10 +30,10 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
import org.apache.lucene.util.FieldCacheSanityChecker.InsanityType;
import org.apache.lucene.uninverting.FieldCacheSanityChecker.Insanity;
import org.apache.lucene.uninverting.FieldCacheSanityChecker.InsanityType;
import org.apache.lucene.util.LuceneTestCase;
public class TestFieldCacheSanityChecker extends LuceneTestCase {
@ -94,13 +94,11 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getDoubles(readerA, "theDouble", false);
cache.getDoubles(readerA, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, false);
cache.getDoubles(readerAclone, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, false);
cache.getDoubles(readerB, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, false);
cache.getNumerics(readerA, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, false);
cache.getNumerics(readerAclone, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, false);
cache.getNumerics(readerB, "theDouble", FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, false);
cache.getInts(readerX, "theInt", false);
cache.getInts(readerX, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
cache.getNumerics(readerX, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
// // //
@ -119,7 +117,7 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getInts(readerX, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
cache.getNumerics(readerX, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
cache.getTerms(readerX, "theInt", false);
// // //

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More