LUCENE-6609: Add getSortField impls to many subclasses of FieldCacheSource which return the most direct SortField implementation

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693979 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-08-03 23:13:22 +00:00
parent 4f065ce777
commit 0b309054b6
10 changed files with 236 additions and 50 deletions

View File

@ -370,6 +370,11 @@ Changes in Runtime Behavior
TestEarlyTerminatingSortingCollector.testTerminatedEarly test added.
(Christine Poerschke)
* LUCENE-6609: Add getSortField impls to many subclasses of FieldCacheSource which return
the most direct SortField implementation. In many trivial sort by ValueSource usages, this
will result in less RAM, and more precise sorting of extreme values due to no longer
converting to double. (hossman)
Optimizations
* LUCENE-6548: Some optimizations for BlockTree's intersect with very

View File

@ -26,6 +26,8 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueDouble;
@ -45,6 +47,11 @@ public class DoubleFieldSource extends FieldCacheSource {
return "double(" + field + ')';
}
@Override
public SortField getSortField(boolean reverse) {
return new SortField(field, Type.DOUBLE, reverse);
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final NumericDocValues arr = DocValues.getNumeric(readerContext.reader(), field);

View File

@ -25,6 +25,8 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueFloat;
@ -44,6 +46,11 @@ public class FloatFieldSource extends FieldCacheSource {
return "float(" + field + ')';
}
@Override
public SortField getSortField(boolean reverse) {
return new SortField(field, Type.FLOAT, reverse);
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final NumericDocValues arr = DocValues.getNumeric(readerContext.reader(), field);

View File

@ -25,6 +25,8 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueInt;
@ -44,6 +46,10 @@ public class IntFieldSource extends FieldCacheSource {
return "int(" + field + ')';
}
@Override
public SortField getSortField(boolean reverse) {
return new SortField(field, Type.INT, reverse);
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {

View File

@ -25,6 +25,8 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.LongDocValues;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueLong;
@ -56,6 +58,11 @@ public class LongFieldSource extends FieldCacheSource {
return longToObject(val).toString();
}
@Override
public SortField getSortField(boolean reverse) {
return new SortField(field, Type.LONG, reverse);
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final NumericDocValues arr = DocValues.getNumeric(readerContext.reader(), field);

View File

@ -26,7 +26,9 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.DocTermsIndexDocValues;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
/**
* Retrieves {@link FunctionValues} instances for multi-valued string based fields.
@ -48,6 +50,11 @@ public class SortedSetFieldSource extends FieldCacheSource {
this.selector = selector;
}
@Override
public SortField getSortField(boolean reverse) {
return new SortedSetSortField(this.field, reverse, this.selector);
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);

View File

@ -26,7 +26,12 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -34,6 +39,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -41,6 +47,55 @@ import org.apache.lucene.util.LuceneTestCase;
/** Test that functionquery's getSortField() actually works */
public class TestFunctionQuerySort extends LuceneTestCase {
public void testOptimizedFieldSourceFunctionSorting() throws IOException {
// index contents don't matter for this test.
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
final boolean reverse = random().nextBoolean();
ValueSource vs;
SortField sf, vssf;
vs = new IntFieldSource("int_field");
sf = new SortField("int_field", Type.INT, reverse);
vssf = vs.getSortField(reverse);
assertEquals(sf, vssf);
sf = sf.rewrite(searcher);
vssf = vssf.rewrite(searcher);
assertEquals(sf, vssf);
vs = new FloatFieldSource("float_field");
sf = new SortField("float_field", Type.FLOAT, reverse);
vssf = vs.getSortField(reverse);
assertEquals(sf, vssf);
sf = sf.rewrite(searcher);
vssf = vssf.rewrite(searcher);
assertEquals(sf, vssf);
vs = new DoubleFieldSource("double_field");
sf = new SortField("double_field", Type.DOUBLE, reverse);
vssf = vs.getSortField(reverse);
assertEquals(sf, vssf);
sf = sf.rewrite(searcher);
vssf = vssf.rewrite(searcher);
assertEquals(sf, vssf);
vs = new LongFieldSource("long_field");
sf = new SortField("long_field", Type.LONG, reverse);
vssf = vs.getSortField(reverse);
assertEquals(sf, vssf);
sf = sf.rewrite(searcher);
vssf = vssf.rewrite(searcher);
assertEquals(sf, vssf);
reader.close();
dir.close();
}
public void testSearchAfterWhenSortingByFunctionValues() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
@ -66,8 +121,9 @@ public class TestFunctionQuerySort extends LuceneTestCase {
writer.close();
IndexSearcher searcher = newSearcher(reader);
// Get ValueSource from FieldCache
IntFieldSource src = new IntFieldSource("value");
// Trivial ValueSource function that bypasses single field ValueSource sort optimization
ValueSource src = new SumFloatFunction(new ValueSource[] { new IntFieldSource("value"),
new DoubleConstValueSource(1.0D) });
// ...and make it a sort criterion
SortField sf = src.getSortField(false).rewrite(searcher);
Sort orderBy = new Sort(sf);

View File

@ -26,6 +26,9 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
@ -47,12 +50,24 @@ public class TestSortedSetFieldSource extends LuceneTestCase {
writer.close();
DirectoryReader ir = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(ir);
LeafReader ar = getOnlySegmentReader(ir);
ValueSource vs = new SortedSetFieldSource("value");
FunctionValues values = vs.getValues(Collections.emptyMap(), ar.getContext());
assertEquals("baz", values.strVal(0));
assertEquals("bar", values.strVal(1));
// test SortField optimization
final boolean reverse = random().nextBoolean();
SortField vssf = vs.getSortField(reverse);
SortField sf = new SortedSetSortField("value", reverse);
assertEquals(sf, vssf);
vssf = vssf.rewrite(searcher);
sf = sf.rewrite(searcher);
assertEquals(sf, vssf);
ir.close();
dir.close();
}

View File

@ -376,12 +376,15 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// tri-level sort with more dups of primary then fit on a page
// tri-level sort with more dups of primary then fit on a page.
// also a function based sort using a simple function(s) on same field
// (order should be the same in all cases)
for (String primarysort : new String[] { "float", "field('float')", "sum(float,42)" }) {
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","2",
"fl", "id",
"sort", "float asc, "+intsort+" desc, id desc");
"sort", primarysort + " asc, "+intsort+" desc, id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
@ -419,6 +422,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
}
// trivial base case: rows bigger then number of matches
cursorMark = CURSOR_MARK_START;

View File

@ -36,7 +36,8 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
assertU(delQ("*:*"));
assertU(commit());
}
public void test() throws Exception {
@ -128,4 +129,75 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
"//result/doc[4]/int[@name='id'][.='1']"
);
}
/**
* Sort by function normally compares the double value, but if a function is specified that identifies
* a single field, we should use the underlying field's SortField to save of a lot of type converstion
* (and RAM), and keep the sort precision as high as possible
*/
public void testFieldSortSpecifiedAsFunction() throws Exception {
final long A = Long.MIN_VALUE;
final long B = A + 1L;
final long C = B + 1L;
final long Z = Long.MAX_VALUE;
final long Y = Z - 1L;
final long X = Y - 1L;
// test is predicated on the idea that if long -> double converstion is happening under the hood
// then we lose precision in sorting; so lets sanity check that our JVM isn't doing something wacky
// in converstion that violates the principle of the test
assertEquals("WTF? small longs cast to double aren't equivilent?",
(double)A, (double)B, 0.0D);
assertEquals("WTF? small longs cast to double aren't equivilent?",
(double)A, (double)C, 0.0D);
assertEquals("WTF? big longs cast to double aren't equivilent?",
(double)Z, (double)Y, 0.0D);
assertEquals("WTF? big longs cast to double aren't equivilent?",
(double)Z, (double)X, 0.0D);
int docId = 0;
for (int i = 0; i < 3; i++) {
assertU(adoc(sdoc("id", ++docId, "primary_tl1", X, "secondary_tl1", i,
"multi_l_dv", X, "multi_l_dv", A)));
assertU(adoc(sdoc("id", ++docId, "primary_tl1", Y, "secondary_tl1", i,
"multi_l_dv", Y, "multi_l_dv", B)));
assertU(adoc(sdoc("id", ++docId, "primary_tl1", Z, "secondary_tl1", i,
"multi_l_dv", Z, "multi_l_dv", C)));
}
assertU(commit());
// all of these sorts should result in the exact same order
for (String primarySort : new String[] { "primary_tl1", "field(primary_tl1)",
"field(multi_l_dv,max)", "field(multi_l_dv,min)" }) {
assertQ(req("q", "*:*",
"sort", primarySort + " asc, secondary_tl1 asc")
, "//*[@numFound='9']"
//
, "//result/doc[1]/long[@name='primary_tl1'][.='"+X+"']"
, "//result/doc[1]/long[@name='secondary_tl1'][.='0']"
, "//result/doc[2]/long[@name='primary_tl1'][.='"+X+"']"
, "//result/doc[2]/long[@name='secondary_tl1'][.='1']"
, "//result/doc[3]/long[@name='primary_tl1'][.='"+X+"']"
, "//result/doc[3]/long[@name='secondary_tl1'][.='2']"
//
, "//result/doc[4]/long[@name='primary_tl1'][.='"+Y+"']"
, "//result/doc[4]/long[@name='secondary_tl1'][.='0']"
, "//result/doc[5]/long[@name='primary_tl1'][.='"+Y+"']"
, "//result/doc[5]/long[@name='secondary_tl1'][.='1']"
, "//result/doc[6]/long[@name='primary_tl1'][.='"+Y+"']"
, "//result/doc[6]/long[@name='secondary_tl1'][.='2']"
//
, "//result/doc[7]/long[@name='primary_tl1'][.='"+Z+"']"
, "//result/doc[7]/long[@name='secondary_tl1'][.='0']"
, "//result/doc[8]/long[@name='primary_tl1'][.='"+Z+"']"
, "//result/doc[8]/long[@name='secondary_tl1'][.='1']"
, "//result/doc[9]/long[@name='primary_tl1'][.='"+Z+"']"
, "//result/doc[9]/long[@name='secondary_tl1'][.='2']"
);
}
}
}