mirror of https://github.com/apache/lucene.git
LUCENE-6609: Add getSortField impls to many subclasses of FieldCacheSource which return the most direct SortField implementation
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693979 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4f065ce777
commit
0b309054b6
|
@ -370,6 +370,11 @@ Changes in Runtime Behavior
|
|||
TestEarlyTerminatingSortingCollector.testTerminatedEarly test added.
|
||||
(Christine Poerschke)
|
||||
|
||||
* LUCENE-6609: Add getSortField impls to many subclasses of FieldCacheSource which return
|
||||
the most direct SortField implementation. In many trivial sort by ValueSource usages, this
|
||||
will result in less RAM, and more precise sorting of extreme values due to no longer
|
||||
converting to double. (hossman)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6548: Some optimizations for BlockTree's intersect with very
|
||||
|
|
|
@ -26,6 +26,8 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueDouble;
|
||||
|
@ -45,6 +47,11 @@ public class DoubleFieldSource extends FieldCacheSource {
|
|||
return "double(" + field + ')';
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(boolean reverse) {
|
||||
return new SortField(field, Type.DOUBLE, reverse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
final NumericDocValues arr = DocValues.getNumeric(readerContext.reader(), field);
|
||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueFloat;
|
||||
|
@ -44,6 +46,11 @@ public class FloatFieldSource extends FieldCacheSource {
|
|||
return "float(" + field + ')';
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(boolean reverse) {
|
||||
return new SortField(field, Type.FLOAT, reverse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
final NumericDocValues arr = DocValues.getNumeric(readerContext.reader(), field);
|
||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueInt;
|
||||
|
@ -44,6 +46,10 @@ public class IntFieldSource extends FieldCacheSource {
|
|||
return "int(" + field + ')';
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(boolean reverse) {
|
||||
return new SortField(field, Type.INT, reverse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.docvalues.LongDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueLong;
|
||||
|
@ -56,6 +58,11 @@ public class LongFieldSource extends FieldCacheSource {
|
|||
return longToObject(val).toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(boolean reverse) {
|
||||
return new SortField(field, Type.LONG, reverse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
final NumericDocValues arr = DocValues.getNumeric(readerContext.reader(), field);
|
||||
|
|
|
@ -26,7 +26,9 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.docvalues.DocTermsIndexDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
|
||||
/**
|
||||
* Retrieves {@link FunctionValues} instances for multi-valued string based fields.
|
||||
|
@ -48,6 +50,11 @@ public class SortedSetFieldSource extends FieldCacheSource {
|
|||
this.selector = selector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(boolean reverse) {
|
||||
return new SortedSetSortField(this.field, reverse, this.selector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);
|
||||
|
|
|
@ -26,7 +26,12 @@ import org.apache.lucene.document.NumericDocValuesField;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
|
@ -34,6 +39,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -41,6 +47,55 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
/** Test that functionquery's getSortField() actually works */
|
||||
public class TestFunctionQuerySort extends LuceneTestCase {
|
||||
|
||||
public void testOptimizedFieldSourceFunctionSorting() throws IOException {
|
||||
// index contents don't matter for this test.
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
final boolean reverse = random().nextBoolean();
|
||||
ValueSource vs;
|
||||
SortField sf, vssf;
|
||||
|
||||
vs = new IntFieldSource("int_field");
|
||||
sf = new SortField("int_field", Type.INT, reverse);
|
||||
vssf = vs.getSortField(reverse);
|
||||
assertEquals(sf, vssf);
|
||||
sf = sf.rewrite(searcher);
|
||||
vssf = vssf.rewrite(searcher);
|
||||
assertEquals(sf, vssf);
|
||||
|
||||
vs = new FloatFieldSource("float_field");
|
||||
sf = new SortField("float_field", Type.FLOAT, reverse);
|
||||
vssf = vs.getSortField(reverse);
|
||||
assertEquals(sf, vssf);
|
||||
sf = sf.rewrite(searcher);
|
||||
vssf = vssf.rewrite(searcher);
|
||||
assertEquals(sf, vssf);
|
||||
|
||||
vs = new DoubleFieldSource("double_field");
|
||||
sf = new SortField("double_field", Type.DOUBLE, reverse);
|
||||
vssf = vs.getSortField(reverse);
|
||||
assertEquals(sf, vssf);
|
||||
sf = sf.rewrite(searcher);
|
||||
vssf = vssf.rewrite(searcher);
|
||||
assertEquals(sf, vssf);
|
||||
|
||||
vs = new LongFieldSource("long_field");
|
||||
sf = new SortField("long_field", Type.LONG, reverse);
|
||||
vssf = vs.getSortField(reverse);
|
||||
assertEquals(sf, vssf);
|
||||
sf = sf.rewrite(searcher);
|
||||
vssf = vssf.rewrite(searcher);
|
||||
assertEquals(sf, vssf);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSearchAfterWhenSortingByFunctionValues() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
|
@ -66,8 +121,9 @@ public class TestFunctionQuerySort extends LuceneTestCase {
|
|||
writer.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
// Get ValueSource from FieldCache
|
||||
IntFieldSource src = new IntFieldSource("value");
|
||||
// Trivial ValueSource function that bypasses single field ValueSource sort optimization
|
||||
ValueSource src = new SumFloatFunction(new ValueSource[] { new IntFieldSource("value"),
|
||||
new DoubleConstValueSource(1.0D) });
|
||||
// ...and make it a sort criterion
|
||||
SortField sf = src.getSortField(false).rewrite(searcher);
|
||||
Sort orderBy = new Sort(sf);
|
||||
|
|
|
@ -26,6 +26,9 @@ import org.apache.lucene.index.LeafReader;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -47,12 +50,24 @@ public class TestSortedSetFieldSource extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
LeafReader ar = getOnlySegmentReader(ir);
|
||||
|
||||
ValueSource vs = new SortedSetFieldSource("value");
|
||||
FunctionValues values = vs.getValues(Collections.emptyMap(), ar.getContext());
|
||||
assertEquals("baz", values.strVal(0));
|
||||
assertEquals("bar", values.strVal(1));
|
||||
|
||||
// test SortField optimization
|
||||
final boolean reverse = random().nextBoolean();
|
||||
SortField vssf = vs.getSortField(reverse);
|
||||
SortField sf = new SortedSetSortField("value", reverse);
|
||||
assertEquals(sf, vssf);
|
||||
|
||||
vssf = vssf.rewrite(searcher);
|
||||
sf = sf.rewrite(searcher);
|
||||
assertEquals(sf, vssf);
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
|
|
@ -376,12 +376,15 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
|
|||
assertEquals("no more docs, but cursorMark has changed",
|
||||
cursorMark, assertHashNextCursorMark(rsp));
|
||||
|
||||
// tri-level sort with more dups of primary then fit on a page
|
||||
// tri-level sort with more dups of primary then fit on a page.
|
||||
// also a function based sort using a simple function(s) on same field
|
||||
// (order should be the same in all cases)
|
||||
for (String primarysort : new String[] { "float", "field('float')", "sum(float,42)" }) {
|
||||
cursorMark = CURSOR_MARK_START;
|
||||
params = params("q", "*:*",
|
||||
"rows","2",
|
||||
"fl", "id",
|
||||
"sort", "float asc, "+intsort+" desc, id desc");
|
||||
"sort", primarysort + " asc, "+intsort+" desc, id desc");
|
||||
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
|
||||
assertNumFound(10, rsp);
|
||||
assertStartsAt(0, rsp);
|
||||
|
@ -419,6 +422,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
|
|||
assertDocList(rsp);
|
||||
assertEquals("no more docs, but cursorMark has changed",
|
||||
cursorMark, assertHashNextCursorMark(rsp));
|
||||
}
|
||||
|
||||
// trivial base case: rows bigger then number of matches
|
||||
cursorMark = CURSOR_MARK_START;
|
||||
|
|
|
@ -36,7 +36,8 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
|
|||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
assertU(delQ("*:*"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
public void test() throws Exception {
|
||||
|
@ -128,4 +129,75 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
|
|||
"//result/doc[4]/int[@name='id'][.='1']"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort by function normally compares the double value, but if a function is specified that identifies
|
||||
* a single field, we should use the underlying field's SortField to save of a lot of type converstion
|
||||
* (and RAM), and keep the sort precision as high as possible
|
||||
*/
|
||||
public void testFieldSortSpecifiedAsFunction() throws Exception {
|
||||
final long A = Long.MIN_VALUE;
|
||||
final long B = A + 1L;
|
||||
final long C = B + 1L;
|
||||
|
||||
final long Z = Long.MAX_VALUE;
|
||||
final long Y = Z - 1L;
|
||||
final long X = Y - 1L;
|
||||
|
||||
// test is predicated on the idea that if long -> double converstion is happening under the hood
|
||||
// then we lose precision in sorting; so lets sanity check that our JVM isn't doing something wacky
|
||||
// in converstion that violates the principle of the test
|
||||
|
||||
assertEquals("WTF? small longs cast to double aren't equivilent?",
|
||||
(double)A, (double)B, 0.0D);
|
||||
assertEquals("WTF? small longs cast to double aren't equivilent?",
|
||||
(double)A, (double)C, 0.0D);
|
||||
|
||||
assertEquals("WTF? big longs cast to double aren't equivilent?",
|
||||
(double)Z, (double)Y, 0.0D);
|
||||
assertEquals("WTF? big longs cast to double aren't equivilent?",
|
||||
(double)Z, (double)X, 0.0D);
|
||||
|
||||
int docId = 0;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
assertU(adoc(sdoc("id", ++docId, "primary_tl1", X, "secondary_tl1", i,
|
||||
"multi_l_dv", X, "multi_l_dv", A)));
|
||||
assertU(adoc(sdoc("id", ++docId, "primary_tl1", Y, "secondary_tl1", i,
|
||||
"multi_l_dv", Y, "multi_l_dv", B)));
|
||||
assertU(adoc(sdoc("id", ++docId, "primary_tl1", Z, "secondary_tl1", i,
|
||||
"multi_l_dv", Z, "multi_l_dv", C)));
|
||||
}
|
||||
assertU(commit());
|
||||
|
||||
// all of these sorts should result in the exact same order
|
||||
for (String primarySort : new String[] { "primary_tl1", "field(primary_tl1)",
|
||||
"field(multi_l_dv,max)", "field(multi_l_dv,min)" }) {
|
||||
assertQ(req("q", "*:*",
|
||||
"sort", primarySort + " asc, secondary_tl1 asc")
|
||||
, "//*[@numFound='9']"
|
||||
//
|
||||
, "//result/doc[1]/long[@name='primary_tl1'][.='"+X+"']"
|
||||
, "//result/doc[1]/long[@name='secondary_tl1'][.='0']"
|
||||
, "//result/doc[2]/long[@name='primary_tl1'][.='"+X+"']"
|
||||
, "//result/doc[2]/long[@name='secondary_tl1'][.='1']"
|
||||
, "//result/doc[3]/long[@name='primary_tl1'][.='"+X+"']"
|
||||
, "//result/doc[3]/long[@name='secondary_tl1'][.='2']"
|
||||
//
|
||||
, "//result/doc[4]/long[@name='primary_tl1'][.='"+Y+"']"
|
||||
, "//result/doc[4]/long[@name='secondary_tl1'][.='0']"
|
||||
, "//result/doc[5]/long[@name='primary_tl1'][.='"+Y+"']"
|
||||
, "//result/doc[5]/long[@name='secondary_tl1'][.='1']"
|
||||
, "//result/doc[6]/long[@name='primary_tl1'][.='"+Y+"']"
|
||||
, "//result/doc[6]/long[@name='secondary_tl1'][.='2']"
|
||||
//
|
||||
, "//result/doc[7]/long[@name='primary_tl1'][.='"+Z+"']"
|
||||
, "//result/doc[7]/long[@name='secondary_tl1'][.='0']"
|
||||
, "//result/doc[8]/long[@name='primary_tl1'][.='"+Z+"']"
|
||||
, "//result/doc[8]/long[@name='secondary_tl1'][.='1']"
|
||||
, "//result/doc[9]/long[@name='primary_tl1'][.='"+Z+"']"
|
||||
, "//result/doc[9]/long[@name='secondary_tl1'][.='2']"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue