mirror of https://github.com/apache/lucene.git
SOLR-1297: fix weighting when sorting by function
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1061065 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1099f8465d
commit
0a30dcd945
|
@ -443,7 +443,7 @@ public class QueryComponent extends SearchComponent
|
|||
// take the documents given and re-derive the sort values.
|
||||
boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false);
|
||||
if(fsv){
|
||||
Sort sort = rb.getSortSpec().getSort();
|
||||
Sort sort = searcher.weightSort(rb.getSortSpec().getSort());
|
||||
SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort();
|
||||
NamedList sortVals = new NamedList(); // order is important for the sort fields
|
||||
Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field
|
||||
|
|
|
@ -162,7 +162,7 @@ public class Grouping {
|
|||
// if we aren't going to return any groups, disregard the offset
|
||||
if (numGroups == 0) maxGroupToFind = 0;
|
||||
|
||||
collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
|
||||
collector = new TopGroupCollector(groupBy, context, searcher.weightSort(normalizeSort(sort)), maxGroupToFind);
|
||||
|
||||
/*** if we need a different algorithm when sort != group.sort
|
||||
if (compareSorts(sort, groupSort)) {
|
||||
|
@ -185,9 +185,9 @@ public class Grouping {
|
|||
int collectorOffset = format==Format.Simple ? 0 : offset;
|
||||
|
||||
if (groupBy instanceof StrFieldSource) {
|
||||
collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset);
|
||||
collector2 = new Phase2StringGroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset);
|
||||
} else {
|
||||
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset);
|
||||
collector2 = new Phase2GroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset);
|
||||
}
|
||||
return collector2;
|
||||
}
|
||||
|
@ -306,11 +306,11 @@ public class Grouping {
|
|||
return v;
|
||||
}
|
||||
|
||||
static TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException {
|
||||
TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException {
|
||||
if (sort==null || sort==byScoreDesc) {
|
||||
return TopScoreDocCollector.create(numHits, true);
|
||||
} else {
|
||||
return TopFieldCollector.create(sort, numHits, false, needScores, needScores, true);
|
||||
return TopFieldCollector.create(searcher.weightSort(sort), numHits, false, needScores, needScores, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -505,12 +505,12 @@ class TopGroupCollector extends GroupCollector {
|
|||
|
||||
int matches;
|
||||
|
||||
public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
|
||||
public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort weightedSort, int nGroups) throws IOException {
|
||||
this.vs = groupByVS;
|
||||
this.context = vsContext;
|
||||
this.nGroups = nGroups = Math.max(1,nGroups); // we need a minimum of 1 for this collector
|
||||
|
||||
SortField[] sortFields = sort.getSort();
|
||||
SortField[] sortFields = weightedSort.getSort();
|
||||
this.comparators = new FieldComparator[sortFields.length];
|
||||
this.reversed = new int[sortFields.length];
|
||||
for (int i = 0; i < sortFields.length; i++) {
|
||||
|
@ -719,7 +719,7 @@ class Phase2GroupCollector extends Collector {
|
|||
int docBase;
|
||||
|
||||
// TODO: may want to decouple from the phase1 collector
|
||||
public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
|
||||
public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException {
|
||||
boolean getSortFields = false;
|
||||
|
||||
if (topGroups.orderedGroups == null)
|
||||
|
@ -733,10 +733,10 @@ class Phase2GroupCollector extends Collector {
|
|||
}
|
||||
SearchGroupDocs groupDocs = new SearchGroupDocs();
|
||||
groupDocs.groupValue = group.groupValue;
|
||||
if (sort==null)
|
||||
if (weightedSort==null)
|
||||
groupDocs.collector = TopScoreDocCollector.create(docsPerGroup, true);
|
||||
else
|
||||
groupDocs.collector = TopFieldCollector.create(sort, docsPerGroup, getSortFields, getScores, getScores, true);
|
||||
groupDocs.collector = TopFieldCollector.create(weightedSort, docsPerGroup, getSortFields, getScores, getScores, true);
|
||||
groupMap.put(groupDocs.groupValue, groupDocs);
|
||||
}
|
||||
|
||||
|
@ -791,8 +791,8 @@ class Phase2StringGroupCollector extends Phase2GroupCollector {
|
|||
final SearchGroupDocs[] groups;
|
||||
final BytesRef spare = new BytesRef();
|
||||
|
||||
public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
|
||||
super(topGroups, groupByVS, vsContext,sort,docsPerGroup,getScores,offset);
|
||||
public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException {
|
||||
super(topGroups, groupByVS, vsContext,weightedSort,docsPerGroup,getScores,offset);
|
||||
ordSet = new SentinelIntSet(groupMap.size(), -1);
|
||||
groups = new SearchGroupDocs[ordSet.keys.length];
|
||||
}
|
||||
|
|
|
@ -460,6 +460,30 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
return fieldValueCache;
|
||||
}
|
||||
|
||||
/** Returns a weighted sort according to this searcher */
|
||||
public Sort weightSort(Sort sort) throws IOException {
|
||||
if (sort == null) return null;
|
||||
SortField[] sorts = sort.getSort();
|
||||
|
||||
boolean needsWeighting = false;
|
||||
for (SortField sf : sorts) {
|
||||
if (sf instanceof SolrSortField) {
|
||||
needsWeighting = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!needsWeighting) return sort;
|
||||
|
||||
SortField[] newSorts = Arrays.copyOf(sorts, sorts.length);
|
||||
for (int i=0; i<newSorts.length; i++) {
|
||||
if (newSorts[i] instanceof SolrSortField) {
|
||||
newSorts[i] = ((SolrSortField)newSorts[i]).weight(this);
|
||||
}
|
||||
}
|
||||
|
||||
return new Sort(newSorts);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the first document number containing the term <code>t</code>
|
||||
|
@ -1156,7 +1180,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
if (cmd.getSort() == null) {
|
||||
topCollector = TopScoreDocCollector.create(len, true);
|
||||
} else {
|
||||
topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true);
|
||||
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
|
||||
}
|
||||
Collector collector = topCollector;
|
||||
if( timeAllowed > 0 ) {
|
||||
|
@ -1266,7 +1290,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
if (cmd.getSort() == null) {
|
||||
topCollector = TopScoreDocCollector.create(len, true);
|
||||
} else {
|
||||
topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true);
|
||||
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
|
||||
}
|
||||
|
||||
DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector);
|
||||
|
@ -1548,7 +1572,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
// bit of a hack to tell if a set is sorted - do it better in the futute.
|
||||
boolean inOrder = set instanceof BitDocSet || set instanceof SortedIntDocSet;
|
||||
|
||||
TopDocsCollector topCollector = TopFieldCollector.create(sort, nDocs, false, false, false, inOrder);
|
||||
TopDocsCollector topCollector = TopFieldCollector.create(weightSort(sort), nDocs, false, false, false, inOrder);
|
||||
|
||||
DocIterator iter = set.iterator();
|
||||
int base=0;
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**@lucene.internal
|
||||
*
|
||||
*/
|
||||
public interface SolrSortField {
|
||||
public SortField weight(IndexSearcher searcher) throws IOException;
|
||||
}
|
|
@ -26,12 +26,13 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.search.SolrSortField;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Instantiates {@link org.apache.solr.search.function.DocValues} for a particular reader.
|
||||
|
@ -61,24 +62,6 @@ public abstract class ValueSource implements Serializable {
|
|||
return description();
|
||||
}
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL: This method is subject to change.
|
||||
* <br>WARNING: Sorted function queries are not currently weighted.
|
||||
* <p>
|
||||
* Get the SortField for this ValueSource. Uses the {@link #getValues(java.util.Map, AtomicReaderContext)}
|
||||
* to populate the SortField.
|
||||
*
|
||||
* @param reverse true if this is a reverse sort.
|
||||
* @return The {@link org.apache.lucene.search.SortField} for the ValueSource
|
||||
* @throws IOException if there was a problem reading the values.
|
||||
*/
|
||||
public SortField getSortField(boolean reverse) throws IOException {
|
||||
//should we pass in the description for the field name?
|
||||
//Hmm, Lucene is going to intern whatever we pass in, not sure I like that
|
||||
//and we can't pass in null, either, as that throws an illegal arg. exception
|
||||
return new SortField(description(), new ValueSourceComparatorSource(), reverse);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementations should propagate createWeight to sub-ValueSources which can optionally store
|
||||
|
@ -97,16 +80,56 @@ public abstract class ValueSource implements Serializable {
|
|||
return context;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Sorting by function
|
||||
//
|
||||
|
||||
/**
|
||||
* EXPERIMENTAL: This method is subject to change.
|
||||
* <br>WARNING: Sorted function queries are not currently weighted.
|
||||
* <p>
|
||||
* Get the SortField for this ValueSource. Uses the {@link #getValues(java.util.Map, AtomicReaderContext)}
|
||||
* to populate the SortField.
|
||||
*
|
||||
* @param reverse true if this is a reverse sort.
|
||||
* @return The {@link org.apache.lucene.search.SortField} for the ValueSource
|
||||
* @throws IOException if there was a problem reading the values.
|
||||
*/
|
||||
public SortField getSortField(boolean reverse) throws IOException {
|
||||
return new ValueSourceSortField(reverse);
|
||||
}
|
||||
|
||||
private static FieldComparatorSource dummyComparator = new FieldComparatorSource() {
|
||||
@Override
|
||||
public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unweighted use of sort " + fieldname);
|
||||
}
|
||||
};
|
||||
|
||||
class ValueSourceSortField extends SortField implements SolrSortField {
|
||||
public ValueSourceSortField(boolean reverse) {
|
||||
super(description(), dummyComparator, reverse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField weight(IndexSearcher searcher) throws IOException {
|
||||
Map context = newContext(searcher);
|
||||
createWeight(context, searcher);
|
||||
return new SortField(getField(), new ValueSourceComparatorSource(context), getReverse());
|
||||
}
|
||||
}
|
||||
|
||||
class ValueSourceComparatorSource extends FieldComparatorSource {
|
||||
private final Map context;
|
||||
|
||||
|
||||
public ValueSourceComparatorSource() {
|
||||
|
||||
public ValueSourceComparatorSource(Map context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
public FieldComparator newComparator(String fieldname, int numHits,
|
||||
int sortPos, boolean reversed) throws IOException {
|
||||
return new ValueSourceComparator(numHits);
|
||||
return new ValueSourceComparator(context, numHits);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,8 +142,10 @@ public abstract class ValueSource implements Serializable {
|
|||
private final double[] values;
|
||||
private DocValues docVals;
|
||||
private double bottom;
|
||||
private Map fcontext;
|
||||
|
||||
ValueSourceComparator(int numHits) {
|
||||
ValueSourceComparator(Map fcontext, int numHits) {
|
||||
this.fcontext = fcontext;
|
||||
values = new double[numHits];
|
||||
}
|
||||
|
||||
|
@ -153,7 +178,7 @@ public abstract class ValueSource implements Serializable {
|
|||
}
|
||||
|
||||
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docVals = getValues(Collections.emptyMap(), context);
|
||||
docVals = getValues(fcontext, context);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -162,7 +187,7 @@ public abstract class ValueSource implements Serializable {
|
|||
}
|
||||
|
||||
public Comparable value(int slot) {
|
||||
return Double.valueOf(values[slot]);
|
||||
return values[slot];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,6 +95,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
|
||||
// these queries should be exactly ordered and scores should exactly match
|
||||
query("q","*:*", "sort",i1+" desc");
|
||||
query("q","*:*", "sort","{!func}add("+i1+",5)"+" desc");
|
||||
query("q","*:*", "sort",i1+" asc");
|
||||
query("q","*:*", "sort",i1+" desc", "fl","*,score");
|
||||
query("q","*:*", "sort",tlong+" asc", "fl","score"); // test legacy behavior - "score"=="*,score"
|
||||
|
|
|
@ -326,17 +326,18 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
|
|||
assertU(adoc("id",""+i, "text","batman"));
|
||||
}
|
||||
assertU(commit());
|
||||
assertU(adoc("id","120", "text","batman superman")); // in a segment by itself
|
||||
assertU(adoc("id","120", "text","batman superman")); // in a smaller segment
|
||||
assertU(adoc("id","121", "text","superman"));
|
||||
assertU(commit());
|
||||
|
||||
// batman and superman have the same idf in single-doc segment, but very different in the complete index.
|
||||
// superman has a higher df (thus lower idf) in one segment, but reversed in the complete index
|
||||
String q ="{!func}query($qq)";
|
||||
String fq="id:120";
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'");
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'");
|
||||
|
||||
// test weighting through a function range query
|
||||
assertQ(req("fl","*,score", "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']");
|
||||
assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']");
|
||||
|
||||
// test weighting through a complex function
|
||||
q ="{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)";
|
||||
|
@ -360,6 +361,14 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
|
|||
// OK
|
||||
}
|
||||
|
||||
// test that sorting by function weights correctly. superman should sort higher than batman due to idf of the whole index
|
||||
|
||||
assertQ(req("q", "*:*", "fq","id:120 OR id:121", "sort","{!func v=$sortfunc} desc", "sortfunc","query($qq)", "qq","text:(batman OR superman)")
|
||||
,"*//doc[1]/float[.='120.0']"
|
||||
,"*//doc[2]/float[.='121.0']"
|
||||
);
|
||||
|
||||
|
||||
purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue