From 0a30dcd94540ecb3ff15686ce45d06171aec6d71 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Wed, 19 Jan 2011 23:48:28 +0000 Subject: [PATCH] SOLR-1297: fix weighting when sorting by function git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1061065 13f79535-47bb-0310-9956-ffa450edef68 --- .../handler/component/QueryComponent.java | 2 +- .../java/org/apache/solr/search/Grouping.java | 24 +++--- .../apache/solr/search/SolrIndexSearcher.java | 30 +++++++- .../org/apache/solr/search/SolrSortField.java | 31 ++++++++ .../solr/search/function/ValueSource.java | 77 ++++++++++++------- .../apache/solr/TestDistributedSearch.java | 1 + .../search/function/TestFunctionQuery.java | 15 +++- 7 files changed, 135 insertions(+), 45 deletions(-) create mode 100644 solr/src/java/org/apache/solr/search/SolrSortField.java diff --git a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java index 18bbf49deec..f98d65493ca 100644 --- a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -443,7 +443,7 @@ public class QueryComponent extends SearchComponent // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false); if(fsv){ - Sort sort = rb.getSortSpec().getSort(); + Sort sort = searcher.weightSort(rb.getSortSpec().getSort()); SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort(); NamedList sortVals = new NamedList(); // order is important for the sort fields Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field diff --git a/solr/src/java/org/apache/solr/search/Grouping.java b/solr/src/java/org/apache/solr/search/Grouping.java index 894b592f3e1..992109130a5 100755 --- a/solr/src/java/org/apache/solr/search/Grouping.java +++ b/solr/src/java/org/apache/solr/search/Grouping.java @@ -162,7 +162,7 @@ public class Grouping { // if we aren't going to return any groups, disregard the offset if (numGroups == 0) maxGroupToFind = 0; - collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind); + collector = new TopGroupCollector(groupBy, context, searcher.weightSort(normalizeSort(sort)), maxGroupToFind); /*** if we need a different algorithm when sort != group.sort if (compareSorts(sort, groupSort)) { @@ -185,9 +185,9 @@ public class Grouping { int collectorOffset = format==Format.Simple ? 0 : offset; if (groupBy instanceof StrFieldSource) { - collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset); + collector2 = new Phase2StringGroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset); } else { - collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset); + collector2 = new Phase2GroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset); } return collector2; } @@ -306,11 +306,11 @@ public class Grouping { return v; } - static TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException { + TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException { if (sort==null || sort==byScoreDesc) { return TopScoreDocCollector.create(numHits, true); } else { - return TopFieldCollector.create(sort, numHits, false, needScores, needScores, true); + return TopFieldCollector.create(searcher.weightSort(sort), numHits, false, needScores, needScores, true); } } @@ -505,12 +505,12 @@ class TopGroupCollector extends GroupCollector { int matches; - public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException { + public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort weightedSort, int nGroups) throws IOException { this.vs = groupByVS; this.context = vsContext; this.nGroups = nGroups = Math.max(1,nGroups); // we need a minimum of 1 for this collector - SortField[] sortFields = sort.getSort(); + SortField[] sortFields = weightedSort.getSort(); this.comparators = new FieldComparator[sortFields.length]; this.reversed = new int[sortFields.length]; for (int i = 0; i < sortFields.length; i++) { @@ -719,7 +719,7 @@ class Phase2GroupCollector extends Collector { int docBase; // TODO: may want to decouple from the phase1 collector - public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException { + public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException { boolean getSortFields = false; if (topGroups.orderedGroups == null) @@ -733,10 +733,10 @@ class Phase2GroupCollector extends Collector { } SearchGroupDocs groupDocs = new SearchGroupDocs(); groupDocs.groupValue = group.groupValue; - if (sort==null) + if (weightedSort==null) groupDocs.collector = TopScoreDocCollector.create(docsPerGroup, true); else - groupDocs.collector = TopFieldCollector.create(sort, docsPerGroup, getSortFields, getScores, getScores, true); + groupDocs.collector = TopFieldCollector.create(weightedSort, docsPerGroup, getSortFields, getScores, getScores, true); groupMap.put(groupDocs.groupValue, groupDocs); } @@ -791,8 +791,8 @@ class Phase2StringGroupCollector extends Phase2GroupCollector { final SearchGroupDocs[] groups; final BytesRef spare = new BytesRef(); - public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException { - super(topGroups, groupByVS, vsContext,sort,docsPerGroup,getScores,offset); + public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException { + super(topGroups, groupByVS, vsContext,weightedSort,docsPerGroup,getScores,offset); ordSet = new SentinelIntSet(groupMap.size(), -1); groups = new SearchGroupDocs[ordSet.keys.length]; } diff --git a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java index 6dec3f0df6b..d874f13722a 100644 --- a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -460,6 +460,30 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { return fieldValueCache; } + /** Returns a weighted sort according to this searcher */ + public Sort weightSort(Sort sort) throws IOException { + if (sort == null) return null; + SortField[] sorts = sort.getSort(); + + boolean needsWeighting = false; + for (SortField sf : sorts) { + if (sf instanceof SolrSortField) { + needsWeighting = true; + break; + } + } + if (!needsWeighting) return sort; + + SortField[] newSorts = Arrays.copyOf(sorts, sorts.length); + for (int i=0; it @@ -1156,7 +1180,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { if (cmd.getSort() == null) { topCollector = TopScoreDocCollector.create(len, true); } else { - topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true); + topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true); } Collector collector = topCollector; if( timeAllowed > 0 ) { @@ -1266,7 +1290,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { if (cmd.getSort() == null) { topCollector = TopScoreDocCollector.create(len, true); } else { - topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true); + topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true); } DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector); @@ -1548,7 +1572,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { // bit of a hack to tell if a set is sorted - do it better in the futute. boolean inOrder = set instanceof BitDocSet || set instanceof SortedIntDocSet; - TopDocsCollector topCollector = TopFieldCollector.create(sort, nDocs, false, false, false, inOrder); + TopDocsCollector topCollector = TopFieldCollector.create(weightSort(sort), nDocs, false, false, false, inOrder); DocIterator iter = set.iterator(); int base=0; diff --git a/solr/src/java/org/apache/solr/search/SolrSortField.java b/solr/src/java/org/apache/solr/search/SolrSortField.java new file mode 100644 index 00000000000..8b21e4357bd --- /dev/null +++ b/solr/src/java/org/apache/solr/search/SolrSortField.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.solr.search; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SortField; + +import java.io.IOException; + +/**@lucene.internal + * + */ +public interface SolrSortField { + public SortField weight(IndexSearcher searcher) throws IOException; +} diff --git a/solr/src/java/org/apache/solr/search/function/ValueSource.java b/solr/src/java/org/apache/solr/search/function/ValueSource.java index e43ef276f1d..7674b803089 100644 --- a/solr/src/java/org/apache/solr/search/function/ValueSource.java +++ b/solr/src/java/org/apache/solr/search/function/ValueSource.java @@ -26,12 +26,13 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.SortField; import org.apache.lucene.util.Bits; import org.apache.lucene.index.MultiFields; +import org.apache.solr.common.SolrException; +import org.apache.solr.search.SolrSortField; import java.io.IOException; import java.io.Serializable; import java.util.IdentityHashMap; import java.util.Map; -import java.util.Collections; /** * Instantiates {@link org.apache.solr.search.function.DocValues} for a particular reader. @@ -61,24 +62,6 @@ public abstract class ValueSource implements Serializable { return description(); } - /** - * EXPERIMENTAL: This method is subject to change. - *
WARNING: Sorted function queries are not currently weighted. - *

- * Get the SortField for this ValueSource. Uses the {@link #getValues(java.util.Map, AtomicReaderContext)} - * to populate the SortField. - * - * @param reverse true if this is a reverse sort. - * @return The {@link org.apache.lucene.search.SortField} for the ValueSource - * @throws IOException if there was a problem reading the values. - */ - public SortField getSortField(boolean reverse) throws IOException { - //should we pass in the description for the field name? - //Hmm, Lucene is going to intern whatever we pass in, not sure I like that - //and we can't pass in null, either, as that throws an illegal arg. exception - return new SortField(description(), new ValueSourceComparatorSource(), reverse); - } - /** * Implementations should propagate createWeight to sub-ValueSources which can optionally store @@ -97,16 +80,56 @@ public abstract class ValueSource implements Serializable { return context; } + + // + // Sorting by function + // + + /** + * EXPERIMENTAL: This method is subject to change. + *
WARNING: Sorted function queries are not currently weighted. + *

+ * Get the SortField for this ValueSource. Uses the {@link #getValues(java.util.Map, AtomicReaderContext)} + * to populate the SortField. + * + * @param reverse true if this is a reverse sort. + * @return The {@link org.apache.lucene.search.SortField} for the ValueSource + * @throws IOException if there was a problem reading the values. + */ + public SortField getSortField(boolean reverse) throws IOException { + return new ValueSourceSortField(reverse); + } + + private static FieldComparatorSource dummyComparator = new FieldComparatorSource() { + @Override + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unweighted use of sort " + fieldname); + } + }; + + class ValueSourceSortField extends SortField implements SolrSortField { + public ValueSourceSortField(boolean reverse) { + super(description(), dummyComparator, reverse); + } + + @Override + public SortField weight(IndexSearcher searcher) throws IOException { + Map context = newContext(searcher); + createWeight(context, searcher); + return new SortField(getField(), new ValueSourceComparatorSource(context), getReverse()); + } + } + class ValueSourceComparatorSource extends FieldComparatorSource { + private final Map context; - - public ValueSourceComparatorSource() { - + public ValueSourceComparatorSource(Map context) { + this.context = context; } public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { - return new ValueSourceComparator(numHits); + return new ValueSourceComparator(context, numHits); } } @@ -119,8 +142,10 @@ public abstract class ValueSource implements Serializable { private final double[] values; private DocValues docVals; private double bottom; + private Map fcontext; - ValueSourceComparator(int numHits) { + ValueSourceComparator(Map fcontext, int numHits) { + this.fcontext = fcontext; values = new double[numHits]; } @@ -153,7 +178,7 @@ public abstract class ValueSource implements Serializable { } public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - docVals = getValues(Collections.emptyMap(), context); + docVals = getValues(fcontext, context); return this; } @@ -162,7 +187,7 @@ public abstract class ValueSource implements Serializable { } public Comparable value(int slot) { - return Double.valueOf(values[slot]); + return values[slot]; } } } diff --git a/solr/src/test/org/apache/solr/TestDistributedSearch.java b/solr/src/test/org/apache/solr/TestDistributedSearch.java index b04c51c023d..d1cd535941d 100755 --- a/solr/src/test/org/apache/solr/TestDistributedSearch.java +++ b/solr/src/test/org/apache/solr/TestDistributedSearch.java @@ -95,6 +95,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { // these queries should be exactly ordered and scores should exactly match query("q","*:*", "sort",i1+" desc"); + query("q","*:*", "sort","{!func}add("+i1+",5)"+" desc"); query("q","*:*", "sort",i1+" asc"); query("q","*:*", "sort",i1+" desc", "fl","*,score"); query("q","*:*", "sort",tlong+" asc", "fl","score"); // test legacy behavior - "score"=="*,score" diff --git a/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java b/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java index 7ac1d8e50fa..300acd34b8a 100755 --- a/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java +++ b/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java @@ -326,17 +326,18 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { assertU(adoc("id",""+i, "text","batman")); } assertU(commit()); - assertU(adoc("id","120", "text","batman superman")); // in a segment by itself + assertU(adoc("id","120", "text","batman superman")); // in a smaller segment + assertU(adoc("id","121", "text","superman")); assertU(commit()); - // batman and superman have the same idf in single-doc segment, but very different in the complete index. + // superman has a higher df (thus lower idf) in one segment, but reversed in the complete index String q ="{!func}query($qq)"; String fq="id:120"; assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'"); assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'"); // test weighting through a function range query - assertQ(req("fl","*,score", "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); + assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); // test weighting through a complex function q ="{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)"; @@ -360,6 +361,14 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { // OK } + // test that sorting by function weights correctly. superman should sort higher than batman due to idf of the whole index + + assertQ(req("q", "*:*", "fq","id:120 OR id:121", "sort","{!func v=$sortfunc} desc", "sortfunc","query($qq)", "qq","text:(batman OR superman)") + ,"*//doc[1]/float[.='120.0']" + ,"*//doc[2]/float[.='121.0']" + ); + + purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity }