SOLR-5973: Pluggable Ranking Collectors and Merge Strategies

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1594698 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Joel Bernstein 2014-05-14 20:12:12 +00:00
parent 9afdfd5037
commit 18c9215bc1
11 changed files with 1866 additions and 4 deletions

View File

@ -113,6 +113,9 @@ New Features
* SOLR-6043: Add ability to set http headers in solr response * SOLR-6043: Add ability to set http headers in solr response
(Tomás Fernández Löbbe via Ryan Ernst) (Tomás Fernández Löbbe via Ryan Ernst)
* SOLR-5973: Pluggable Ranking Collectors and Merge Strategies
(Joel Bernstein)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import org.apache.solr.search.SolrIndexSearcher;
import java.util.Comparator;
import java.io.IOException;
/**
* The MergeStrategy class defines custom merge logic for distributed searches.
**/
public interface MergeStrategy {
/**
* merge defines the merging behaving of results that are collected from the
* shards during a distributed search.
*
**/
public void merge(ResponseBuilder rb, ShardRequest sreq);
/**
* mergesIds must return true if the merge method merges document ids from the shards.
* If it merges other output from the shards it must return false.
* */
public boolean mergesIds();
/**
* handlesMergeFields must return true if the MergeStrategy
* implements a custom handleMergeFields(ResponseBuilder rb, SolrIndexSearch searcher)
* */
public boolean handlesMergeFields();
/**
* Implement handleMergeFields(ResponseBuilder rb, SolrIndexSearch searcher) if
* your merge strategy needs more complex data then the sort fields provide.
* */
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException;
/**
* Defines the order that the mergeStrategies are applied. Lower costs are applied first.
* */
public int getCost();
public static final Comparator MERGE_COMP = new Comparator() {
public int compare(Object o1, Object o2) {
MergeStrategy m1 = (MergeStrategy)o1;
MergeStrategy m2 = (MergeStrategy)o2;
return m1.getCost()-m2.getCost();
}
};
}

View File

@ -77,6 +77,7 @@ import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields; import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.search.SortSpec; import org.apache.solr.search.SortSpec;
import org.apache.solr.search.RankQuery;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.grouping.CommandHandler; import org.apache.solr.search.grouping.CommandHandler;
import org.apache.solr.search.grouping.GroupingSpecification; import org.apache.solr.search.grouping.GroupingSpecification;
@ -98,6 +99,8 @@ import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTran
import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer; import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer; import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import java.util.Collections;
import java.util.Comparator;
/** /**
* TODO! * TODO!
@ -147,6 +150,17 @@ public class QueryComponent extends SearchComponent
// normalize a null query to a query that matches nothing // normalize a null query to a query that matches nothing
q = new BooleanQuery(); q = new BooleanQuery();
} }
if(q instanceof RankQuery) {
MergeStrategy mergeStrategy = ((RankQuery)q).getMergeStrategy();
if(mergeStrategy != null) {
rb.addMergeStrategy(mergeStrategy);
if(mergeStrategy.handlesMergeFields()) {
rb.mergeFieldHandler = mergeStrategy;
}
}
}
rb.setQuery( q ); rb.setQuery( q );
rb.setSortSpec( parser.getSort(true) ); rb.setSortSpec( parser.getSort(true) );
rb.setQparser(parser); rb.setQparser(parser);
@ -473,7 +487,13 @@ public class QueryComponent extends SearchComponent
rb.getNextCursorMark().getSerializedTotem()); rb.getNextCursorMark().getSerializedTotem());
} }
} }
doFieldSortValues(rb, searcher);
if(rb.mergeFieldHandler != null) {
rb.mergeFieldHandler.handleMergeFields(rb, searcher);
} else {
doFieldSortValues(rb, searcher);
}
doPrefetch(rb); doPrefetch(rb);
} }
@ -821,6 +841,22 @@ public class QueryComponent extends SearchComponent
private void mergeIds(ResponseBuilder rb, ShardRequest sreq) { private void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
List<MergeStrategy> mergeStrategies = rb.getMergeStrategies();
if(mergeStrategies != null) {
Collections.sort(mergeStrategies, MergeStrategy.MERGE_COMP);
boolean idsMerged = false;
for(MergeStrategy mergeStrategy : mergeStrategies) {
mergeStrategy.merge(rb, sreq);
if(mergeStrategy.mergesIds()) {
idsMerged = true;
}
}
if(idsMerged) {
return; //ids were merged above so return.
}
}
SortSpec ss = rb.getSortSpec(); SortSpec ss = rb.getSortSpec();
Sort sort = ss.getSort(); Sort sort = ss.getSort();

View File

@ -40,6 +40,7 @@ import org.apache.solr.search.grouping.distributed.command.QueryCommandResult;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.ArrayList;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -58,6 +59,7 @@ public class ResponseBuilder
public boolean doExpand; public boolean doExpand;
public boolean doStats; public boolean doStats;
public boolean doTerms; public boolean doTerms;
public MergeStrategy mergeFieldHandler;
private boolean needDocList = false; private boolean needDocList = false;
private boolean needDocSet = false; private boolean needDocSet = false;
@ -74,6 +76,9 @@ public class ResponseBuilder
private CursorMark cursorMark; private CursorMark cursorMark;
private CursorMark nextCursorMark; private CursorMark nextCursorMark;
private List<MergeStrategy> mergeStrategies;
private DocListAndSet results = null; private DocListAndSet results = null;
private NamedList<Object> debugInfo = null; private NamedList<Object> debugInfo = null;
private RTimer timer = null; private RTimer timer = null;
@ -230,7 +235,23 @@ public class ResponseBuilder
debugResults = dbg; debugResults = dbg;
debugTrack = dbg; debugTrack = dbg;
} }
public void addMergeStrategy(MergeStrategy mergeStrategy) {
if(mergeStrategies == null) {
mergeStrategies = new ArrayList();
}
mergeStrategies.add(mergeStrategy);
}
public List<MergeStrategy> getMergeStrategies() {
return this.mergeStrategies;
}
public void setResponseDocs(SolrDocumentList _responseDocs) {
this._responseDocs = _responseDocs;
}
public boolean isDebugTrack() { public boolean isDebugTrack() {
return debugTrack; return debugTrack;
} }

View File

@ -35,7 +35,7 @@ public class ShardDoc extends FieldDoc {
public String shard; public String shard;
public String shardAddress; // TODO public String shardAddress; // TODO
int orderInShard; public int orderInShard;
// the position of this doc within the shard... this can be used // the position of this doc within the shard... this can be used
// to short-circuit comparisons if the shard is equal, and can // to short-circuit comparisons if the shard is equal, and can
// also be used to break ties within the same shard. // also be used to break ties within the same shard.

View File

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.Query;
import org.apache.solr.handler.component.MergeStrategy;
public abstract class RankQuery extends Query {
public abstract TopDocsCollector getTopDocsCollector(int len, SolrIndexSearcher.QueryCommand cmd);
public abstract MergeStrategy getMergeStrategy();
}

View File

@ -1482,7 +1482,13 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
* TopDocsCollector to use. * TopDocsCollector to use.
*/ */
private TopDocsCollector buildTopDocsCollector(int len, QueryCommand cmd) throws IOException { private TopDocsCollector buildTopDocsCollector(int len, QueryCommand cmd) throws IOException {
Query q = cmd.getQuery();
if(q instanceof RankQuery) {
RankQuery rq = (RankQuery)q;
return rq.getTopDocsCollector(len, cmd);
}
if (null == cmd.getSort()) { if (null == cmd.getSort()) {
assert null == cmd.getCursorMark() : "have cursor but no sort"; assert null == cmd.getCursorMark() : "have cursor but no sort";
return TopScoreDocCollector.create(len, true); return TopScoreDocCollector.create(len, true);

View File

@ -0,0 +1,579 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- This is a "kitchen sink" config file that tests can use.
When writting a new test, feel free to add *new* items (plugins,
config options, etc...) as long as they don't break any existing
tests. if you need to test something esoteric please add a new
"solrconfig-your-esoteric-purpose.xml" config file.
Note in particular that this test is used by MinimalSchemaTest so
Anything added to this file needs to work correctly even if there
is now uniqueKey or defaultSearch Field.
-->
<config>
<jmx />
<!-- Used to specify an alternate directory to hold all index data.
It defaults to "index" if not present, and should probably
not be changed if replication is in use. -->
<dataDir>${solr.data.dir:}</dataDir>
<!-- The DirectoryFactory to use for indexes.
solr.StandardDirectoryFactory, the default, is filesystem based.
solr.RAMDirectoryFactory is memory based and not persistent. -->
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}">
<double name="maxWriteMBPerSecDefault">1000000</double>
<double name="maxWriteMBPerSecFlush">2000000</double>
<double name="maxWriteMBPerSecMerge">3000000</double>
<double name="maxWriteMBPerSecRead">4000000</double>
</directoryFactory>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<updateHandler class="solr.DirectUpdateHandler2">
<!-- autocommit pending docs if certain criteria are met
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>3600000</maxTime>
</autoCommit>
-->
<!-- represents a lower bound on the frequency that commits may
occur (in seconds). NOTE: not yet implemented
<commitIntervalLowerBound>0</commitIntervalLowerBound>
-->
<!-- The RunExecutableListener executes an external command.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">/var/opt/resin3/__PORT__/scripts/solr/snapshooter</str>
<str name="dir">/var/opt/resin3/__PORT__</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
<updateLog enable="${enable.update.log:true}">
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
<commitWithin>
<softCommit>${solr.commitwithin.softcommit:true}</softCommit>
</commitWithin>
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or wildcard queries that expand to big boolean
queries. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
<filterCache
class="solr.search.FastLRUCache"
size="512"
initialSize="512"
autowarmCount="2"/>
<queryResultCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="2"/>
<documentCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<cache name="perSegFilter"
class="solr.search.LRUCache"
size="10"
initialSize="0"
autowarmCount="10" />
<!-- If true, stored fields that are not requested will be loaded lazily.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!--
<cache name="myUserCache"
class="solr.search.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="MyRegenerator"
/>
-->
<!--
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<queryResultWindowSize>10</queryResultWindowSize>
<!-- set maxSize artificially low to exercise both types of sets -->
<HashDocSet maxSize="3" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index)
-->
<boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain prewarming data from. -->
<!--
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
</query>
<queryResponseWriter name="xml" default="true"
class="solr.XMLResponseWriter" />
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the 'qt' param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler">
<bool name="httpCaching">true</bool>
</requestHandler>
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
</lst>
</requestHandler>
<requestHandler name="dismax" class="solr.SearchHandler" >
<lst name="defaults">
<str name="defType">dismax</str>
<str name="q.alt">*:*</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</lst>
</requestHandler>
<requestHandler name="mock" class="org.apache.solr.core.MockQuerySenderListenerReqHandler"/>
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
<!-- test query parameter defaults -->
<requestHandler name="defaults" class="solr.StandardRequestHandler">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<!-- test query parameter defaults -->
<requestHandler name="lazy" class="solr.StandardRequestHandler" startup="lazy">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent">
<!-- This is slightly different from the field value so we can test dealing with token offset changes -->
<str name="queryAnalyzerFieldType">lowerpunctfilt</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellchecker1</str>
<str name="buildOnCommit">false</str>
</lst>
<lst name="spellchecker">
<str name="name">direct</str>
<str name="classname">DirectSolrSpellChecker</str>
<str name="field">lowerfilt</str>
<int name="minQueryLength">3</int>
</lst>
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">lowerfilt</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>
<lst name="spellchecker">
<str name="name">multipleFields</str>
<str name="field">lowerfilt1and2</str>
<str name="spellcheckIndexDir">spellcheckerMultipleFields</str>
<str name="buildOnCommit">false</str>
</lst>
<!-- Example of using different distance measure -->
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">lowerfilt</str>
<!-- Use a different Distance Measure -->
<str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">spellchecker2</str>
</lst>
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">external</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellchecker3</str>
</lst>
<!-- Comparator -->
<lst name="spellchecker">
<str name="name">freq</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFreq</str>
<!-- comparatorClass be one of:
1. score (default)
2. freq (Frequency first, then score)
3. A fully qualified class name
-->
<str name="comparatorClass">freq</str>
<str name="buildOnCommit">false</str>
</lst>
<lst name="spellchecker">
<str name="name">fqcn</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFQCN</str>
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
<str name="buildOnCommit">false</str>
</lst>
<lst name="spellchecker">
<str name="name">perDict</str>
<str name="classname">org.apache.solr.handler.component.DummyCustomParamSpellChecker</str>
<str name="field">lowerfilt</str>
</lst>
</searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
<arr name="components">
<str>termsComp</str>
</arr>
</requestHandler>
<!--
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
-->
<queryConverter name="queryConverter" class="org.apache.solr.spelling.SpellingQueryConverter"/>
<requestHandler name="spellCheckCompRH" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckCompRH_Direct" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="spellcheck.dictionary">direct</str>
<str name="spellcheck.onlyMorePopular">false</str>
<str name="spellcheck.extendedResults">false</str>
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckWithWordbreak" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck.count">20</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckWithWordbreak_Direct" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="spellcheck.dictionary">direct</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck.count">20</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="qf">lowerfilt1^1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="mltrh" class="org.apache.solr.handler.component.SearchHandler">
</requestHandler>
<searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
<requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>
<!-- test elevation -->
<searchComponent name="elevate" class="org.apache.solr.handler.component.QueryElevationComponent" >
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>
<requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<arr name="last-components">
<str>elevate</str>
</arr>
</requestHandler>
<requestHandler name="/mlt" class="solr.MoreLikeThisHandler">
</requestHandler>
<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<!-- Configure the standard fragmenter -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>
<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple" class="org.apache.solr.highlight.SimpleFragListBuilder" default="true"/>
<!-- Configure the standard fragmentsBuilder -->
<fragmentsBuilder name="simple" class="org.apache.solr.highlight.SimpleFragmentsBuilder" default="true"/>
<fragmentsBuilder name="scoreOrder" class="org.apache.solr.highlight.ScoreOrderFragmentsBuilder"/>
<boundaryScanner name="simple" class="solr.highlight.SimpleBoundaryScanner" default="true">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
</lst>
</boundaryScanner>
<boundaryScanner name="breakIterator" class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.type">WORD</str>
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>
<!-- enable streaming for testing... -->
<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr" never304="false">
<cacheControl>max-age=30, public</cacheControl>
</httpCaching>
</requestDispatcher>
<!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<admin>
<defaultQuery>solr</defaultQuery>
<gettableFiles>solrconfig.xml schema.xml admin-extra.html</gettableFiles>
</admin>
<!-- test getting system property -->
<propTest attr1="${solr.test.sys.prop1}-$${literal}"
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
<queryParser name="rank" class="org.apache.solr.search.TestRankQueryPlugin"/>
<updateRequestProcessorChain name="dedupe">
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">false</bool>
<bool name="overwriteDupes">true</bool>
<str name="fields">v_t,t_field</str>
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="dedupe-allfields">
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">false</bool>
<bool name="overwriteDupes">false</bool>
<str name="signatureField">id</str>
<str name="fields"></str>
<str name="signatureClass">org.apache.solr.update.processor.Lookup3Signature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="stored_sig">
<!-- this chain is valid even though the signature field is not
indexed, because we are not asking for dups to be overwritten
-->
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
<str name="signatureField">non_indexed_signature_sS</str>
<bool name="overwriteDupes">false</bool>
<str name="fields">v_t,t_field</str>
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="uniq-fields">
<processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
<arr name="fieldName">
<str>uniq</str>
<str>uniq2</str>
<str>uniq3</str>
</arr>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="distrib-dup-test-chain-explicit">
<!-- explicit test using processors before and after distrib -->
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_A_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.DistributedUpdateProcessorFactory" />
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_B_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="distrib-dup-test-chain-implicit">
<!-- implicit test w/o distrib declared-->
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_A_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">regex_dup_B_s</str>
<str name="pattern">x</str>
<str name="replacement">x_x</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
</config>

View File

@ -0,0 +1,181 @@
package org.apache.solr.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.MergeStrategy;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.junit.BeforeClass;
import java.util.Arrays;
/**
* Test for QueryComponent's distributed querying
*
* @see org.apache.solr.handler.component.QueryComponent
*/
public class MergeStrategyTest extends BaseDistributedSearchTestCase {
public MergeStrategyTest() {
fixShardCount = true;
shardCount = 3;
stress = 0;
}
@BeforeClass
public static void setUpBeforeClass() throws Exception {
initCore("solrconfig-plugcollector.xml", "schema15.xml");
}
@Override
public void doTest() throws Exception {
del("*:*");
index_specific(0,"id","1", "sort_i", "5");
index_specific(0,"id","2", "sort_i", "50");
index_specific(1,"id","5", "sort_i", "4");
index_specific(1,"id","6", "sort_i", "10");
index_specific(0,"id","7", "sort_i", "1");
index_specific(1,"id","8", "sort_i", "2");
index_specific(2,"id","9", "sort_i", "1000");
index_specific(2,"id","10", "sort_i", "1500");
index_specific(2,"id","11", "sort_i", "1300");
index_specific(1,"id","12", "sort_i", "15");
index_specific(1,"id","13", "sort_i", "16");
commit();
handle.put("explain", SKIPVAL);
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("score", SKIPVAL);
handle.put("wt", SKIP);
handle.put("distrib", SKIP);
handle.put("shards.qt", SKIP);
handle.put("shards", SKIP);
handle.put("q", SKIP);
handle.put("maxScore", SKIPVAL);
handle.put("_version_", SKIP);
//Test mergeStrategy that uses score
query("q", "{!rank q=$qq}", "qq", "*:*", "rows","12", "sort", "sort_i asc", "fl","*,score");
//Test without mergeStrategy
query("q", "*:*", "rows","12", "sort", "sort_i asc");
//Test mergeStrategy1 that uses a sort field.
query("q", "{!rank mergeStrategy=1 q=$qq}", "qq", "*:*", "rows","12", "sort", "sort_i asc");
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("qq", "*:*");
params.add("rows", "12");
params.add("q", "{!rank q=$qq}");
params.add("sort", "sort_i asc");
params.add("fl","*,score");
setDistributedParams(params);
QueryResponse rsp = queryServer(params);
assertOrder(rsp,"10","11","9","2","13","12","6","1","5","8","7");
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("rows", "12");
params.add("sort", "sort_i asc");
params.add("fl","*,score");
setDistributedParams(params);
rsp = queryServer(params);
assertOrder(rsp,"7","8","5","1","6","12","13","2","9","11","10");
MergeStrategy m1 = new MergeStrategy() {
@Override
public void merge(ResponseBuilder rb, ShardRequest sreq) {
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() { return false;}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {}
@Override
public int getCost() {
return 1;
}
};
MergeStrategy m2 = new MergeStrategy() {
@Override
public void merge(ResponseBuilder rb, ShardRequest sreq) {
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() { return false;}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {}
@Override
public int getCost() {
return 100;
}
};
MergeStrategy m3 = new MergeStrategy() {
@Override
public void merge(ResponseBuilder rb, ShardRequest sreq) {
}
public boolean mergesIds() {
return false;
}
public boolean handlesMergeFields() { return false;}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {}
@Override
public int getCost() {
return 50;
}
};
MergeStrategy[] merges = {m1,m2,m3};
Arrays.sort(merges, MergeStrategy.MERGE_COMP);
assert(merges[0].getCost() == 1);
assert(merges[1].getCost() == 50);
assert(merges[2].getCost() == 100);
}
private void assertOrder(QueryResponse rsp, String ... docs) throws Exception {
SolrDocumentList list = rsp.getResults();
for(int i=0; i<docs.length; i++) {
SolrDocument doc = list.get(i);
Object o = doc.getFieldValue("id");
if(!docs[i].equals(o)) {
throw new Exception("Order is not correct:"+o+"!="+docs[i]);
}
}
}
}

View File

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
public class RankQueryTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-plugcollector.xml", "schema15.xml");
}
@Override
@Before
public void setUp() throws Exception {
// if you override setUp or tearDown, you better call
// the super classes version
super.setUp();
clearIndex();
assertU(commit());
}
@Test
public void testPluggableCollector() throws Exception {
String[] doc = {"id","1", "sort_i", "100"};
assertU(adoc(doc));
assertU(commit());
String[] doc1 = {"id","2", "sort_i", "50"};
assertU(adoc(doc1));
String[] doc2 = {"id","3", "sort_i", "1000"};
assertU(adoc(doc2));
assertU(commit());
String[] doc3 = {"id","4", "sort_i", "2000"};
assertU(adoc(doc3));
String[] doc4 = {"id","5", "sort_i", "2"};
assertU(adoc(doc4));
assertU(commit());
String[] doc5 = {"id","6", "sort_i","11"};
assertU(adoc(doc5));
assertU(commit());
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("qq", "*:*");
params.add("q", "{!rank q=$qq}");
params.add("sort","sort_i asc");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[1]/str[@name='id'][.='4']",
"//result/doc[2]/str[@name='id'][.='3']",
"//result/doc[3]/str[@name='id'][.='1']",
"//result/doc[4]/str[@name='id'][.='2']",
"//result/doc[5]/str[@name='id'][.='6']",
"//result/doc[6]/str[@name='id'][.='5']"
);
params = new ModifiableSolrParams();
params.add("qq", "{!edismax bf=$bff}*:*");
params.add("bff", "field(sort_i)");
params.add("q", "{!rank q=$qq collector=1}");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[6]/str[@name='id'][.='4']",
"//result/doc[5]/str[@name='id'][.='3']",
"//result/doc[4]/str[@name='id'][.='1']",
"//result/doc[3]/str[@name='id'][.='2']",
"//result/doc[2]/str[@name='id'][.='6']",
"//result/doc[1]/str[@name='id'][.='5']"
);
params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("sort","sort_i asc");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[6]/str[@name='id'][.='4']",
"//result/doc[5]/str[@name='id'][.='3']",
"//result/doc[4]/str[@name='id'][.='1']",
"//result/doc[3]/str[@name='id'][.='2']",
"//result/doc[2]/str[@name='id'][.='6']",
"//result/doc[1]/str[@name='id'][.='5']"
);
}
}

View File

@ -0,0 +1,816 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.MergeStrategy;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardDoc;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.Ignore;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Ignore
public class TestRankQueryPlugin extends QParserPlugin {
public void init(NamedList params) {
}
public QParser createParser(String query, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new TestRankQueryParser(query, localParams, params, req);
}
class TestRankQueryParser extends QParser {
public TestRankQueryParser(String query, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(query, localParams, params, req);
}
public Query parse() throws SyntaxError {
String qs = localParams.get("q");
QParser parser = QParser.getParser(qs, null, req);
Query q = parser.getQuery();
int mergeStrategy = localParams.getInt("mergeStrategy", 0);
int collector = localParams.getInt("collector", 0);
return new TestRankQuery(collector, mergeStrategy, q);
}
}
class TestRankQuery extends RankQuery {
private int mergeStrategy;
private int collector;
private Query q;
public int hashCode() {
return collector+q.hashCode();
}
public boolean equals(Object o) {
if(o instanceof TestRankQuery) {
TestRankQuery trq = (TestRankQuery)o;
return (trq.q.equals(q) && trq.collector == collector) ;
}
return false;
}
public Weight createWeight(IndexSearcher indexSearcher ) throws IOException{
return q.createWeight(indexSearcher);
}
public void setBoost(float boost) {
q.setBoost(boost);
}
public float getBoost() {
return q.getBoost();
}
public String toString() {
return q.toString();
}
public String toString(String field) {
return q.toString(field);
}
public TestRankQuery(int collector, int mergeStrategy, Query q) {
this.q = q;
this.collector = collector;
this.mergeStrategy = mergeStrategy;
}
public TopDocsCollector getTopDocsCollector(int len, SolrIndexSearcher.QueryCommand cmd) {
if(collector == 0)
return new TestCollector(null);
else
return new TestCollector1(null);
}
public MergeStrategy getMergeStrategy() {
if(mergeStrategy == 0)
return new TestMergeStrategy();
else
return new TestMergeStrategy1();
}
}
class TestMergeStrategy implements MergeStrategy {
public int getCost() {
return 1;
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() {
return false;
}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) {
}
public void merge(ResponseBuilder rb, ShardRequest sreq) {
// id to shard mapping, to eliminate any accidental dups
HashMap<Object,String> uniqueDoc = new HashMap<>();
NamedList<Object> shardInfo = null;
if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
shardInfo = new SimpleOrderedMap<>();
rb.rsp.getValues().add(ShardParams.SHARDS_INFO,shardInfo);
}
IndexSchema schema = rb.req.getSchema();
SchemaField uniqueKeyField = schema.getUniqueKeyField();
long numFound = 0;
Float maxScore=null;
boolean partialResults = false;
List<ShardDoc> shardDocs = new ArrayList();
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = null;
if(shardInfo!=null) {
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
if (srsp.getException() != null) {
Throwable t = srsp.getException();
if(t instanceof SolrServerException) {
t = ((SolrServerException)t).getCause();
}
nl.add("error", t.toString() );
StringWriter trace = new StringWriter();
t.printStackTrace(new PrintWriter(trace));
nl.add("trace", trace.toString() );
if (srsp.getShardAddress() != null) {
nl.add("shardAddress", srsp.getShardAddress());
}
}
else {
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
nl.add("numFound", docs.getNumFound());
nl.add("maxScore", docs.getMaxScore());
nl.add("shardAddress", srsp.getShardAddress());
}
if(srsp.getSolrResponse()!=null) {
nl.add("time", srsp.getSolrResponse().getElapsedTime());
}
shardInfo.add(srsp.getShard(), nl);
}
// now that we've added the shard info, let's only proceed if we have no error.
if (srsp.getException() != null) {
partialResults = true;
continue;
}
if (docs == null) { // could have been initialized in the shards info block above
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
}
NamedList<?> responseHeader = (NamedList<?>)srsp.getSolrResponse().getResponse().get("responseHeader");
if (responseHeader != null && Boolean.TRUE.equals(responseHeader.get("partialResults"))) {
partialResults = true;
}
// calculate global maxScore and numDocsFound
if (docs.getMaxScore() != null) {
maxScore = maxScore==null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
}
numFound += docs.getNumFound();
for (int i=0; i<docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());
String prevShard = uniqueDoc.put(id, srsp.getShard());
if (prevShard != null) {
// duplicate detected
numFound--;
// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
continue;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(srsp.shard) >= 0) {
// TODO: remove previous from priority queue
// continue;
// }
}
ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
shardDoc.orderInShard = i;
Object scoreObj = doc.getFieldValue("score");
if (scoreObj != null) {
if (scoreObj instanceof String) {
shardDoc.score = Float.parseFloat((String)scoreObj);
} else {
shardDoc.score = (Float)scoreObj;
}
}
shardDocs.add(shardDoc);
} // end for-each-doc-in-response
} // end for-each-response
Collections.sort(shardDocs, new Comparator<ShardDoc>() {
@Override
public int compare(ShardDoc o1, ShardDoc o2) {
if(o1.score < o2.score) {
return 1;
} else if (o1.score > o2.score) {
return -1;
} else {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
}
});
int resultSize = shardDocs.size();
Map<Object,ShardDoc> resultIds = new HashMap<>();
for (int i=0; i<shardDocs.size(); i++) {
ShardDoc shardDoc = shardDocs.get(i);
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
// Add hits for distributed requests
// https://issues.apache.org/jira/browse/SOLR-3518
rb.rsp.addToLog("hits", numFound);
SolrDocumentList responseDocs = new SolrDocumentList();
if (maxScore!=null) responseDocs.setMaxScore(maxScore);
responseDocs.setNumFound(numFound);
responseDocs.setStart(0);
// size appropriately
for (int i=0; i<resultSize; i++) responseDocs.add(null);
// save these results in a private area so we can access them
// again when retrieving stored fields.
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb.setResponseDocs(responseDocs);
if (partialResults) {
rb.rsp.getResponseHeader().add( "partialResults", Boolean.TRUE );
}
}
}
class TestMergeStrategy1 implements MergeStrategy {
public int getCost() {
return 1;
}
public boolean mergesIds() {
return true;
}
public boolean handlesMergeFields() {
return true;
}
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException {
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
// The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
// currently have an option to return sort field values. Because of this, we
// take the documents given and re-derive the sort values.
//
// TODO: See SOLR-5595
boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false);
if(fsv){
NamedList<Object[]> sortVals = new NamedList<>(); // order is important for the sort fields
IndexReaderContext topReaderContext = searcher.getTopReaderContext();
List<AtomicReaderContext> leaves = topReaderContext.leaves();
AtomicReaderContext currentLeaf = null;
if (leaves.size()==1) {
// if there is a single segment, use that subReader and avoid looking up each time
currentLeaf = leaves.get(0);
leaves=null;
}
DocList docList = rb.getResults().docList;
// sort ids from lowest to highest so we can access them in order
int nDocs = docList.size();
final long[] sortedIds = new long[nDocs];
final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds
DocList docs = rb.getResults().docList;
DocIterator it = docs.iterator();
for (int i=0; i<nDocs; i++) {
sortedIds[i] = (((long)it.nextDoc()) << 32) | i;
scores[i] = docs.hasScores() ? it.score() : Float.NaN;
}
// sort ids and scores together
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpId = sortedIds[i];
float tmpScore = scores[i];
sortedIds[i] = sortedIds[j];
scores[i] = scores[j];
sortedIds[j] = tmpId;
scores[j] = tmpScore;
}
@Override
protected int compare(int i, int j) {
return Long.compare(sortedIds[i], sortedIds[j]);
}
}.sort(0, sortedIds.length);
SortSpec sortSpec = rb.getSortSpec();
Sort sort = searcher.weightSort(sortSpec.getSort());
SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort();
List<SchemaField> schemaFields = sortSpec.getSchemaFields();
for (int fld = 0; fld < schemaFields.size(); fld++) {
SchemaField schemaField = schemaFields.get(fld);
FieldType ft = null == schemaField? null : schemaField.getType();
SortField sortField = sortFields[fld];
SortField.Type type = sortField.getType();
// :TODO: would be simpler to always serialize every position of SortField[]
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
FieldComparator comparator = null;
Object[] vals = new Object[nDocs];
int lastIdx = -1;
int idx = 0;
for (int i = 0; i < sortedIds.length; ++i) {
long idAndPos = sortedIds[i];
float score = scores[i];
int doc = (int)(idAndPos >>> 32);
int position = (int)idAndPos;
if (leaves != null) {
idx = ReaderUtil.subIndex(doc, leaves);
currentLeaf = leaves.get(idx);
if (idx != lastIdx) {
// we switched segments. invalidate comparator.
comparator = null;
}
}
if (comparator == null) {
comparator = sortField.getComparator(1,0);
comparator = comparator.setNextReader(currentLeaf);
}
doc -= currentLeaf.docBase; // adjust for what segment this is in
comparator.setScorer(new FakeScorer(doc, score));
comparator.copy(0, doc);
Object val = comparator.value(0);
if (null != ft) val = ft.marshalSortValue(val);
vals[position] = val;
}
sortVals.add(sortField.getField(), vals);
}
rsp.add("merge_values", sortVals);
}
}
private class FakeScorer extends Scorer {
final int docid;
final float score;
FakeScorer(int docid, float score) {
super(null);
this.docid = docid;
this.score = score;
}
@Override
public int docID() {
return docid;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return 1;
}
@Override
public Weight getWeight() {
throw new UnsupportedOperationException();
}
@Override
public Collection<ChildScorer> getChildren() {
throw new UnsupportedOperationException();
}
}
public void merge(ResponseBuilder rb, ShardRequest sreq) {
// id to shard mapping, to eliminate any accidental dups
HashMap<Object,String> uniqueDoc = new HashMap<>();
NamedList<Object> shardInfo = null;
if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
shardInfo = new SimpleOrderedMap<>();
rb.rsp.getValues().add(ShardParams.SHARDS_INFO,shardInfo);
}
IndexSchema schema = rb.req.getSchema();
SchemaField uniqueKeyField = schema.getUniqueKeyField();
long numFound = 0;
Float maxScore=null;
boolean partialResults = false;
List<ShardDoc> shardDocs = new ArrayList();
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = null;
if(shardInfo!=null) {
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
if (srsp.getException() != null) {
Throwable t = srsp.getException();
if(t instanceof SolrServerException) {
t = ((SolrServerException)t).getCause();
}
nl.add("error", t.toString() );
StringWriter trace = new StringWriter();
t.printStackTrace(new PrintWriter(trace));
nl.add("trace", trace.toString() );
if (srsp.getShardAddress() != null) {
nl.add("shardAddress", srsp.getShardAddress());
}
}
else {
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
nl.add("numFound", docs.getNumFound());
nl.add("maxScore", docs.getMaxScore());
nl.add("shardAddress", srsp.getShardAddress());
}
if(srsp.getSolrResponse()!=null) {
nl.add("time", srsp.getSolrResponse().getElapsedTime());
}
shardInfo.add(srsp.getShard(), nl);
}
// now that we've added the shard info, let's only proceed if we have no error.
if (srsp.getException() != null) {
partialResults = true;
continue;
}
if (docs == null) { // could have been initialized in the shards info block above
docs = (SolrDocumentList)srsp.getSolrResponse().getResponse().get("response");
}
NamedList<?> responseHeader = (NamedList<?>)srsp.getSolrResponse().getResponse().get("responseHeader");
if (responseHeader != null && Boolean.TRUE.equals(responseHeader.get("partialResults"))) {
partialResults = true;
}
// calculate global maxScore and numDocsFound
if (docs.getMaxScore() != null) {
maxScore = maxScore==null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
}
numFound += docs.getNumFound();
SortSpec ss = rb.getSortSpec();
Sort sort = ss.getSort();
NamedList sortFieldValues = (NamedList)(srsp.getSolrResponse().getResponse().get("merge_values"));
NamedList unmarshalledSortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema);
List lst = (List)unmarshalledSortFieldValues.getVal(0);
for (int i=0; i<docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());
String prevShard = uniqueDoc.put(id, srsp.getShard());
if (prevShard != null) {
// duplicate detected
numFound--;
// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
continue;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(srsp.shard) >= 0) {
// TODO: remove previous from priority queue
// continue;
// }
}
ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
shardDoc.orderInShard = i;
Object scoreObj = lst.get(i);
if (scoreObj != null) {
shardDoc.score = ((Integer)scoreObj).floatValue();
}
shardDocs.add(shardDoc);
} // end for-each-doc-in-response
} // end for-each-response
Collections.sort(shardDocs, new Comparator<ShardDoc>() {
@Override
public int compare(ShardDoc o1, ShardDoc o2) {
if(o1.score < o2.score) {
return 1;
} else if (o1.score > o2.score) {
return -1;
} else {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
}
});
int resultSize = shardDocs.size();
Map<Object,ShardDoc> resultIds = new HashMap<>();
for (int i=0; i<shardDocs.size(); i++) {
ShardDoc shardDoc = shardDocs.get(i);
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}
// Add hits for distributed requests
// https://issues.apache.org/jira/browse/SOLR-3518
rb.rsp.addToLog("hits", numFound);
SolrDocumentList responseDocs = new SolrDocumentList();
if (maxScore!=null) responseDocs.setMaxScore(maxScore);
responseDocs.setNumFound(numFound);
responseDocs.setStart(0);
// size appropriately
for (int i=0; i<resultSize; i++) responseDocs.add(null);
// save these results in a private area so we can access them
// again when retrieving stored fields.
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb.setResponseDocs(responseDocs);
if (partialResults) {
rb.rsp.getResponseHeader().add( "partialResults", Boolean.TRUE );
}
}
private NamedList unmarshalSortValues(SortSpec sortSpec,
NamedList sortFieldValues,
IndexSchema schema) {
NamedList unmarshalledSortValsPerField = new NamedList();
if (0 == sortFieldValues.size()) return unmarshalledSortValsPerField;
List<SchemaField> schemaFields = sortSpec.getSchemaFields();
SortField[] sortFields = sortSpec.getSort().getSort();
int marshalledFieldNum = 0;
for (int sortFieldNum = 0; sortFieldNum < sortFields.length; sortFieldNum++) {
final SortField sortField = sortFields[sortFieldNum];
final SortField.Type type = sortField.getType();
// :TODO: would be simpler to always serialize every position of SortField[]
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
final String sortFieldName = sortField.getField();
final String valueFieldName = sortFieldValues.getName(marshalledFieldNum);
assert sortFieldName.equals(valueFieldName)
: "sortFieldValues name key does not match expected SortField.getField";
List sortVals = (List)sortFieldValues.getVal(marshalledFieldNum);
final SchemaField schemaField = schemaFields.get(sortFieldNum);
if (null == schemaField) {
unmarshalledSortValsPerField.add(sortField.getField(), sortVals);
} else {
FieldType fieldType = schemaField.getType();
List unmarshalledSortVals = new ArrayList();
for (Object sortVal : sortVals) {
unmarshalledSortVals.add(fieldType.unmarshalSortValue(sortVal));
}
unmarshalledSortValsPerField.add(sortField.getField(), unmarshalledSortVals);
}
marshalledFieldNum++;
}
return unmarshalledSortValsPerField;
}
}
class TestCollector extends TopDocsCollector {
private List<ScoreDoc> list = new ArrayList();
private FieldCache.Ints values;
private int base;
public TestCollector(PriorityQueue pq) {
super(pq);
}
public boolean acceptsDocsOutOfOrder() {
return false;
}
public void doSetNextReader(AtomicReaderContext context) throws IOException {
values = FieldCache.DEFAULT.getInts(context.reader(), "sort_i", false);
base = context.docBase;
}
public void collect(int doc) {
list.add(new ScoreDoc(doc+base, (float)values.get(doc)));
}
public int topDocsSize() {
return list.size();
}
public TopDocs topDocs() {
Collections.sort(list, new Comparator() {
public int compare(Object o1, Object o2) {
ScoreDoc s1 = (ScoreDoc) o1;
ScoreDoc s2 = (ScoreDoc) o2;
if (s1.score == s2.score) {
return 0;
} else if (s1.score < s2.score) {
return 1;
} else {
return -1;
}
}
});
ScoreDoc[] scoreDocs = list.toArray(new ScoreDoc[list.size()]);
return new TopDocs(list.size(), scoreDocs, 0.0f);
}
public TopDocs topDocs(int start, int len) {
return topDocs();
}
public int getTotalHits() {
return list.size();
}
}
class TestCollector1 extends TopDocsCollector {
private List<ScoreDoc> list = new ArrayList();
private int base;
private Scorer scorer;
public TestCollector1(PriorityQueue pq) {
super(pq);
}
public boolean acceptsDocsOutOfOrder() {
return false;
}
public void doSetNextReader(AtomicReaderContext context) throws IOException {
base = context.docBase;
}
public void setScorer(Scorer scorer) {
this.scorer = scorer;
}
public void collect(int doc) throws IOException {
list.add(new ScoreDoc(doc+base, scorer.score()));
}
public int topDocsSize() {
return list.size();
}
public TopDocs topDocs() {
Collections.sort(list, new Comparator() {
public int compare(Object o1, Object o2) {
ScoreDoc s1 = (ScoreDoc) o1;
ScoreDoc s2 = (ScoreDoc) o2;
if (s1.score == s2.score) {
return 0;
} else if (s1.score > s2.score) {
return 1;
} else {
return -1;
}
}
});
ScoreDoc[] scoreDocs = list.toArray(new ScoreDoc[list.size()]);
return new TopDocs(list.size(), scoreDocs, 0.0f);
}
public TopDocs topDocs(int start, int len) {
return topDocs();
}
public int getTotalHits() {
return list.size();
}
}
}