mirror of https://github.com/apache/lucene.git
SOLR-418: Adding a query elevation component. This is an optional component to elevate some documents to the top positions (or exclude them) for a given query.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@613059 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
107eebe1a3
commit
ef545d107f
|
@ -186,6 +186,10 @@ New Features
|
|||
36. SOLR-446: TextResponseWriter can write SolrDocuments and SolrDocumentLists the
|
||||
same way it writes Document and DocList. (yonik, ryan)
|
||||
|
||||
37. SOLR-418: Adding a query elevation component. This is an optional component to
|
||||
elevate some documents to the top positions (or exclude them) for a given query.
|
||||
(ryan)
|
||||
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- If this file is found in the config directory, it will only be
|
||||
loaded once at startup. If it is found in Solr's data
|
||||
directory, it will be re-loaded every commit.
|
||||
-->
|
||||
|
||||
<elevate>
|
||||
<query text="foo bar">
|
||||
<doc id="1" />
|
||||
<doc id="2" />
|
||||
<doc id="3" />
|
||||
</query>
|
||||
|
||||
<query text="ipod">
|
||||
<doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
|
||||
<doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
|
||||
</query>
|
||||
|
||||
</elevate>
|
|
@ -471,6 +471,22 @@
|
|||
-->
|
||||
</requestHandler>
|
||||
|
||||
<searchComponent name="elevator" class="org.apache.solr.handler.component.QueryElevationComponent" >
|
||||
<!-- pick a fieldType to analyze queries -->
|
||||
<str name="queryFieldType">string</str>
|
||||
<str name="config-file">elevate.xml</str>
|
||||
</searchComponent>
|
||||
|
||||
<requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler" startup="lazy">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>elevator</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
|
||||
<!-- Update request handler.
|
||||
|
||||
|
|
|
@ -0,0 +1,494 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringReader;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.WeakHashMap;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreDocComparator;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortComparatorSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.DOMUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.Config;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryResponse;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.SortSpec;
|
||||
import org.apache.solr.util.VersionedFile;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
/**
|
||||
* A component to elevate some documents to the top of the result set.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class QueryElevationComponent extends SearchComponent implements SolrCoreAware
|
||||
{
|
||||
private static Logger log = Logger.getLogger(QueryElevationComponent.class.getName());
|
||||
|
||||
// Constants used in solrconfig.xml
|
||||
static final String FIELD_TYPE = "queryFieldType";
|
||||
static final String CONFIG_FILE = "config-file";
|
||||
static final String FORCE_ELEVATION = "forceElevation";
|
||||
static final String EXCLUDE = "exclude";
|
||||
|
||||
// Runtime param -- should be in common?
|
||||
static final String ENABLE = "enableElevation";
|
||||
|
||||
private SolrParams initArgs = null;
|
||||
private Analyzer analyzer = null;
|
||||
private String idField = null;
|
||||
boolean forceElevation = false;
|
||||
|
||||
// For each IndexReader, keep a query->elevation map
|
||||
// When the configuration is loaded from the data directory.
|
||||
// The key is null if loaded from the config directory, and
|
||||
// is never re-loaded.
|
||||
final Map<IndexReader,Map<String, ElevationObj>> elevationCache =
|
||||
new WeakHashMap<IndexReader, Map<String,ElevationObj>>();
|
||||
|
||||
class ElevationObj {
|
||||
final String text;
|
||||
final String analyzed;
|
||||
final BooleanClause[] exclude;
|
||||
final BooleanQuery include;
|
||||
final Map<String,Integer> priority;
|
||||
|
||||
ElevationObj( String qstr, List<String> elevate, List<String> exclude ) throws IOException
|
||||
{
|
||||
this.text = qstr;
|
||||
this.analyzed = getAnalyzedQuery( this.text );
|
||||
|
||||
this.include = new BooleanQuery();
|
||||
this.include.setBoost( 0 );
|
||||
this.priority = new HashMap<String, Integer>();
|
||||
int max = elevate.size()+5;
|
||||
for( String id : elevate ) {
|
||||
TermQuery tq = new TermQuery( new Term( idField, id ) );
|
||||
include.add( tq, BooleanClause.Occur.SHOULD );
|
||||
this.priority.put( id, max-- );
|
||||
}
|
||||
|
||||
if( exclude == null || exclude.isEmpty() ) {
|
||||
this.exclude = null;
|
||||
}
|
||||
else {
|
||||
this.exclude = new BooleanClause[exclude.size()];
|
||||
for( int i=0; i<exclude.size(); i++ ) {
|
||||
TermQuery tq = new TermQuery( new Term( idField, exclude.get(i) ) );
|
||||
this.exclude[i] = new BooleanClause( tq, BooleanClause.Occur.MUST_NOT );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init( NamedList args )
|
||||
{
|
||||
this.initArgs = SolrParams.toSolrParams( args );
|
||||
}
|
||||
|
||||
public void inform(SolrCore core)
|
||||
{
|
||||
String a = initArgs.get( FIELD_TYPE );
|
||||
if( a != null ) {
|
||||
FieldType ft = core.getSchema().getFieldTypes().get( a );
|
||||
if( ft == null ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Unknown FieldType: '"+a+"' used in QueryElevationComponent" );
|
||||
}
|
||||
analyzer = ft.getAnalyzer();
|
||||
}
|
||||
|
||||
SchemaField sf = core.getSchema().getUniqueKeyField();
|
||||
if( sf == null ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"QueryElevationComponent requires the schema to have a uniqueKeyField" );
|
||||
}
|
||||
idField = sf.getName().intern();
|
||||
|
||||
forceElevation = initArgs.getBool( FORCE_ELEVATION, forceElevation );
|
||||
try {
|
||||
synchronized( elevationCache ) {
|
||||
elevationCache.clear();
|
||||
String f = initArgs.get( CONFIG_FILE );
|
||||
if( f == null ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"QueryElevationComponent must specify argument: '"+CONFIG_FILE
|
||||
+"' -- path to elevate.xml" );
|
||||
}
|
||||
File fC = new File( core.getResourceLoader().getConfigDir(), f );
|
||||
File fD = new File( core.getDataDir(), f );
|
||||
if( fC.exists() == fD.exists() ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"QueryElevationComponent missing config file: '"+f + "\n"
|
||||
+"either: "+fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both." );
|
||||
}
|
||||
if( fC.exists() ) {
|
||||
log.info( "Loading QueryElevation from: "+fC.getAbsolutePath() );
|
||||
Config cfg = new Config( core.getResourceLoader(), f );
|
||||
elevationCache.put(null, loadElevationMap( cfg ));
|
||||
}
|
||||
else {
|
||||
// preload the first data
|
||||
IndexReader reader = core.getSearcher().get().getReader();
|
||||
getElevationMap( reader, core );
|
||||
}
|
||||
}
|
||||
}
|
||||
catch( Exception ex ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Error initializing QueryElevationComponent.", ex );
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, ElevationObj> getElevationMap( IndexReader reader, SolrCore core ) throws Exception
|
||||
{
|
||||
synchronized( elevationCache ) {
|
||||
Map<String, ElevationObj> map = elevationCache.get( null );
|
||||
if (map != null) return map;
|
||||
|
||||
map = elevationCache.get( reader );
|
||||
if( map == null ) {
|
||||
String f = initArgs.get( CONFIG_FILE );
|
||||
if( f == null ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"QueryElevationComponent must specify argument: "+CONFIG_FILE );
|
||||
}
|
||||
log.info( "Loading QueryElevation from data dir: "+f );
|
||||
|
||||
InputStream is = VersionedFile.getLatestFile( core.getDataDir(), f );
|
||||
Config cfg = new Config( core.getResourceLoader(), f, is, null );
|
||||
map = loadElevationMap( cfg );
|
||||
elevationCache.put( reader, map );
|
||||
}
|
||||
return map;
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, ElevationObj> loadElevationMap( Config cfg ) throws IOException
|
||||
{
|
||||
XPath xpath = XPathFactory.newInstance().newXPath();
|
||||
Map<String, ElevationObj> map = new HashMap<String, ElevationObj>();
|
||||
NodeList nodes = (NodeList)cfg.evaluate( "elevate/query", XPathConstants.NODESET );
|
||||
for (int i=0; i<nodes.getLength(); i++) {
|
||||
Node node = nodes.item( i );
|
||||
String qstr = DOMUtil.getAttr( node, "text", "missing query 'text'" );
|
||||
|
||||
NodeList children = null;
|
||||
try {
|
||||
children = (NodeList)xpath.evaluate("doc", node, XPathConstants.NODESET);
|
||||
}
|
||||
catch (XPathExpressionException e) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"query requires '<doc .../>' child" );
|
||||
}
|
||||
|
||||
ArrayList<String> include = new ArrayList<String>();
|
||||
ArrayList<String> exclude = new ArrayList<String>();
|
||||
for (int j=0; j<children.getLength(); j++) {
|
||||
Node child = children.item(j);
|
||||
String id = DOMUtil.getAttr( child, "id", "missing 'id'" );
|
||||
String e = DOMUtil.getAttr( child, EXCLUDE, null );
|
||||
if( e != null ) {
|
||||
if( Boolean.valueOf( e ) ) {
|
||||
exclude.add( id );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
include.add( id );
|
||||
}
|
||||
|
||||
ElevationObj elev = new ElevationObj( qstr, include, exclude );
|
||||
if( map.containsKey( elev.analyzed ) ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Boosting query defined twice for query: '"+elev.text+"' ("+elev.analyzed+"')" );
|
||||
}
|
||||
map.put( elev.analyzed, elev );
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helpful for testing without loading config.xml
|
||||
* @throws IOException
|
||||
*/
|
||||
void setTopQueryResults( IndexReader reader, String query, String[] ids, String[] ex ) throws IOException
|
||||
{
|
||||
if( ids == null ) {
|
||||
ids = new String[0];
|
||||
}
|
||||
if( ex == null ) {
|
||||
ex = new String[0];
|
||||
}
|
||||
|
||||
Map<String,ElevationObj> elev = elevationCache.get( reader );
|
||||
if( elev == null ) {
|
||||
elev = new HashMap<String, ElevationObj>();
|
||||
elevationCache.put( reader, elev );
|
||||
}
|
||||
ElevationObj obj = new ElevationObj( query, Arrays.asList(ids), Arrays.asList(ex) );
|
||||
elev.put( obj.analyzed, obj );
|
||||
}
|
||||
|
||||
String getAnalyzedQuery( String query ) throws IOException
|
||||
{
|
||||
if( analyzer == null ) {
|
||||
return query;
|
||||
}
|
||||
StringBuilder norm = new StringBuilder();
|
||||
TokenStream tokens = analyzer.tokenStream( null, new StringReader( query ) );
|
||||
Token token = tokens.next();
|
||||
while( token != null ) {
|
||||
norm.append( token.termText() );
|
||||
token = tokens.next();
|
||||
}
|
||||
return norm.toString();
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// SearchComponent
|
||||
//---------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void prepare(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException
|
||||
{
|
||||
SolrParams params = req.getParams();
|
||||
// A runtime param can skip
|
||||
if( !params.getBool( ENABLE, true ) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
ResponseBuilder builder = SearchHandler.getResponseBuilder( req );
|
||||
Query query = builder.getQuery();
|
||||
if( query == null ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"The QueryElevationComponent needs to be registered 'after' the query component" );
|
||||
}
|
||||
|
||||
String qstr = getAnalyzedQuery( builder.getQueryString() );
|
||||
IndexReader reader = req.getSearcher().getReader();
|
||||
ElevationObj booster = null;
|
||||
try {
|
||||
booster = getElevationMap( reader, req.getCore() ).get( qstr );
|
||||
}
|
||||
catch( Exception ex ) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Error loading elevation", ex );
|
||||
}
|
||||
|
||||
if( booster != null ) {
|
||||
// Change the query to insert forced documents
|
||||
BooleanQuery newq = new BooleanQuery( true );
|
||||
newq.add( query, BooleanClause.Occur.SHOULD );
|
||||
newq.add( booster.include, BooleanClause.Occur.SHOULD );
|
||||
if( booster.exclude != null ) {
|
||||
for( BooleanClause bq : booster.exclude ) {
|
||||
newq.add( bq );
|
||||
}
|
||||
}
|
||||
builder.setQuery( newq );
|
||||
|
||||
// if the sort is 'score desc' use a custom sorting method to
|
||||
// insert documents in their proper place
|
||||
SortSpec sortSpec = builder.getSortSpec();
|
||||
if( sortSpec.getSort() == null ) {
|
||||
sortSpec.setSort( new Sort( new SortField[] {
|
||||
new SortField(idField, new ElevationComparatorSource(booster.priority), false ),
|
||||
new SortField(null, SortField.SCORE, false)
|
||||
}));
|
||||
}
|
||||
else {
|
||||
// Check if the sort is based on score
|
||||
boolean modify = false;
|
||||
SortField[] current = sortSpec.getSort().getSort();
|
||||
ArrayList<SortField> sorts = new ArrayList<SortField>( current.length + 1 );
|
||||
// Perhaps force it to always sort by score
|
||||
if( forceElevation && current[0].getType() != SortField.SCORE ) {
|
||||
sorts.add( new SortField(idField,
|
||||
new ElevationComparatorSource(booster.priority), false ) );
|
||||
modify = true;
|
||||
}
|
||||
for( SortField sf : current ) {
|
||||
if( sf.getType() == SortField.SCORE ) {
|
||||
sorts.add( new SortField(idField,
|
||||
new ElevationComparatorSource(booster.priority), sf.getReverse() ) );
|
||||
modify = true;
|
||||
}
|
||||
sorts.add( sf );
|
||||
}
|
||||
if( modify ) {
|
||||
sortSpec.setSort( new Sort( sorts.toArray( new SortField[sorts.size()] ) ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add debugging information
|
||||
if( builder.isDebug() ) {
|
||||
List<String> match = null;
|
||||
if( booster != null ) {
|
||||
// Extract the elevated terms into a list
|
||||
match = new ArrayList<String>(booster.priority.size());
|
||||
for( Object o : booster.include.clauses() ) {
|
||||
TermQuery tq = (TermQuery)((BooleanClause)o).getQuery();
|
||||
match.add( tq.getTerm().text() );
|
||||
}
|
||||
}
|
||||
|
||||
SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>();
|
||||
dbg.add( "q", qstr );
|
||||
dbg.add( "match", match );
|
||||
builder.addDebugInfo( "queryBoosting", dbg );
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
|
||||
// Do nothing -- the real work is modifying the input query
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------
|
||||
// SolrInfoMBean
|
||||
//---------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Query Boosting -- boost particular documents for a given query";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getVersion() {
|
||||
return "$Revision$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSourceId() {
|
||||
return "$Id$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL[] getDocs() {
|
||||
try {
|
||||
return new URL[] {
|
||||
new URL("http://wiki.apache.org/solr/QueryElevationComponent")
|
||||
};
|
||||
}
|
||||
catch (MalformedURLException e) {
|
||||
throw new RuntimeException( e );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparator source that knows about elevated documents
|
||||
*/
|
||||
class ElevationComparatorSource implements SortComparatorSource
|
||||
{
|
||||
private final Map<String,Integer> priority;
|
||||
|
||||
public ElevationComparatorSource( final Map<String,Integer> boosts) {
|
||||
this.priority = boosts;
|
||||
}
|
||||
|
||||
public ScoreDocComparator newComparator(final IndexReader reader, final String fieldname)
|
||||
throws IOException
|
||||
{
|
||||
|
||||
// A future alternate version could store internal docids (would need to be regenerated per IndexReader)
|
||||
// instead of loading the FieldCache instance into memory.
|
||||
|
||||
final FieldCache.StringIndex index =
|
||||
FieldCache.DEFAULT.getStringIndex(reader, fieldname);
|
||||
|
||||
return new ScoreDocComparator ()
|
||||
{
|
||||
public final int compare (final ScoreDoc d0, final ScoreDoc d1) {
|
||||
final int f0 = index.order[d0.doc];
|
||||
final int f1 = index.order[d1.doc];
|
||||
|
||||
final String id0 = index.lookup[f0];
|
||||
final String id1 = index.lookup[f1];
|
||||
|
||||
final Integer b0 = priority.get( id0 );
|
||||
final Integer b1 = priority.get( id1 );
|
||||
|
||||
final int v0 = (b0 == null) ? -1 : b0.intValue();
|
||||
final int v1 = (b1 == null) ? -1 : b1.intValue();
|
||||
|
||||
return v1 - v0;
|
||||
}
|
||||
|
||||
public Comparable sortValue (final ScoreDoc d0) {
|
||||
final int f0 = index.order[d0.doc];
|
||||
final String id0 = index.lookup[f0];
|
||||
final Integer b0 = priority.get( id0 );
|
||||
final int v0 = (b0 == null) ? -1 : b0.intValue();
|
||||
return new Integer( v0 );
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.CUSTOM;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -24,6 +24,7 @@ import org.apache.solr.core.SolrCore;
|
|||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.util.VersionedFile;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
@ -184,7 +185,7 @@ public class FileFloatSource extends ValueSource {
|
|||
InputStream is;
|
||||
String fname = "external_" + ffs.field.getName();
|
||||
try {
|
||||
is = getLatestFile(ffs.dataDir, fname);
|
||||
is = VersionedFile.getLatestFile(ffs.dataDir, fname);
|
||||
} catch (IOException e) {
|
||||
// log, use defaults
|
||||
SolrCore.log.severe("Error opening external value source file: " +e);
|
||||
|
@ -341,77 +342,4 @@ public class FileFloatSource extends ValueSource {
|
|||
}
|
||||
|
||||
|
||||
// Future: refactor/pull out into VersionedFile class
|
||||
|
||||
/* Open the latest version of a file... fileName if that exists, or
|
||||
* the last fileName.* after being sorted lexicographically.
|
||||
* Older versions of the file are deleted (and queued for deletion if
|
||||
* that fails).
|
||||
*/
|
||||
private static InputStream getLatestFile(String dirName, String fileName) throws FileNotFoundException {
|
||||
Collection<File> oldFiles=null;
|
||||
final String prefix = fileName+'.';
|
||||
File f = new File(dirName, fileName);
|
||||
InputStream is = null;
|
||||
|
||||
// there can be a race between checking for a file and opening it...
|
||||
// the user may have just put a new version in and deleted an old version.
|
||||
// try multiple times in a row.
|
||||
for (int retry=0; retry<10; retry++) {
|
||||
try {
|
||||
if (!f.exists()) {
|
||||
File dir = new File(dirName);
|
||||
String[] names = dir.list(new FilenameFilter() {
|
||||
public boolean accept(File dir, String name) {
|
||||
return name.startsWith(prefix);
|
||||
}
|
||||
});
|
||||
Arrays.sort(names);
|
||||
f = new File(dir, names[names.length-1]);
|
||||
oldFiles = new ArrayList<File>();
|
||||
for (int i=0; i<names.length-1; i++) {
|
||||
oldFiles.add(new File(dir, names[i]));
|
||||
}
|
||||
}
|
||||
|
||||
is = new FileInputStream(f);
|
||||
} catch (Exception e) {
|
||||
// swallow exception for now
|
||||
}
|
||||
}
|
||||
|
||||
// allow exception to be thrown from the final try.
|
||||
is = new FileInputStream(f);
|
||||
|
||||
// delete old files only after we have successfuly opened the newest
|
||||
if (oldFiles != null) {
|
||||
delete(oldFiles);
|
||||
}
|
||||
|
||||
return is;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static final Set<File> deleteList = new HashSet<File>();
|
||||
private static synchronized void delete(Collection<File> files) {
|
||||
synchronized (deleteList) {
|
||||
deleteList.addAll(files);
|
||||
List<File> deleted = new ArrayList<File>();
|
||||
for (File df : deleteList) {
|
||||
try {
|
||||
df.delete();
|
||||
// deleteList.remove(df);
|
||||
deleted.add(df);
|
||||
} catch (SecurityException e) {
|
||||
if (!df.exists()) {
|
||||
deleted.add(df);
|
||||
}
|
||||
}
|
||||
}
|
||||
deleteList.removeAll(deleted);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.util;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class VersionedFile
|
||||
{
|
||||
/* Open the latest version of a file... fileName if that exists, or
|
||||
* the last fileName.* after being sorted lexicographically.
|
||||
* Older versions of the file are deleted (and queued for deletion if
|
||||
* that fails).
|
||||
*/
|
||||
public static InputStream getLatestFile(String dirName, String fileName) throws FileNotFoundException
|
||||
{
|
||||
Collection<File> oldFiles=null;
|
||||
final String prefix = fileName+'.';
|
||||
File f = new File(dirName, fileName);
|
||||
InputStream is = null;
|
||||
|
||||
// there can be a race between checking for a file and opening it...
|
||||
// the user may have just put a new version in and deleted an old version.
|
||||
// try multiple times in a row.
|
||||
for (int retry=0; retry<10 && is==null; retry++) {
|
||||
try {
|
||||
if (!f.exists()) {
|
||||
File dir = new File(dirName);
|
||||
String[] names = dir.list(new FilenameFilter() {
|
||||
public boolean accept(File dir, String name) {
|
||||
return name.startsWith(prefix);
|
||||
}
|
||||
});
|
||||
Arrays.sort(names);
|
||||
f = new File(dir, names[names.length-1]);
|
||||
oldFiles = new ArrayList<File>();
|
||||
for (int i=0; i<names.length-1; i++) {
|
||||
oldFiles.add(new File(dir, names[i]));
|
||||
}
|
||||
}
|
||||
|
||||
is = new FileInputStream(f);
|
||||
} catch (Exception e) {
|
||||
// swallow exception for now
|
||||
}
|
||||
}
|
||||
|
||||
// allow exception to be thrown from the final try.
|
||||
if (is == null) {
|
||||
is = new FileInputStream(f);
|
||||
}
|
||||
|
||||
// delete old files only after we have successfuly opened the newest
|
||||
if (oldFiles != null) {
|
||||
delete(oldFiles);
|
||||
}
|
||||
|
||||
return is;
|
||||
}
|
||||
|
||||
private static final Set<File> deleteList = new HashSet<File>();
|
||||
private static synchronized void delete(Collection<File> files) {
|
||||
synchronized (deleteList) {
|
||||
deleteList.addAll(files);
|
||||
List<File> deleted = new ArrayList<File>();
|
||||
for (File df : deleteList) {
|
||||
try {
|
||||
df.delete();
|
||||
// deleteList.remove(df);
|
||||
deleted.add(df);
|
||||
} catch (SecurityException e) {
|
||||
if (!df.exists()) {
|
||||
deleted.add(df);
|
||||
}
|
||||
}
|
||||
}
|
||||
deleteList.removeAll(deleted);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.QueryElevationComponent.ElevationObj;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
||||
|
||||
|
||||
public class QueryElevationComponentTest extends AbstractSolrTestCase {
|
||||
|
||||
@Override public String getSchemaFile() { return "schema.xml"; }
|
||||
@Override public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||
|
||||
public void testInterface() throws Exception
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
|
||||
NamedList<String> args = new NamedList<String>();
|
||||
args.add( QueryElevationComponent.FIELD_TYPE, "string" );
|
||||
args.add( QueryElevationComponent.CONFIG_FILE, "elevate.xml" );
|
||||
|
||||
QueryElevationComponent comp = new QueryElevationComponent();
|
||||
comp.init( args );
|
||||
comp.inform( core );
|
||||
|
||||
IndexReader reader = core.getSearcher().get().getReader();
|
||||
Map<String, ElevationObj> map = comp.getElevationMap( reader, core );
|
||||
// Make sure the boosts loaded properly
|
||||
assertEquals( 3, map.size() );
|
||||
assertEquals( 1, map.get( "XXXX" ).priority.size() );
|
||||
assertEquals( 2, map.get( "YYYY" ).priority.size() );
|
||||
assertEquals( 3, map.get( "ZZZZ" ).priority.size() );
|
||||
assertEquals( null, map.get( "xxxx" ) );
|
||||
assertEquals( null, map.get( "yyyy" ) );
|
||||
assertEquals( null, map.get( "zzzz" ) );
|
||||
|
||||
// Now test the same thing with a lowercase filter: 'lowerfilt'
|
||||
args = new NamedList<String>();
|
||||
args.add( QueryElevationComponent.FIELD_TYPE, "lowerfilt" );
|
||||
args.add( QueryElevationComponent.CONFIG_FILE, "elevate.xml" );
|
||||
|
||||
comp = new QueryElevationComponent();
|
||||
comp.init( args );
|
||||
comp.inform( core );
|
||||
map = comp.getElevationMap( reader, core );
|
||||
assertEquals( 3, map.size() );
|
||||
assertEquals( null, map.get( "XXXX" ) );
|
||||
assertEquals( null, map.get( "YYYY" ) );
|
||||
assertEquals( null, map.get( "ZZZZ" ) );
|
||||
assertEquals( 1, map.get( "xxxx" ).priority.size() );
|
||||
assertEquals( 2, map.get( "yyyy" ).priority.size() );
|
||||
assertEquals( 3, map.get( "zzzz" ).priority.size() );
|
||||
|
||||
assertEquals( "xxxx", comp.getAnalyzedQuery( "XXXX" ) );
|
||||
assertEquals( "xxxxyyyy", comp.getAnalyzedQuery( "XXXX YYYY" ) );
|
||||
}
|
||||
|
||||
public void testSorting() throws IOException
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
|
||||
assertU(adoc("id", "a", "title", "ipod", "str_s", "a" ));
|
||||
assertU(adoc("id", "b", "title", "ipod ipod", "str_s", "b" ));
|
||||
assertU(adoc("id", "c", "title", "ipod ipod ipod", "str_s", "c" ));
|
||||
|
||||
assertU(adoc("id", "x", "title", "boosted", "str_s", "x" ));
|
||||
assertU(adoc("id", "y", "title", "boosted boosted", "str_s", "y" ));
|
||||
assertU(adoc("id", "z", "title", "boosted boosted boosted", "str_s", "z" ));
|
||||
assertU(commit());
|
||||
|
||||
String query = "title:ipod";
|
||||
|
||||
Map<String,String> args = new HashMap<String, String>();
|
||||
args.put( CommonParams.Q, query );
|
||||
args.put( CommonParams.QT, "/elevate" );
|
||||
args.put( CommonParams.FL, "id,score" );
|
||||
args.put( "indent", "true" );
|
||||
//args.put( CommonParams.FL, "id,title,score" );
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
|
||||
|
||||
assertQ("Make sure standard sort works as expected", req
|
||||
,"//*[@numFound='3']"
|
||||
,"//result/doc[1]/int[@name='id'][.='a']"
|
||||
,"//result/doc[2]/int[@name='id'][.='b']"
|
||||
,"//result/doc[3]/int[@name='id'][.='c']"
|
||||
);
|
||||
|
||||
// Explicitly set what gets boosted
|
||||
IndexReader reader = core.getSearcher().get().getReader();
|
||||
QueryElevationComponent booster = (QueryElevationComponent)core.getSearchComponent( "elevate" );
|
||||
booster.elevationCache.clear();
|
||||
booster.setTopQueryResults( reader, query, new String[] { "x", "y", "z" }, null );
|
||||
|
||||
assertQ("All six should make it", req
|
||||
,"//*[@numFound='6']"
|
||||
,"//result/doc[1]/int[@name='id'][.='x']"
|
||||
,"//result/doc[2]/int[@name='id'][.='y']"
|
||||
,"//result/doc[3]/int[@name='id'][.='z']"
|
||||
,"//result/doc[4]/int[@name='id'][.='a']"
|
||||
,"//result/doc[5]/int[@name='id'][.='b']"
|
||||
,"//result/doc[6]/int[@name='id'][.='c']"
|
||||
);
|
||||
|
||||
booster.elevationCache.clear();
|
||||
|
||||
// now switch the order:
|
||||
booster.setTopQueryResults( reader, query, new String[] { "a", "x" }, null );
|
||||
assertQ("All six should make it", req
|
||||
,"//*[@numFound='4']"
|
||||
,"//result/doc[1]/int[@name='id'][.='a']"
|
||||
,"//result/doc[2]/int[@name='id'][.='x']"
|
||||
,"//result/doc[3]/int[@name='id'][.='b']"
|
||||
,"//result/doc[4]/int[@name='id'][.='c']"
|
||||
);
|
||||
|
||||
// Test reverse sort
|
||||
args.put( CommonParams.SORT, "score asc" );
|
||||
assertQ("All six should make it", req
|
||||
,"//*[@numFound='4']"
|
||||
,"//result/doc[4]/int[@name='id'][.='a']"
|
||||
,"//result/doc[3]/int[@name='id'][.='x']"
|
||||
,"//result/doc[2]/int[@name='id'][.='b']"
|
||||
,"//result/doc[1]/int[@name='id'][.='c']"
|
||||
);
|
||||
|
||||
// Try normal sort by 'id'
|
||||
// default 'forceBoost' shoudl be false
|
||||
assertEquals( false, booster.forceElevation );
|
||||
args.put( CommonParams.SORT, "str_s asc" );
|
||||
assertQ( null, req
|
||||
,"//*[@numFound='4']"
|
||||
,"//result/doc[1]/int[@name='id'][.='a']"
|
||||
,"//result/doc[2]/int[@name='id'][.='b']"
|
||||
,"//result/doc[3]/int[@name='id'][.='c']"
|
||||
,"//result/doc[4]/int[@name='id'][.='x']"
|
||||
);
|
||||
|
||||
booster.forceElevation = true;
|
||||
assertQ( null, req
|
||||
,"//*[@numFound='4']"
|
||||
,"//result/doc[1]/int[@name='id'][.='a']"
|
||||
,"//result/doc[2]/int[@name='id'][.='x']"
|
||||
,"//result/doc[3]/int[@name='id'][.='b']"
|
||||
,"//result/doc[4]/int[@name='id'][.='c']"
|
||||
);
|
||||
|
||||
// Test exclusion
|
||||
booster.elevationCache.clear();
|
||||
args.remove( CommonParams.SORT );
|
||||
booster.setTopQueryResults( reader, query, new String[] { "x" }, new String[] { "a" } );
|
||||
assertQ( null, req
|
||||
,"//*[@numFound='3']"
|
||||
,"//result/doc[1]/int[@name='id'][.='x']"
|
||||
,"//result/doc[2]/int[@name='id'][.='b']"
|
||||
,"//result/doc[3]/int[@name='id'][.='c']"
|
||||
);
|
||||
}
|
||||
|
||||
// write a test file to boost some docs
|
||||
private void writeFile( File file, String query, String ... ids ) throws Exception
|
||||
{
|
||||
PrintWriter out = new PrintWriter( new FileOutputStream( file ) );
|
||||
out.println( "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" );
|
||||
out.println( "<elevate>" );
|
||||
out.println( "<query text=\""+query+"\">" );
|
||||
for( String id : ids ) {
|
||||
out.println( " <doc id=\""+id+"\"/>" );
|
||||
}
|
||||
out.println( "</query>" );
|
||||
out.println( "</elevate>" );
|
||||
out.flush();
|
||||
out.close();
|
||||
|
||||
System.out.println( "OUT:"+file.getAbsolutePath() );
|
||||
}
|
||||
|
||||
public void testElevationReloading() throws Exception
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
|
||||
String testfile = "data-elevation.xml";
|
||||
File f = new File( core.getDataDir(), testfile );
|
||||
writeFile( f, "aaa", "A" );
|
||||
|
||||
QueryElevationComponent comp = (QueryElevationComponent)core.getSearchComponent("elevate");
|
||||
NamedList<String> args = new NamedList<String>();
|
||||
args.add( QueryElevationComponent.CONFIG_FILE, testfile );
|
||||
comp.init( args );
|
||||
comp.inform( core );
|
||||
|
||||
IndexReader reader = core.getSearcher().get().getReader();
|
||||
Map<String, ElevationObj> map = comp.getElevationMap(reader, core);
|
||||
assertTrue( map.get( "aaa" ).priority.containsKey( "A" ) );
|
||||
assertNull( map.get( "bbb" ) );
|
||||
|
||||
// now change the file
|
||||
writeFile( f, "bbb", "B" );
|
||||
assertU(commit());
|
||||
|
||||
reader = core.getSearcher().get().getReader();
|
||||
map = comp.getElevationMap(reader, core);
|
||||
assertNull( map.get( "aaa" ) );
|
||||
assertTrue( map.get( "bbb" ).priority.containsKey( "B" ) );
|
||||
}
|
||||
}
|
|
@ -19,6 +19,8 @@
|
|||
<schema name="test" version="1.0">
|
||||
<types>
|
||||
|
||||
<fieldType name="string" class="solr.StrField"/>
|
||||
|
||||
<fieldType name="ftAgain" class="solr.IntField"/>
|
||||
<fieldType name="ftAgain" class="solr.IntField"/>
|
||||
|
||||
|
@ -31,6 +33,8 @@
|
|||
<fields>
|
||||
<field name="id" type="text" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
|
||||
<fieldType name="string" class="solr.StrField"/>
|
||||
|
||||
<field name="fAgain" type="text" indexed="true" stored="true"/>
|
||||
<field name="fAgain" type="text" indexed="true" stored="true"/>
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<elevate>
|
||||
|
||||
<query text="XXXX">
|
||||
<doc id="1" />
|
||||
</query>
|
||||
|
||||
<query text="YYYY">
|
||||
<doc id="1" />
|
||||
<doc id="2" />
|
||||
</query>
|
||||
|
||||
<query text="ZZZZ">
|
||||
<doc id="1" />
|
||||
<doc id="2" />
|
||||
<doc id="3" />
|
||||
</query>
|
||||
|
||||
</elevate>
|
|
@ -25,6 +25,7 @@
|
|||
-->
|
||||
<schema name="test" version="1.0">
|
||||
<types>
|
||||
<fieldType name="string" class="solr.StrField"/>
|
||||
<fieldtype name="sint" class="solr.SortableIntField" />
|
||||
<fieldtype name="text" class="solr.TextField">
|
||||
<analyzer>
|
||||
|
|
|
@ -266,6 +266,22 @@
|
|||
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
|
||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
|
||||
|
||||
<!-- test elevation -->
|
||||
<searchComponent name="elevate" class="org.apache.solr.handler.component.QueryElevationComponent" >
|
||||
<str name="queryFieldType">string</str>
|
||||
<str name="config-file">elevate.xml</str>
|
||||
</searchComponent>
|
||||
|
||||
<requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>elevate</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<highlighting>
|
||||
<!-- Configure the standard fragmenter -->
|
||||
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
|
||||
|
|
Loading…
Reference in New Issue