SOLR-2977: add 'fake' excludes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1222234 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2011-12-22 13:59:11 +00:00
parent 8913ecf0aa
commit 6a6d33257e
8 changed files with 245 additions and 54 deletions

View File

@ -20,16 +20,12 @@ package org.apache.solr.handler.component;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.*; import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet; import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.QueryElevationParams; import org.apache.solr.common.params.QueryElevationParams;
@ -40,7 +36,8 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config; import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.transform.EditorialMarkerFactory; import org.apache.solr.response.transform.ElevatedMarkerFactory;
import org.apache.solr.response.transform.ExcludedMarkerFactory;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
@ -78,6 +75,8 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
static final String FIELD_TYPE = "queryFieldType"; static final String FIELD_TYPE = "queryFieldType";
static final String CONFIG_FILE = "config-file"; static final String CONFIG_FILE = "config-file";
static final String EXCLUDE = "exclude"; static final String EXCLUDE = "exclude";
public static final String BOOSTED = "BOOSTED";
public static final String EXCLUDED = "EXCLUDED";
// Runtime param -- should be in common? // Runtime param -- should be in common?
@ -97,15 +96,17 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
class ElevationObj { class ElevationObj {
final String text; final String text;
final String analyzed; final String analyzed;
final BooleanClause[] exclude; final TermQuery [] exclude;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param
final BooleanQuery include; final BooleanQuery include;
final Map<BytesRef, Integer> priority; final Map<BytesRef, Integer> priority;
final Set<String> ids; final Set<String> ids;
final Set<String> excludeIds;
ElevationObj(String qstr, List<String> elevate, List<String> exclude) throws IOException { ElevationObj(String qstr, List<String> elevate, List<String> exclude) throws IOException {
this.text = qstr; this.text = qstr;
this.analyzed = getAnalyzedQuery(this.text); this.analyzed = getAnalyzedQuery(this.text);
this.ids = new HashSet<String>(); this.ids = new HashSet<String>();
this.excludeIds = new HashSet<String>();
this.include = new BooleanQuery(); this.include = new BooleanQuery();
this.include.setBoost(0); this.include.setBoost(0);
@ -122,10 +123,11 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
if (exclude == null || exclude.isEmpty()) { if (exclude == null || exclude.isEmpty()) {
this.exclude = null; this.exclude = null;
} else { } else {
this.exclude = new BooleanClause[exclude.size()]; this.exclude = new TermQuery[exclude.size()];
for (int i = 0; i < exclude.size(); i++) { for (int i = 0; i < exclude.size(); i++) {
TermQuery tq = new TermQuery(new Term(idField, idSchemaFT.readableToIndexed(exclude.get(i)))); String id = idSchemaFT.readableToIndexed(exclude.get(i));
this.exclude[i] = new BooleanClause(tq, BooleanClause.Occur.MUST_NOT); excludeIds.add(id);
this.exclude[i] = new TermQuery(new Term(idField, id));
} }
} }
} }
@ -155,12 +157,18 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
idSchemaFT = sf.getType(); idSchemaFT = sf.getType();
idField = sf.getName(); idField = sf.getName();
//register the EditorialMarkerFactory //register the EditorialMarkerFactory
EditorialMarkerFactory factory = new EditorialMarkerFactory(); String excludeName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, "excluded");
if (excludeName == null || excludeName.equals("") == true){
excludeName = "excluded";
}
ExcludedMarkerFactory excludedMarkerFactory = new ExcludedMarkerFactory();
core.addTransformerFactory(excludeName, excludedMarkerFactory);
ElevatedMarkerFactory elevatedMarkerFactory = new ElevatedMarkerFactory();
String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, "elevated"); String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, "elevated");
if (markerName == null || markerName.equals("") == true) { if (markerName == null || markerName.equals("") == true) {
markerName = "elevated"; markerName = "elevated";
} }
core.addTransformerFactory(markerName, factory); core.addTransformerFactory(markerName, elevatedMarkerFactory);
forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
try { try {
synchronized (elevationCache) { synchronized (elevationCache) {
@ -333,7 +341,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false); boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false);
// A runtime parameter can alter the config value for forceElevation // A runtime parameter can alter the config value for forceElevation
boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false);
Query query = rb.getQuery(); Query query = rb.getQuery();
String qstr = rb.getQueryString(); String qstr = rb.getQueryString();
if (query == null || qstr == null) { if (query == null || qstr == null) {
@ -351,7 +359,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
} }
if (booster != null) { if (booster != null) {
rb.req.getContext().put("BOOSTED", booster.ids); rb.req.getContext().put(BOOSTED, booster.ids);
// Change the query to insert forced documents // Change the query to insert forced documents
if (exclusive == true) { if (exclusive == true) {
@ -362,8 +370,17 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
newq.add(query, BooleanClause.Occur.SHOULD); newq.add(query, BooleanClause.Occur.SHOULD);
newq.add(booster.include, BooleanClause.Occur.SHOULD); newq.add(booster.include, BooleanClause.Occur.SHOULD);
if (booster.exclude != null) { if (booster.exclude != null) {
for (BooleanClause bq : booster.exclude) { if (markExcludes == false) {
newq.add(bq); for (TermQuery tq : booster.exclude) {
newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT));
}
} else {
//we are only going to mark items as excluded, not actually exclude them. This works
//with the EditorialMarkerFactory
rb.req.getContext().put(EXCLUDED, booster.excludeIds);
for (TermQuery tq : booster.exclude) {
newq.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
}
} }
} }
rb.setQuery(newq); rb.setQuery(newq);

View File

@ -0,0 +1,79 @@
package org.apache.solr.response.transform;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.handler.component.QueryElevationComponent;
import org.apache.solr.schema.FieldType;
import java.util.Set;
/**
*
*
**/
public abstract class BaseEditorialTransformer extends TransformerWithContext {
final String name;
final String idFieldName;
final FieldType ft;
public BaseEditorialTransformer(String name, String idFieldName, FieldType ft) {
this.name = name;
this.idFieldName = idFieldName;
this.ft = ft;
}
@Override
public String getName()
{
return name;
}
@Override
public void transform(SolrDocument doc, int docid) {
//this only gets added if QueryElevationParams.MARK_EXCLUDED is true
Set<String> ids = getIdSet();
if (ids != null && ids.isEmpty() == false) {
String key = getKey(doc);
doc.setField(name, ids.contains(key));
} else {
//if we have no ids, that means we weren't marking, but the user still asked for the field to be added, so just mark everything as false
doc.setField(name, Boolean.FALSE);
}
}
protected abstract Set<String> getIdSet();
protected String getKey(SolrDocument doc) {
String key;
Object field = doc.get(idFieldName);
if (field instanceof NumericField){
key = ((Field)field).stringValue();
key = ft.readableToIndexed(key);
} else if (field instanceof Field){
key = ((Field)field).stringValue();
} else {
key = field.toString();
}
return key;
}
}

View File

@ -28,10 +28,10 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
/** /**
* * Used to mark whether a document has been elevated or not
* @since solr 4.0 * @since solr 4.0
*/ */
public class EditorialMarkerFactory extends TransformerFactory public class ElevatedMarkerFactory extends TransformerFactory
{ {
@Override @Override
public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) { public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) {
@ -41,45 +41,16 @@ public class EditorialMarkerFactory extends TransformerFactory
} }
} }
class MarkTransformer extends TransformerWithContext class MarkTransformer extends BaseEditorialTransformer
{ {
final String name;
final String idFieldName;
final FieldType ft;
public MarkTransformer( String name, String idFieldName, FieldType ft) MarkTransformer(String name, String idFieldName, FieldType ft) {
{ super(name, idFieldName, ft);
this.name = name;
this.idFieldName = idFieldName;
this.ft = ft;
} }
@Override @Override
public String getName() protected Set<String> getIdSet() {
{ return (Set<String>) context.req.getContext().get("BOOSTED");
return name;
}
@Override
public void transform(SolrDocument doc, int docid) {
Set<?> ids = (Set<?>)context.req.getContext().get("BOOSTED");
if(ids!=null) {
String key;
Object field = doc.get(idFieldName);
if (field instanceof NumericField){
key = ((Field)field).stringValue();
key = ft.readableToIndexed(key);
} else if (field instanceof Field){
key = ((Field)field).stringValue();
} else {
key = field.toString();
}
doc.setField(name, ids.contains(key));
} else {
//if we have no ids, that means we weren't boosting, but the user still asked for the field to be added, so just mark everything as false
doc.setField(name, Boolean.FALSE);
}
} }
} }

View File

@ -0,0 +1,58 @@
package org.apache.solr.response.transform;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Set;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.handler.component.QueryElevationComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
/**
*
* @since solr 4.0
*/
public class ExcludedMarkerFactory extends TransformerFactory
{
@Override
public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) {
SchemaField uniqueKeyField = req.getSchema().getUniqueKeyField();
String idfield = uniqueKeyField.getName();
return new ExcludedTransformer(field,idfield, uniqueKeyField.getType());
}
}
class ExcludedTransformer extends BaseEditorialTransformer {
public ExcludedTransformer( String name, String idFieldName, FieldType ft)
{
super(name, idFieldName, ft);
}
protected Set<String> getIdSet() {
return (Set<String>)context.req.getContext().get(QueryElevationComponent.EXCLUDED);
}
}

View File

@ -37,6 +37,10 @@
<doc id="7" /> <doc id="7" />
</query> </query>
<query text="XXXX XXXX">
<doc id="5" />
<doc id="6" exclude="true" />
</query>
</elevate> </elevate>

View File

@ -157,7 +157,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
req.close(); req.close();
// Make sure the boosts loaded properly // Make sure the boosts loaded properly
assertEquals(4, map.size()); assertEquals(5, map.size());
assertEquals(1, map.get("XXXX").priority.size()); assertEquals(1, map.get("XXXX").priority.size());
assertEquals(2, map.get("YYYY").priority.size()); assertEquals(2, map.get("YYYY").priority.size());
assertEquals(3, map.get("ZZZZ").priority.size()); assertEquals(3, map.get("ZZZZ").priority.size());
@ -174,7 +174,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
comp.init(args); comp.init(args);
comp.inform(core); comp.inform(core);
map = comp.getElevationMap(reader, core); map = comp.getElevationMap(reader, core);
assertEquals(4, map.size()); assertEquals(5, map.size());
assertEquals(null, map.get("XXXX")); assertEquals(null, map.get("XXXX"));
assertEquals(null, map.get("YYYY")); assertEquals(null, map.get("YYYY"));
assertEquals(null, map.get("ZZZZ")); assertEquals(null, map.get("ZZZZ"));
@ -237,6 +237,52 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
} }
} }
@Test
public void testMarkExcludes() throws Exception {
try {
init("schema12.xml");
assertU(adoc("id", "1", "title", "XXXX XXXX", "str_s1", "a"));
assertU(adoc("id", "2", "title", "YYYY", "str_s1", "b"));
assertU(adoc("id", "3", "title", "ZZZZ", "str_s1", "c"));
assertU(adoc("id", "4", "title", "XXXX XXXX", "str_s1", "x"));
assertU(adoc("id", "5", "title", "YYYY YYYY", "str_s1", "y"));
assertU(adoc("id", "6", "title", "XXXX XXXX", "str_s1", "z"));
assertU(adoc("id", "7", "title", "AAAA", "str_s1", "a"));
assertU(commit());
assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate",
QueryElevationParams.MARK_EXCLUDES, "true",
CommonParams.FL, "id, score, [excluded]")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.='5']"
, "//result/doc[2]/str[@name='id'][.='6']"
, "//result/doc[3]/str[@name='id'][.='1']"
, "//result/doc[4]/str[@name='id'][.='4']",
"//result/doc[1]/bool[@name='[excluded]'][.='false']",
"//result/doc[2]/bool[@name='[excluded]'][.='true']",
"//result/doc[3]/bool[@name='[excluded]'][.='false']",
"//result/doc[4]/bool[@name='[excluded]'][.='false']"
);
//ask for excluded as a field, but don't actually request the MARK_EXCLUDES
//thus, number 6 should not be returned, b/c it is excluded
assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate",
QueryElevationParams.MARK_EXCLUDES, "false",
CommonParams.FL, "id, score, [excluded]")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.='5']"
, "//result/doc[2]/str[@name='id'][.='1']"
, "//result/doc[3]/str[@name='id'][.='4']",
"//result/doc[1]/bool[@name='[excluded]'][.='false']",
"//result/doc[2]/bool[@name='[excluded]'][.='false']",
"//result/doc[3]/bool[@name='[excluded]'][.='false']"
);
} finally {
delete();
}
}
@Test @Test
public void testSorting() throws Exception { public void testSorting() throws Exception {
try { try {

View File

@ -19,8 +19,10 @@
<!-- If this file is found in the config directory, it will only be <!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit. directory, it will be re-loaded every commit.
-->
See http://wiki.apache.org/solr/QueryElevationComponent for more info
-->
<elevate> <elevate>
<query text="foo bar"> <query text="foo bar">
<doc id="1" /> <doc id="1" />

View File

@ -34,4 +34,18 @@ public interface QueryElevationParams {
* See http://wiki.apache.org/solr/DocTransformers * See http://wiki.apache.org/solr/DocTransformers
*/ */
String EDITORIAL_MARKER_FIELD_NAME = "editorialMarkerFieldName"; String EDITORIAL_MARKER_FIELD_NAME = "editorialMarkerFieldName";
/**
* The name of the field that excluded editorial results will be written out as when using the QueryElevationComponent, which
* automatically configures the EditorialMarkerFactory. The default name is "excluded". This is only used
* when {@link #MARK_EXCLUDES} is set to true at query time.
* <br/>
* See http://wiki.apache.org/solr/DocTransformers
*/
String EXCLUDE_MARKER_FIELD_NAME = "excludeMarkerFieldName";
/**
* Instead of removing excluded items from the results, passing in this parameter allows you to get back the excluded items, but to mark them
* as excluded.
*/
String MARK_EXCLUDES = "markExcludes";
} }