SOLR-2977: add 'fake' excludes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1222234 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2011-12-22 13:59:11 +00:00
parent 8913ecf0aa
commit 6a6d33257e
8 changed files with 245 additions and 54 deletions

View File

@ -20,16 +20,12 @@ package org.apache.solr.handler.component;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.QueryElevationParams;
@ -40,7 +36,8 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.transform.EditorialMarkerFactory;
import org.apache.solr.response.transform.ElevatedMarkerFactory;
import org.apache.solr.response.transform.ExcludedMarkerFactory;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
@ -78,6 +75,8 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
static final String FIELD_TYPE = "queryFieldType";
static final String CONFIG_FILE = "config-file";
static final String EXCLUDE = "exclude";
public static final String BOOSTED = "BOOSTED";
public static final String EXCLUDED = "EXCLUDED";
// Runtime param -- should be in common?
@ -97,15 +96,17 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
class ElevationObj {
final String text;
final String analyzed;
final BooleanClause[] exclude;
final TermQuery [] exclude;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param
final BooleanQuery include;
final Map<BytesRef, Integer> priority;
final Set<String> ids;
final Set<String> excludeIds;
ElevationObj(String qstr, List<String> elevate, List<String> exclude) throws IOException {
this.text = qstr;
this.analyzed = getAnalyzedQuery(this.text);
this.ids = new HashSet<String>();
this.excludeIds = new HashSet<String>();
this.include = new BooleanQuery();
this.include.setBoost(0);
@ -122,10 +123,11 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
if (exclude == null || exclude.isEmpty()) {
this.exclude = null;
} else {
this.exclude = new BooleanClause[exclude.size()];
this.exclude = new TermQuery[exclude.size()];
for (int i = 0; i < exclude.size(); i++) {
TermQuery tq = new TermQuery(new Term(idField, idSchemaFT.readableToIndexed(exclude.get(i))));
this.exclude[i] = new BooleanClause(tq, BooleanClause.Occur.MUST_NOT);
String id = idSchemaFT.readableToIndexed(exclude.get(i));
excludeIds.add(id);
this.exclude[i] = new TermQuery(new Term(idField, id));
}
}
}
@ -155,12 +157,18 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
idSchemaFT = sf.getType();
idField = sf.getName();
//register the EditorialMarkerFactory
EditorialMarkerFactory factory = new EditorialMarkerFactory();
String excludeName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, "excluded");
if (excludeName == null || excludeName.equals("") == true){
excludeName = "excluded";
}
ExcludedMarkerFactory excludedMarkerFactory = new ExcludedMarkerFactory();
core.addTransformerFactory(excludeName, excludedMarkerFactory);
ElevatedMarkerFactory elevatedMarkerFactory = new ElevatedMarkerFactory();
String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, "elevated");
if (markerName == null || markerName.equals("") == true) {
markerName = "elevated";
}
core.addTransformerFactory(markerName, factory);
core.addTransformerFactory(markerName, elevatedMarkerFactory);
forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
try {
synchronized (elevationCache) {
@ -333,7 +341,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false);
// A runtime parameter can alter the config value for forceElevation
boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false);
Query query = rb.getQuery();
String qstr = rb.getQueryString();
if (query == null || qstr == null) {
@ -351,7 +359,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
}
if (booster != null) {
rb.req.getContext().put("BOOSTED", booster.ids);
rb.req.getContext().put(BOOSTED, booster.ids);
// Change the query to insert forced documents
if (exclusive == true) {
@ -362,8 +370,17 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
newq.add(query, BooleanClause.Occur.SHOULD);
newq.add(booster.include, BooleanClause.Occur.SHOULD);
if (booster.exclude != null) {
for (BooleanClause bq : booster.exclude) {
newq.add(bq);
if (markExcludes == false) {
for (TermQuery tq : booster.exclude) {
newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT));
}
} else {
//we are only going to mark items as excluded, not actually exclude them. This works
//with the EditorialMarkerFactory
rb.req.getContext().put(EXCLUDED, booster.excludeIds);
for (TermQuery tq : booster.exclude) {
newq.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
}
}
}
rb.setQuery(newq);

View File

@ -0,0 +1,79 @@
package org.apache.solr.response.transform;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.handler.component.QueryElevationComponent;
import org.apache.solr.schema.FieldType;
import java.util.Set;
/**
*
*
**/
public abstract class BaseEditorialTransformer extends TransformerWithContext {
final String name;
final String idFieldName;
final FieldType ft;
public BaseEditorialTransformer(String name, String idFieldName, FieldType ft) {
this.name = name;
this.idFieldName = idFieldName;
this.ft = ft;
}
@Override
public String getName()
{
return name;
}
@Override
public void transform(SolrDocument doc, int docid) {
//this only gets added if QueryElevationParams.MARK_EXCLUDED is true
Set<String> ids = getIdSet();
if (ids != null && ids.isEmpty() == false) {
String key = getKey(doc);
doc.setField(name, ids.contains(key));
} else {
//if we have no ids, that means we weren't marking, but the user still asked for the field to be added, so just mark everything as false
doc.setField(name, Boolean.FALSE);
}
}
protected abstract Set<String> getIdSet();
protected String getKey(SolrDocument doc) {
String key;
Object field = doc.get(idFieldName);
if (field instanceof NumericField){
key = ((Field)field).stringValue();
key = ft.readableToIndexed(key);
} else if (field instanceof Field){
key = ((Field)field).stringValue();
} else {
key = field.toString();
}
return key;
}
}

View File

@ -28,10 +28,10 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
/**
*
* Used to mark whether a document has been elevated or not
* @since solr 4.0
*/
public class EditorialMarkerFactory extends TransformerFactory
public class ElevatedMarkerFactory extends TransformerFactory
{
@Override
public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) {
@ -41,45 +41,16 @@ public class EditorialMarkerFactory extends TransformerFactory
}
}
class MarkTransformer extends TransformerWithContext
class MarkTransformer extends BaseEditorialTransformer
{
final String name;
final String idFieldName;
final FieldType ft;
public MarkTransformer( String name, String idFieldName, FieldType ft)
{
this.name = name;
this.idFieldName = idFieldName;
this.ft = ft;
MarkTransformer(String name, String idFieldName, FieldType ft) {
super(name, idFieldName, ft);
}
@Override
public String getName()
{
return name;
}
@Override
public void transform(SolrDocument doc, int docid) {
Set<?> ids = (Set<?>)context.req.getContext().get("BOOSTED");
if(ids!=null) {
String key;
Object field = doc.get(idFieldName);
if (field instanceof NumericField){
key = ((Field)field).stringValue();
key = ft.readableToIndexed(key);
} else if (field instanceof Field){
key = ((Field)field).stringValue();
} else {
key = field.toString();
}
doc.setField(name, ids.contains(key));
} else {
//if we have no ids, that means we weren't boosting, but the user still asked for the field to be added, so just mark everything as false
doc.setField(name, Boolean.FALSE);
}
protected Set<String> getIdSet() {
return (Set<String>) context.req.getContext().get("BOOSTED");
}
}

View File

@ -0,0 +1,58 @@
package org.apache.solr.response.transform;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Set;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.handler.component.QueryElevationComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
/**
*
* @since solr 4.0
*/
public class ExcludedMarkerFactory extends TransformerFactory
{
@Override
public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) {
SchemaField uniqueKeyField = req.getSchema().getUniqueKeyField();
String idfield = uniqueKeyField.getName();
return new ExcludedTransformer(field,idfield, uniqueKeyField.getType());
}
}
class ExcludedTransformer extends BaseEditorialTransformer {
public ExcludedTransformer( String name, String idFieldName, FieldType ft)
{
super(name, idFieldName, ft);
}
protected Set<String> getIdSet() {
return (Set<String>)context.req.getContext().get(QueryElevationComponent.EXCLUDED);
}
}

View File

@ -37,6 +37,10 @@
<doc id="7" />
</query>
<query text="XXXX XXXX">
<doc id="5" />
<doc id="6" exclude="true" />
</query>
</elevate>

View File

@ -157,7 +157,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
req.close();
// Make sure the boosts loaded properly
assertEquals(4, map.size());
assertEquals(5, map.size());
assertEquals(1, map.get("XXXX").priority.size());
assertEquals(2, map.get("YYYY").priority.size());
assertEquals(3, map.get("ZZZZ").priority.size());
@ -174,7 +174,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
comp.init(args);
comp.inform(core);
map = comp.getElevationMap(reader, core);
assertEquals(4, map.size());
assertEquals(5, map.size());
assertEquals(null, map.get("XXXX"));
assertEquals(null, map.get("YYYY"));
assertEquals(null, map.get("ZZZZ"));
@ -237,6 +237,52 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
}
}
@Test
public void testMarkExcludes() throws Exception {
try {
init("schema12.xml");
assertU(adoc("id", "1", "title", "XXXX XXXX", "str_s1", "a"));
assertU(adoc("id", "2", "title", "YYYY", "str_s1", "b"));
assertU(adoc("id", "3", "title", "ZZZZ", "str_s1", "c"));
assertU(adoc("id", "4", "title", "XXXX XXXX", "str_s1", "x"));
assertU(adoc("id", "5", "title", "YYYY YYYY", "str_s1", "y"));
assertU(adoc("id", "6", "title", "XXXX XXXX", "str_s1", "z"));
assertU(adoc("id", "7", "title", "AAAA", "str_s1", "a"));
assertU(commit());
assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate",
QueryElevationParams.MARK_EXCLUDES, "true",
CommonParams.FL, "id, score, [excluded]")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.='5']"
, "//result/doc[2]/str[@name='id'][.='6']"
, "//result/doc[3]/str[@name='id'][.='1']"
, "//result/doc[4]/str[@name='id'][.='4']",
"//result/doc[1]/bool[@name='[excluded]'][.='false']",
"//result/doc[2]/bool[@name='[excluded]'][.='true']",
"//result/doc[3]/bool[@name='[excluded]'][.='false']",
"//result/doc[4]/bool[@name='[excluded]'][.='false']"
);
//ask for excluded as a field, but don't actually request the MARK_EXCLUDES
//thus, number 6 should not be returned, b/c it is excluded
assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate",
QueryElevationParams.MARK_EXCLUDES, "false",
CommonParams.FL, "id, score, [excluded]")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.='5']"
, "//result/doc[2]/str[@name='id'][.='1']"
, "//result/doc[3]/str[@name='id'][.='4']",
"//result/doc[1]/bool[@name='[excluded]'][.='false']",
"//result/doc[2]/bool[@name='[excluded]'][.='false']",
"//result/doc[3]/bool[@name='[excluded]'][.='false']"
);
} finally {
delete();
}
}
@Test
public void testSorting() throws Exception {
try {

View File

@ -19,8 +19,10 @@
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
-->
See http://wiki.apache.org/solr/QueryElevationComponent for more info
-->
<elevate>
<query text="foo bar">
<doc id="1" />

View File

@ -34,4 +34,18 @@ public interface QueryElevationParams {
* See http://wiki.apache.org/solr/DocTransformers
*/
String EDITORIAL_MARKER_FIELD_NAME = "editorialMarkerFieldName";
/**
* The name of the field that excluded editorial results will be written out as when using the QueryElevationComponent, which
* automatically configures the EditorialMarkerFactory. The default name is "excluded". This is only used
* when {@link #MARK_EXCLUDES} is set to true at query time.
* <br/>
* See http://wiki.apache.org/solr/DocTransformers
*/
String EXCLUDE_MARKER_FIELD_NAME = "excludeMarkerFieldName";
/**
* Instead of removing excluded items from the results, passing in this parameter allows you to get back the excluded items, but to mark them
* as excluded.
*/
String MARK_EXCLUDES = "markExcludes";
}