From 00f5fef16202ebd52f1caa99d79e485d489e3c1d Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Fri, 18 Nov 2011 21:07:13 +0000 Subject: [PATCH] SOLR-2037: hook in DocTransformer for QueryElevationComponent to mark documents that have been boosted git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203839 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/solr/core/SolrCore.java | 4 + .../component/QueryElevationComponent.java | 21 ++++-- .../component/RealTimeGetComponent.java | 7 ++ .../solr/response/BinaryResponseWriter.java | 16 ++-- .../solr/response/TextResponseWriter.java | 11 ++- .../response/transform/DocTransformer.java | 25 +++++++ .../response/transform/DocTransformers.java | 3 +- .../transform/EditorialMarkerFactory.java | 75 +++++++++++++++++++ .../transform/RenameFieldsTransformer.java | 1 + .../response/transform/ScoreAugmenter.java | 1 + .../response/transform/TransformContext.java | 2 + .../transform/ValueSourceAugmenter.java | 1 + .../QueryElevationComponentTest.java | 39 ++++++++++ solr/example/solr/conf/solrconfig.xml | 4 + .../org/apache/solr/common/SolrDocument.java | 2 +- .../common/params/QueryElevationParams.java | 7 ++ 16 files changed, 199 insertions(+), 20 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/response/transform/EditorialMarkerFactory.java diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 26afa4e49f0..f9aa989d59d 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -1705,6 +1705,10 @@ public final class SolrCore implements SolrInfoMBean { public TransformerFactory getTransformerFactory(String name) { return transformerFactories.get(name); } + + public void addTransformerFactory(String name, TransformerFactory factory){ + transformerFactories.put(name, factory); + } /** diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index c2f1bc6b579..0fee7ad6176 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -23,14 +23,10 @@ import java.io.InputStream; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.WeakHashMap; +import java.util.*; import org.apache.solr.common.params.QueryElevationParams; +import org.apache.solr.response.transform.EditorialMarkerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -103,6 +99,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore final BooleanClause[] exclude; final BooleanQuery include; final Map priority; + final Set ids; // use singletons so hashCode/equals on Sort will just work final FieldComparatorSource comparatorSource; @@ -111,12 +108,14 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore { this.text = qstr; this.analyzed = getAnalyzedQuery( this.text ); + this.ids = new HashSet(); this.include = new BooleanQuery(); this.include.setBoost( 0 ); this.priority = new HashMap(); int max = elevate.size()+5; for( String id : elevate ) { + ids.add(id); TermQuery tq = new TermQuery( new Term( idField, id ) ); include.add( tq, BooleanClause.Occur.SHOULD ); this.priority.put( new BytesRef(id), max-- ); @@ -161,7 +160,13 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore "QueryElevationComponent requires the schema to have a uniqueKeyField implemented using StrField" ); } idField = sf.getName(); - + //register the EditorialMarkerFactory + EditorialMarkerFactory factory = new EditorialMarkerFactory(); + String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, "elevated"); + if (markerName == null || markerName.equals("") == true){ + markerName = "elevated"; + } + core.addTransformerFactory(markerName, factory); forceElevation = initArgs.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation ); try { synchronized( elevationCache ) { @@ -357,6 +362,8 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore } if( booster != null ) { + rb.req.getContext().put("BOOSTED", booster.ids); + // Change the query to insert forced documents if (exclusive == true){ //we only want these results diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java index 75f3f32695e..f2a25ecb422 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java @@ -31,6 +31,7 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.transform.DocTransformer; +import org.apache.solr.response.transform.TransformContext; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -40,6 +41,7 @@ import org.apache.solr.update.DocumentBuilder; import org.apache.solr.update.UpdateLog; import org.apache.solr.util.RefCounted; +import javax.xml.transform.Transformer; import java.io.IOException; import java.net.URL; import java.util.ArrayList; @@ -103,6 +105,11 @@ public class RealTimeGetComponent extends SearchComponent RefCounted searcherHolder = null; DocTransformer transformer = rsp.getReturnFields().getTransformer(); + if (transformer != null) { + TransformContext context = new TransformContext(); + context.req = req; + transformer.setContext(context); + } try { SolrIndexSearcher searcher = null; diff --git a/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java b/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java index f7062726692..2fe6fba0f8a 100755 --- a/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java @@ -119,17 +119,17 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter { protected void writeResultsBody( ResultContext res, JavaBinCodec codec ) throws IOException { DocList ids = res.docs; - TransformContext context = new TransformContext(); - context.query = res.query; - context.wantsScores = returnFields.wantsScore() && ids.hasScores(); - int sz = ids.size(); codec.writeTag(JavaBinCodec.ARR, sz); if(searcher == null) searcher = solrQueryRequest.getSearcher(); - if(schema == null) schema = solrQueryRequest.getSchema(); - - context.searcher = searcher; + if(schema == null) schema = solrQueryRequest.getSchema(); + DocTransformer transformer = returnFields.getTransformer(); + TransformContext context = new TransformContext(); + context.query = res.query; + context.wantsScores = returnFields.wantsScore() && ids.hasScores(); + context.req = solrQueryRequest; + context.searcher = searcher; if( transformer != null ) { transformer.setContext( context ); } @@ -141,7 +141,7 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter { Document doc = searcher.doc(id, fnames); SolrDocument sdoc = getDoc(doc); if( transformer != null ) { - transformer.transform(sdoc, id ); + transformer.transform(sdoc, id); } codec.writeSolrDocument(sdoc); } diff --git a/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java b/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java index 3c7d2939c03..38edc9bdf62 100644 --- a/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java @@ -146,8 +146,12 @@ public abstract class TextResponseWriter { writeDouble(name, ((Double)val).doubleValue()); } else if (val instanceof Document) { SolrDocument doc = toSolrDocument( (Document)val ); - if( returnFields.getTransformer() != null ) { - returnFields.getTransformer().transform( doc, -1 ); + DocTransformer transformer = returnFields.getTransformer(); + if( transformer != null ) { + TransformContext context = new TransformContext(); + context.req = req; + transformer.setContext(context); + transformer.transform(doc, -1); } writeSolrDocument(name, doc, returnFields, 0 ); } else if (val instanceof SolrDocument) { @@ -232,6 +236,7 @@ public abstract class TextResponseWriter { TransformContext context = new TransformContext(); context.query = res.query; context.wantsScores = fields.wantsScore() && ids.hasScores(); + context.req = req; writeStartDocumentList(name, ids.offset(), ids.size(), ids.matches(), context.wantsScores ? new Float(ids.maxScore()) : null ); @@ -248,7 +253,7 @@ public abstract class TextResponseWriter { Document doc = context.searcher.doc(id, fnames); SolrDocument sdoc = toSolrDocument( doc ); if( transformer != null ) { - transformer.transform( sdoc, id ); + transformer.transform( sdoc, id); } writeSolrDocument( null, sdoc, returnFields, i ); } diff --git a/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java index bcc827f6f55..f1cd3c80fb6 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/DocTransformer.java @@ -20,16 +20,41 @@ package org.apache.solr.response.transform; import java.io.IOException; import org.apache.solr.common.SolrDocument; +import org.apache.solr.request.SolrQueryRequest; /** + * A DocTransformer can add, remove or alter a Document before it is written out to the Response. For instance, there are implementations + * that can put explanations inline with a document, add constant values and mark items as being artificially boosted (see {@link org.apache.solr.handler.component.QueryElevationComponent}) + * + *

* New instance for each request * + * @see TransformerFactory * */ public abstract class DocTransformer { + /** + * + * @return The name of the transformer + */ public abstract String getName(); + + /** + * This is called before transform and sets + * @param context The {@link org.apache.solr.response.transform.TransformContext} stores information about the current state of things in Solr that may be + * useful for doing transformations. + */ public void setContext( TransformContext context ) {} + + /** + * This is where implementations do the actual work + * + * + * @param doc The document to alter + * @param docid The Lucene internal doc id + * @throws IOException + */ public abstract void transform(SolrDocument doc, int docid) throws IOException; @Override diff --git a/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java b/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java index 6906f2f3ea9..3be17c4e13b 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java +++ b/solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java @@ -23,6 +23,7 @@ import java.util.Iterator; import java.util.List; import org.apache.solr.common.SolrDocument; +import org.apache.solr.request.SolrQueryRequest; /** * Transform a document before it gets sent out @@ -73,7 +74,7 @@ public class DocTransformers extends DocTransformer @Override public void transform(SolrDocument doc, int docid) throws IOException { for( DocTransformer a : children ) { - a.transform( doc, docid ); + a.transform( doc, docid); } } } diff --git a/solr/core/src/java/org/apache/solr/response/transform/EditorialMarkerFactory.java b/solr/core/src/java/org/apache/solr/response/transform/EditorialMarkerFactory.java new file mode 100644 index 00000000000..1187623b1e5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/response/transform/EditorialMarkerFactory.java @@ -0,0 +1,75 @@ +package org.apache.solr.response.transform; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Set; + +import org.apache.lucene.document.Field; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; + +/** + * + * @since solr 4.0 + */ +public class EditorialMarkerFactory extends TransformerFactory +{ + @Override + public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) { + String idfield = req.getSchema().getUniqueKeyField().getName(); + return new MarkTransformer(field,idfield); + } +} + +class MarkTransformer extends TransformerWithContext +{ + final String name; + final String idFieldName; + + public MarkTransformer( String name, String idFieldName) + { + this.name = name; + this.idFieldName = idFieldName; + } + + @Override + public String getName() + { + return name; + } + + @Override + public void transform(SolrDocument doc, int docid) { + Set ids = (Set)context.req.getContext().get("BOOSTED"); + if(ids!=null) { + String key; + Object field = doc.get(idFieldName); + if (field instanceof Field){ + key = ((Field)field).stringValue(); + } else { + key = field.toString(); + } + doc.setField(name, ids.contains(key)); + } else { + //if we have no ids, that means we weren't boosting, but the user still asked for the field to be added, so just mark everything as false + doc.setField(name, Boolean.FALSE); + } + } +} + diff --git a/solr/core/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java index 1c09b653e6f..f49a8d9a24e 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java @@ -18,6 +18,7 @@ package org.apache.solr.response.transform; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; /** * Return a field with a name that is different that what is indexed diff --git a/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java b/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java index 69cae827fd4..725cafa5d79 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ScoreAugmenter.java @@ -17,6 +17,7 @@ package org.apache.solr.response.transform; import org.apache.solr.common.SolrDocument; +import org.apache.solr.request.SolrQueryRequest; /** * Simple Augmenter that adds the docId diff --git a/solr/core/src/java/org/apache/solr/response/transform/TransformContext.java b/solr/core/src/java/org/apache/solr/response/transform/TransformContext.java index dc71e58b691..66eed658482 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/TransformContext.java +++ b/solr/core/src/java/org/apache/solr/response/transform/TransformContext.java @@ -17,6 +17,7 @@ package org.apache.solr.response.transform; import org.apache.lucene.search.Query; +import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.DocIterator; import org.apache.solr.search.SolrIndexSearcher; @@ -32,4 +33,5 @@ public class TransformContext public boolean wantsScores = false; public DocIterator iterator; public SolrIndexSearcher searcher; + public SolrQueryRequest req; } diff --git a/solr/core/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java b/solr/core/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java index 82b6430509e..e12a54a3e15 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java @@ -25,6 +25,7 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.util.ReaderUtil; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; +import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.QParser; import org.apache.solr.search.SolrIndexSearcher; diff --git a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index 821c838af7c..d1170c6e8e4 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -117,6 +117,45 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { } + @Test + public void testMarker() throws Exception { + assertU(adoc("id", "1", "title", "XXXX XXXX", "str_s1", "a" )); + assertU(adoc("id", "2", "title", "YYYY", "str_s1", "b" )); + assertU(adoc("id", "3", "title", "ZZZZ", "str_s1", "c" )); + + assertU(adoc("id", "4", "title", "XXXX XXXX", "str_s1", "x" )); + assertU(adoc("id", "5", "title", "YYYY YYYY", "str_s1", "y" )); + assertU(adoc("id", "6", "title", "XXXX XXXX", "str_s1", "z" )); + assertU(adoc("id", "7", "title", "AAAA", "str_s1", "a" )); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "XXXX", CommonParams.QT, "/elevate", + CommonParams.FL, "id, score, [elevated]") + ,"//*[@numFound='3']" + ,"//result/doc[1]/str[@name='id'][.='1']" + ,"//result/doc[2]/str[@name='id'][.='4']" + ,"//result/doc[3]/str[@name='id'][.='6']", + "//result/doc[1]/bool[@name='[elevated]'][.='true']", + "//result/doc[2]/bool[@name='[elevated]'][.='false']", + "//result/doc[3]/bool[@name='[elevated]'][.='false']" + ); + + assertQ("", req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", + CommonParams.FL, "id, score, [elevated]") + ,"//*[@numFound='1']" + ,"//result/doc[1]/str[@name='id'][.='7']", + "//result/doc[1]/bool[@name='[elevated]'][.='false']" + ); + + assertQ("", req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", + CommonParams.FL, "id, score, [elev]") + ,"//*[@numFound='1']" + ,"//result/doc[1]/str[@name='id'][.='7']", + "not(//result/doc[1]/bool[@name='[elevated]'][.='false'])", + "not(//result/doc[1]/bool[@name='[elev]'][.='false'])" // even though we asked for elev, there is no Transformer registered w/ that, so we shouldn't get a result + ); + } + @Test public void testSorting() throws IOException { diff --git a/solr/example/solr/conf/solrconfig.xml b/solr/example/solr/conf/solrconfig.xml index b431ac0237e..cc8a5f304f0 100755 --- a/solr/example/solr/conf/solrconfig.xml +++ b/solr/example/solr/conf/solrconfig.xml @@ -1631,6 +1631,10 @@ 5 + + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The + EditorialMarkerFactory will do exactly that: + --> diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java index f2f46e6d43a..cd2aa5d737e 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java @@ -275,7 +275,7 @@ public class SolrDocument implements Map, Iterable> entrySet() { return _fields.entrySet(); } - + //TODO: Shouldn't the input parameter here be a String? The _fields map requires a String. public Object get(Object key) { return _fields.get(key); } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java index 0c15b7b2acf..cc5f0940da6 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java @@ -27,4 +27,11 @@ public interface QueryElevationParams { String ENABLE = "enableElevation"; String EXCLUSIVE = "exclusive"; String FORCE_ELEVATION = "forceElevation"; + /** + * The name of the field that editorial results will be written out as when using the QueryElevationComponent, which + * automatically configures the EditorialMarkerFactory. The default name is "elevated" + *
+ * See http://wiki.apache.org/solr/DocTransformers + */ + String EDITORIAL_MARKER_FIELD_NAME = "editorialMarkerFieldName"; }