SOLR-2977: add 'fake' excludes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1222234 13f79535-47bb-0310-9956-ffa450edef68
2011-12-22 13:59:11 +00:00 · 2011-12-22 13:59:11 +00:00 · 6a6d33257e
parent 8913ecf0aa
commit 6a6d33257e
8 changed files with 245 additions and 54 deletions
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
@ -20,16 +20,12 @@ package org.apache.solr.handler.component;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DocumentStoredFieldVisitor;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SentinelIntSet;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.QueryElevationParams;
@ -40,7 +36,8 @@ import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.core.Config;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.transform.EditorialMarkerFactory;
+import org.apache.solr.response.transform.ElevatedMarkerFactory;
+import org.apache.solr.response.transform.ExcludedMarkerFactory;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.SolrIndexSearcher;
@ -78,6 +75,8 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
  static final String FIELD_TYPE = "queryFieldType";
  static final String CONFIG_FILE = "config-file";
  static final String EXCLUDE = "exclude";
+  public static final String BOOSTED = "BOOSTED";
+  public static final String EXCLUDED = "EXCLUDED";

  // Runtime param -- should be in common?

@ -97,15 +96,17 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
  class ElevationObj {
    final String text;
    final String analyzed;
-    final BooleanClause[] exclude;
+    final TermQuery [] exclude;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param
    final BooleanQuery include;
    final Map<BytesRef, Integer> priority;
    final Set<String> ids;
+    final Set<String> excludeIds;

    ElevationObj(String qstr, List<String> elevate, List<String> exclude) throws IOException {
      this.text = qstr;
      this.analyzed = getAnalyzedQuery(this.text);
      this.ids = new HashSet<String>();
+      this.excludeIds = new HashSet<String>();

      this.include = new BooleanQuery();
      this.include.setBoost(0);
@ -122,10 +123,11 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
      if (exclude == null || exclude.isEmpty()) {
        this.exclude = null;
      } else {
-        this.exclude = new BooleanClause[exclude.size()];
+        this.exclude = new TermQuery[exclude.size()];
        for (int i = 0; i < exclude.size(); i++) {
-          TermQuery tq = new TermQuery(new Term(idField, idSchemaFT.readableToIndexed(exclude.get(i))));
-          this.exclude[i] = new BooleanClause(tq, BooleanClause.Occur.MUST_NOT);
+          String id = idSchemaFT.readableToIndexed(exclude.get(i));
+          excludeIds.add(id);
+          this.exclude[i] = new TermQuery(new Term(idField, id));
        }
      }
    }
@ -155,12 +157,18 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
    idSchemaFT = sf.getType();
    idField = sf.getName();
    //register the EditorialMarkerFactory
-    EditorialMarkerFactory factory = new EditorialMarkerFactory();
+    String excludeName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, "excluded");
+    if (excludeName == null || excludeName.equals("") == true){
+      excludeName = "excluded";
+    }
+    ExcludedMarkerFactory excludedMarkerFactory = new ExcludedMarkerFactory();
+    core.addTransformerFactory(excludeName, excludedMarkerFactory);
+    ElevatedMarkerFactory elevatedMarkerFactory = new ElevatedMarkerFactory();
    String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, "elevated");
    if (markerName == null || markerName.equals("") == true) {
      markerName = "elevated";
    }
-    core.addTransformerFactory(markerName, factory);
+    core.addTransformerFactory(markerName, elevatedMarkerFactory);
    forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
    try {
      synchronized (elevationCache) {
@ -333,7 +341,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
    boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false);
    // A runtime parameter can alter the config value for forceElevation
    boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
-
+    boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false);
    Query query = rb.getQuery();
    String qstr = rb.getQueryString();
    if (query == null || qstr == null) {
@ -351,7 +359,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
    }

    if (booster != null) {
-      rb.req.getContext().put("BOOSTED", booster.ids);
+      rb.req.getContext().put(BOOSTED, booster.ids);

      // Change the query to insert forced documents
      if (exclusive == true) {
@ -362,8 +370,17 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
        newq.add(query, BooleanClause.Occur.SHOULD);
        newq.add(booster.include, BooleanClause.Occur.SHOULD);
        if (booster.exclude != null) {
-          for (BooleanClause bq : booster.exclude) {
-            newq.add(bq);
+          if (markExcludes == false) {
+            for (TermQuery tq : booster.exclude) {
+              newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT));
+            }
+          } else {
+            //we are only going to mark items as excluded, not actually exclude them.  This works
+            //with the EditorialMarkerFactory
+            rb.req.getContext().put(EXCLUDED, booster.excludeIds);
+            for (TermQuery tq : booster.exclude) {
+              newq.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
+            }
          }
        }
        rb.setQuery(newq);
--- a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java
+++ b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java
@ -0,0 +1,79 @@
+package org.apache.solr.response.transform;
+
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.handler.component.QueryElevationComponent;
+import org.apache.solr.schema.FieldType;
+
+import java.util.Set;
+
+/**
+ *
+ *
+ **/
+public abstract class BaseEditorialTransformer extends TransformerWithContext {
+
+  final String name;
+  final String idFieldName;
+  final FieldType ft;
+
+  public BaseEditorialTransformer(String name, String idFieldName, FieldType ft) {
+    this.name = name;
+    this.idFieldName = idFieldName;
+    this.ft = ft;
+  }
+
+  @Override
+  public String getName()
+  {
+    return name;
+  }
+
+  @Override
+  public void transform(SolrDocument doc, int docid) {
+    //this only gets added if QueryElevationParams.MARK_EXCLUDED is true
+    Set<String> ids = getIdSet();
+    if (ids != null && ids.isEmpty() == false) {
+      String key = getKey(doc);
+      doc.setField(name, ids.contains(key));
+    } else {
+      //if we have no ids, that means we weren't marking, but the user still asked for the field to be added, so just mark everything as false
+      doc.setField(name, Boolean.FALSE);
+    }
+  }
+
+  protected abstract Set<String> getIdSet();
+
+  protected String getKey(SolrDocument doc) {
+    String key;
+    Object field = doc.get(idFieldName);
+    if (field instanceof NumericField){
+      key = ((Field)field).stringValue();
+      key = ft.readableToIndexed(key);
+    } else if (field instanceof Field){
+      key = ((Field)field).stringValue();
+    } else {
+      key = field.toString();
+    }
+    return key;
+  }
+}
--- a/solr/core/src/java/org/apache/solr/response/transform/EditorialMarkerFactory.java
+++ b/solr/core/src/java/org/apache/solr/response/transform/EditorialMarkerFactory.java
@ -28,10 +28,10 @@ import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;

 /**
- *
+ * Used to mark whether a document has been elevated or not
 * @since solr 4.0
 */
-public class EditorialMarkerFactory extends TransformerFactory
+public class ElevatedMarkerFactory extends TransformerFactory
 {
  @Override
  public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) {
@ -41,45 +41,16 @@ public class EditorialMarkerFactory extends TransformerFactory
  }
 }

-class MarkTransformer extends TransformerWithContext
+class MarkTransformer extends BaseEditorialTransformer
 {
-  final String name;
-  final String idFieldName;
-  final FieldType ft;

-  public MarkTransformer( String name, String idFieldName, FieldType ft)
-  {
-    this.name = name;
-    this.idFieldName = idFieldName;
-    this.ft = ft;
+  MarkTransformer(String name, String idFieldName, FieldType ft) {
+    super(name, idFieldName, ft);
  }

  @Override
-  public String getName()
-  {
-    return name;
-  }
-
-  @Override
-  public void transform(SolrDocument doc, int docid) {
-    Set<?> ids = (Set<?>)context.req.getContext().get("BOOSTED");
-    if(ids!=null) {
-      String key;
-      Object field = doc.get(idFieldName);
-      if (field instanceof NumericField){
-        key = ((Field)field).stringValue();
-        key = ft.readableToIndexed(key);
-      } else if (field instanceof Field){
-        key = ((Field)field).stringValue();
-      } else {
-        key = field.toString();
-      }
-
-      doc.setField(name, ids.contains(key));
-    } else {
-      //if we have no ids, that means we weren't boosting, but the user still asked for the field to be added, so just mark everything as false
-      doc.setField(name, Boolean.FALSE);
-    }
+  protected Set<String> getIdSet() {
+    return (Set<String>) context.req.getContext().get("BOOSTED");
  }
 }

--- a/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java
+++ b/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java
@ -0,0 +1,58 @@
+package org.apache.solr.response.transform;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Set;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.handler.component.QueryElevationComponent;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
+
+/**
+ *
+ * @since solr 4.0
+ */
+public class ExcludedMarkerFactory extends TransformerFactory
+{
+
+  @Override
+  public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) {
+    SchemaField uniqueKeyField = req.getSchema().getUniqueKeyField();
+    String idfield = uniqueKeyField.getName();
+    return new ExcludedTransformer(field,idfield, uniqueKeyField.getType());
+  }
+}
+
+class ExcludedTransformer extends BaseEditorialTransformer {
+
+  public ExcludedTransformer( String name, String idFieldName, FieldType ft)
+  {
+    super(name, idFieldName, ft);
+  }
+
+  protected Set<String> getIdSet() {
+    return (Set<String>)context.req.getContext().get(QueryElevationComponent.EXCLUDED);
+  }
+
+}
+
--- a/solr/core/src/test-files/solr/conf/elevate.xml
+++ b/solr/core/src/test-files/solr/conf/elevate.xml
@ -37,6 +37,10 @@
  <doc id="7" />
 </query>

+ <query text="XXXX XXXX">
+  <doc id="5" />
+  <doc id="6" exclude="true" />
+ </query>


 </elevate>
--- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java
@ -157,7 +157,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
      req.close();

      // Make sure the boosts loaded properly
-      assertEquals(4, map.size());
+      assertEquals(5, map.size());
      assertEquals(1, map.get("XXXX").priority.size());
      assertEquals(2, map.get("YYYY").priority.size());
      assertEquals(3, map.get("ZZZZ").priority.size());
@ -174,7 +174,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
      comp.init(args);
      comp.inform(core);
      map = comp.getElevationMap(reader, core);
-      assertEquals(4, map.size());
+      assertEquals(5, map.size());
      assertEquals(null, map.get("XXXX"));
      assertEquals(null, map.get("YYYY"));
      assertEquals(null, map.get("ZZZZ"));
@ -237,6 +237,52 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
    }
  }

+  @Test
+  public void testMarkExcludes() throws Exception {
+    try {
+      init("schema12.xml");
+      assertU(adoc("id", "1", "title", "XXXX XXXX", "str_s1", "a"));
+      assertU(adoc("id", "2", "title", "YYYY", "str_s1", "b"));
+      assertU(adoc("id", "3", "title", "ZZZZ", "str_s1", "c"));
+
+      assertU(adoc("id", "4", "title", "XXXX XXXX", "str_s1", "x"));
+      assertU(adoc("id", "5", "title", "YYYY YYYY", "str_s1", "y"));
+      assertU(adoc("id", "6", "title", "XXXX XXXX", "str_s1", "z"));
+      assertU(adoc("id", "7", "title", "AAAA", "str_s1", "a"));
+      assertU(commit());
+
+      assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate",
+          QueryElevationParams.MARK_EXCLUDES, "true",
+          CommonParams.FL, "id, score, [excluded]")
+          , "//*[@numFound='4']"
+          , "//result/doc[1]/str[@name='id'][.='5']"
+          , "//result/doc[2]/str[@name='id'][.='6']"
+          , "//result/doc[3]/str[@name='id'][.='1']"
+          , "//result/doc[4]/str[@name='id'][.='4']",
+          "//result/doc[1]/bool[@name='[excluded]'][.='false']",
+          "//result/doc[2]/bool[@name='[excluded]'][.='true']",
+          "//result/doc[3]/bool[@name='[excluded]'][.='false']",
+          "//result/doc[4]/bool[@name='[excluded]'][.='false']"
+      );
+      //ask for excluded as a field, but don't actually request the MARK_EXCLUDES
+      //thus, number 6 should not be returned, b/c it is excluded
+      assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate",
+          QueryElevationParams.MARK_EXCLUDES, "false",
+          CommonParams.FL, "id, score, [excluded]")
+          , "//*[@numFound='3']"
+          , "//result/doc[1]/str[@name='id'][.='5']"
+          , "//result/doc[2]/str[@name='id'][.='1']"
+          , "//result/doc[3]/str[@name='id'][.='4']",
+          "//result/doc[1]/bool[@name='[excluded]'][.='false']",
+          "//result/doc[2]/bool[@name='[excluded]'][.='false']",
+          "//result/doc[3]/bool[@name='[excluded]'][.='false']"
+      );
+
+    } finally {
+      delete();
+    }
+  }
+
  @Test
  public void testSorting() throws Exception {
    try {
--- a/solr/example/solr/conf/elevate.xml
+++ b/solr/example/solr/conf/elevate.xml
@ -19,8 +19,10 @@
 <!-- If this file is found in the config directory, it will only be
     loaded once at startup.  If it is found in Solr's data
     directory, it will be re-loaded every commit.
-->

+   See http://wiki.apache.org/solr/QueryElevationComponent for more info
+
+-->
 <elevate>
 <query text="foo bar">
  <doc id="1" />
--- a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java
@ -34,4 +34,18 @@ public interface QueryElevationParams {
   * See http://wiki.apache.org/solr/DocTransformers
   */
  String EDITORIAL_MARKER_FIELD_NAME = "editorialMarkerFieldName";
+  /**
+   * The name of the field that excluded editorial results will be written out as when using the QueryElevationComponent, which
+   * automatically configures the EditorialMarkerFactory.  The default name is "excluded".  This is only used
+   * when {@link #MARK_EXCLUDES} is set to true at query time.
+   * <br/>
+   * See http://wiki.apache.org/solr/DocTransformers
+   */
+  String EXCLUDE_MARKER_FIELD_NAME = "excludeMarkerFieldName";
+
+  /**
+   * Instead of removing excluded items from the results, passing in this parameter allows you to get back the excluded items, but to mark them
+   * as excluded.
+   */
+  String MARK_EXCLUDES = "markExcludes";
 }