SOLR-3988: Fixed SolrTestCaseJ4.adoc(SolrInputDocument) to respect field and document boosts

SOLR-3981: Fixed bug that resulted in document boosts being compounded in <copyField/> destination fields git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1401916 13f79535-47bb-0310-9956-ffa450edef68
2012-10-24 23:22:23 +00:00 · 2012-10-24 23:22:23 +00:00 · d4108c2b80
parent e1b693ddb2
commit d4108c2b80
4 changed files with 174 additions and 22 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -98,6 +98,12 @@ Bug Fixes
  numShards=2 collection after starting up a second core and not specifying 
  numShards. (Mark Miller)

+* SOLR-3988: Fixed SolrTestCaseJ4.adoc(SolrInputDocument) to respect 
+  field and document boosts (hossman)
+
+* SOLR-3981: Fixed bug that resulted in document boosts being compounded in
+  <copyField/> destination fields. (hossman)
+
 Other Changes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
+++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
@ -238,8 +238,6 @@ public class DocumentBuilder {
      SchemaField sfield = schema.getFieldOrNull(name);
      boolean used = false;

-      float boost = field.getBoost();
-      boolean applyBoost = sfield != null && sfield.indexed() && !sfield.omitNorms();
      
      // Make sure it has the correct number
      if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) {
@ -248,17 +246,18 @@ public class DocumentBuilder {
              sfield.getName() + ": " +field.getValue() );
      }
      
-      if (applyBoost == false && boost != 1.0F) {
+      float fieldBoost = field.getBoost();
+      boolean applyBoost = sfield != null && sfield.indexed() && !sfield.omitNorms();
+
+      if (applyBoost == false && fieldBoost != 1.0F) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "ERROR: "+getID(doc, schema)+"cannot set an index-time boost, unindexed or norms are omitted for field " + 
              sfield.getName() + ": " +field.getValue() );
      }

      // Lucene no longer has a native docBoost, so we have to multiply 
-      // it ourselves (do this after the applyBoost error check so we don't 
-      // give an error on fields that don't support boost just because of a 
-      // docBoost)
-      boost *= docBoost;
+      // it ourselves 
+      float compoundBoost = fieldBoost * docBoost;

      // load each field value
      boolean hasField = false;
@ -270,16 +269,20 @@ public class DocumentBuilder {
          hasField = true;
          if (sfield != null) {
            used = true;
-            addField(out, sfield, v, applyBoost ? boost : 1f);
+            addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
          }
  
-          // Check if we should copy this field to any other fields.
+          // Check if we should copy this field value to any other fields.
          // This could happen whether it is explicit or not.
          List<CopyField> copyFields = schema.getCopyFieldsList(name);
          for (CopyField cf : copyFields) {
            SchemaField destinationField = cf.getDestination();
+
+            final boolean destHasValues = 
+              (null != out.getField(destinationField.getName()));
+
            // check if the copy field is a multivalued or not
-            if (!destinationField.multiValued() && out.getField(destinationField.getName()) != null) {
+            if (!destinationField.multiValued() && destHasValues) {
              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                      "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
                              destinationField.getName() + ": " + v);
@ -292,14 +295,23 @@ public class DocumentBuilder {
            if( val instanceof String && cf.getMaxChars() > 0 ) {
              val = cf.getLimitedValue((String)val);
            }
-            addField(out, destinationField, val, destinationField.indexed() && !destinationField.omitNorms() ? boost : 1F);
+
+            // we can't copy any boost unless the dest field is 
+            // indexed & !omitNorms, but which boost we copy depends
+            // on wether the dest field already contains values (we 
+            // don't want to apply the compounded docBoost more then once)
+            final float destBoost = 
+              (destinationField.indexed() && !destinationField.omitNorms()) ?
+              (destHasValues ? fieldBoost : compoundBoost) : 1.0F;
+            
+            addField(out, destinationField, val, destBoost);
          }
          
-          // The boost for a given field is the product of the 
+          // The final boost for a given field named is the product of the 
          // *all* boosts on values of that field. 
          // For multi-valued fields, we only want to set the boost on the
          // first field.
-          boost = 1.0f;
+          fieldBoost = compoundBoost = 1.0f;
        }
      }
      catch( SolrException ex ) {
--- a/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java
+++ b/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java
@ -19,13 +19,23 @@ package org.apache.solr.update;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.DocList;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.response.ResultContext;
+
 import org.junit.BeforeClass;
 import org.junit.Test;

@ -208,7 +218,7 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
    assertNull(h.validateUpdate(add(xml, new String[0])));
  }
  
-  public void testMultiValuedFielAndDocBoosts() throws Exception {
+  public void testMultiValuedFieldAndDocBoosts() throws Exception {
    SolrCore core = h.getCore();
    IndexSchema schema = core.getSchema();
    SolrInputDocument doc = new SolrInputDocument();
@ -234,11 +244,127 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
    
  }

+  public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception {
+    SolrCore core = h.getCore();
+    IndexSchema schema = core.getSchema();
+    SolrInputDocument doc = new SolrInputDocument();
+
+    final float DOC_BOOST = 3.0F;
+    doc.setDocumentBoost(DOC_BOOST);
+    doc.addField("id", "42");
+
+    SolrInputField inTitle = new SolrInputField( "title" );
+    inTitle.addValue( "titleA" , 2.0F ); 
+    inTitle.addValue( "titleB" , 7.0F ); 
+    final float TITLE_BOOST = 2.0F * 7.0F;
+    assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F);
+    doc.put( inTitle.getName(), inTitle );
+    
+    SolrInputField inFoo = new SolrInputField( "foo_t" );
+    inFoo.addValue( "summer time" , 1.0F );
+    inFoo.addValue( "in the city" , 5.0F ); 
+    inFoo.addValue( "living is easy" , 11.0F );
+    final float FOO_BOOST = 1.0F * 5.0F * 11.0F;
+    assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F);
+    doc.put( inFoo.getName(), inFoo );
+
+    Document out = DocumentBuilder.toDocument( doc, core.getSchema() );
+
+    IndexableField[] outTitle = out.getFields( inTitle.getName() );
+    assertEquals("wrong number of title values",
+                 2, outTitle.length);
+
+    IndexableField[] outNoNorms = out.getFields( "title_stringNoNorms" );
+    assertEquals("wrong number of nonorms values",
+                 2, outNoNorms.length);
+
+    IndexableField[] outFoo = out.getFields( inFoo.getName() );
+    assertEquals("wrong number of foo values",
+                 3, outFoo.length);
+
+    IndexableField[] outText = out.getFields( "text" );
+    assertEquals("wrong number of text values",
+                 5, outText.length);
+
+    // since Lucene no longer has native document boosts, we should find
+    // the doc boost multiplied into the boost on the first field value
+    // of each field.  All other field values should be 1.0f
+    // (lucene will multiply all of the field value boosts later)
+    assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F);
+    assertEquals(1.0F,                    outTitle[1].boost(), 0.0F);
+    //
+    assertEquals(FOO_BOOST * DOC_BOOST,   outFoo[0].boost(), 0.0F);
+    assertEquals(1.0F,                    outFoo[1].boost(), 0.0F);
+    assertEquals(1.0F,                    outFoo[2].boost(), 0.0F);
+    //
+    assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F);
+    assertEquals(1.0F,                    outText[1].boost(), 0.0F);
+    assertEquals(FOO_BOOST,               outText[2].boost(), 0.0F);
+    assertEquals(1.0F,                    outText[3].boost(), 0.0F);
+    assertEquals(1.0F,                    outText[4].boost(), 0.0F);
+    
+    // copyField dest with no norms should not have recieved any boost
+    assertEquals(1.0F, outNoNorms[0].boost(), 0.0F);
+    assertEquals(1.0F, outNoNorms[1].boost(), 0.0F);
+    
+    // now index that SolrInputDocument to check the computed norms
+
+    assertU(adoc(doc));
+    assertU(commit());
+
+    SolrQueryRequest req = req("q", "id:42");
+    try {
+      // very hack-ish
+
+      SolrQueryResponse rsp = new SolrQueryResponse();
+      core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp);
+
+      DocList dl = ((ResultContext) rsp.getValues().get("response")).docs;
+      assertTrue("can't find the doc we just added", 1 == dl.size());
+      int docid = dl.iterator().nextDoc();
+
+      SolrIndexSearcher searcher = req.getSearcher();
+      AtomicReader reader = SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader());
+
+      assertTrue("similarity doesn't extend DefaultSimilarity, " + 
+                 "config or defaults have changed since test was written",
+                 searcher.getSimilarity() instanceof DefaultSimilarity);
+
+      DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity();
+      
+      byte[] titleNorms = (byte[]) reader.normValues("title").getSource().getArray();
+      byte[] fooNorms = (byte[]) reader.normValues("foo_t").getSource().getArray();
+      byte[] textNorms = (byte[]) reader.normValues("text").getSource().getArray();
+
+      assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST),
+                   titleNorms[docid]);
+
+      assertEquals(expectedNorm(sim, 8-3, FOO_BOOST * DOC_BOOST),
+                   fooNorms[docid]);
+
+      assertEquals(expectedNorm(sim, 2 + 8-3, 
+                                TITLE_BOOST * FOO_BOOST * DOC_BOOST),
+                   textNorms[docid]);
+
+    } finally {
+      req.close();
+    }
+  }
+
  /**
-   * Its not ok to boost a field if it omits norms
+   * Given a length, and boost returns the expected encoded norm 
   */
+  private static byte expectedNorm(final DefaultSimilarity sim,
+                                   final int length, final float boost) {
+    
+    return sim.encodeNormValue(boost / ((float) Math.sqrt(length)));
+
+  }
+    
+
  public void testBoostOmitNorms() throws Exception {
    XmlDoc xml = new XmlDoc();
+    // explicitly boosting a field if that omits norms is not ok
    xml.xml = "<doc>"
        + "<field name=\"id\">ignore_exception</field>"
        + "<field name=\"title_stringNoNorms\" boost=\"3.0\">mytitle</field>"
@ -249,6 +375,12 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
    } catch (SolrException expected) {
      // expected exception
    }
+    // boosting a field that is copied to another field that omits norms is ok
+    xml.xml = "<doc>"
+      + "<field name=\"id\">42</field>"
+      + "<field name=\"title\" boost=\"3.0\">mytitle</field>"
+      + "</doc>";
+    assertNull(h.validateUpdate(add(xml, new String[0])));
  }
  
  /**
--- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
+++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
@ -27,6 +27,7 @@ import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.QuickPatchThreadsFilter;
 import org.apache.noggit.*;
+import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.*;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.params.*;
@ -667,14 +668,15 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
   * Generates a simple &lt;add&gt;&lt;doc&gt;... XML String with no options
   */
  public static String adoc(SolrInputDocument sdoc) {
-    List<String> fields = new ArrayList<String>();
-    for (SolrInputField sf : sdoc) {
-      for (Object o : sf.getValues()) {
-        fields.add(sf.getName());
-        fields.add(o.toString());
-      }
+    StringWriter out = new StringWriter(512);
+    try {
+      out.append("<add>");
+      ClientUtils.writeXML(sdoc, out);
+      out.append("</add>");
+    } catch (IOException e) {
+      throw new RuntimeException("Inexplicable IO error from StringWriter", e);
    }
-    return adoc(fields.toArray(new String[fields.size()]));
+    return out.toString();
  }