SOLR-3988: Fixed SolrTestCaseJ4.adoc(SolrInputDocument) to respect field and document boosts

SOLR-3981: Fixed bug that resulted in document boosts being compounded in <copyField/> destination fields




git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1401916 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2012-10-24 23:22:23 +00:00
parent e1b693ddb2
commit d4108c2b80
4 changed files with 174 additions and 22 deletions

View File

@ -98,6 +98,12 @@ Bug Fixes
numShards=2 collection after starting up a second core and not specifying numShards=2 collection after starting up a second core and not specifying
numShards. (Mark Miller) numShards. (Mark Miller)
* SOLR-3988: Fixed SolrTestCaseJ4.adoc(SolrInputDocument) to respect
field and document boosts (hossman)
* SOLR-3981: Fixed bug that resulted in document boosts being compounded in
<copyField/> destination fields. (hossman)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -238,8 +238,6 @@ public class DocumentBuilder {
SchemaField sfield = schema.getFieldOrNull(name); SchemaField sfield = schema.getFieldOrNull(name);
boolean used = false; boolean used = false;
float boost = field.getBoost();
boolean applyBoost = sfield != null && sfield.indexed() && !sfield.omitNorms();
// Make sure it has the correct number // Make sure it has the correct number
if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) { if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) {
@ -248,17 +246,18 @@ public class DocumentBuilder {
sfield.getName() + ": " +field.getValue() ); sfield.getName() + ": " +field.getValue() );
} }
if (applyBoost == false && boost != 1.0F) { float fieldBoost = field.getBoost();
boolean applyBoost = sfield != null && sfield.indexed() && !sfield.omitNorms();
if (applyBoost == false && fieldBoost != 1.0F) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"ERROR: "+getID(doc, schema)+"cannot set an index-time boost, unindexed or norms are omitted for field " + "ERROR: "+getID(doc, schema)+"cannot set an index-time boost, unindexed or norms are omitted for field " +
sfield.getName() + ": " +field.getValue() ); sfield.getName() + ": " +field.getValue() );
} }
// Lucene no longer has a native docBoost, so we have to multiply // Lucene no longer has a native docBoost, so we have to multiply
// it ourselves (do this after the applyBoost error check so we don't // it ourselves
// give an error on fields that don't support boost just because of a float compoundBoost = fieldBoost * docBoost;
// docBoost)
boost *= docBoost;
// load each field value // load each field value
boolean hasField = false; boolean hasField = false;
@ -270,16 +269,20 @@ public class DocumentBuilder {
hasField = true; hasField = true;
if (sfield != null) { if (sfield != null) {
used = true; used = true;
addField(out, sfield, v, applyBoost ? boost : 1f); addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
} }
// Check if we should copy this field to any other fields. // Check if we should copy this field value to any other fields.
// This could happen whether it is explicit or not. // This could happen whether it is explicit or not.
List<CopyField> copyFields = schema.getCopyFieldsList(name); List<CopyField> copyFields = schema.getCopyFieldsList(name);
for (CopyField cf : copyFields) { for (CopyField cf : copyFields) {
SchemaField destinationField = cf.getDestination(); SchemaField destinationField = cf.getDestination();
final boolean destHasValues =
(null != out.getField(destinationField.getName()));
// check if the copy field is a multivalued or not // check if the copy field is a multivalued or not
if (!destinationField.multiValued() && out.getField(destinationField.getName()) != null) { if (!destinationField.multiValued() && destHasValues) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " + "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
destinationField.getName() + ": " + v); destinationField.getName() + ": " + v);
@ -292,14 +295,23 @@ public class DocumentBuilder {
if( val instanceof String && cf.getMaxChars() > 0 ) { if( val instanceof String && cf.getMaxChars() > 0 ) {
val = cf.getLimitedValue((String)val); val = cf.getLimitedValue((String)val);
} }
addField(out, destinationField, val, destinationField.indexed() && !destinationField.omitNorms() ? boost : 1F);
// we can't copy any boost unless the dest field is
// indexed & !omitNorms, but which boost we copy depends
// on wether the dest field already contains values (we
// don't want to apply the compounded docBoost more then once)
final float destBoost =
(destinationField.indexed() && !destinationField.omitNorms()) ?
(destHasValues ? fieldBoost : compoundBoost) : 1.0F;
addField(out, destinationField, val, destBoost);
} }
// The boost for a given field is the product of the // The final boost for a given field named is the product of the
// *all* boosts on values of that field. // *all* boosts on values of that field.
// For multi-valued fields, we only want to set the boost on the // For multi-valued fields, we only want to set the boost on the
// first field. // first field.
boost = 1.0f; fieldBoost = compoundBoost = 1.0f;
} }
} }
catch( SolrException ex ) { catch( SolrException ex ) {

View File

@ -19,13 +19,23 @@ package org.apache.solr.update;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField; import org.apache.solr.common.SolrInputField;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocList;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.response.ResultContext;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -208,7 +218,7 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
assertNull(h.validateUpdate(add(xml, new String[0]))); assertNull(h.validateUpdate(add(xml, new String[0])));
} }
public void testMultiValuedFielAndDocBoosts() throws Exception { public void testMultiValuedFieldAndDocBoosts() throws Exception {
SolrCore core = h.getCore(); SolrCore core = h.getCore();
IndexSchema schema = core.getSchema(); IndexSchema schema = core.getSchema();
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
@ -234,11 +244,127 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
} }
public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception {
SolrCore core = h.getCore();
IndexSchema schema = core.getSchema();
SolrInputDocument doc = new SolrInputDocument();
final float DOC_BOOST = 3.0F;
doc.setDocumentBoost(DOC_BOOST);
doc.addField("id", "42");
SolrInputField inTitle = new SolrInputField( "title" );
inTitle.addValue( "titleA" , 2.0F );
inTitle.addValue( "titleB" , 7.0F );
final float TITLE_BOOST = 2.0F * 7.0F;
assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F);
doc.put( inTitle.getName(), inTitle );
SolrInputField inFoo = new SolrInputField( "foo_t" );
inFoo.addValue( "summer time" , 1.0F );
inFoo.addValue( "in the city" , 5.0F );
inFoo.addValue( "living is easy" , 11.0F );
final float FOO_BOOST = 1.0F * 5.0F * 11.0F;
assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F);
doc.put( inFoo.getName(), inFoo );
Document out = DocumentBuilder.toDocument( doc, core.getSchema() );
IndexableField[] outTitle = out.getFields( inTitle.getName() );
assertEquals("wrong number of title values",
2, outTitle.length);
IndexableField[] outNoNorms = out.getFields( "title_stringNoNorms" );
assertEquals("wrong number of nonorms values",
2, outNoNorms.length);
IndexableField[] outFoo = out.getFields( inFoo.getName() );
assertEquals("wrong number of foo values",
3, outFoo.length);
IndexableField[] outText = out.getFields( "text" );
assertEquals("wrong number of text values",
5, outText.length);
// since Lucene no longer has native document boosts, we should find
// the doc boost multiplied into the boost on the first field value
// of each field. All other field values should be 1.0f
// (lucene will multiply all of the field value boosts later)
assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F);
assertEquals(1.0F, outTitle[1].boost(), 0.0F);
//
assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F);
assertEquals(1.0F, outFoo[1].boost(), 0.0F);
assertEquals(1.0F, outFoo[2].boost(), 0.0F);
//
assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F);
assertEquals(1.0F, outText[1].boost(), 0.0F);
assertEquals(FOO_BOOST, outText[2].boost(), 0.0F);
assertEquals(1.0F, outText[3].boost(), 0.0F);
assertEquals(1.0F, outText[4].boost(), 0.0F);
// copyField dest with no norms should not have recieved any boost
assertEquals(1.0F, outNoNorms[0].boost(), 0.0F);
assertEquals(1.0F, outNoNorms[1].boost(), 0.0F);
// now index that SolrInputDocument to check the computed norms
assertU(adoc(doc));
assertU(commit());
SolrQueryRequest req = req("q", "id:42");
try {
// very hack-ish
SolrQueryResponse rsp = new SolrQueryResponse();
core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp);
DocList dl = ((ResultContext) rsp.getValues().get("response")).docs;
assertTrue("can't find the doc we just added", 1 == dl.size());
int docid = dl.iterator().nextDoc();
SolrIndexSearcher searcher = req.getSearcher();
AtomicReader reader = SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader());
assertTrue("similarity doesn't extend DefaultSimilarity, " +
"config or defaults have changed since test was written",
searcher.getSimilarity() instanceof DefaultSimilarity);
DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity();
byte[] titleNorms = (byte[]) reader.normValues("title").getSource().getArray();
byte[] fooNorms = (byte[]) reader.normValues("foo_t").getSource().getArray();
byte[] textNorms = (byte[]) reader.normValues("text").getSource().getArray();
assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST),
titleNorms[docid]);
assertEquals(expectedNorm(sim, 8-3, FOO_BOOST * DOC_BOOST),
fooNorms[docid]);
assertEquals(expectedNorm(sim, 2 + 8-3,
TITLE_BOOST * FOO_BOOST * DOC_BOOST),
textNorms[docid]);
} finally {
req.close();
}
}
/** /**
* Its not ok to boost a field if it omits norms * Given a length, and boost returns the expected encoded norm
*/ */
private static byte expectedNorm(final DefaultSimilarity sim,
final int length, final float boost) {
return sim.encodeNormValue(boost / ((float) Math.sqrt(length)));
}
public void testBoostOmitNorms() throws Exception { public void testBoostOmitNorms() throws Exception {
XmlDoc xml = new XmlDoc(); XmlDoc xml = new XmlDoc();
// explicitly boosting a field if that omits norms is not ok
xml.xml = "<doc>" xml.xml = "<doc>"
+ "<field name=\"id\">ignore_exception</field>" + "<field name=\"id\">ignore_exception</field>"
+ "<field name=\"title_stringNoNorms\" boost=\"3.0\">mytitle</field>" + "<field name=\"title_stringNoNorms\" boost=\"3.0\">mytitle</field>"
@ -249,6 +375,12 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
} catch (SolrException expected) { } catch (SolrException expected) {
// expected exception // expected exception
} }
// boosting a field that is copied to another field that omits norms is ok
xml.xml = "<doc>"
+ "<field name=\"id\">42</field>"
+ "<field name=\"title\" boost=\"3.0\">mytitle</field>"
+ "</doc>";
assertNull(h.validateUpdate(add(xml, new String[0])));
} }
/** /**

View File

@ -27,6 +27,7 @@ import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.QuickPatchThreadsFilter; import org.apache.lucene.util.QuickPatchThreadsFilter;
import org.apache.noggit.*; import org.apache.noggit.*;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.*; import org.apache.solr.common.*;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.params.*; import org.apache.solr.common.params.*;
@ -667,14 +668,15 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
* Generates a simple &lt;add&gt;&lt;doc&gt;... XML String with no options * Generates a simple &lt;add&gt;&lt;doc&gt;... XML String with no options
*/ */
public static String adoc(SolrInputDocument sdoc) { public static String adoc(SolrInputDocument sdoc) {
List<String> fields = new ArrayList<String>(); StringWriter out = new StringWriter(512);
for (SolrInputField sf : sdoc) { try {
for (Object o : sf.getValues()) { out.append("<add>");
fields.add(sf.getName()); ClientUtils.writeXML(sdoc, out);
fields.add(o.toString()); out.append("</add>");
} } catch (IOException e) {
throw new RuntimeException("Inexplicable IO error from StringWriter", e);
} }
return adoc(fields.toArray(new String[fields.size()])); return out.toString();
} }