mirror of https://github.com/apache/lucene.git
SOLR-3988: Fixed SolrTestCaseJ4.adoc(SolrInputDocument) to respect field and document boosts
SOLR-3981: Fixed bug that resulted in document boosts being compounded in <copyField/> destination fields git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1401916 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e1b693ddb2
commit
d4108c2b80
|
@ -98,6 +98,12 @@ Bug Fixes
|
||||||
numShards=2 collection after starting up a second core and not specifying
|
numShards=2 collection after starting up a second core and not specifying
|
||||||
numShards. (Mark Miller)
|
numShards. (Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-3988: Fixed SolrTestCaseJ4.adoc(SolrInputDocument) to respect
|
||||||
|
field and document boosts (hossman)
|
||||||
|
|
||||||
|
* SOLR-3981: Fixed bug that resulted in document boosts being compounded in
|
||||||
|
<copyField/> destination fields. (hossman)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -238,8 +238,6 @@ public class DocumentBuilder {
|
||||||
SchemaField sfield = schema.getFieldOrNull(name);
|
SchemaField sfield = schema.getFieldOrNull(name);
|
||||||
boolean used = false;
|
boolean used = false;
|
||||||
|
|
||||||
float boost = field.getBoost();
|
|
||||||
boolean applyBoost = sfield != null && sfield.indexed() && !sfield.omitNorms();
|
|
||||||
|
|
||||||
// Make sure it has the correct number
|
// Make sure it has the correct number
|
||||||
if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) {
|
if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) {
|
||||||
|
@ -248,17 +246,18 @@ public class DocumentBuilder {
|
||||||
sfield.getName() + ": " +field.getValue() );
|
sfield.getName() + ": " +field.getValue() );
|
||||||
}
|
}
|
||||||
|
|
||||||
if (applyBoost == false && boost != 1.0F) {
|
float fieldBoost = field.getBoost();
|
||||||
|
boolean applyBoost = sfield != null && sfield.indexed() && !sfield.omitNorms();
|
||||||
|
|
||||||
|
if (applyBoost == false && fieldBoost != 1.0F) {
|
||||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
|
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
|
||||||
"ERROR: "+getID(doc, schema)+"cannot set an index-time boost, unindexed or norms are omitted for field " +
|
"ERROR: "+getID(doc, schema)+"cannot set an index-time boost, unindexed or norms are omitted for field " +
|
||||||
sfield.getName() + ": " +field.getValue() );
|
sfield.getName() + ": " +field.getValue() );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lucene no longer has a native docBoost, so we have to multiply
|
// Lucene no longer has a native docBoost, so we have to multiply
|
||||||
// it ourselves (do this after the applyBoost error check so we don't
|
// it ourselves
|
||||||
// give an error on fields that don't support boost just because of a
|
float compoundBoost = fieldBoost * docBoost;
|
||||||
// docBoost)
|
|
||||||
boost *= docBoost;
|
|
||||||
|
|
||||||
// load each field value
|
// load each field value
|
||||||
boolean hasField = false;
|
boolean hasField = false;
|
||||||
|
@ -270,16 +269,20 @@ public class DocumentBuilder {
|
||||||
hasField = true;
|
hasField = true;
|
||||||
if (sfield != null) {
|
if (sfield != null) {
|
||||||
used = true;
|
used = true;
|
||||||
addField(out, sfield, v, applyBoost ? boost : 1f);
|
addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if we should copy this field to any other fields.
|
// Check if we should copy this field value to any other fields.
|
||||||
// This could happen whether it is explicit or not.
|
// This could happen whether it is explicit or not.
|
||||||
List<CopyField> copyFields = schema.getCopyFieldsList(name);
|
List<CopyField> copyFields = schema.getCopyFieldsList(name);
|
||||||
for (CopyField cf : copyFields) {
|
for (CopyField cf : copyFields) {
|
||||||
SchemaField destinationField = cf.getDestination();
|
SchemaField destinationField = cf.getDestination();
|
||||||
|
|
||||||
|
final boolean destHasValues =
|
||||||
|
(null != out.getField(destinationField.getName()));
|
||||||
|
|
||||||
// check if the copy field is a multivalued or not
|
// check if the copy field is a multivalued or not
|
||||||
if (!destinationField.multiValued() && out.getField(destinationField.getName()) != null) {
|
if (!destinationField.multiValued() && destHasValues) {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
"ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
|
"ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
|
||||||
destinationField.getName() + ": " + v);
|
destinationField.getName() + ": " + v);
|
||||||
|
@ -292,14 +295,23 @@ public class DocumentBuilder {
|
||||||
if( val instanceof String && cf.getMaxChars() > 0 ) {
|
if( val instanceof String && cf.getMaxChars() > 0 ) {
|
||||||
val = cf.getLimitedValue((String)val);
|
val = cf.getLimitedValue((String)val);
|
||||||
}
|
}
|
||||||
addField(out, destinationField, val, destinationField.indexed() && !destinationField.omitNorms() ? boost : 1F);
|
|
||||||
|
// we can't copy any boost unless the dest field is
|
||||||
|
// indexed & !omitNorms, but which boost we copy depends
|
||||||
|
// on wether the dest field already contains values (we
|
||||||
|
// don't want to apply the compounded docBoost more then once)
|
||||||
|
final float destBoost =
|
||||||
|
(destinationField.indexed() && !destinationField.omitNorms()) ?
|
||||||
|
(destHasValues ? fieldBoost : compoundBoost) : 1.0F;
|
||||||
|
|
||||||
|
addField(out, destinationField, val, destBoost);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The boost for a given field is the product of the
|
// The final boost for a given field named is the product of the
|
||||||
// *all* boosts on values of that field.
|
// *all* boosts on values of that field.
|
||||||
// For multi-valued fields, we only want to set the boost on the
|
// For multi-valued fields, we only want to set the boost on the
|
||||||
// first field.
|
// first field.
|
||||||
boost = 1.0f;
|
fieldBoost = compoundBoost = 1.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch( SolrException ex ) {
|
catch( SolrException ex ) {
|
||||||
|
|
|
@ -19,13 +19,23 @@ package org.apache.solr.update;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.index.AtomicReader;
|
||||||
|
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||||
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.SolrInputField;
|
import org.apache.solr.common.SolrInputField;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.search.DocList;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.response.ResultContext;
|
||||||
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -208,7 +218,7 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
|
||||||
assertNull(h.validateUpdate(add(xml, new String[0])));
|
assertNull(h.validateUpdate(add(xml, new String[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMultiValuedFielAndDocBoosts() throws Exception {
|
public void testMultiValuedFieldAndDocBoosts() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
SolrCore core = h.getCore();
|
||||||
IndexSchema schema = core.getSchema();
|
IndexSchema schema = core.getSchema();
|
||||||
SolrInputDocument doc = new SolrInputDocument();
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
|
@ -234,11 +244,127 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
IndexSchema schema = core.getSchema();
|
||||||
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
|
|
||||||
|
final float DOC_BOOST = 3.0F;
|
||||||
|
doc.setDocumentBoost(DOC_BOOST);
|
||||||
|
doc.addField("id", "42");
|
||||||
|
|
||||||
|
SolrInputField inTitle = new SolrInputField( "title" );
|
||||||
|
inTitle.addValue( "titleA" , 2.0F );
|
||||||
|
inTitle.addValue( "titleB" , 7.0F );
|
||||||
|
final float TITLE_BOOST = 2.0F * 7.0F;
|
||||||
|
assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F);
|
||||||
|
doc.put( inTitle.getName(), inTitle );
|
||||||
|
|
||||||
|
SolrInputField inFoo = new SolrInputField( "foo_t" );
|
||||||
|
inFoo.addValue( "summer time" , 1.0F );
|
||||||
|
inFoo.addValue( "in the city" , 5.0F );
|
||||||
|
inFoo.addValue( "living is easy" , 11.0F );
|
||||||
|
final float FOO_BOOST = 1.0F * 5.0F * 11.0F;
|
||||||
|
assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F);
|
||||||
|
doc.put( inFoo.getName(), inFoo );
|
||||||
|
|
||||||
|
Document out = DocumentBuilder.toDocument( doc, core.getSchema() );
|
||||||
|
|
||||||
|
IndexableField[] outTitle = out.getFields( inTitle.getName() );
|
||||||
|
assertEquals("wrong number of title values",
|
||||||
|
2, outTitle.length);
|
||||||
|
|
||||||
|
IndexableField[] outNoNorms = out.getFields( "title_stringNoNorms" );
|
||||||
|
assertEquals("wrong number of nonorms values",
|
||||||
|
2, outNoNorms.length);
|
||||||
|
|
||||||
|
IndexableField[] outFoo = out.getFields( inFoo.getName() );
|
||||||
|
assertEquals("wrong number of foo values",
|
||||||
|
3, outFoo.length);
|
||||||
|
|
||||||
|
IndexableField[] outText = out.getFields( "text" );
|
||||||
|
assertEquals("wrong number of text values",
|
||||||
|
5, outText.length);
|
||||||
|
|
||||||
|
// since Lucene no longer has native document boosts, we should find
|
||||||
|
// the doc boost multiplied into the boost on the first field value
|
||||||
|
// of each field. All other field values should be 1.0f
|
||||||
|
// (lucene will multiply all of the field value boosts later)
|
||||||
|
assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F);
|
||||||
|
assertEquals(1.0F, outTitle[1].boost(), 0.0F);
|
||||||
|
//
|
||||||
|
assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F);
|
||||||
|
assertEquals(1.0F, outFoo[1].boost(), 0.0F);
|
||||||
|
assertEquals(1.0F, outFoo[2].boost(), 0.0F);
|
||||||
|
//
|
||||||
|
assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F);
|
||||||
|
assertEquals(1.0F, outText[1].boost(), 0.0F);
|
||||||
|
assertEquals(FOO_BOOST, outText[2].boost(), 0.0F);
|
||||||
|
assertEquals(1.0F, outText[3].boost(), 0.0F);
|
||||||
|
assertEquals(1.0F, outText[4].boost(), 0.0F);
|
||||||
|
|
||||||
|
// copyField dest with no norms should not have recieved any boost
|
||||||
|
assertEquals(1.0F, outNoNorms[0].boost(), 0.0F);
|
||||||
|
assertEquals(1.0F, outNoNorms[1].boost(), 0.0F);
|
||||||
|
|
||||||
|
// now index that SolrInputDocument to check the computed norms
|
||||||
|
|
||||||
|
assertU(adoc(doc));
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
SolrQueryRequest req = req("q", "id:42");
|
||||||
|
try {
|
||||||
|
// very hack-ish
|
||||||
|
|
||||||
|
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||||
|
core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp);
|
||||||
|
|
||||||
|
DocList dl = ((ResultContext) rsp.getValues().get("response")).docs;
|
||||||
|
assertTrue("can't find the doc we just added", 1 == dl.size());
|
||||||
|
int docid = dl.iterator().nextDoc();
|
||||||
|
|
||||||
|
SolrIndexSearcher searcher = req.getSearcher();
|
||||||
|
AtomicReader reader = SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader());
|
||||||
|
|
||||||
|
assertTrue("similarity doesn't extend DefaultSimilarity, " +
|
||||||
|
"config or defaults have changed since test was written",
|
||||||
|
searcher.getSimilarity() instanceof DefaultSimilarity);
|
||||||
|
|
||||||
|
DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity();
|
||||||
|
|
||||||
|
byte[] titleNorms = (byte[]) reader.normValues("title").getSource().getArray();
|
||||||
|
byte[] fooNorms = (byte[]) reader.normValues("foo_t").getSource().getArray();
|
||||||
|
byte[] textNorms = (byte[]) reader.normValues("text").getSource().getArray();
|
||||||
|
|
||||||
|
assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST),
|
||||||
|
titleNorms[docid]);
|
||||||
|
|
||||||
|
assertEquals(expectedNorm(sim, 8-3, FOO_BOOST * DOC_BOOST),
|
||||||
|
fooNorms[docid]);
|
||||||
|
|
||||||
|
assertEquals(expectedNorm(sim, 2 + 8-3,
|
||||||
|
TITLE_BOOST * FOO_BOOST * DOC_BOOST),
|
||||||
|
textNorms[docid]);
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
req.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Its not ok to boost a field if it omits norms
|
* Given a length, and boost returns the expected encoded norm
|
||||||
*/
|
*/
|
||||||
|
private static byte expectedNorm(final DefaultSimilarity sim,
|
||||||
|
final int length, final float boost) {
|
||||||
|
|
||||||
|
return sim.encodeNormValue(boost / ((float) Math.sqrt(length)));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testBoostOmitNorms() throws Exception {
|
public void testBoostOmitNorms() throws Exception {
|
||||||
XmlDoc xml = new XmlDoc();
|
XmlDoc xml = new XmlDoc();
|
||||||
|
// explicitly boosting a field if that omits norms is not ok
|
||||||
xml.xml = "<doc>"
|
xml.xml = "<doc>"
|
||||||
+ "<field name=\"id\">ignore_exception</field>"
|
+ "<field name=\"id\">ignore_exception</field>"
|
||||||
+ "<field name=\"title_stringNoNorms\" boost=\"3.0\">mytitle</field>"
|
+ "<field name=\"title_stringNoNorms\" boost=\"3.0\">mytitle</field>"
|
||||||
|
@ -249,6 +375,12 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
|
||||||
} catch (SolrException expected) {
|
} catch (SolrException expected) {
|
||||||
// expected exception
|
// expected exception
|
||||||
}
|
}
|
||||||
|
// boosting a field that is copied to another field that omits norms is ok
|
||||||
|
xml.xml = "<doc>"
|
||||||
|
+ "<field name=\"id\">42</field>"
|
||||||
|
+ "<field name=\"title\" boost=\"3.0\">mytitle</field>"
|
||||||
|
+ "</doc>";
|
||||||
|
assertNull(h.validateUpdate(add(xml, new String[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.util.Constants;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.QuickPatchThreadsFilter;
|
import org.apache.lucene.util.QuickPatchThreadsFilter;
|
||||||
import org.apache.noggit.*;
|
import org.apache.noggit.*;
|
||||||
|
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||||
import org.apache.solr.common.*;
|
import org.apache.solr.common.*;
|
||||||
import org.apache.solr.common.cloud.SolrZkClient;
|
import org.apache.solr.common.cloud.SolrZkClient;
|
||||||
import org.apache.solr.common.params.*;
|
import org.apache.solr.common.params.*;
|
||||||
|
@ -667,14 +668,15 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
|
||||||
* Generates a simple <add><doc>... XML String with no options
|
* Generates a simple <add><doc>... XML String with no options
|
||||||
*/
|
*/
|
||||||
public static String adoc(SolrInputDocument sdoc) {
|
public static String adoc(SolrInputDocument sdoc) {
|
||||||
List<String> fields = new ArrayList<String>();
|
StringWriter out = new StringWriter(512);
|
||||||
for (SolrInputField sf : sdoc) {
|
try {
|
||||||
for (Object o : sf.getValues()) {
|
out.append("<add>");
|
||||||
fields.add(sf.getName());
|
ClientUtils.writeXML(sdoc, out);
|
||||||
fields.add(o.toString());
|
out.append("</add>");
|
||||||
}
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Inexplicable IO error from StringWriter", e);
|
||||||
}
|
}
|
||||||
return adoc(fields.toArray(new String[fields.size()]));
|
return out.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue