SOLR-10208: Adjust scoring formula for the scoreNodes function

This commit is contained in:
Joel Bernstein 2017-02-27 12:03:03 -05:00
parent a248e6e3c0
commit 0c1fde664f
3 changed files with 7 additions and 8 deletions

View File

@ -237,7 +237,7 @@ public class ScoreNodesStream extends TupleStream implements Expressible
throw new Exception("termFreq field not present in the Tuple");
}
Number termFreqValue = (Number)tuple.get(termFreq);
float score = termFreqValue.floatValue() * (float) (Math.log((numDocs + 1) / (docFreq.doubleValue() + 1)) + 1.0);
float score = (float)(Math.log(termFreqValue.floatValue())+1.0) * (float) (Math.log((numDocs + 1) / (docFreq.doubleValue() + 1)) + 1.0);
tuple.put("nodeScore", score);
tuple.put("docFreq", docFreq);
tuple.put("numDocs", numDocs);

View File

@ -358,7 +358,7 @@ public class SignificantTermsStream extends TupleStream implements Expressible{
map.put("background", freqs[0]);
map.put("foreground", freqs[1]);
float score = (float)Math.log(freqs[1]) * (float) (Math.log(((float)(numDocs + 1)) / (freqs[0] + 1)) + 1.0);
float score = (float)(Math.log(freqs[1])+1.0) * (float) (Math.log(((float)(numDocs + 1)) / (freqs[0] + 1)) + 1.0);
map.put("score", score);
maps.add(map);

View File

@ -521,10 +521,10 @@ public class GraphExpressionTest extends SolrCloudTestCase {
.add(id, "3", "basket_s", "basket2", "product_ss", "product1", "product_ss", "product6", "product_ss", "product7", "price_f", "1")
.add(id, "6", "basket_s", "basket3", "product_ss", "product4", "product_ss","product3", "product_ss","product1", "price_f", "1")
.add(id, "9", "basket_s", "basket4", "product_ss", "product4", "product_ss", "product3", "product_ss", "product1","price_f", "1")
.add(id, "12", "basket_s", "basket5", "product_ss", "product1", "price_f", "1")
.add(id, "13", "basket_s", "basket6", "product_ss", "product1", "price_f", "1")
.add(id, "14", "basket_s", "basket7", "product_ss", "product1", "price_f", "1")
.add(id, "15", "basket_s", "basket4", "product_ss", "product1", "price_f", "1")
//.add(id, "12", "basket_s", "basket5", "product_ss", "product1", "price_f", "1")
//.add(id, "13", "basket_s", "basket6", "product_ss", "product1", "price_f", "1")
//.add(id, "14", "basket_s", "basket7", "product_ss", "product1", "price_f", "1")
//.add(id, "15", "basket_s", "basket4", "product_ss", "product1", "price_f", "1")
.commit(cluster.getSolrClient(), COLLECTION);
List<Tuple> tuples = null;
@ -557,7 +557,6 @@ public class GraphExpressionTest extends SolrCloudTestCase {
stream.setStreamContext(context);
tuples = getTuples(stream);
//The highest scoring tuple will be the product searched for.
Tuple tuple = tuples.get(0);
assert(tuple.getString("node").equals("product3"));
assert(tuple.getLong("docFreq") == 3);
@ -570,7 +569,7 @@ public class GraphExpressionTest extends SolrCloudTestCase {
Tuple tuple1 = tuples.get(2);
assert(tuple1.getString("node").equals("product1"));
assert(tuple1.getLong("docFreq") == 8);
assert(tuple1.getLong("docFreq") == 4);
assert(tuple1.getLong("count(*)") == 3);
Tuple tuple2 = tuples.get(3);