mirror of https://github.com/apache/lucene.git
SOLR-8542: change default feature vector format (to 'dense' from 'sparse')
also: increase test coverage w.r.t. 'sparse' vs. 'dense' vs. 'default' feature vector format
This commit is contained in:
parent
ac3f1bb339
commit
f62874e47a
|
@ -324,7 +324,7 @@ produce the features without doing the reranking:
|
|||
`fl=*,score,[features store=yourFeatureStore format=[dense|sparse] ]`
|
||||
|
||||
This will return the values of the features in the given store. The format of the
|
||||
extracted features will be based on the format parameter. The default is sparse.
|
||||
extracted features will be based on the format parameter. The default is dense.
|
||||
|
||||
# Assemble training data
|
||||
In order to train a learning to rank model you need training data. Training data is
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -59,7 +60,7 @@ import org.slf4j.LoggerFactory;
|
|||
* will default to the features used by your reranking model.<br>
|
||||
* <code>efi.*</code> - External feature information variables required by the features
|
||||
* you are extracting.<br>
|
||||
* <code>format</code> - The format you want the features to be returned in. Supports (dense|sparse). Defaults to sparse.<br>
|
||||
* <code>format</code> - The format you want the features to be returned in. Supports (dense|sparse). Defaults to dense.<br>
|
||||
*/
|
||||
|
||||
public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
||||
|
@ -77,7 +78,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
|||
private String fvCacheName;
|
||||
private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME;
|
||||
private String defaultStore;
|
||||
private String defaultFormat;
|
||||
private FeatureLogger.FeatureFormat defaultFormat = FeatureLogger.FeatureFormat.DENSE;
|
||||
private char csvKeyValueDelimiter = CSVFeatureLogger.DEFAULT_KEY_VALUE_SEPARATOR;
|
||||
private char csvFeatureSeparator = CSVFeatureLogger.DEFAULT_FEATURE_SEPARATOR;
|
||||
|
||||
|
@ -96,7 +97,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
|||
}
|
||||
|
||||
public void setDefaultFormat(String defaultFormat) {
|
||||
this.defaultFormat = defaultFormat;
|
||||
this.defaultFormat = FeatureLogger.FeatureFormat.valueOf(defaultFormat.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
public void setCsvKeyValueDelimiter(String csvKeyValueDelimiter) {
|
||||
|
@ -133,7 +134,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
|||
// Create and supply the feature logger to be used
|
||||
SolrQueryRequestContextUtils.setFeatureLogger(req,
|
||||
createFeatureLogger(
|
||||
localparams.get(FV_FORMAT, defaultFormat)));
|
||||
localparams.get(FV_FORMAT)));
|
||||
|
||||
return new FeatureTransformer(name, localparams, req);
|
||||
}
|
||||
|
@ -147,23 +148,17 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
|||
*
|
||||
* @return a feature logger for the format specified.
|
||||
*/
|
||||
private FeatureLogger createFeatureLogger(String featureFormat) {
|
||||
final FeatureLogger.FeatureFormat f;
|
||||
if (featureFormat == null || featureFormat.isEmpty() ||
|
||||
featureFormat.equals("sparse")) {
|
||||
f = FeatureLogger.FeatureFormat.SPARSE;
|
||||
}
|
||||
else if (featureFormat.equals("dense")) {
|
||||
f = FeatureLogger.FeatureFormat.DENSE;
|
||||
}
|
||||
else {
|
||||
f = FeatureLogger.FeatureFormat.SPARSE;
|
||||
log.warn("unknown feature logger feature format {}", featureFormat);
|
||||
private FeatureLogger createFeatureLogger(String formatStr) {
|
||||
final FeatureLogger.FeatureFormat format;
|
||||
if (formatStr != null) {
|
||||
format = FeatureLogger.FeatureFormat.valueOf(formatStr.toUpperCase(Locale.ROOT));
|
||||
} else {
|
||||
format = this.defaultFormat;
|
||||
}
|
||||
if (fvCacheName == null) {
|
||||
throw new IllegalArgumentException("a fvCacheName must be configured");
|
||||
}
|
||||
return new CSVFeatureLogger(fvCacheName, f, csvKeyValueDelimiter, csvFeatureSeparator);
|
||||
return new CSVFeatureLogger(fvCacheName, format, csvKeyValueDelimiter, csvFeatureSeparator);
|
||||
}
|
||||
|
||||
class FeatureTransformer extends DocTransformer {
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
enclosed between brackets (in this case [fv]). In order to get the feature
|
||||
vector you will have to specify that you want the field (e.g., fl="*,[fv]) -->
|
||||
<transformer name="fv" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
|
||||
<str name="defaultFormat">${solr.ltr.transformer.fv.defaultFormat:dense}</str>
|
||||
<str name="fvCacheName">QUERY_DOC_FV</str>
|
||||
</transformer>
|
||||
|
||||
|
|
|
@ -75,12 +75,55 @@ public class TestRerankBase extends RestTestBase {
|
|||
protected static File fstorefile = null;
|
||||
protected static File mstorefile = null;
|
||||
|
||||
final private static String SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT = "solr.ltr.transformer.fv.defaultFormat";
|
||||
private static String defaultFeatureFormat;
|
||||
|
||||
protected String chooseDefaultFeatureVector(String dense, String sparse) {
|
||||
if (defaultFeatureFormat == null) {
|
||||
// to match <code><str name="defaultFormat">${solr.ltr.transformer.fv.defaultFormat:dense}</str></code> snippet
|
||||
return dense;
|
||||
} else if ("dense".equals(defaultFeatureFormat)) {
|
||||
return dense;
|
||||
} else if ("sparse".equals(defaultFeatureFormat)) {
|
||||
return sparse;
|
||||
} else {
|
||||
fail("unexpected feature format choice: "+defaultFeatureFormat);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
protected static void chooseDefaultFeatureFormat() throws Exception {
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
defaultFeatureFormat = null;
|
||||
break;
|
||||
case 1:
|
||||
defaultFeatureFormat = "dense";
|
||||
break;
|
||||
case 2:
|
||||
defaultFeatureFormat = "sparse";
|
||||
break;
|
||||
default:
|
||||
fail("unexpected feature format choice");
|
||||
break;
|
||||
}
|
||||
if (defaultFeatureFormat != null) {
|
||||
System.setProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT, defaultFeatureFormat);
|
||||
}
|
||||
}
|
||||
|
||||
protected static void unchooseDefaultFeatureFormat() {
|
||||
System.clearProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT);
|
||||
}
|
||||
|
||||
protected static void setuptest(boolean bulkIndex) throws Exception {
|
||||
chooseDefaultFeatureFormat();
|
||||
setuptest("solrconfig-ltr.xml", "schema.xml");
|
||||
if (bulkIndex) bulkIndex();
|
||||
}
|
||||
|
||||
protected static void setupPersistenttest(boolean bulkIndex) throws Exception {
|
||||
chooseDefaultFeatureFormat();
|
||||
setupPersistentTest("solrconfig-ltr.xml", "schema.xml");
|
||||
if (bulkIndex) bulkIndex();
|
||||
}
|
||||
|
@ -178,6 +221,7 @@ public class TestRerankBase extends RestTestBase {
|
|||
FileUtils.deleteDirectory(tmpSolrHome);
|
||||
System.clearProperty("managed.schema.mutable");
|
||||
// System.clearProperty("enable.update.log");
|
||||
unchooseDefaultFeatureFormat();
|
||||
}
|
||||
|
||||
public static void makeRestTestHarnessNull() {
|
||||
|
|
|
@ -94,8 +94,15 @@ public class TestExternalFeatures extends TestRerankBase {
|
|||
// Stopword only query passed in
|
||||
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query='a'}");
|
||||
|
||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"matchedTitle","0.0",
|
||||
"titlePhraseMatch","0.0");
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
|
||||
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||
|
||||
// Features are query title matches, which remove stopwords, leaving blank query, so no matches
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -104,7 +111,7 @@ public class TestExternalFeatures extends TestRerankBase {
|
|||
query.setQuery("*:*");
|
||||
query.add("rows", "1");
|
||||
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"confidence","2.3", "originalScore","1.0");
|
||||
|
||||
// Features we're extracting depend on external feature info not passed in
|
||||
|
@ -114,13 +121,13 @@ public class TestExternalFeatures extends TestRerankBase {
|
|||
// Adding efi in features section should make it work
|
||||
query.remove("fl");
|
||||
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'");
|
||||
|
||||
// Adding efi in transformer + rq should still use the transformer's params for feature extraction
|
||||
query.remove("fl");
|
||||
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
|
||||
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query=w3}");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -129,10 +136,18 @@ public class TestExternalFeatures extends TestRerankBase {
|
|||
query.setQuery("*:*");
|
||||
query.add("rows", "1");
|
||||
|
||||
final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"confidence","0.0",
|
||||
"originalScore","0.0");
|
||||
final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"originalScore","0.0");
|
||||
|
||||
final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv);
|
||||
|
||||
// Efi is explicitly not required, so we do not score the feature
|
||||
query.remove("fl");
|
||||
query.add("fl", "fvalias:[fv store=fstore2]");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -141,10 +156,18 @@ public class TestExternalFeatures extends TestRerankBase {
|
|||
query.setQuery("*:*");
|
||||
query.add("rows", "1");
|
||||
|
||||
final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"occurrences","0.0",
|
||||
"originalScore","0.0");
|
||||
final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"originalScore","0.0");
|
||||
|
||||
final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv);
|
||||
|
||||
// Efi is explicitly not required, so we do not score the feature
|
||||
query.remove("fl");
|
||||
query.add("fl", "fvalias:[fv store=fstore3]");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -58,9 +58,17 @@ public class TestExternalValueFeatures extends TestRerankBase {
|
|||
query.add("rows", "3");
|
||||
query.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature efi.user_device_tablet=1}");
|
||||
|
||||
final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"user_device_smartphone","0.0",
|
||||
"user_device_tablet","1.0");
|
||||
final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"user_device_tablet","1.0");
|
||||
|
||||
final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv);
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/features=='"+FeatureLoggerTestUtils.toFeatureVector("user_device_tablet","1.0")+"'");
|
||||
"/response/docs/[0]/features=='"+docs0features_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/score==65.0");
|
||||
}
|
||||
|
@ -76,9 +84,16 @@ public class TestExternalValueFeatures extends TestRerankBase {
|
|||
query
|
||||
.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature}");
|
||||
|
||||
final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"user_device_smartphone","0.0",
|
||||
"user_device_tablet","0.0");
|
||||
final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
|
||||
final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv);
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/features==''");
|
||||
"/response/docs/[0]/features=='"+docs0features_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/score==0.0");
|
||||
}
|
||||
|
|
|
@ -56,6 +56,13 @@ public class TestFeatureLogging extends TestRerankBase {
|
|||
"c1", "c2", "c3"}, "test1",
|
||||
"{\"weights\":{\"c1\":1.0,\"c2\":1.0,\"c3\":1.0}}");
|
||||
|
||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"c1","1.0",
|
||||
"c2","2.0",
|
||||
"c3","3.0",
|
||||
"pop","2.0",
|
||||
"nomatch","0.0",
|
||||
"yesmatch","1.0");
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"c1","1.0",
|
||||
"c2","2.0",
|
||||
|
@ -63,6 +70,8 @@ public class TestFeatureLogging extends TestRerankBase {
|
|||
"pop","2.0",
|
||||
"yesmatch","1.0");
|
||||
|
||||
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||
|
||||
final SolrQuery query = new SolrQuery();
|
||||
query.setQuery("title:bloomberg");
|
||||
query.add("fl", "title,description,id,popularity,[fv]");
|
||||
|
@ -73,7 +82,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
|||
restTestHarness.query("/query" + query.toQueryString());
|
||||
assertJQ(
|
||||
"/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_sparse_csv+"'}");
|
||||
"/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_default_csv+"'}");
|
||||
|
||||
query.remove("fl");
|
||||
query.add("fl", "[fv]");
|
||||
|
@ -82,7 +91,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
|||
|
||||
restTestHarness.query("/query" + query.toQueryString());
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/=={'[fv]':'"+docs0fv_sparse_csv+"'}");
|
||||
"/response/docs/[0]/=={'[fv]':'"+docs0fv_default_csv+"'}");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -157,7 +166,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
|||
|
||||
query.add("rq", "{!ltr reRankDocs=3 model=sumgroup}");
|
||||
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"c1","1.0",
|
||||
"c2","2.0",
|
||||
"c3","3.0",
|
||||
|
@ -166,7 +175,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
|||
restTestHarness.query("/query" + query.toQueryString());
|
||||
assertJQ(
|
||||
"/query" + query.toQueryString(),
|
||||
"/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_sparse_csv+"'}");
|
||||
"/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_csv+"'}");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -181,25 +190,28 @@ public class TestFeatureLogging extends TestRerankBase {
|
|||
"{\"weights\":{\"match\":1.0}}");
|
||||
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
|
||||
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("c4", "1.0");
|
||||
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( "c4", "1.0");
|
||||
|
||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
|
||||
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "0.0", "c4", "1.0");
|
||||
|
||||
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||
final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv);
|
||||
|
||||
final SolrQuery query = new SolrQuery();
|
||||
query.setQuery("title:bloomberg");
|
||||
query.add("rows", "10");
|
||||
query.add("rq", "{!ltr reRankDocs=10 model=sum4}");
|
||||
|
||||
//csv - no feature format check (default to sparse)
|
||||
//csv - no feature format specified i.e. use default
|
||||
query.remove("fl");
|
||||
query.add("fl", "*,score,fv:[fv store=test4]");
|
||||
assertJQ(
|
||||
"/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/fv/=='"+docs0fv_sparse_csv+"'");
|
||||
"/response/docs/[0]/fv/=='"+docs0fv_default_csv+"'");
|
||||
assertJQ(
|
||||
"/query" + query.toQueryString(),
|
||||
"/response/docs/[1]/fv/=='"+docs1fv_sparse_csv+"'");
|
||||
"/response/docs/[1]/fv/=='"+docs1fv_default_csv+"'");
|
||||
|
||||
//csv - sparse feature format check
|
||||
query.remove("fl");
|
||||
|
|
|
@ -97,13 +97,13 @@ public class TestFilterSolrFeature extends TestRerankBase {
|
|||
query.add("rq", "{!ltr reRankDocs=4 model=fqmodel efi.user_query=w2}");
|
||||
query.add("fl", "fv:[fv]");
|
||||
|
||||
final String docs0fv_sparse_csv= FeatureLoggerTestUtils.toFeatureVector(
|
||||
final String docs0fv_csv= FeatureLoggerTestUtils.toFeatureVector(
|
||||
"matchedTitle","1.0", "popularity","3.0");
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='2'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='1'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_sparse_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_csv+"'");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -105,20 +105,48 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
|||
final Double doc0Score = (Double) ((Map<String,Object>) ((ArrayList<Object>) ((Map<String,Object>) jsonParse
|
||||
.get("response")).get("docs")).get(0)).get("score");
|
||||
|
||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature",doc0Score.toString(),
|
||||
"nomatchfeature2","0.0");
|
||||
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature","0.0",
|
||||
"nomatchfeature2","0.0");
|
||||
final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature","0.0",
|
||||
"nomatchfeature2","0.0");
|
||||
final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature","0.0",
|
||||
"nomatchfeature2","0.0");
|
||||
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"yesmatchfeature",doc0Score.toString());
|
||||
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
|
||||
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||
final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv);
|
||||
final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv);
|
||||
final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv);
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score=="
|
||||
+ (doc0Score * 1.1));
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'");
|
||||
"/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='2'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='4'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -142,15 +170,47 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
|||
final Double doc0Score = (Double) ((Map<String,Object>) ((ArrayList<Object>) ((Map<String,Object>) jsonParse
|
||||
.get("response")).get("docs")).get(0)).get("score");
|
||||
|
||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature",doc0Score.toString(),
|
||||
"nomatchfeature2","0.0",
|
||||
"nomatchfeature3","0.0");
|
||||
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature","0.0",
|
||||
"nomatchfeature2","0.0",
|
||||
"nomatchfeature3","0.0");
|
||||
final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature","0.0",
|
||||
"nomatchfeature2","0.0",
|
||||
"nomatchfeature3","0.0");
|
||||
final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature","0.0",
|
||||
"yesmatchfeature","0.0",
|
||||
"nomatchfeature2","0.0",
|
||||
"nomatchfeature3","0.0");
|
||||
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"yesmatchfeature",doc0Score.toString());
|
||||
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
|
||||
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||
final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv);
|
||||
final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv);
|
||||
final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv);
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'");
|
||||
"/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -163,8 +223,14 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
|||
query.add("fv", "true");
|
||||
query.add("rq", "{!ltr model=nomatchmodel3 reRankDocs=4}");
|
||||
|
||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature4","0.0");
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
|
||||
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -184,9 +250,15 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
|||
query.add("rows", "4");
|
||||
query.add("rq", "{!ltr model=nomatchmodel4 reRankDocs=4}");
|
||||
|
||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"nomatchfeature4","0.0");
|
||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||
|
||||
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue