mirror of https://github.com/apache/lucene.git
SOLR-8542: change default feature vector format (to 'dense' from 'sparse')
also: increase test coverage w.r.t. 'sparse' vs. 'dense' vs. 'default' feature vector format
This commit is contained in:
parent
01846cbb4c
commit
b7c75a3a1c
|
@ -324,7 +324,7 @@ produce the features without doing the reranking:
|
||||||
`fl=*,score,[features store=yourFeatureStore format=[dense|sparse] ]`
|
`fl=*,score,[features store=yourFeatureStore format=[dense|sparse] ]`
|
||||||
|
|
||||||
This will return the values of the features in the given store. The format of the
|
This will return the values of the features in the given store. The format of the
|
||||||
extracted features will be based on the format parameter. The default is sparse.
|
extracted features will be based on the format parameter. The default is dense.
|
||||||
|
|
||||||
# Assemble training data
|
# Assemble training data
|
||||||
In order to train a learning to rank model you need training data. Training data is
|
In order to train a learning to rank model you need training data. Training data is
|
||||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
@ -59,7 +60,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* will default to the features used by your reranking model.<br>
|
* will default to the features used by your reranking model.<br>
|
||||||
* <code>efi.*</code> - External feature information variables required by the features
|
* <code>efi.*</code> - External feature information variables required by the features
|
||||||
* you are extracting.<br>
|
* you are extracting.<br>
|
||||||
* <code>format</code> - The format you want the features to be returned in. Supports (dense|sparse). Defaults to sparse.<br>
|
* <code>format</code> - The format you want the features to be returned in. Supports (dense|sparse). Defaults to dense.<br>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
||||||
|
@ -77,7 +78,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
||||||
private String fvCacheName;
|
private String fvCacheName;
|
||||||
private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME;
|
private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME;
|
||||||
private String defaultStore;
|
private String defaultStore;
|
||||||
private String defaultFormat;
|
private FeatureLogger.FeatureFormat defaultFormat = FeatureLogger.FeatureFormat.DENSE;
|
||||||
private char csvKeyValueDelimiter = CSVFeatureLogger.DEFAULT_KEY_VALUE_SEPARATOR;
|
private char csvKeyValueDelimiter = CSVFeatureLogger.DEFAULT_KEY_VALUE_SEPARATOR;
|
||||||
private char csvFeatureSeparator = CSVFeatureLogger.DEFAULT_FEATURE_SEPARATOR;
|
private char csvFeatureSeparator = CSVFeatureLogger.DEFAULT_FEATURE_SEPARATOR;
|
||||||
|
|
||||||
|
@ -96,7 +97,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDefaultFormat(String defaultFormat) {
|
public void setDefaultFormat(String defaultFormat) {
|
||||||
this.defaultFormat = defaultFormat;
|
this.defaultFormat = FeatureLogger.FeatureFormat.valueOf(defaultFormat.toUpperCase(Locale.ROOT));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCsvKeyValueDelimiter(String csvKeyValueDelimiter) {
|
public void setCsvKeyValueDelimiter(String csvKeyValueDelimiter) {
|
||||||
|
@ -133,7 +134,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
||||||
// Create and supply the feature logger to be used
|
// Create and supply the feature logger to be used
|
||||||
SolrQueryRequestContextUtils.setFeatureLogger(req,
|
SolrQueryRequestContextUtils.setFeatureLogger(req,
|
||||||
createFeatureLogger(
|
createFeatureLogger(
|
||||||
localparams.get(FV_FORMAT, defaultFormat)));
|
localparams.get(FV_FORMAT)));
|
||||||
|
|
||||||
return new FeatureTransformer(name, localparams, req);
|
return new FeatureTransformer(name, localparams, req);
|
||||||
}
|
}
|
||||||
|
@ -147,23 +148,17 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
||||||
*
|
*
|
||||||
* @return a feature logger for the format specified.
|
* @return a feature logger for the format specified.
|
||||||
*/
|
*/
|
||||||
private FeatureLogger createFeatureLogger(String featureFormat) {
|
private FeatureLogger createFeatureLogger(String formatStr) {
|
||||||
final FeatureLogger.FeatureFormat f;
|
final FeatureLogger.FeatureFormat format;
|
||||||
if (featureFormat == null || featureFormat.isEmpty() ||
|
if (formatStr != null) {
|
||||||
featureFormat.equals("sparse")) {
|
format = FeatureLogger.FeatureFormat.valueOf(formatStr.toUpperCase(Locale.ROOT));
|
||||||
f = FeatureLogger.FeatureFormat.SPARSE;
|
} else {
|
||||||
}
|
format = this.defaultFormat;
|
||||||
else if (featureFormat.equals("dense")) {
|
|
||||||
f = FeatureLogger.FeatureFormat.DENSE;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
f = FeatureLogger.FeatureFormat.SPARSE;
|
|
||||||
log.warn("unknown feature logger feature format {}", featureFormat);
|
|
||||||
}
|
}
|
||||||
if (fvCacheName == null) {
|
if (fvCacheName == null) {
|
||||||
throw new IllegalArgumentException("a fvCacheName must be configured");
|
throw new IllegalArgumentException("a fvCacheName must be configured");
|
||||||
}
|
}
|
||||||
return new CSVFeatureLogger(fvCacheName, f, csvKeyValueDelimiter, csvFeatureSeparator);
|
return new CSVFeatureLogger(fvCacheName, format, csvKeyValueDelimiter, csvFeatureSeparator);
|
||||||
}
|
}
|
||||||
|
|
||||||
class FeatureTransformer extends DocTransformer {
|
class FeatureTransformer extends DocTransformer {
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
enclosed between brackets (in this case [fv]). In order to get the feature
|
enclosed between brackets (in this case [fv]). In order to get the feature
|
||||||
vector you will have to specify that you want the field (e.g., fl="*,[fv]) -->
|
vector you will have to specify that you want the field (e.g., fl="*,[fv]) -->
|
||||||
<transformer name="fv" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
|
<transformer name="fv" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
|
||||||
|
<str name="defaultFormat">${solr.ltr.transformer.fv.defaultFormat:dense}</str>
|
||||||
<str name="fvCacheName">QUERY_DOC_FV</str>
|
<str name="fvCacheName">QUERY_DOC_FV</str>
|
||||||
</transformer>
|
</transformer>
|
||||||
|
|
||||||
|
|
|
@ -75,12 +75,55 @@ public class TestRerankBase extends RestTestBase {
|
||||||
protected static File fstorefile = null;
|
protected static File fstorefile = null;
|
||||||
protected static File mstorefile = null;
|
protected static File mstorefile = null;
|
||||||
|
|
||||||
|
final private static String SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT = "solr.ltr.transformer.fv.defaultFormat";
|
||||||
|
private static String defaultFeatureFormat;
|
||||||
|
|
||||||
|
protected String chooseDefaultFeatureVector(String dense, String sparse) {
|
||||||
|
if (defaultFeatureFormat == null) {
|
||||||
|
// to match <code><str name="defaultFormat">${solr.ltr.transformer.fv.defaultFormat:dense}</str></code> snippet
|
||||||
|
return dense;
|
||||||
|
} else if ("dense".equals(defaultFeatureFormat)) {
|
||||||
|
return dense;
|
||||||
|
} else if ("sparse".equals(defaultFeatureFormat)) {
|
||||||
|
return sparse;
|
||||||
|
} else {
|
||||||
|
fail("unexpected feature format choice: "+defaultFeatureFormat);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void chooseDefaultFeatureFormat() throws Exception {
|
||||||
|
switch (random().nextInt(3)) {
|
||||||
|
case 0:
|
||||||
|
defaultFeatureFormat = null;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
defaultFeatureFormat = "dense";
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
defaultFeatureFormat = "sparse";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fail("unexpected feature format choice");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (defaultFeatureFormat != null) {
|
||||||
|
System.setProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT, defaultFeatureFormat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void unchooseDefaultFeatureFormat() {
|
||||||
|
System.clearProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT);
|
||||||
|
}
|
||||||
|
|
||||||
protected static void setuptest(boolean bulkIndex) throws Exception {
|
protected static void setuptest(boolean bulkIndex) throws Exception {
|
||||||
|
chooseDefaultFeatureFormat();
|
||||||
setuptest("solrconfig-ltr.xml", "schema.xml");
|
setuptest("solrconfig-ltr.xml", "schema.xml");
|
||||||
if (bulkIndex) bulkIndex();
|
if (bulkIndex) bulkIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void setupPersistenttest(boolean bulkIndex) throws Exception {
|
protected static void setupPersistenttest(boolean bulkIndex) throws Exception {
|
||||||
|
chooseDefaultFeatureFormat();
|
||||||
setupPersistentTest("solrconfig-ltr.xml", "schema.xml");
|
setupPersistentTest("solrconfig-ltr.xml", "schema.xml");
|
||||||
if (bulkIndex) bulkIndex();
|
if (bulkIndex) bulkIndex();
|
||||||
}
|
}
|
||||||
|
@ -178,6 +221,7 @@ public class TestRerankBase extends RestTestBase {
|
||||||
FileUtils.deleteDirectory(tmpSolrHome);
|
FileUtils.deleteDirectory(tmpSolrHome);
|
||||||
System.clearProperty("managed.schema.mutable");
|
System.clearProperty("managed.schema.mutable");
|
||||||
// System.clearProperty("enable.update.log");
|
// System.clearProperty("enable.update.log");
|
||||||
|
unchooseDefaultFeatureFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void makeRestTestHarnessNull() {
|
public static void makeRestTestHarnessNull() {
|
||||||
|
|
|
@ -94,8 +94,15 @@ public class TestExternalFeatures extends TestRerankBase {
|
||||||
// Stopword only query passed in
|
// Stopword only query passed in
|
||||||
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query='a'}");
|
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query='a'}");
|
||||||
|
|
||||||
|
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"matchedTitle","0.0",
|
||||||
|
"titlePhraseMatch","0.0");
|
||||||
|
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
|
||||||
|
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||||
|
|
||||||
// Features are query title matches, which remove stopwords, leaving blank query, so no matches
|
// Features are query title matches, which remove stopwords, leaving blank query, so no matches
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -104,7 +111,7 @@ public class TestExternalFeatures extends TestRerankBase {
|
||||||
query.setQuery("*:*");
|
query.setQuery("*:*");
|
||||||
query.add("rows", "1");
|
query.add("rows", "1");
|
||||||
|
|
||||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
"confidence","2.3", "originalScore","1.0");
|
"confidence","2.3", "originalScore","1.0");
|
||||||
|
|
||||||
// Features we're extracting depend on external feature info not passed in
|
// Features we're extracting depend on external feature info not passed in
|
||||||
|
@ -114,13 +121,13 @@ public class TestExternalFeatures extends TestRerankBase {
|
||||||
// Adding efi in features section should make it work
|
// Adding efi in features section should make it work
|
||||||
query.remove("fl");
|
query.remove("fl");
|
||||||
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
|
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'");
|
||||||
|
|
||||||
// Adding efi in transformer + rq should still use the transformer's params for feature extraction
|
// Adding efi in transformer + rq should still use the transformer's params for feature extraction
|
||||||
query.remove("fl");
|
query.remove("fl");
|
||||||
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
|
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
|
||||||
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query=w3}");
|
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query=w3}");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -129,10 +136,18 @@ public class TestExternalFeatures extends TestRerankBase {
|
||||||
query.setQuery("*:*");
|
query.setQuery("*:*");
|
||||||
query.add("rows", "1");
|
query.add("rows", "1");
|
||||||
|
|
||||||
|
final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"confidence","0.0",
|
||||||
|
"originalScore","0.0");
|
||||||
|
final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"originalScore","0.0");
|
||||||
|
|
||||||
|
final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv);
|
||||||
|
|
||||||
// Efi is explicitly not required, so we do not score the feature
|
// Efi is explicitly not required, so we do not score the feature
|
||||||
query.remove("fl");
|
query.remove("fl");
|
||||||
query.add("fl", "fvalias:[fv store=fstore2]");
|
query.add("fl", "fvalias:[fv store=fstore2]");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -141,10 +156,18 @@ public class TestExternalFeatures extends TestRerankBase {
|
||||||
query.setQuery("*:*");
|
query.setQuery("*:*");
|
||||||
query.add("rows", "1");
|
query.add("rows", "1");
|
||||||
|
|
||||||
|
final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"occurrences","0.0",
|
||||||
|
"originalScore","0.0");
|
||||||
|
final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"originalScore","0.0");
|
||||||
|
|
||||||
|
final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv);
|
||||||
|
|
||||||
// Efi is explicitly not required, so we do not score the feature
|
// Efi is explicitly not required, so we do not score the feature
|
||||||
query.remove("fl");
|
query.remove("fl");
|
||||||
query.add("fl", "fvalias:[fv store=fstore3]");
|
query.add("fl", "fvalias:[fv store=fstore3]");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -58,9 +58,17 @@ public class TestExternalValueFeatures extends TestRerankBase {
|
||||||
query.add("rows", "3");
|
query.add("rows", "3");
|
||||||
query.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature efi.user_device_tablet=1}");
|
query.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature efi.user_device_tablet=1}");
|
||||||
|
|
||||||
|
final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"user_device_smartphone","0.0",
|
||||||
|
"user_device_tablet","1.0");
|
||||||
|
final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"user_device_tablet","1.0");
|
||||||
|
|
||||||
|
final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv);
|
||||||
|
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/features=='"+FeatureLoggerTestUtils.toFeatureVector("user_device_tablet","1.0")+"'");
|
"/response/docs/[0]/features=='"+docs0features_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/score==65.0");
|
"/response/docs/[0]/score==65.0");
|
||||||
}
|
}
|
||||||
|
@ -76,9 +84,16 @@ public class TestExternalValueFeatures extends TestRerankBase {
|
||||||
query
|
query
|
||||||
.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature}");
|
.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature}");
|
||||||
|
|
||||||
|
final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"user_device_smartphone","0.0",
|
||||||
|
"user_device_tablet","0.0");
|
||||||
|
final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
|
||||||
|
final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv);
|
||||||
|
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/features==''");
|
"/response/docs/[0]/features=='"+docs0features_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/score==0.0");
|
"/response/docs/[0]/score==0.0");
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,13 @@ public class TestFeatureLogging extends TestRerankBase {
|
||||||
"c1", "c2", "c3"}, "test1",
|
"c1", "c2", "c3"}, "test1",
|
||||||
"{\"weights\":{\"c1\":1.0,\"c2\":1.0,\"c3\":1.0}}");
|
"{\"weights\":{\"c1\":1.0,\"c2\":1.0,\"c3\":1.0}}");
|
||||||
|
|
||||||
|
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"c1","1.0",
|
||||||
|
"c2","2.0",
|
||||||
|
"c3","3.0",
|
||||||
|
"pop","2.0",
|
||||||
|
"nomatch","0.0",
|
||||||
|
"yesmatch","1.0");
|
||||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
"c1","1.0",
|
"c1","1.0",
|
||||||
"c2","2.0",
|
"c2","2.0",
|
||||||
|
@ -63,6 +70,8 @@ public class TestFeatureLogging extends TestRerankBase {
|
||||||
"pop","2.0",
|
"pop","2.0",
|
||||||
"yesmatch","1.0");
|
"yesmatch","1.0");
|
||||||
|
|
||||||
|
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||||
|
|
||||||
final SolrQuery query = new SolrQuery();
|
final SolrQuery query = new SolrQuery();
|
||||||
query.setQuery("title:bloomberg");
|
query.setQuery("title:bloomberg");
|
||||||
query.add("fl", "title,description,id,popularity,[fv]");
|
query.add("fl", "title,description,id,popularity,[fv]");
|
||||||
|
@ -73,7 +82,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
||||||
restTestHarness.query("/query" + query.toQueryString());
|
restTestHarness.query("/query" + query.toQueryString());
|
||||||
assertJQ(
|
assertJQ(
|
||||||
"/query" + query.toQueryString(),
|
"/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_sparse_csv+"'}");
|
"/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_default_csv+"'}");
|
||||||
|
|
||||||
query.remove("fl");
|
query.remove("fl");
|
||||||
query.add("fl", "[fv]");
|
query.add("fl", "[fv]");
|
||||||
|
@ -82,7 +91,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
||||||
|
|
||||||
restTestHarness.query("/query" + query.toQueryString());
|
restTestHarness.query("/query" + query.toQueryString());
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/=={'[fv]':'"+docs0fv_sparse_csv+"'}");
|
"/response/docs/[0]/=={'[fv]':'"+docs0fv_default_csv+"'}");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -157,7 +166,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
||||||
|
|
||||||
query.add("rq", "{!ltr reRankDocs=3 model=sumgroup}");
|
query.add("rq", "{!ltr reRankDocs=3 model=sumgroup}");
|
||||||
|
|
||||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
"c1","1.0",
|
"c1","1.0",
|
||||||
"c2","2.0",
|
"c2","2.0",
|
||||||
"c3","3.0",
|
"c3","3.0",
|
||||||
|
@ -166,7 +175,7 @@ public class TestFeatureLogging extends TestRerankBase {
|
||||||
restTestHarness.query("/query" + query.toQueryString());
|
restTestHarness.query("/query" + query.toQueryString());
|
||||||
assertJQ(
|
assertJQ(
|
||||||
"/query" + query.toQueryString(),
|
"/query" + query.toQueryString(),
|
||||||
"/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_sparse_csv+"'}");
|
"/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_csv+"'}");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -181,25 +190,28 @@ public class TestFeatureLogging extends TestRerankBase {
|
||||||
"{\"weights\":{\"match\":1.0}}");
|
"{\"weights\":{\"match\":1.0}}");
|
||||||
|
|
||||||
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
|
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
|
||||||
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("c4", "1.0");
|
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( "c4", "1.0");
|
||||||
|
|
||||||
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
|
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
|
||||||
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "0.0", "c4", "1.0");
|
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "0.0", "c4", "1.0");
|
||||||
|
|
||||||
|
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||||
|
final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv);
|
||||||
|
|
||||||
final SolrQuery query = new SolrQuery();
|
final SolrQuery query = new SolrQuery();
|
||||||
query.setQuery("title:bloomberg");
|
query.setQuery("title:bloomberg");
|
||||||
query.add("rows", "10");
|
query.add("rows", "10");
|
||||||
query.add("rq", "{!ltr reRankDocs=10 model=sum4}");
|
query.add("rq", "{!ltr reRankDocs=10 model=sum4}");
|
||||||
|
|
||||||
//csv - no feature format check (default to sparse)
|
//csv - no feature format specified i.e. use default
|
||||||
query.remove("fl");
|
query.remove("fl");
|
||||||
query.add("fl", "*,score,fv:[fv store=test4]");
|
query.add("fl", "*,score,fv:[fv store=test4]");
|
||||||
assertJQ(
|
assertJQ(
|
||||||
"/query" + query.toQueryString(),
|
"/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/fv/=='"+docs0fv_sparse_csv+"'");
|
"/response/docs/[0]/fv/=='"+docs0fv_default_csv+"'");
|
||||||
assertJQ(
|
assertJQ(
|
||||||
"/query" + query.toQueryString(),
|
"/query" + query.toQueryString(),
|
||||||
"/response/docs/[1]/fv/=='"+docs1fv_sparse_csv+"'");
|
"/response/docs/[1]/fv/=='"+docs1fv_default_csv+"'");
|
||||||
|
|
||||||
//csv - sparse feature format check
|
//csv - sparse feature format check
|
||||||
query.remove("fl");
|
query.remove("fl");
|
||||||
|
|
|
@ -97,13 +97,13 @@ public class TestFilterSolrFeature extends TestRerankBase {
|
||||||
query.add("rq", "{!ltr reRankDocs=4 model=fqmodel efi.user_query=w2}");
|
query.add("rq", "{!ltr reRankDocs=4 model=fqmodel efi.user_query=w2}");
|
||||||
query.add("fl", "fv:[fv]");
|
query.add("fl", "fv:[fv]");
|
||||||
|
|
||||||
final String docs0fv_sparse_csv= FeatureLoggerTestUtils.toFeatureVector(
|
final String docs0fv_csv= FeatureLoggerTestUtils.toFeatureVector(
|
||||||
"matchedTitle","1.0", "popularity","3.0");
|
"matchedTitle","1.0", "popularity","3.0");
|
||||||
|
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='2'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='2'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='1'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='1'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_sparse_csv+"'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,20 +105,48 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
||||||
final Double doc0Score = (Double) ((Map<String,Object>) ((ArrayList<Object>) ((Map<String,Object>) jsonParse
|
final Double doc0Score = (Double) ((Map<String,Object>) ((ArrayList<Object>) ((Map<String,Object>) jsonParse
|
||||||
.get("response")).get("docs")).get(0)).get("score");
|
.get("response")).get("docs")).get(0)).get("score");
|
||||||
|
|
||||||
|
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature",doc0Score.toString(),
|
||||||
|
"nomatchfeature2","0.0");
|
||||||
|
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature","0.0",
|
||||||
|
"nomatchfeature2","0.0");
|
||||||
|
final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature","0.0",
|
||||||
|
"nomatchfeature2","0.0");
|
||||||
|
final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature","0.0",
|
||||||
|
"nomatchfeature2","0.0");
|
||||||
|
|
||||||
|
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"yesmatchfeature",doc0Score.toString());
|
||||||
|
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
|
||||||
|
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||||
|
final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv);
|
||||||
|
final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv);
|
||||||
|
final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv);
|
||||||
|
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score=="
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score=="
|
||||||
+ (doc0Score * 1.1));
|
+ (doc0Score * 1.1));
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'");
|
"/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='2'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='2'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='4'");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='4'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -142,15 +170,47 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
||||||
final Double doc0Score = (Double) ((Map<String,Object>) ((ArrayList<Object>) ((Map<String,Object>) jsonParse
|
final Double doc0Score = (Double) ((Map<String,Object>) ((ArrayList<Object>) ((Map<String,Object>) jsonParse
|
||||||
.get("response")).get("docs")).get(0)).get("score");
|
.get("response")).get("docs")).get(0)).get("score");
|
||||||
|
|
||||||
|
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature",doc0Score.toString(),
|
||||||
|
"nomatchfeature2","0.0",
|
||||||
|
"nomatchfeature3","0.0");
|
||||||
|
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature","0.0",
|
||||||
|
"nomatchfeature2","0.0",
|
||||||
|
"nomatchfeature3","0.0");
|
||||||
|
final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature","0.0",
|
||||||
|
"nomatchfeature2","0.0",
|
||||||
|
"nomatchfeature3","0.0");
|
||||||
|
final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature","0.0",
|
||||||
|
"yesmatchfeature","0.0",
|
||||||
|
"nomatchfeature2","0.0",
|
||||||
|
"nomatchfeature3","0.0");
|
||||||
|
|
||||||
|
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"yesmatchfeature",doc0Score.toString());
|
||||||
|
final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
|
||||||
|
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||||
|
final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv);
|
||||||
|
final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv);
|
||||||
|
final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv);
|
||||||
|
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'");
|
"/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -163,8 +223,14 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
||||||
query.add("fv", "true");
|
query.add("fv", "true");
|
||||||
query.add("rq", "{!ltr model=nomatchmodel3 reRankDocs=4}");
|
query.add("rq", "{!ltr model=nomatchmodel3 reRankDocs=4}");
|
||||||
|
|
||||||
|
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature4","0.0");
|
||||||
|
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
|
||||||
|
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||||
|
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -184,9 +250,15 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
|
||||||
query.add("rows", "4");
|
query.add("rows", "4");
|
||||||
query.add("rq", "{!ltr model=nomatchmodel4 reRankDocs=4}");
|
query.add("rq", "{!ltr model=nomatchmodel4 reRankDocs=4}");
|
||||||
|
|
||||||
|
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
|
||||||
|
"nomatchfeature4","0.0");
|
||||||
|
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
|
||||||
|
|
||||||
|
final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
|
||||||
|
|
||||||
assertJQ("/query" + query.toQueryString(),
|
assertJQ("/query" + query.toQueryString(),
|
||||||
"/response/docs/[0]/score==0.0");
|
"/response/docs/[0]/score==0.0");
|
||||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''");
|
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue