diff --git a/solr/contrib/ltr/README.md b/solr/contrib/ltr/README.md
index 5c95056c678..83fb279a6db 100644
--- a/solr/contrib/ltr/README.md
+++ b/solr/contrib/ltr/README.md
@@ -324,7 +324,7 @@ produce the features without doing the reranking:
`fl=*,score,[features store=yourFeatureStore format=[dense|sparse] ]`
This will return the values of the features in the given store. The format of the
-extracted features will be based on the format parameter. The default is sparse.
+extracted features will be based on the format parameter. The default is dense.
# Assemble training data
In order to train a learning to rank model you need training data. Training data is
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java
index 354ecc27044..9585a7f97ab 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
@@ -59,7 +60,7 @@ import org.slf4j.LoggerFactory;
* will default to the features used by your reranking model.
* efi.*
- External feature information variables required by the features
* you are extracting.
- * format
- The format you want the features to be returned in. Supports (dense|sparse). Defaults to sparse.
+ * format
- The format you want the features to be returned in. Supports (dense|sparse). Defaults to dense.
*/
public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
@@ -77,7 +78,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
private String fvCacheName;
private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME;
private String defaultStore;
- private String defaultFormat;
+ private FeatureLogger.FeatureFormat defaultFormat = FeatureLogger.FeatureFormat.DENSE;
private char csvKeyValueDelimiter = CSVFeatureLogger.DEFAULT_KEY_VALUE_SEPARATOR;
private char csvFeatureSeparator = CSVFeatureLogger.DEFAULT_FEATURE_SEPARATOR;
@@ -96,7 +97,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
}
public void setDefaultFormat(String defaultFormat) {
- this.defaultFormat = defaultFormat;
+ this.defaultFormat = FeatureLogger.FeatureFormat.valueOf(defaultFormat.toUpperCase(Locale.ROOT));
}
public void setCsvKeyValueDelimiter(String csvKeyValueDelimiter) {
@@ -133,7 +134,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
// Create and supply the feature logger to be used
SolrQueryRequestContextUtils.setFeatureLogger(req,
createFeatureLogger(
- localparams.get(FV_FORMAT, defaultFormat)));
+ localparams.get(FV_FORMAT)));
return new FeatureTransformer(name, localparams, req);
}
@@ -147,23 +148,17 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
*
* @return a feature logger for the format specified.
*/
- private FeatureLogger createFeatureLogger(String featureFormat) {
- final FeatureLogger.FeatureFormat f;
- if (featureFormat == null || featureFormat.isEmpty() ||
- featureFormat.equals("sparse")) {
- f = FeatureLogger.FeatureFormat.SPARSE;
- }
- else if (featureFormat.equals("dense")) {
- f = FeatureLogger.FeatureFormat.DENSE;
- }
- else {
- f = FeatureLogger.FeatureFormat.SPARSE;
- log.warn("unknown feature logger feature format {}", featureFormat);
+ private FeatureLogger createFeatureLogger(String formatStr) {
+ final FeatureLogger.FeatureFormat format;
+ if (formatStr != null) {
+ format = FeatureLogger.FeatureFormat.valueOf(formatStr.toUpperCase(Locale.ROOT));
+ } else {
+ format = this.defaultFormat;
}
if (fvCacheName == null) {
throw new IllegalArgumentException("a fvCacheName must be configured");
}
- return new CSVFeatureLogger(fvCacheName, f, csvKeyValueDelimiter, csvFeatureSeparator);
+ return new CSVFeatureLogger(fvCacheName, format, csvKeyValueDelimiter, csvFeatureSeparator);
}
class FeatureTransformer extends DocTransformer {
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml
index 1e1a6183be7..0e92546723f 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml
@@ -36,6 +36,7 @@
enclosed between brackets (in this case [fv]). In order to get the feature
vector you will have to specify that you want the field (e.g., fl="*,[fv]) -->
+ ${solr.ltr.transformer.fv.defaultFormat:dense}
QUERY_DOC_FV
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java
index 792975a112a..52778219c61 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java
@@ -75,12 +75,55 @@ public class TestRerankBase extends RestTestBase {
protected static File fstorefile = null;
protected static File mstorefile = null;
+ final private static String SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT = "solr.ltr.transformer.fv.defaultFormat";
+ private static String defaultFeatureFormat;
+
+ protected String chooseDefaultFeatureVector(String dense, String sparse) {
+ if (defaultFeatureFormat == null) {
+ // to match ${solr.ltr.transformer.fv.defaultFormat:dense}
snippet
+ return dense;
+ } else if ("dense".equals(defaultFeatureFormat)) {
+ return dense;
+ } else if ("sparse".equals(defaultFeatureFormat)) {
+ return sparse;
+ } else {
+ fail("unexpected feature format choice: "+defaultFeatureFormat);
+ return null;
+ }
+ }
+
+ protected static void chooseDefaultFeatureFormat() throws Exception {
+ switch (random().nextInt(3)) {
+ case 0:
+ defaultFeatureFormat = null;
+ break;
+ case 1:
+ defaultFeatureFormat = "dense";
+ break;
+ case 2:
+ defaultFeatureFormat = "sparse";
+ break;
+ default:
+ fail("unexpected feature format choice");
+ break;
+ }
+ if (defaultFeatureFormat != null) {
+ System.setProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT, defaultFeatureFormat);
+ }
+ }
+
+ protected static void unchooseDefaultFeatureFormat() {
+ System.clearProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT);
+ }
+
protected static void setuptest(boolean bulkIndex) throws Exception {
+ chooseDefaultFeatureFormat();
setuptest("solrconfig-ltr.xml", "schema.xml");
if (bulkIndex) bulkIndex();
}
protected static void setupPersistenttest(boolean bulkIndex) throws Exception {
+ chooseDefaultFeatureFormat();
setupPersistentTest("solrconfig-ltr.xml", "schema.xml");
if (bulkIndex) bulkIndex();
}
@@ -178,6 +221,7 @@ public class TestRerankBase extends RestTestBase {
FileUtils.deleteDirectory(tmpSolrHome);
System.clearProperty("managed.schema.mutable");
// System.clearProperty("enable.update.log");
+ unchooseDefaultFeatureFormat();
}
public static void makeRestTestHarnessNull() {
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
index e27844bba72..10ababb07f5 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
@@ -94,8 +94,15 @@ public class TestExternalFeatures extends TestRerankBase {
// Stopword only query passed in
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query='a'}");
+ final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "matchedTitle","0.0",
+ "titlePhraseMatch","0.0");
+ final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
+
+ final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
+
// Features are query title matches, which remove stopwords, leaving blank query, so no matches
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'");
}
@Test
@@ -104,7 +111,7 @@ public class TestExternalFeatures extends TestRerankBase {
query.setQuery("*:*");
query.add("rows", "1");
- final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
+ final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector(
"confidence","2.3", "originalScore","1.0");
// Features we're extracting depend on external feature info not passed in
@@ -114,13 +121,13 @@ public class TestExternalFeatures extends TestRerankBase {
// Adding efi in features section should make it work
query.remove("fl");
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'");
// Adding efi in transformer + rq should still use the transformer's params for feature extraction
query.remove("fl");
query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]");
query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query=w3}");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'");
}
@Test
@@ -129,10 +136,18 @@ public class TestExternalFeatures extends TestRerankBase {
query.setQuery("*:*");
query.add("rows", "1");
+ final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "confidence","0.0",
+ "originalScore","0.0");
+ final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "originalScore","0.0");
+
+ final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv);
+
// Efi is explicitly not required, so we do not score the feature
query.remove("fl");
query.add("fl", "fvalias:[fv store=fstore2]");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'");
}
@Test
@@ -141,10 +156,18 @@ public class TestExternalFeatures extends TestRerankBase {
query.setQuery("*:*");
query.add("rows", "1");
+ final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "occurrences","0.0",
+ "originalScore","0.0");
+ final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "originalScore","0.0");
+
+ final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv);
+
// Efi is explicitly not required, so we do not score the feature
query.remove("fl");
query.add("fl", "fvalias:[fv store=fstore3]");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'");
}
@Test
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java
index 8a09bb380c7..2de23a544e2 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java
@@ -58,9 +58,17 @@ public class TestExternalValueFeatures extends TestRerankBase {
query.add("rows", "3");
query.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature efi.user_device_tablet=1}");
+ final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "user_device_smartphone","0.0",
+ "user_device_tablet","1.0");
+ final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "user_device_tablet","1.0");
+
+ final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv);
+
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/features=='"+FeatureLoggerTestUtils.toFeatureVector("user_device_tablet","1.0")+"'");
+ "/response/docs/[0]/features=='"+docs0features_default_csv+"'");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/score==65.0");
}
@@ -76,9 +84,16 @@ public class TestExternalValueFeatures extends TestRerankBase {
query
.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature}");
+ final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "user_device_smartphone","0.0",
+ "user_device_tablet","0.0");
+ final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector();
+
+ final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv);
+
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/features==''");
+ "/response/docs/[0]/features=='"+docs0features_default_csv+"'");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/score==0.0");
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java
index ad431f5a5e7..6f811d92021 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java
@@ -56,6 +56,13 @@ public class TestFeatureLogging extends TestRerankBase {
"c1", "c2", "c3"}, "test1",
"{\"weights\":{\"c1\":1.0,\"c2\":1.0,\"c3\":1.0}}");
+ final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector(
+ "c1","1.0",
+ "c2","2.0",
+ "c3","3.0",
+ "pop","2.0",
+ "nomatch","0.0",
+ "yesmatch","1.0");
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
"c1","1.0",
"c2","2.0",
@@ -63,6 +70,8 @@ public class TestFeatureLogging extends TestRerankBase {
"pop","2.0",
"yesmatch","1.0");
+ final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
+
final SolrQuery query = new SolrQuery();
query.setQuery("title:bloomberg");
query.add("fl", "title,description,id,popularity,[fv]");
@@ -73,7 +82,7 @@ public class TestFeatureLogging extends TestRerankBase {
restTestHarness.query("/query" + query.toQueryString());
assertJQ(
"/query" + query.toQueryString(),
- "/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_sparse_csv+"'}");
+ "/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_default_csv+"'}");
query.remove("fl");
query.add("fl", "[fv]");
@@ -82,7 +91,7 @@ public class TestFeatureLogging extends TestRerankBase {
restTestHarness.query("/query" + query.toQueryString());
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+docs0fv_sparse_csv+"'}");
+ "/response/docs/[0]/=={'[fv]':'"+docs0fv_default_csv+"'}");
}
@Test
@@ -157,7 +166,7 @@ public class TestFeatureLogging extends TestRerankBase {
query.add("rq", "{!ltr reRankDocs=3 model=sumgroup}");
- final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(
+ final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector(
"c1","1.0",
"c2","2.0",
"c3","3.0",
@@ -166,7 +175,7 @@ public class TestFeatureLogging extends TestRerankBase {
restTestHarness.query("/query" + query.toQueryString());
assertJQ(
"/query" + query.toQueryString(),
- "/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_sparse_csv+"'}");
+ "/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_csv+"'}");
}
@Test
@@ -181,25 +190,28 @@ public class TestFeatureLogging extends TestRerankBase {
"{\"weights\":{\"match\":1.0}}");
final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
- final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("c4", "1.0");
+ final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( "c4", "1.0");
final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0");
final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "0.0", "c4", "1.0");
+ final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv);
+ final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv);
+
final SolrQuery query = new SolrQuery();
query.setQuery("title:bloomberg");
query.add("rows", "10");
query.add("rq", "{!ltr reRankDocs=10 model=sum4}");
- //csv - no feature format check (default to sparse)
+ //csv - no feature format specified i.e. use default
query.remove("fl");
query.add("fl", "*,score,fv:[fv store=test4]");
assertJQ(
"/query" + query.toQueryString(),
- "/response/docs/[0]/fv/=='"+docs0fv_sparse_csv+"'");
+ "/response/docs/[0]/fv/=='"+docs0fv_default_csv+"'");
assertJQ(
"/query" + query.toQueryString(),
- "/response/docs/[1]/fv/=='"+docs1fv_sparse_csv+"'");
+ "/response/docs/[1]/fv/=='"+docs1fv_default_csv+"'");
//csv - sparse feature format check
query.remove("fl");
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java
index 23b7a55ea83..bb52f39c161 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java
@@ -97,13 +97,13 @@ public class TestFilterSolrFeature extends TestRerankBase {
query.add("rq", "{!ltr reRankDocs=4 model=fqmodel efi.user_query=w2}");
query.add("fl", "fv:[fv]");
- final String docs0fv_sparse_csv= FeatureLoggerTestUtils.toFeatureVector(
+ final String docs0fv_csv= FeatureLoggerTestUtils.toFeatureVector(
"matchedTitle","1.0", "popularity","3.0");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='2'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='1'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_sparse_csv+"'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_csv+"'");
}
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java
index e6c287d3a0e..c068be95cc5 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java
@@ -105,20 +105,48 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
final Double doc0Score = (Double) ((Map) ((ArrayList