From b7c75a3a1c7524994cb2413afa82562e30eaadcb Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 22 Dec 2016 15:31:20 +0000 Subject: [PATCH] SOLR-8542: change default feature vector format (to 'dense' from 'sparse') also: increase test coverage w.r.t. 'sparse' vs. 'dense' vs. 'default' feature vector format --- solr/contrib/ltr/README.md | 2 +- .../LTRFeatureLoggerTransformerFactory.java | 29 +++--- .../solr/collection1/conf/solrconfig-ltr.xml | 1 + .../org/apache/solr/ltr/TestRerankBase.java | 44 +++++++++ .../ltr/feature/TestExternalFeatures.java | 35 +++++-- .../feature/TestExternalValueFeatures.java | 19 +++- .../solr/ltr/feature/TestFeatureLogging.java | 28 ++++-- .../ltr/feature/TestFilterSolrFeature.java | 4 +- .../ltr/feature/TestNoMatchSolrFeature.java | 92 +++++++++++++++++-- 9 files changed, 208 insertions(+), 46 deletions(-) diff --git a/solr/contrib/ltr/README.md b/solr/contrib/ltr/README.md index 5c95056c678..83fb279a6db 100644 --- a/solr/contrib/ltr/README.md +++ b/solr/contrib/ltr/README.md @@ -324,7 +324,7 @@ produce the features without doing the reranking: `fl=*,score,[features store=yourFeatureStore format=[dense|sparse] ]` This will return the values of the features in the given store. The format of the -extracted features will be based on the format parameter. The default is sparse. +extracted features will be based on the format parameter. The default is dense. # Assemble training data In order to train a learning to rank model you need training data. Training data is diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java index 608f317edef..8fd8e515bbd 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.lucene.index.LeafReaderContext; @@ -59,7 +60,7 @@ import org.slf4j.LoggerFactory; * will default to the features used by your reranking model.
* efi.* - External feature information variables required by the features * you are extracting.
- * format - The format you want the features to be returned in. Supports (dense|sparse). Defaults to sparse.
+ * format - The format you want the features to be returned in. Supports (dense|sparse). Defaults to dense.
*/ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { @@ -77,7 +78,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { private String fvCacheName; private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME; private String defaultStore; - private String defaultFormat; + private FeatureLogger.FeatureFormat defaultFormat = FeatureLogger.FeatureFormat.DENSE; private char csvKeyValueDelimiter = CSVFeatureLogger.DEFAULT_KEY_VALUE_SEPARATOR; private char csvFeatureSeparator = CSVFeatureLogger.DEFAULT_FEATURE_SEPARATOR; @@ -96,7 +97,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { } public void setDefaultFormat(String defaultFormat) { - this.defaultFormat = defaultFormat; + this.defaultFormat = FeatureLogger.FeatureFormat.valueOf(defaultFormat.toUpperCase(Locale.ROOT)); } public void setCsvKeyValueDelimiter(String csvKeyValueDelimiter) { @@ -133,7 +134,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { // Create and supply the feature logger to be used SolrQueryRequestContextUtils.setFeatureLogger(req, createFeatureLogger( - localparams.get(FV_FORMAT, defaultFormat))); + localparams.get(FV_FORMAT))); return new FeatureTransformer(name, localparams, req); } @@ -147,23 +148,17 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { * * @return a feature logger for the format specified. */ - private FeatureLogger createFeatureLogger(String featureFormat) { - final FeatureLogger.FeatureFormat f; - if (featureFormat == null || featureFormat.isEmpty() || - featureFormat.equals("sparse")) { - f = FeatureLogger.FeatureFormat.SPARSE; - } - else if (featureFormat.equals("dense")) { - f = FeatureLogger.FeatureFormat.DENSE; - } - else { - f = FeatureLogger.FeatureFormat.SPARSE; - log.warn("unknown feature logger feature format {}", featureFormat); + private FeatureLogger createFeatureLogger(String formatStr) { + final FeatureLogger.FeatureFormat format; + if (formatStr != null) { + format = FeatureLogger.FeatureFormat.valueOf(formatStr.toUpperCase(Locale.ROOT)); + } else { + format = this.defaultFormat; } if (fvCacheName == null) { throw new IllegalArgumentException("a fvCacheName must be configured"); } - return new CSVFeatureLogger(fvCacheName, f, csvKeyValueDelimiter, csvFeatureSeparator); + return new CSVFeatureLogger(fvCacheName, format, csvKeyValueDelimiter, csvFeatureSeparator); } class FeatureTransformer extends DocTransformer { diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml index 1e1a6183be7..0e92546723f 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml @@ -36,6 +36,7 @@ enclosed between brackets (in this case [fv]). In order to get the feature vector you will have to specify that you want the field (e.g., fl="*,[fv]) --> + ${solr.ltr.transformer.fv.defaultFormat:dense} QUERY_DOC_FV diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java index 792975a112a..52778219c61 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestRerankBase.java @@ -75,12 +75,55 @@ public class TestRerankBase extends RestTestBase { protected static File fstorefile = null; protected static File mstorefile = null; + final private static String SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT = "solr.ltr.transformer.fv.defaultFormat"; + private static String defaultFeatureFormat; + + protected String chooseDefaultFeatureVector(String dense, String sparse) { + if (defaultFeatureFormat == null) { + // to match ${solr.ltr.transformer.fv.defaultFormat:dense} snippet + return dense; + } else if ("dense".equals(defaultFeatureFormat)) { + return dense; + } else if ("sparse".equals(defaultFeatureFormat)) { + return sparse; + } else { + fail("unexpected feature format choice: "+defaultFeatureFormat); + return null; + } + } + + protected static void chooseDefaultFeatureFormat() throws Exception { + switch (random().nextInt(3)) { + case 0: + defaultFeatureFormat = null; + break; + case 1: + defaultFeatureFormat = "dense"; + break; + case 2: + defaultFeatureFormat = "sparse"; + break; + default: + fail("unexpected feature format choice"); + break; + } + if (defaultFeatureFormat != null) { + System.setProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT, defaultFeatureFormat); + } + } + + protected static void unchooseDefaultFeatureFormat() { + System.clearProperty(SYSTEM_PROPERTY_SOLR_LTR_TRANSFORMER_FV_DEFAULTFORMAT); + } + protected static void setuptest(boolean bulkIndex) throws Exception { + chooseDefaultFeatureFormat(); setuptest("solrconfig-ltr.xml", "schema.xml"); if (bulkIndex) bulkIndex(); } protected static void setupPersistenttest(boolean bulkIndex) throws Exception { + chooseDefaultFeatureFormat(); setupPersistentTest("solrconfig-ltr.xml", "schema.xml"); if (bulkIndex) bulkIndex(); } @@ -178,6 +221,7 @@ public class TestRerankBase extends RestTestBase { FileUtils.deleteDirectory(tmpSolrHome); System.clearProperty("managed.schema.mutable"); // System.clearProperty("enable.update.log"); + unchooseDefaultFeatureFormat(); } public static void makeRestTestHarnessNull() { diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java index e27844bba72..10ababb07f5 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java @@ -94,8 +94,15 @@ public class TestExternalFeatures extends TestRerankBase { // Stopword only query passed in query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query='a'}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "matchedTitle","0.0", + "titlePhraseMatch","0.0"); + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + // Features are query title matches, which remove stopwords, leaving blank query, so no matches - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); } @Test @@ -104,7 +111,7 @@ public class TestExternalFeatures extends TestRerankBase { query.setQuery("*:*"); query.add("rows", "1"); - final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector( "confidence","2.3", "originalScore","1.0"); // Features we're extracting depend on external feature info not passed in @@ -114,13 +121,13 @@ public class TestExternalFeatures extends TestRerankBase { // Adding efi in features section should make it work query.remove("fl"); query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'"); // Adding efi in transformer + rq should still use the transformer's params for feature extraction query.remove("fl"); query.add("fl", "score,fvalias:[fv store=fstore2 efi.myconf=2.3]"); query.add("rq", "{!ltr reRankDocs=3 model=externalmodel efi.user_query=w3}"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_sparse_csv+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fv_csv+"'"); } @Test @@ -129,10 +136,18 @@ public class TestExternalFeatures extends TestRerankBase { query.setQuery("*:*"); query.add("rows", "1"); + final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "confidence","0.0", + "originalScore","0.0"); + final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "originalScore","0.0"); + + final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv); + // Efi is explicitly not required, so we do not score the feature query.remove("fl"); query.add("fl", "fvalias:[fv store=fstore2]"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'"); } @Test @@ -141,10 +156,18 @@ public class TestExternalFeatures extends TestRerankBase { query.setQuery("*:*"); query.add("rows", "1"); + final String docs0fvalias_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "occurrences","0.0", + "originalScore","0.0"); + final String docs0fvalias_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "originalScore","0.0"); + + final String docs0fvalias_default_csv = chooseDefaultFeatureVector(docs0fvalias_dense_csv, docs0fvalias_sparse_csv); + // Efi is explicitly not required, so we do not score the feature query.remove("fl"); query.add("fl", "fvalias:[fv store=fstore3]"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+FeatureLoggerTestUtils.toFeatureVector("originalScore","0.0")+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fvalias=='"+docs0fvalias_default_csv+"'"); } @Test diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java index 8a09bb380c7..2de23a544e2 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalValueFeatures.java @@ -58,9 +58,17 @@ public class TestExternalValueFeatures extends TestRerankBase { query.add("rows", "3"); query.add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature efi.user_device_tablet=1}"); + final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "user_device_smartphone","0.0", + "user_device_tablet","1.0"); + final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "user_device_tablet","1.0"); + + final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/features=='"+FeatureLoggerTestUtils.toFeatureVector("user_device_tablet","1.0")+"'"); + "/response/docs/[0]/features=='"+docs0features_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==65.0"); } @@ -76,9 +84,16 @@ public class TestExternalValueFeatures extends TestRerankBase { query .add("rq", "{!ltr reRankDocs=3 model=external_model_binary_feature}"); + final String docs0features_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "user_device_smartphone","0.0", + "user_device_tablet","0.0"); + final String docs0features_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0features_default_csv = chooseDefaultFeatureVector(docs0features_dense_csv, docs0features_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/features==''"); + "/response/docs/[0]/features=='"+docs0features_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java index ad431f5a5e7..6f811d92021 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java @@ -56,6 +56,13 @@ public class TestFeatureLogging extends TestRerankBase { "c1", "c2", "c3"}, "test1", "{\"weights\":{\"c1\":1.0,\"c2\":1.0,\"c3\":1.0}}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "c1","1.0", + "c2","2.0", + "c3","3.0", + "pop","2.0", + "nomatch","0.0", + "yesmatch","1.0"); final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( "c1","1.0", "c2","2.0", @@ -63,6 +70,8 @@ public class TestFeatureLogging extends TestRerankBase { "pop","2.0", "yesmatch","1.0"); + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final SolrQuery query = new SolrQuery(); query.setQuery("title:bloomberg"); query.add("fl", "title,description,id,popularity,[fv]"); @@ -73,7 +82,7 @@ public class TestFeatureLogging extends TestRerankBase { restTestHarness.query("/query" + query.toQueryString()); assertJQ( "/query" + query.toQueryString(), - "/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_sparse_csv+"'}"); + "/response/docs/[0]/=={'title':'bloomberg bloomberg ', 'description':'bloomberg','id':'7', 'popularity':2, '[fv]':'"+docs0fv_default_csv+"'}"); query.remove("fl"); query.add("fl", "[fv]"); @@ -82,7 +91,7 @@ public class TestFeatureLogging extends TestRerankBase { restTestHarness.query("/query" + query.toQueryString()); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+docs0fv_sparse_csv+"'}"); + "/response/docs/[0]/=={'[fv]':'"+docs0fv_default_csv+"'}"); } @Test @@ -157,7 +166,7 @@ public class TestFeatureLogging extends TestRerankBase { query.add("rq", "{!ltr reRankDocs=3 model=sumgroup}"); - final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + final String docs0fv_csv = FeatureLoggerTestUtils.toFeatureVector( "c1","1.0", "c2","2.0", "c3","3.0", @@ -166,7 +175,7 @@ public class TestFeatureLogging extends TestRerankBase { restTestHarness.query("/query" + query.toQueryString()); assertJQ( "/query" + query.toQueryString(), - "/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_sparse_csv+"'}"); + "/grouped/title/groups/[0]/doclist/docs/[0]/=={'fv':'"+docs0fv_csv+"'}"); } @Test @@ -181,25 +190,28 @@ public class TestFeatureLogging extends TestRerankBase { "{\"weights\":{\"match\":1.0}}"); final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0"); - final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector("c4", "1.0"); + final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( "c4", "1.0"); final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "1.0", "c4", "1.0"); final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector("match", "0.0", "c4", "1.0"); + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv); + final SolrQuery query = new SolrQuery(); query.setQuery("title:bloomberg"); query.add("rows", "10"); query.add("rq", "{!ltr reRankDocs=10 model=sum4}"); - //csv - no feature format check (default to sparse) + //csv - no feature format specified i.e. use default query.remove("fl"); query.add("fl", "*,score,fv:[fv store=test4]"); assertJQ( "/query" + query.toQueryString(), - "/response/docs/[0]/fv/=='"+docs0fv_sparse_csv+"'"); + "/response/docs/[0]/fv/=='"+docs0fv_default_csv+"'"); assertJQ( "/query" + query.toQueryString(), - "/response/docs/[1]/fv/=='"+docs1fv_sparse_csv+"'"); + "/response/docs/[1]/fv/=='"+docs1fv_default_csv+"'"); //csv - sparse feature format check query.remove("fl"); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java index 23b7a55ea83..bb52f39c161 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFilterSolrFeature.java @@ -97,13 +97,13 @@ public class TestFilterSolrFeature extends TestRerankBase { query.add("rq", "{!ltr reRankDocs=4 model=fqmodel efi.user_query=w2}"); query.add("fl", "fv:[fv]"); - final String docs0fv_sparse_csv= FeatureLoggerTestUtils.toFeatureVector( + final String docs0fv_csv= FeatureLoggerTestUtils.toFeatureVector( "matchedTitle","1.0", "popularity","3.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='2'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='1'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_sparse_csv+"'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_csv+"'"); } } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java index e6c287d3a0e..c068be95cc5 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java @@ -105,20 +105,48 @@ public class TestNoMatchSolrFeature extends TestRerankBase { final Double doc0Score = (Double) ((Map) ((ArrayList) ((Map) jsonParse .get("response")).get("docs")).get(0)).get("score"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature",doc0Score.toString(), + "nomatchfeature2","0.0"); + final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0"); + final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0"); + final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0"); + + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "yesmatchfeature",doc0Score.toString()); + final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv); + final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv); + final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==" + (doc0Score * 1.1)); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'"); + "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='2'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='4'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'"); } @Test @@ -142,15 +170,47 @@ public class TestNoMatchSolrFeature extends TestRerankBase { final Double doc0Score = (Double) ((Map) ((ArrayList) ((Map) jsonParse .get("response")).get("docs")).get(0)).get("score"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature",doc0Score.toString(), + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + final String docs1fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + final String docs2fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + final String docs3fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature","0.0", + "yesmatchfeature","0.0", + "nomatchfeature2","0.0", + "nomatchfeature3","0.0"); + + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector( + "yesmatchfeature",doc0Score.toString()); + final String docs1fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs2fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + final String docs3fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + final String docs1fv_default_csv = chooseDefaultFeatureVector(docs1fv_dense_csv, docs1fv_sparse_csv); + final String docs2fv_default_csv = chooseDefaultFeatureVector(docs2fv_dense_csv, docs2fv_sparse_csv); + final String docs3fv_default_csv = chooseDefaultFeatureVector(docs3fv_dense_csv, docs3fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/fv=='"+FeatureLoggerTestUtils.toFeatureVector("yesmatchfeature", doc0Score.toString())+"'"); + "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/fv=='"+docs1fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/fv=='"+docs2fv_default_csv+"'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/fv=='"+docs3fv_default_csv+"'"); } @Test @@ -163,8 +223,14 @@ public class TestNoMatchSolrFeature extends TestRerankBase { query.add("fv", "true"); query.add("rq", "{!ltr model=nomatchmodel3 reRankDocs=4}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature4","0.0"); + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); } @Test @@ -184,9 +250,15 @@ public class TestNoMatchSolrFeature extends TestRerankBase { query.add("rows", "4"); query.add("rq", "{!ltr model=nomatchmodel4 reRankDocs=4}"); + final String docs0fv_dense_csv = FeatureLoggerTestUtils.toFeatureVector( + "nomatchfeature4","0.0"); + final String docs0fv_sparse_csv = FeatureLoggerTestUtils.toFeatureVector(); + + final String docs0fv_default_csv = chooseDefaultFeatureVector(docs0fv_dense_csv, docs0fv_sparse_csv); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.0"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv==''"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_default_csv+"'"); } }