From 96e8392921792b9ec281c28f28f49b50f998ceed Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Mon, 6 Aug 2018 20:04:59 -0700 Subject: [PATCH] SOLR-11770: NPE in tvrh if no field is specified and document doesn't contain any fields with term vectors --- solr/CHANGES.txt | 2 + .../component/TermVectorComponent.java | 89 ++-- .../response/RetrieveFieldsOptimizer.java | 8 +- .../transform/BaseEditorialTransformer.java | 2 + .../solr/collection1/conf/schema.xml | 2 +- .../QueryElevationComponentTest.java | 30 ++ .../component/TermVectorComponentTest.java | 388 ++++++++++-------- 7 files changed, 304 insertions(+), 217 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5fb1f884282..e21b931040e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -212,6 +212,8 @@ Bug Fixes This would fix the NPE when using the search stream with partitionKeys. (Varun Thacker) +* SOLR-11770: NPE in tvrh if no field is specified and document doesn't contain any fields with term vectors + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java index 4cb95c2055e..784dc814206 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java @@ -17,7 +17,6 @@ package org.apache.solr.handler.component; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -28,15 +27,16 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; @@ -44,11 +44,14 @@ import org.apache.solr.common.params.TermVectorParams; import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; +import org.apache.solr.response.DocsStreamer; +import org.apache.solr.response.RetrieveFieldsOptimizer; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocList; import org.apache.solr.search.DocListAndSet; import org.apache.solr.search.ReturnFields; +import org.apache.solr.search.SolrDocumentFetcher; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrReturnFields; import org.apache.solr.util.SolrPluginUtils; @@ -262,54 +265,49 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar //Only load the id field to get the uniqueKey of that //field - - final String finalUniqFieldName = uniqFieldName; - - final List uniqValues = new ArrayList<>(); - - // TODO: is this required to be single-valued? if so, we should STOP - // once we find it... - final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() { - @Override - public void stringField(FieldInfo fieldInfo, byte[] bytes) { - uniqValues.add(new String(bytes, StandardCharsets.UTF_8)); - } - - @Override - public void intField(FieldInfo fieldInfo, int value) { - uniqValues.add(Integer.toString(value)); - } - - @Override - public void longField(FieldInfo fieldInfo, long value) { - uniqValues.add(Long.toString(value)); - } - - @Override - public Status needsField(FieldInfo fieldInfo) { - return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO; - } - }; + SolrDocumentFetcher docFetcher = searcher.getDocFetcher(); + SolrReturnFields srf = new SolrReturnFields(uniqFieldName, rb.req); + RetrieveFieldsOptimizer retrieveFieldsOptimizer = RetrieveFieldsOptimizer.create(docFetcher, srf); while (iter.hasNext()) { Integer docId = iter.next(); NamedList docNL = new NamedList<>(); if (keyField != null) { - reader.document(docId, getUniqValue); - String uniqVal = null; - if (uniqValues.size() != 0) { - uniqVal = uniqValues.get(0); - uniqValues.clear(); - docNL.add("uniqueKey", uniqVal); - termVectors.add(uniqVal, docNL); + SolrDocument sdoc = null; + try { + if (retrieveFieldsOptimizer.returnStoredFields()) { + Document doc = docFetcher.doc(docId, retrieveFieldsOptimizer.getStoredFields()); + // make sure to use the schema from the searcher and not the request (cross-core) + sdoc = DocsStreamer.convertLuceneDocToSolrDoc(doc, searcher.getSchema(), srf); + } else { + // no need to get stored fields of the document, see SOLR-5968 + sdoc = new SolrDocument(); + } + + // decorate the document with non-stored docValues fields + if (retrieveFieldsOptimizer.returnDVFields()) { + docFetcher.decorateDocValueFields(sdoc, docId, retrieveFieldsOptimizer.getDvFields()); + } + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + docId, e); } + Object val = sdoc.getFieldValue(uniqFieldName); + String uniqVal = ""; + if (val instanceof StoredField) { + uniqVal = ((StoredField) val).stringValue(); + } else { + uniqVal = val.toString(); + } + + docNL.add("uniqueKey", uniqVal); + termVectors.add(uniqVal, docNL); } else { // support for schemas w/o a unique key, termVectors.add("doc-" + docId, docNL); } - if ( null != fields ) { + if (null != fields) { for (Map.Entry entry : fieldOptions.entrySet()) { final String field = entry.getKey(); final Terms vector = reader.getTermVector(docId, field); @@ -321,11 +319,14 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar } else { // extract all fields final Fields vectors = reader.getTermVectors(docId); - for (String field : vectors) { - Terms terms = vectors.terms(field); - if (terms != null) { - TermsEnum termsEnum = terms.iterator(); - mapOneVector(docNL, allFields, reader, docId, termsEnum, field); + // There can be no documents with vectors + if (vectors != null) { + for (String field : vectors) { + Terms terms = vectors.terms(field); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + mapOneVector(docNL, allFields, reader, docId, termsEnum, field); + } } } } diff --git a/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java b/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java index f143b940708..fcd47a732a4 100644 --- a/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java +++ b/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java @@ -53,19 +53,19 @@ public class RetrieveFieldsOptimizer { storedFields.clear(); } - boolean returnStoredFields() { + public boolean returnStoredFields() { return !(storedFields != null && storedFields.isEmpty()); } - boolean returnDVFields() { + public boolean returnDVFields() { return !dvFields.isEmpty(); } - Set getStoredFields() { + public Set getStoredFields() { return storedFields; } - Set getDvFields() { + public Set getDvFields() { return dvFields; } diff --git a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java index 6324d8ad2e1..d23813161d3 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java @@ -73,6 +73,8 @@ public abstract class BaseEditorialTransformer extends DocTransformer { ft.readableToIndexed(f.stringValue(), bytesRefBuilder); } return bytesRefBuilder.get(); + } else if (obj instanceof String) { // Allows the idField to be stored=false, docValues=true + return new BytesRef(((String)obj)); } throw new AssertionError("Expected an IndexableField but got: " + obj.getClass()); } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml index 64d6fddc4c1..b1a261b6c73 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml @@ -502,7 +502,7 @@ - + diff --git a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index 892d51f4b43..0db6b0f7be5 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -23,6 +23,7 @@ import java.io.PrintWriter; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; +import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.BytesRef; import org.apache.solr.SolrTestCaseJ4; @@ -36,14 +37,43 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.FileUtils; import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class QueryElevationComponentTest extends SolrTestCaseJ4 { + @Rule + public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule()); + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + @BeforeClass + public static void beforeClass() throws Exception { + switch (random().nextInt(3)) { + case 0: + System.setProperty("solr.tests.id.stored", "true"); + System.setProperty("solr.tests.id.docValues", "true"); + break; + case 1: + System.setProperty("solr.tests.id.stored", "true"); + System.setProperty("solr.tests.id.docValues", "false"); + break; + case 2: + System.setProperty("solr.tests.id.stored", "false"); + System.setProperty("solr.tests.id.docValues", "true"); + break; + default: + fail("Bad random number generatged not between 0-2 iunclusive"); + break; + } + } + @Before @Override public void setUp() throws Exception { diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java index 91a586993f9..c083a0a616e 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java @@ -20,110 +20,147 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.TermVectorParams; import org.junit.BeforeClass; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + /** * * **/ public class TermVectorComponentTest extends SolrTestCaseJ4 { + + @Rule + public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule()); + + // ensure that we operate correctly with all valid combinations of the uniqueKey being + // stored and/or in docValues. @BeforeClass public static void beforeClass() throws Exception { - initCore("solrconfig.xml","schema.xml"); - - assertU(adoc("id", "0", - "test_posoffpaytv", "This is a title and another title", - "test_posofftv", "This is a title and another title", - "test_basictv", "This is a title and another title", - "test_notv", "This is a title and another title", - "test_postv", "This is a title and another title", - "test_offtv", "This is a title and another title" - )); - assertU(adoc("id", "1", - "test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.", - "test_posofftv", "The quick reb fox jumped over the lazy brown dogs.", - "test_basictv", "The quick reb fox jumped over the lazy brown dogs.", - "test_notv", "The quick reb fox jumped over the lazy brown dogs.", - "test_postv", "The quick reb fox jumped over the lazy brown dogs.", - "test_offtv", "The quick reb fox jumped over the lazy brown dogs." - )); - assertU(adoc("id", "2", - "test_posoffpaytv", "This is a document", - "test_posofftv", "This is a document", - "test_basictv", "This is a document", - "test_notv", "This is a document", - "test_postv", "This is a document", - "test_offtv", "This is a document" - )); - assertU(adoc("id", "3", - "test_posoffpaytv", "another document", - "test_posofftv", "another document", - "test_basictv", "another document", - "test_notv", "another document", - "test_postv", "another document", - "test_offtv", "another document" - )); - //bunch of docs that are variants on blue - assertU(adoc("id", "4", - "test_posoffpaytv", "blue", - "test_posofftv", "blue", - "test_basictv", "blue", - "test_notv", "blue", - "test_postv", "blue", - "test_offtv", "blue" - )); - assertU(adoc("id", "5", - "test_posoffpaytv", "blud", - "test_posofftv", "blud", - "test_basictv", "blud", - "test_notv", "blud", - "test_postv", "blud", - "test_offtv", "blud" - )); - assertU(adoc("id", "6", - "test_posoffpaytv", "boue", - "test_posofftv", "boue", - "test_basictv", "boue", - "test_notv", "boue", - "test_postv", "boue", - "test_offtv", "boue" - )); - assertU(adoc("id", "7", - "test_posoffpaytv", "glue", - "test_posofftv", "glue", - "test_basictv", "glue", - "test_notv", "glue", - "test_postv", "glue", - "test_offtv", "glue" - )); - assertU(adoc("id", "8", - "test_posoffpaytv", "blee", - "test_posofftv", "blee", - "test_basictv", "blee", - "test_notv", "blee", - "test_postv", "blee", - "test_offtv", "blee" - )); - assertU(adoc("id", "9", - "test_posoffpaytv", "blah", - "test_posofftv", "blah", - "test_basictv", "blah", - "test_notv", "blah", - "test_postv", "blah", - "test_offtv", "blah" - )); - - assertNull(h.validateUpdate(commit())); + switch (random().nextInt(3)) { + case 0: + System.setProperty("solr.tests.id.stored", "true"); + System.setProperty("solr.tests.id.docValues", "true"); + break; + case 1: + System.setProperty("solr.tests.id.stored", "true"); + System.setProperty("solr.tests.id.docValues", "false"); + break; + case 2: + System.setProperty("solr.tests.id.stored", "false"); + System.setProperty("solr.tests.id.docValues", "true"); + break; + default: + fail("Bad random number generatged not between 0-2 iunclusive"); + break; + } + initCore("solrconfig.xml", "schema.xml"); } static String tv = "/tvrh"; @Test - public void testBasics() throws Exception { - assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") - ,"/termVectors=={'0':{'uniqueKey':'0'," + + public void testCanned() throws Exception { + + clearIndex(); + assertU(adoc("id", "0", + "test_posoffpaytv", "This is a title and another title", + "test_posofftv", "This is a title and another title", + "test_basictv", "This is a title and another title", + "test_notv", "This is a title and another title", + "test_postv", "This is a title and another title", + "test_offtv", "This is a title and another title" + )); + assertU(adoc("id", "1", + "test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.", + "test_posofftv", "The quick reb fox jumped over the lazy brown dogs.", + "test_basictv", "The quick reb fox jumped over the lazy brown dogs.", + "test_notv", "The quick reb fox jumped over the lazy brown dogs.", + "test_postv", "The quick reb fox jumped over the lazy brown dogs.", + "test_offtv", "The quick reb fox jumped over the lazy brown dogs." + )); + assertU(adoc("id", "2", + "test_posoffpaytv", "This is a document", + "test_posofftv", "This is a document", + "test_basictv", "This is a document", + "test_notv", "This is a document", + "test_postv", "This is a document", + "test_offtv", "This is a document" + )); + assertU(adoc("id", "3", + "test_posoffpaytv", "another document", + "test_posofftv", "another document", + "test_basictv", "another document", + "test_notv", "another document", + "test_postv", "another document", + "test_offtv", "another document" + )); + //bunch of docs that are variants on blue + assertU(adoc("id", "4", + "test_posoffpaytv", "blue", + "test_posofftv", "blue", + "test_basictv", "blue", + "test_notv", "blue", + "test_postv", "blue", + "test_offtv", "blue" + )); + assertU(adoc("id", "5", + "test_posoffpaytv", "blud", + "test_posofftv", "blud", + "test_basictv", "blud", + "test_notv", "blud", + "test_postv", "blud", + "test_offtv", "blud" + )); + assertU(adoc("id", "6", + "test_posoffpaytv", "boue", + "test_posofftv", "boue", + "test_basictv", "boue", + "test_notv", "boue", + "test_postv", "boue", + "test_offtv", "boue" + )); + assertU(adoc("id", "7", + "test_posoffpaytv", "glue", + "test_posofftv", "glue", + "test_basictv", "glue", + "test_notv", "glue", + "test_postv", "glue", + "test_offtv", "glue" + )); + assertU(adoc("id", "8", + "test_posoffpaytv", "blee", + "test_posofftv", "blee", + "test_basictv", "blee", + "test_notv", "blee", + "test_postv", "blee", + "test_offtv", "blee" + )); + assertU(adoc("id", "9", + "test_posoffpaytv", "blah", + "test_posofftv", "blah", + "test_basictv", "blah", + "test_notv", "blah", + "test_postv", "blah", + "test_offtv", "blah" + )); + + assertNull(h.validateUpdate(commit())); + doBasics(); + doOptions(); + doPerField(); + doPayloads(); + } + + + private void doBasics() throws Exception { + assertJQ(req("json.nl", "map", "qt", tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") + , "/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," + @@ -131,38 +168,38 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 { " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // tv.fl diff from fl - assertJQ(req("json.nl","map", - "qt",tv, - "q", "id:0", - "fl", "*,score", - "tv.fl", "test_basictv,test_offtv", - TermVectorComponent.COMPONENT_NAME, "true", - TermVectorParams.TF, "true") - ,"/termVectors=={'0':{'uniqueKey':'0'," + + assertJQ(req("json.nl", "map", + "qt", tv, + "q", "id:0", + "fl", "*,score", + "tv.fl", "test_basictv,test_offtv", + TermVectorComponent.COMPONENT_NAME, "true", + TermVectorParams.TF, "true") + , "/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // multi-valued tv.fl - assertJQ(req("json.nl","map", - "qt",tv, - "q", "id:0", - "fl", "*,score", - "tv.fl", "test_basictv", - "tv.fl","test_offtv", - TermVectorComponent.COMPONENT_NAME, "true", - TermVectorParams.TF, "true") - ,"/termVectors=={'0':{'uniqueKey':'0'," + + assertJQ(req("json.nl", "map", + "qt", tv, + "q", "id:0", + "fl", "*,score", + "tv.fl", "test_basictv", + "tv.fl", "test_offtv", + TermVectorComponent.COMPONENT_NAME, "true", + TermVectorParams.TF, "true") + , "/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // re-use fl glob - assertJQ(req("json.nl","map", - "qt",tv, - "q", "id:0", - "fl", "*,score", - TermVectorComponent.COMPONENT_NAME, "true", - TermVectorParams.TF, "true") - ,"/termVectors=={'0':{'uniqueKey':'0'," + + assertJQ(req("json.nl", "map", + "qt", tv, + "q", "id:0", + "fl", "*,score", + TermVectorComponent.COMPONENT_NAME, "true", + TermVectorParams.TF, "true") + , "/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," + @@ -170,51 +207,50 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 { " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // re-use fl, ignore things we can't handle - assertJQ(req("json.nl","map", - "qt",tv, - "q", "id:0", - "fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)", - TermVectorComponent.COMPONENT_NAME, "true", - TermVectorParams.TF, "true") - ,"/termVectors=={'0':{'uniqueKey':'0'," + + assertJQ(req("json.nl", "map", + "qt", tv, + "q", "id:0", + "fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)", + TermVectorComponent.COMPONENT_NAME, "true", + TermVectorParams.TF, "true") + , "/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // re-use (multi-valued) fl, ignore things we can't handle - assertJQ(req("json.nl","map", - "qt",tv, - "q", "id:0", - "fl", "score,test_basictv", - "fl", "[docid],test_postv,val:sum(3,4)", - TermVectorComponent.COMPONENT_NAME, "true", - TermVectorParams.TF, "true") - ,"/termVectors=={'0':{'uniqueKey':'0'," + + assertJQ(req("json.nl", "map", + "qt", tv, + "q", "id:0", + "fl", "score,test_basictv", + "fl", "[docid],test_postv,val:sum(3,4)", + TermVectorComponent.COMPONENT_NAME, "true", + TermVectorParams.TF, "true") + , "/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); } - @Test - public void testOptions() throws Exception { - assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" - , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true") - ,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" + private void doOptions() throws Exception { + assertJQ(req("json.nl", "map", "qt", tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" + , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true") + , "/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" ); - - assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" + + assertJQ(req("json.nl", "map", "qt", tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" , TermVectorParams.ALL, "true") - ,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" - ); - + , "/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" + ); + // test each combination at random final List list = new ArrayList<>(); - list.addAll(Arrays.asList("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true")); - String[][] options = new String[][] { { TermVectorParams.TF, "'tf':1" }, - { TermVectorParams.OFFSETS, "'offsets':{'start':20, 'end':27}" }, - { TermVectorParams.POSITIONS, "'positions':{'position':5}" }, - { TermVectorParams.DF, "'df':2" }, - { TermVectorParams.TF_IDF, "'tf-idf':0.5" } }; + list.addAll(Arrays.asList("json.nl", "map", "qt", tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true")); + String[][] options = new String[][]{{TermVectorParams.TF, "'tf':1"}, + {TermVectorParams.OFFSETS, "'offsets':{'start':20, 'end':27}"}, + {TermVectorParams.POSITIONS, "'positions':{'position':5}"}, + {TermVectorParams.DF, "'df':2"}, + {TermVectorParams.TF_IDF, "'tf-idf':0.5"}}; StringBuilder expected = new StringBuilder("/termVectors/0/test_posofftv/anoth=={"); boolean first = true; for (int i = 0; i < options.length; i++) { @@ -225,46 +261,62 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 { } first = false; expected.append(options[i][1]); - + } list.add(options[i][0]); list.add(use ? "true" : "false"); } - + expected.append("}"); assertJQ(req(list.toArray(new String[0])), expected.toString()); } - @Test - public void testPerField() throws Exception { - assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" - ,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true" - ,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv" - ,"f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false" - ,"f.test_posofftv." + TermVectorParams.POSITIONS, "false" - ,"f.test_offtv." + TermVectorParams.OFFSETS, "false" - ,"f.test_basictv." + TermVectorParams.DF, "false" - ,"f.test_basictv." + TermVectorParams.TF, "false" - ,"f.test_basictv." + TermVectorParams.TF_IDF, "false" + private void doPerField() throws Exception { + assertJQ(req("json.nl", "map", "qt", tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" + , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true" + , TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv" + , "f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false" + , "f.test_posofftv." + TermVectorParams.POSITIONS, "false" + , "f.test_offtv." + TermVectorParams.OFFSETS, "false" + , "f.test_basictv." + TermVectorParams.DF, "false" + , "f.test_basictv." + TermVectorParams.TF, "false" + , "f.test_basictv." + TermVectorParams.TF_IDF, "false" ) - ,"/termVectors/0/test_basictv=={'anoth':{},'titl':{}}" - ,"/termVectors/0/test_postv/anoth=={'tf':1, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" - ,"/termVectors/0/test_offtv/anoth=={'tf':1, 'df':2, 'tf-idf':0.5}" - ,"/termVectors/warnings=={ 'noTermVectors':['test_notv'], 'noPositions':['test_basictv', 'test_offtv'], 'noOffsets':['test_basictv', 'test_postv']}" + , "/termVectors/0/test_basictv=={'anoth':{},'titl':{}}" + , "/termVectors/0/test_postv/anoth=={'tf':1, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" + , "/termVectors/0/test_offtv/anoth=={'tf':1, 'df':2, 'tf-idf':0.5}" + , "/termVectors/warnings=={ 'noTermVectors':['test_notv'], 'noPositions':['test_basictv', 'test_offtv'], 'noOffsets':['test_basictv', 'test_postv']}" + ); + } + + private void doPayloads() throws Exception { + // This field uses TokenOffsetPayloadTokenFilter, which + // stuffs start (20) and end offset (27) into the + // payload: + assertJQ(req("json.nl", "map", "qt", tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" + , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true", + TermVectorParams.PAYLOADS, "true") + , "/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}" ); } @Test - public void testPayloads() throws Exception { - // This field uses TokenOffsetPayloadTokenFilter, which - // stuffs start (20) and end offset (27) into the - // payload: - assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" - , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true", - TermVectorParams.PAYLOADS, "true") - ,"/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}" + public void testNoVectors() throws Exception { + clearIndex(); + assertU(adoc("id", "0")); + assertU(adoc("id", "1")); + assertU(adoc("id", "2")); + assertU(adoc("id", "3")); + assertNull(h.validateUpdate(commit())); + + // Kind of an odd test, but we just want to know if we don't generate an NPE when there is nothing to give back in the term vectors. + assertJQ(req("json.nl", "map", "qt", tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") + , "/termVectors=={'0':{'uniqueKey':'0'}}}" ); + + } + }