mirror of https://github.com/apache/lucene.git
SOLR-10304: Refactor new SolrDocumentFetcher out of SolrIndexSearcher
This commit is contained in:
parent
8664f1f38a
commit
f1aef3d12b
|
@ -140,6 +140,9 @@ Other Changes
|
|||
|
||||
* SOLR-10249: Refactor IndexFetcher.doFetch() to return a more detailed result. (Jeff Miller via David Smiley)
|
||||
|
||||
* SOLR-10304: Refactor Document handling out of SolrIndexSearcher into a new class "SolrDocumentFetcher".
|
||||
Deprecated SolrPluginUtils.docListToSolrDocumentList(). (David Smiley)
|
||||
|
||||
================== 6.5.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -26,6 +26,8 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -39,8 +41,12 @@ import org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine;
|
|||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.ShardRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocListAndSet;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -87,6 +93,60 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
*/
|
||||
private NamedList<Object> initParams;
|
||||
|
||||
/**
|
||||
* Convert a DocList to a SolrDocumentList
|
||||
*
|
||||
* The optional param "ids" is populated with the lucene document id
|
||||
* for each SolrDocument.
|
||||
*
|
||||
* @param docs The {@link org.apache.solr.search.DocList} to convert
|
||||
* @param searcher The {@link org.apache.solr.search.SolrIndexSearcher} to use to load the docs from the Lucene index
|
||||
* @param fields The names of the Fields to load
|
||||
* @param ids A map to store the ids of the docs
|
||||
* @return The new {@link SolrDocumentList} containing all the loaded docs
|
||||
* @throws IOException if there was a problem loading the docs
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public static SolrDocumentList docListToSolrDocumentList(
|
||||
DocList docs,
|
||||
SolrIndexSearcher searcher,
|
||||
Set<String> fields,
|
||||
Map<SolrDocument, Integer> ids ) throws IOException
|
||||
{
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
|
||||
SolrDocumentList list = new SolrDocumentList();
|
||||
list.setNumFound(docs.matches());
|
||||
list.setMaxScore(docs.maxScore());
|
||||
list.setStart(docs.offset());
|
||||
|
||||
DocIterator dit = docs.iterator();
|
||||
|
||||
while (dit.hasNext()) {
|
||||
int docid = dit.nextDoc();
|
||||
|
||||
Document luceneDoc = searcher.doc(docid, fields);
|
||||
SolrDocument doc = new SolrDocument();
|
||||
|
||||
for( IndexableField field : luceneDoc) {
|
||||
if (null == fields || fields.contains(field.name())) {
|
||||
SchemaField sf = schema.getField( field.name() );
|
||||
doc.addField( field.name(), sf.getType().toObject( field ) );
|
||||
}
|
||||
}
|
||||
if (docs.hasScores() && (null == fields || fields.contains("score"))) {
|
||||
doc.addField("score", dit.score());
|
||||
}
|
||||
|
||||
list.add( doc );
|
||||
|
||||
if( ids != null ) {
|
||||
ids.put( doc, new Integer(docid) );
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||
public void init(NamedList args) {
|
||||
|
@ -172,7 +232,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
checkAvailable(name, engine);
|
||||
DocListAndSet results = rb.getResults();
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<>(results.docList.size());
|
||||
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
|
||||
SolrDocumentList solrDocList = docListToSolrDocumentList(
|
||||
results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
|
||||
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
|
||||
rb.rsp.add("clusters", clusters);
|
||||
|
|
|
@ -277,7 +277,7 @@
|
|||
-->
|
||||
|
||||
<field name="id" type="string" indexed="true" stored="true" required="true"/>
|
||||
<field name="url" type="string" indexed="true" stored="true" required="true"/>
|
||||
<field name="url" type="string" indexed="true" stored="true" required="false"/>
|
||||
<field name="lang" type="string" indexed="true" stored="true" required="false" multiValued="true"/>
|
||||
|
||||
<field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
|
@ -305,6 +305,10 @@
|
|||
|
||||
<dynamicField name="random*" type="random"/>
|
||||
|
||||
<dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<!-- uncomment the following to ignore any fields that don't already match an existing
|
||||
field name or dynamic field, rather than reporting them as an error.
|
||||
alternately, change the type="ignored" to some other type e.g. "text" if you want
|
||||
|
@ -331,4 +335,9 @@
|
|||
<copyField source="body" dest="text"/>
|
||||
<copyField source="snippet" dest="text"/>
|
||||
|
||||
<!-- dynamic destination -->
|
||||
<copyField source="*_dynamic" dest="dynamic_*"/>
|
||||
|
||||
<copyField source="id" dest="range_facet_l"/>
|
||||
|
||||
</schema>
|
||||
|
|
|
@ -15,6 +15,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.clustering;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -24,8 +31,14 @@ import org.apache.solr.handler.component.QueryComponent;
|
|||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.QueryCommand;
|
||||
import org.apache.solr.search.QueryResult;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
@ -34,6 +47,11 @@ import org.junit.Test;
|
|||
**/
|
||||
public class ClusteringComponentTest extends AbstractClusteringTestCase {
|
||||
|
||||
@Before
|
||||
public void doBefore() {
|
||||
clearIndex();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComponent() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
|
@ -79,4 +97,52 @@ public class ClusteringComponentTest extends AbstractClusteringTestCase {
|
|||
req.close();
|
||||
}
|
||||
|
||||
|
||||
// tests ClusteringComponent.docListToSolrDocumentList
|
||||
@Test
|
||||
public void testDocListConversion() throws Exception {
|
||||
assertU("", adoc("id", "3234", "url", "ignoreme", "val_i", "1",
|
||||
"val_dynamic", "quick red fox"));
|
||||
assertU("", adoc("id", "3235", "url", "ignoreme", "val_i", "1",
|
||||
"val_dynamic", "quick green fox"));
|
||||
assertU("", adoc("id", "3236", "url", "ignoreme", "val_i", "1",
|
||||
"val_dynamic", "quick brown fox"));
|
||||
assertU("", commit());
|
||||
|
||||
RefCounted<SolrIndexSearcher> holder = h.getCore().getSearcher();
|
||||
try {
|
||||
SolrIndexSearcher srchr = holder.get();
|
||||
QueryResult qr = new QueryResult();
|
||||
QueryCommand cmd = new QueryCommand();
|
||||
cmd.setQuery(new MatchAllDocsQuery());
|
||||
cmd.setLen(10);
|
||||
qr = srchr.search(qr, cmd);
|
||||
|
||||
DocList docs = qr.getDocList();
|
||||
assertEquals("wrong docs size", 3, docs.size());
|
||||
Set<String> fields = new HashSet<>();
|
||||
fields.add("val_dynamic");
|
||||
fields.add("dynamic_val");
|
||||
fields.add("range_facet_l"); // copied from id
|
||||
|
||||
SolrDocumentList list = ClusteringComponent.docListToSolrDocumentList(docs, srchr, fields, null);
|
||||
assertEquals("wrong list Size", docs.size(), list.size());
|
||||
for (SolrDocument document : list) {
|
||||
|
||||
assertTrue("unexpected field", ! document.containsKey("val_i"));
|
||||
assertTrue("unexpected id field", ! document.containsKey("id"));
|
||||
|
||||
assertTrue("original field", document.containsKey("val_dynamic"));
|
||||
assertTrue("dyn copy field", document.containsKey("dynamic_val"));
|
||||
assertTrue("copy field", document.containsKey("range_facet_l"));
|
||||
|
||||
assertNotNull("original field null", document.get("val_dynamic"));
|
||||
assertNotNull("dyn copy field null", document.get("dynamic_val"));
|
||||
assertNotNull("copy field null", document.get("range_facet_l"));
|
||||
}
|
||||
} finally {
|
||||
if (null != holder) holder.decref();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.solr.request.LocalSolrQueryRequest;
|
|||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.util.attribute.AttributeUtils;
|
||||
|
@ -465,7 +464,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
// Perform clustering
|
||||
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<>(docList.size());
|
||||
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, engine.getFieldsToLoad(req), docIds );
|
||||
SolrDocumentList solrDocList = ClusteringComponent.docListToSolrDocumentList( docList, searcher, engine.getFieldsToLoad(req), docIds );
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
List<NamedList<Object>> results = (List<NamedList<Object>>) engine.cluster(query, solrDocList, docIds, req);
|
||||
|
|
|
@ -69,6 +69,7 @@ import org.apache.solr.schema.FieldType;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrDocumentFetcher;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.ReturnFields;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -290,7 +291,8 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
|
||||
Document luceneDocument = searcherInfo.getSearcher().doc(docid, rsp.getReturnFields().getLuceneFieldNames());
|
||||
SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema());
|
||||
searcherInfo.getSearcher().decorateDocValueFields(doc, docid, searcherInfo.getSearcher().getNonStoredDVs(true));
|
||||
SolrDocumentFetcher docFetcher = searcherInfo.getSearcher().getDocFetcher();
|
||||
docFetcher.decorateDocValueFields(doc, docid, docFetcher.getNonStoredDVs(true));
|
||||
if ( null != transformer) {
|
||||
if (null == resultContext) {
|
||||
// either first pass, or we've re-opened searcher - either way now we setContext
|
||||
|
@ -423,7 +425,8 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
}
|
||||
Document luceneDocument = searcher.doc(docid, returnFields.getLuceneFieldNames());
|
||||
SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema());
|
||||
searcher.decorateDocValueFields(doc, docid, searcher.getNonStoredDVs(false));
|
||||
SolrDocumentFetcher docFetcher = searcher.getDocFetcher();
|
||||
docFetcher.decorateDocValueFields(doc, docid, docFetcher.getNonStoredDVs(false));
|
||||
|
||||
return doc;
|
||||
} finally {
|
||||
|
@ -471,10 +474,10 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
}
|
||||
|
||||
SolrDocument doc;
|
||||
Set<String> decorateFields = onlyTheseFields == null ? searcher.getNonStoredDVs(false): onlyTheseFields;
|
||||
Set<String> decorateFields = onlyTheseFields == null ? searcher.getDocFetcher().getNonStoredDVs(false): onlyTheseFields;
|
||||
Document luceneDocument = searcher.doc(docid, returnFields.getLuceneFieldNames());
|
||||
doc = toSolrDoc(luceneDocument, core.getLatestSchema());
|
||||
searcher.decorateDocValueFields(doc, docid, decorateFields);
|
||||
searcher.getDocFetcher().decorateDocValueFields(doc, docid, decorateFields);
|
||||
|
||||
long docVersion = (long) doc.getFirstValue(VERSION_FIELD);
|
||||
Object partialVersionObj = partialDoc.getFieldValue(VERSION_FIELD);
|
||||
|
@ -483,7 +486,7 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
if (docVersion > partialDocVersion) {
|
||||
return doc;
|
||||
}
|
||||
for (String fieldName: (Iterable<String>) partialDoc.getFieldNames()) {
|
||||
for (String fieldName: partialDoc.getFieldNames()) {
|
||||
doc.setField(fieldName.toString(), partialDoc.getFieldValue(fieldName)); // since partial doc will only contain single valued fields, this is fine
|
||||
}
|
||||
|
||||
|
@ -604,17 +607,18 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
|
||||
int docid = searcher.getFirstMatch(new Term(idField.getName(), idBytes));
|
||||
if (docid < 0) return null;
|
||||
|
||||
|
||||
SolrDocumentFetcher docFetcher = searcher.getDocFetcher();
|
||||
if (avoidRetrievingStoredFields) {
|
||||
sid = new SolrInputDocument();
|
||||
} else {
|
||||
Document luceneDocument = searcher.doc(docid);
|
||||
Document luceneDocument = docFetcher.doc(docid);
|
||||
sid = toSolrInputDocument(luceneDocument, core.getLatestSchema());
|
||||
}
|
||||
if (onlyTheseNonStoredDVs != null) {
|
||||
searcher.decorateDocValueFields(sid, docid, onlyTheseNonStoredDVs);
|
||||
docFetcher.decorateDocValueFields(sid, docid, onlyTheseNonStoredDVs);
|
||||
} else {
|
||||
searcher.decorateDocValueFields(sid, docid, searcher.getNonStoredDVsWithoutCopyTargets());
|
||||
docFetcher.decorateDocValueFields(sid, docid, docFetcher.getNonStoredDVsWithoutCopyTargets());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
|
|
|
@ -69,7 +69,7 @@ public abstract class SolrHighlighter
|
|||
if (fields[0].contains("*")) {
|
||||
// create a Java regular expression from the wildcard string
|
||||
String fieldRegex = fields[0].replaceAll("\\*", ".*");
|
||||
Collection<String> storedHighlightFieldNames = request.getSearcher().getStoredHighlightFieldNames();
|
||||
Collection<String> storedHighlightFieldNames = request.getSearcher().getDocFetcher().getStoredHighlightFieldNames();
|
||||
List<String> storedFieldsToHighlight = new ArrayList<>();
|
||||
for (String storedFieldName: storedHighlightFieldNames) {
|
||||
if (storedFieldName.matches(fieldRegex)) {
|
||||
|
|
|
@ -49,6 +49,8 @@ import org.apache.solr.schema.TrieIntField;
|
|||
import org.apache.solr.schema.TrieLongField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.ReturnFields;
|
||||
import org.apache.solr.search.SolrDocumentFetcher;
|
||||
import org.apache.solr.search.SolrReturnFields;
|
||||
|
||||
/**
|
||||
|
@ -57,15 +59,17 @@ import org.apache.solr.search.SolrReturnFields;
|
|||
public class DocsStreamer implements Iterator<SolrDocument> {
|
||||
public static final Set<Class> KNOWN_TYPES = new HashSet<>();
|
||||
|
||||
private org.apache.solr.response.ResultContext rctx;
|
||||
private final org.apache.solr.response.ResultContext rctx;
|
||||
private final SolrDocumentFetcher docFetcher; // a collaborator of SolrIndexSearcher
|
||||
private final DocList docs;
|
||||
|
||||
private DocTransformer transformer;
|
||||
private DocIterator docIterator;
|
||||
private final DocTransformer transformer;
|
||||
private final DocIterator docIterator;
|
||||
|
||||
private final Set<String> fnames; // returnFields.getLuceneFieldNames(). Maybe null. Not empty.
|
||||
private final boolean onlyPseudoFields;
|
||||
private final Set<String> dvFieldsToReturn; // maybe null. Not empty.
|
||||
|
||||
private boolean onlyPseudoFields;
|
||||
private Set<String> fnames;
|
||||
private Set<String> dvFieldsToReturn;
|
||||
private int idx = -1;
|
||||
|
||||
public DocsStreamer(ResultContext rctx) {
|
||||
|
@ -74,46 +78,61 @@ public class DocsStreamer implements Iterator<SolrDocument> {
|
|||
transformer = rctx.getReturnFields().getTransformer();
|
||||
docIterator = this.docs.iterator();
|
||||
fnames = rctx.getReturnFields().getLuceneFieldNames();
|
||||
//TODO move onlyPseudoFields calc to ReturnFields
|
||||
onlyPseudoFields = (fnames == null && !rctx.getReturnFields().wantsAllFields() && !rctx.getReturnFields().hasPatternMatching())
|
||||
|| (fnames != null && fnames.size() == 1 && SolrReturnFields.SCORE.equals(fnames.iterator().next()));
|
||||
|
||||
// add non-stored DV fields that may have been requested
|
||||
if (rctx.getReturnFields().wantsAllFields()) {
|
||||
// check whether there are no additional fields
|
||||
Set<String> fieldNames = rctx.getReturnFields().getLuceneFieldNames(true);
|
||||
if (fieldNames == null) {
|
||||
dvFieldsToReturn = rctx.getSearcher().getNonStoredDVs(true);
|
||||
} else {
|
||||
dvFieldsToReturn = new HashSet<>(rctx.getSearcher().getNonStoredDVs(true)); // copy
|
||||
// add all requested fields that may be useDocValuesAsStored=false
|
||||
for (String fl : fieldNames) {
|
||||
if (rctx.getSearcher().getNonStoredDVs(false).contains(fl)) {
|
||||
dvFieldsToReturn.add(fl);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (rctx.getReturnFields().hasPatternMatching()) {
|
||||
for (String s : rctx.getSearcher().getNonStoredDVs(true)) {
|
||||
if (rctx.getReturnFields().wantsField(s)) {
|
||||
if (null == dvFieldsToReturn) {
|
||||
dvFieldsToReturn = new HashSet<>();
|
||||
}
|
||||
dvFieldsToReturn.add(s);
|
||||
}
|
||||
}
|
||||
} else if (fnames != null) {
|
||||
dvFieldsToReturn = new HashSet<>(fnames); // copy
|
||||
// here we get all non-stored dv fields because even if a user has set
|
||||
// useDocValuesAsStored=false in schema, he may have requested a field
|
||||
// explicitly using the fl parameter
|
||||
dvFieldsToReturn.retainAll(rctx.getSearcher().getNonStoredDVs(false));
|
||||
}
|
||||
}
|
||||
docFetcher = rctx.getSearcher().getDocFetcher();
|
||||
dvFieldsToReturn = calcDocValueFieldsForReturn(docFetcher, rctx.getReturnFields());
|
||||
|
||||
if (transformer != null) transformer.setContext(rctx);
|
||||
}
|
||||
|
||||
// TODO move to ReturnFields ? Or SolrDocumentFetcher ?
|
||||
public static Set<String> calcDocValueFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
|
||||
Set<String> result = null;
|
||||
if (returnFields.wantsAllFields()) {
|
||||
// check whether there are no additional fields
|
||||
Set<String> fieldNames = returnFields.getLuceneFieldNames(true);
|
||||
if (fieldNames == null) {
|
||||
result = docFetcher.getNonStoredDVs(true);
|
||||
} else {
|
||||
result = new HashSet<>(docFetcher.getNonStoredDVs(true)); // copy
|
||||
// add all requested fields that may be useDocValuesAsStored=false
|
||||
for (String fl : fieldNames) {
|
||||
if (docFetcher.getNonStoredDVs(false).contains(fl)) {
|
||||
result.add(fl);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (returnFields.hasPatternMatching()) {
|
||||
for (String s : docFetcher.getNonStoredDVs(true)) {
|
||||
if (returnFields.wantsField(s)) {
|
||||
if (null == result) {
|
||||
result = new HashSet<>();
|
||||
}
|
||||
result.add(s);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Set<String> fnames = returnFields.getLuceneFieldNames();
|
||||
if (fnames == null) {
|
||||
return null;
|
||||
}
|
||||
result = new HashSet<>(fnames); // copy
|
||||
// here we get all non-stored dv fields because even if a user has set
|
||||
// useDocValuesAsStored=false in schema, he may have requested a field
|
||||
// explicitly using the fl parameter
|
||||
result.retainAll(docFetcher.getNonStoredDVs(false));
|
||||
}
|
||||
}
|
||||
if (result != null && result.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int currentIndex() {
|
||||
return idx;
|
||||
|
@ -133,12 +152,12 @@ public class DocsStreamer implements Iterator<SolrDocument> {
|
|||
sdoc = new SolrDocument();
|
||||
} else {
|
||||
try {
|
||||
Document doc = rctx.getSearcher().doc(id, fnames);
|
||||
sdoc = getDoc(doc, rctx.getSearcher().getSchema()); // make sure to use the schema from the searcher and not the request (cross-core)
|
||||
Document doc = docFetcher.doc(id, fnames);
|
||||
sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema()); // make sure to use the schema from the searcher and not the request (cross-core)
|
||||
|
||||
// decorate the document with non-stored docValues fields
|
||||
if (dvFieldsToReturn != null) {
|
||||
rctx.getSearcher().decorateDocValueFields(sdoc, id, dvFieldsToReturn);
|
||||
docFetcher.decorateDocValueFields(sdoc, id, dvFieldsToReturn);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + id, e);
|
||||
|
@ -157,7 +176,8 @@ public class DocsStreamer implements Iterator<SolrDocument> {
|
|||
|
||||
}
|
||||
|
||||
public static SolrDocument getDoc(Document doc, final IndexSchema schema) {
|
||||
// TODO move to SolrDocumentFetcher ? Refactor to also call docFetcher.decorateDocValueFields(...) ?
|
||||
public static SolrDocument convertLuceneDocToSolrDoc(Document doc, final IndexSchema schema) {
|
||||
SolrDocument out = new SolrDocument();
|
||||
for (IndexableField f : doc.getFields()) {
|
||||
// Make sure multivalued fields are represented as lists
|
||||
|
|
|
@ -147,7 +147,7 @@ public abstract class TextResponseWriter implements PushWriter {
|
|||
} else if (val instanceof Date) {
|
||||
writeDate(name, (Date) val);
|
||||
} else if (val instanceof Document) {
|
||||
SolrDocument doc = DocsStreamer.getDoc((Document) val, schema);
|
||||
SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc((Document) val, schema);
|
||||
writeSolrDocument(name, doc, returnFields, 0);
|
||||
} else if (val instanceof SolrDocument) {
|
||||
writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
|
||||
|
|
|
@ -139,7 +139,7 @@ class ChildDocTransformer extends DocTransformer {
|
|||
while(i.hasNext()) {
|
||||
Integer childDocNum = i.next();
|
||||
Document childDoc = context.getSearcher().doc(childDocNum);
|
||||
SolrDocument solrChildDoc = DocsStreamer.getDoc(childDoc, schema);
|
||||
SolrDocument solrChildDoc = DocsStreamer.convertLuceneDocToSolrDoc(childDoc, schema);
|
||||
|
||||
// TODO: future enhancement...
|
||||
// support an fl local param in the transformer, which is used to build
|
||||
|
|
|
@ -0,0 +1,571 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
||||
import org.apache.lucene.document.LazyDocument;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.solr.common.SolrDocumentBase;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.schema.BoolField;
|
||||
import org.apache.solr.schema.EnumField;
|
||||
import org.apache.solr.schema.NumberType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
import org.apache.solr.schema.TrieDoubleField;
|
||||
import org.apache.solr.schema.TrieFloatField;
|
||||
import org.apache.solr.schema.TrieIntField;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* A helper class of {@link org.apache.solr.search.SolrIndexSearcher} for stored Document related matters
|
||||
* including DocValue substitutions.
|
||||
*/
|
||||
public class SolrDocumentFetcher {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private final SolrIndexSearcher searcher;
|
||||
|
||||
private final boolean enableLazyFieldLoading;
|
||||
|
||||
private final SolrCache<Integer,Document> documentCache;
|
||||
|
||||
/** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */
|
||||
private final Set<String> allNonStoredDVs;
|
||||
|
||||
/** Contains the names/patterns of all docValues=true,stored=false,useDocValuesAsStored=true fields in the schema. */
|
||||
private final Set<String> nonStoredDVsUsedAsStored;
|
||||
|
||||
/** Contains the names/patterns of all docValues=true,stored=false fields, excluding those that are copyField targets in the schema. */
|
||||
private final Set<String> nonStoredDVsWithoutCopyTargets;
|
||||
|
||||
private static int largeValueLengthCacheThreshold = Integer.getInteger("solr.largeField.cacheThreshold", 512 * 1024); // internal setting
|
||||
|
||||
private final Set<String> largeFields;
|
||||
|
||||
private Collection<String> storedHighlightFieldNames; // lazy populated; use getter
|
||||
|
||||
SolrDocumentFetcher(SolrIndexSearcher searcher, SolrConfig solrConfig, boolean cachingEnabled) {
|
||||
this.searcher = searcher;
|
||||
this.enableLazyFieldLoading = solrConfig.enableLazyFieldLoading;
|
||||
if (cachingEnabled) {
|
||||
documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
|
||||
} else {
|
||||
documentCache = null;
|
||||
}
|
||||
|
||||
final Set<String> nonStoredDVsUsedAsStored = new HashSet<>();
|
||||
final Set<String> allNonStoredDVs = new HashSet<>();
|
||||
final Set<String> nonStoredDVsWithoutCopyTargets = new HashSet<>();
|
||||
final Set<String> storedLargeFields = new HashSet<>();
|
||||
|
||||
for (FieldInfo fieldInfo : searcher.getFieldInfos()) { // can find materialized dynamic fields, unlike using the Solr IndexSchema.
|
||||
final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldInfo.name);
|
||||
if (schemaField == null) {
|
||||
continue;
|
||||
}
|
||||
if (!schemaField.stored() && schemaField.hasDocValues()) {
|
||||
if (schemaField.useDocValuesAsStored()) {
|
||||
nonStoredDVsUsedAsStored.add(fieldInfo.name);
|
||||
}
|
||||
allNonStoredDVs.add(fieldInfo.name);
|
||||
if (!searcher.getSchema().isCopyFieldTarget(schemaField)) {
|
||||
nonStoredDVsWithoutCopyTargets.add(fieldInfo.name);
|
||||
}
|
||||
}
|
||||
if (schemaField.stored() && schemaField.isLarge()) {
|
||||
storedLargeFields.add(schemaField.getName());
|
||||
}
|
||||
}
|
||||
|
||||
this.nonStoredDVsUsedAsStored = Collections.unmodifiableSet(nonStoredDVsUsedAsStored);
|
||||
this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs);
|
||||
this.nonStoredDVsWithoutCopyTargets = Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets);
|
||||
this.largeFields = Collections.unmodifiableSet(storedLargeFields);
|
||||
}
|
||||
|
||||
public boolean isLazyFieldLoadingEnabled() {
|
||||
return enableLazyFieldLoading;
|
||||
}
|
||||
|
||||
public SolrCache<Integer, Document> getDocumentCache() {
|
||||
return documentCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a collection of the names of all stored fields which can be highlighted the index reader knows about.
|
||||
*/
|
||||
public Collection<String> getStoredHighlightFieldNames() {
|
||||
synchronized (this) {
|
||||
if (storedHighlightFieldNames == null) {
|
||||
storedHighlightFieldNames = new LinkedList<>();
|
||||
for (FieldInfo fieldInfo : searcher.getFieldInfos()) {
|
||||
final String fieldName = fieldInfo.name;
|
||||
try {
|
||||
SchemaField field = searcher.getSchema().getField(fieldName);
|
||||
if (field.stored() && ((field.getType() instanceof org.apache.solr.schema.TextField)
|
||||
|| (field.getType() instanceof org.apache.solr.schema.StrField))) {
|
||||
storedHighlightFieldNames.add(fieldName);
|
||||
}
|
||||
} catch (RuntimeException e) { // getField() throws a SolrException, but it arrives as a RuntimeException
|
||||
log.warn("Field [{}] found in index, but not defined in schema.", fieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
return storedHighlightFieldNames;
|
||||
}
|
||||
}
|
||||
|
||||
/** @see SolrIndexSearcher#doc(int) */
|
||||
public Document doc(int docId) throws IOException {
|
||||
return doc(docId, (Set<String>) null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the {@link Document} instance corresponding to the document id.
|
||||
* <p>
|
||||
* <b>NOTE</b>: the document will have all fields accessible, but if a field filter is provided, only the provided
|
||||
* fields will be loaded (the remainder will be available lazily).
|
||||
*
|
||||
* @see SolrIndexSearcher#doc(int, Set)
|
||||
*/
|
||||
public Document doc(int i, Set<String> fields) throws IOException {
|
||||
Document d;
|
||||
if (documentCache != null) {
|
||||
d = documentCache.get(i);
|
||||
if (d != null) return d;
|
||||
}
|
||||
|
||||
final DirectoryReader reader = searcher.getIndexReader();
|
||||
if (documentCache != null && !enableLazyFieldLoading) {
|
||||
// we do not filter the fields in this case because that would return an incomplete document which would
|
||||
// be eventually cached. The alternative would be to read the stored fields twice; once with the fields
|
||||
// and then without for caching leading to a performance hit
|
||||
// see SOLR-8858 for related discussion
|
||||
fields = null;
|
||||
}
|
||||
final SolrDocumentStoredFieldVisitor visitor = new SolrDocumentStoredFieldVisitor(fields, reader, i);
|
||||
reader.document(i, visitor);
|
||||
d = visitor.getDocument();
|
||||
|
||||
if (documentCache != null) {
|
||||
documentCache.put(i, d);
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
/** {@link StoredFieldVisitor} which loads the specified fields eagerly (or all if null).
|
||||
* If {@link #enableLazyFieldLoading} then the rest get special lazy field entries. Designated "large"
|
||||
* fields will always get a special field entry. */
|
||||
private class SolrDocumentStoredFieldVisitor extends DocumentStoredFieldVisitor {
|
||||
private final Document doc;
|
||||
private final LazyDocument lazyFieldProducer; // arguably a better name than LazyDocument; at least how we use it here
|
||||
private final int docId;
|
||||
private final boolean addLargeFieldsLazily;
|
||||
|
||||
SolrDocumentStoredFieldVisitor(Set<String> toLoad, IndexReader reader, int docId) {
|
||||
super(toLoad);
|
||||
this.docId = docId;
|
||||
this.doc = getDocument();
|
||||
this.lazyFieldProducer = toLoad != null && enableLazyFieldLoading ? new LazyDocument(reader, docId) : null;
|
||||
this.addLargeFieldsLazily = (documentCache != null && !largeFields.isEmpty());
|
||||
//TODO can we return Status.STOP after a val is loaded and we know there are no other fields of interest?
|
||||
// When: toLoad is one single-valued field, no lazyFieldProducer
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status needsField(FieldInfo fieldInfo) throws IOException {
|
||||
Status status = super.needsField(fieldInfo);
|
||||
assert status != Status.STOP : "Status.STOP not supported or expected";
|
||||
if (addLargeFieldsLazily && largeFields.contains(fieldInfo.name)) { // load "large" fields using this lazy mechanism
|
||||
if (lazyFieldProducer != null || status == Status.YES) {
|
||||
doc.add(new LargeLazyField(fieldInfo.name, docId));
|
||||
}
|
||||
return Status.NO;
|
||||
}
|
||||
if (status == Status.NO && lazyFieldProducer != null) { // lazy
|
||||
doc.add(lazyFieldProducer.getField(fieldInfo));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
/** @see SolrIndexSearcher#doc(int, StoredFieldVisitor) */
|
||||
public void doc(int docId, StoredFieldVisitor visitor) throws IOException {
|
||||
if (documentCache != null) {
|
||||
Document cached = documentCache.get(docId);
|
||||
if (cached != null) {
|
||||
visitFromCached(cached, visitor);
|
||||
return;
|
||||
}
|
||||
}
|
||||
searcher.getIndexReader().document(docId, visitor);
|
||||
}
|
||||
|
||||
/** Executes a stored field visitor against a hit from the document cache */
|
||||
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
|
||||
for (IndexableField f : document) {
|
||||
final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
|
||||
final StoredFieldVisitor.Status needsField = visitor.needsField(info);
|
||||
if (needsField == StoredFieldVisitor.Status.STOP) return;
|
||||
if (needsField == StoredFieldVisitor.Status.NO) continue;
|
||||
BytesRef binaryValue = f.binaryValue();
|
||||
if (binaryValue != null) {
|
||||
visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
|
||||
continue;
|
||||
}
|
||||
Number numericValue = f.numericValue();
|
||||
if (numericValue != null) {
|
||||
if (numericValue instanceof Double) {
|
||||
visitor.doubleField(info, numericValue.doubleValue());
|
||||
} else if (numericValue instanceof Integer) {
|
||||
visitor.intField(info, numericValue.intValue());
|
||||
} else if (numericValue instanceof Float) {
|
||||
visitor.floatField(info, numericValue.floatValue());
|
||||
} else if (numericValue instanceof Long) {
|
||||
visitor.longField(info, numericValue.longValue());
|
||||
} else {
|
||||
throw new AssertionError();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// must be String
|
||||
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
|
||||
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
|
||||
} else {
|
||||
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] toByteArrayUnwrapIfPossible(BytesRef bytesRef) {
|
||||
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
|
||||
return bytesRef.bytes;
|
||||
} else {
|
||||
return Arrays.copyOfRange(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length);
|
||||
}
|
||||
}
|
||||
|
||||
/** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
|
||||
class LargeLazyField implements IndexableField {
|
||||
|
||||
final String name;
|
||||
final int docId;
|
||||
// synchronize on 'this' to access:
|
||||
BytesRef cachedBytes; // we only conditionally populate this if it's big enough
|
||||
|
||||
private LargeLazyField(String name, int docId) {
|
||||
this.name = name;
|
||||
this.docId = docId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return fieldType().toString() + "<" + name() + ">"; // mimic Field.java
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexableFieldType fieldType() {
|
||||
return searcher.getSchema().getField(name());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
|
||||
return analyzer.tokenStream(name(), stringValue()); // or we could throw unsupported exception?
|
||||
}
|
||||
/** (for tests) */
|
||||
synchronized boolean hasBeenLoaded() {
|
||||
return cachedBytes != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String stringValue() {
|
||||
try {
|
||||
return readBytes().utf8ToString();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
synchronized BytesRef readBytes() throws IOException {
|
||||
if (cachedBytes != null) {
|
||||
return cachedBytes;
|
||||
} else {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
searcher.getIndexReader().document(docId, new StoredFieldVisitor() {
|
||||
boolean done = false;
|
||||
@Override
|
||||
public Status needsField(FieldInfo fieldInfo) throws IOException {
|
||||
if (done) {
|
||||
return Status.STOP;
|
||||
}
|
||||
return fieldInfo.name.equals(name()) ? Status.YES : Status.NO;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
bytesRef.bytes = value;
|
||||
bytesRef.length = value.length;
|
||||
done = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
throw new UnsupportedOperationException("'large' binary fields are not (yet) supported");
|
||||
}
|
||||
});
|
||||
if (bytesRef.length < largeValueLengthCacheThreshold) {
|
||||
return cachedBytes = bytesRef;
|
||||
} else {
|
||||
return bytesRef;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef binaryValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader readerValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number numericValue() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
|
||||
*
|
||||
* @param doc
|
||||
* A SolrDocument or SolrInputDocument instance where docValues will be added
|
||||
* @param docid
|
||||
* The lucene docid of the document to be populated
|
||||
* @param fields
|
||||
* The list of docValues fields to be decorated
|
||||
*/
|
||||
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields)
|
||||
throws IOException {
|
||||
final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
|
||||
final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
|
||||
final int localId = docid - leafContexts.get(subIndex).docBase;
|
||||
final LeafReader leafReader = leafContexts.get(subIndex).reader();
|
||||
for (String fieldName : fields) {
|
||||
final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
|
||||
if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
|
||||
log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
|
||||
continue;
|
||||
}
|
||||
FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
|
||||
if (fi == null) {
|
||||
continue; // Searcher doesn't have info about this field, hence ignore it.
|
||||
}
|
||||
final DocValuesType dvType = fi.getDocValuesType();
|
||||
switch (dvType) {
|
||||
case NUMERIC:
|
||||
final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
|
||||
if (ndv == null) {
|
||||
continue;
|
||||
}
|
||||
Long val;
|
||||
if (ndv.advanceExact(localId)) {
|
||||
val = ndv.longValue();
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
Object newVal = val;
|
||||
if (schemaField.getType().isPointField()) {
|
||||
// TODO: Maybe merge PointField with TrieFields here
|
||||
NumberType type = schemaField.getType().getNumberType();
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
newVal = val.intValue();
|
||||
break;
|
||||
case LONG:
|
||||
newVal = val.longValue();
|
||||
break;
|
||||
case FLOAT:
|
||||
newVal = Float.intBitsToFloat(val.intValue());
|
||||
break;
|
||||
case DOUBLE:
|
||||
newVal = Double.longBitsToDouble(val);
|
||||
break;
|
||||
case DATE:
|
||||
newVal = new Date(val);
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Unexpected PointType: " + type);
|
||||
}
|
||||
} else {
|
||||
if (schemaField.getType() instanceof TrieIntField) {
|
||||
newVal = val.intValue();
|
||||
} else if (schemaField.getType() instanceof TrieFloatField) {
|
||||
newVal = Float.intBitsToFloat(val.intValue());
|
||||
} else if (schemaField.getType() instanceof TrieDoubleField) {
|
||||
newVal = Double.longBitsToDouble(val);
|
||||
} else if (schemaField.getType() instanceof TrieDateField) {
|
||||
newVal = new Date(val);
|
||||
} else if (schemaField.getType() instanceof EnumField) {
|
||||
newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
|
||||
}
|
||||
}
|
||||
doc.addField(fieldName, newVal);
|
||||
break;
|
||||
case BINARY:
|
||||
BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
|
||||
if (bdv == null) {
|
||||
continue;
|
||||
}
|
||||
BytesRef value;
|
||||
if (bdv.advanceExact(localId)) {
|
||||
value = BytesRef.deepCopyOf(bdv.binaryValue());
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
doc.addField(fieldName, value);
|
||||
break;
|
||||
case SORTED:
|
||||
SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
|
||||
if (sdv == null) {
|
||||
continue;
|
||||
}
|
||||
if (sdv.advanceExact(localId)) {
|
||||
final BytesRef bRef = sdv.binaryValue();
|
||||
// Special handling for Boolean fields since they're stored as 'T' and 'F'.
|
||||
if (schemaField.getType() instanceof BoolField) {
|
||||
doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
|
||||
} else {
|
||||
doc.addField(fieldName, bRef.utf8ToString());
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SORTED_NUMERIC:
|
||||
final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
|
||||
NumberType type = schemaField.getType().getNumberType();
|
||||
if (numericDv != null) {
|
||||
if (numericDv.advance(localId) == localId) {
|
||||
final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
|
||||
for (int i = 0; i < numericDv.docValueCount(); i++) {
|
||||
long number = numericDv.nextValue();
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
outValues.add((int)number);
|
||||
break;
|
||||
case LONG:
|
||||
outValues.add(number);
|
||||
break;
|
||||
case FLOAT:
|
||||
outValues.add(NumericUtils.sortableIntToFloat((int)number));
|
||||
break;
|
||||
case DOUBLE:
|
||||
outValues.add(NumericUtils.sortableLongToDouble(number));
|
||||
break;
|
||||
case DATE:
|
||||
outValues.add(new Date(number));
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Unexpected PointType: " + type);
|
||||
}
|
||||
}
|
||||
assert outValues.size() > 0;
|
||||
doc.addField(fieldName, outValues);
|
||||
}
|
||||
}
|
||||
case SORTED_SET:
|
||||
final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
|
||||
if (values != null && values.getValueCount() > 0) {
|
||||
if (values.advance(localId) == localId) {
|
||||
final List<Object> outValues = new LinkedList<>();
|
||||
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
|
||||
value = values.lookupOrd(ord);
|
||||
outValues.add(schemaField.getType().toObject(schemaField, value));
|
||||
}
|
||||
assert outValues.size() > 0;
|
||||
doc.addField(fieldName, outValues);
|
||||
}
|
||||
}
|
||||
case NONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable set of non-stored docValues field names.
|
||||
*
|
||||
* @param onlyUseDocValuesAsStored
|
||||
* If false, returns all non-stored docValues. If true, returns only those non-stored docValues which have
|
||||
* the {@link SchemaField#useDocValuesAsStored()} flag true.
|
||||
*/
|
||||
public Set<String> getNonStoredDVs(boolean onlyUseDocValuesAsStored) {
|
||||
return onlyUseDocValuesAsStored ? nonStoredDVsUsedAsStored : allNonStoredDVs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable set of names of non-stored docValues fields, except those that are targets of a copy field.
|
||||
*/
|
||||
public Set<String> getNonStoredDVsWithoutCopyTargets() {
|
||||
return nonStoredDVsWithoutCopyTargets;
|
||||
}
|
||||
|
||||
}
|
|
@ -18,19 +18,14 @@ package org.apache.solr.search;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
@ -40,21 +35,26 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
||||
import org.apache.lucene.document.LazyDocument;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.StoredFieldVisitor.Status;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.ExitableDirectoryReader;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiPostingsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.solr.common.SolrDocumentBase;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
@ -71,15 +71,8 @@ import org.apache.solr.request.LocalSolrQueryRequest;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.BoolField;
|
||||
import org.apache.solr.schema.EnumField;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.NumberType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
import org.apache.solr.schema.TrieDoubleField;
|
||||
import org.apache.solr.schema.TrieFloatField;
|
||||
import org.apache.solr.schema.TrieIntField;
|
||||
import org.apache.solr.search.facet.UnInvertedField;
|
||||
import org.apache.solr.search.stats.StatsSource;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
|
@ -107,6 +100,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
|
||||
private final SolrCore core;
|
||||
private final IndexSchema schema;
|
||||
private final SolrDocumentFetcher docFetcher;
|
||||
|
||||
private final String name;
|
||||
private final Date openTime = new Date();
|
||||
|
@ -119,12 +113,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
private final int queryResultWindowSize;
|
||||
private final int queryResultMaxDocsCached;
|
||||
private final boolean useFilterForSortedQuery;
|
||||
public final boolean enableLazyFieldLoading;
|
||||
|
||||
private final boolean cachingEnabled;
|
||||
private final SolrCache<Query,DocSet> filterCache;
|
||||
private final SolrCache<QueryResultKey,DocList> queryResultCache;
|
||||
private final SolrCache<Integer,Document> documentCache;
|
||||
private final SolrCache<String,UnInvertedField> fieldValueCache;
|
||||
|
||||
// map of generic caches - not synchronized since it's read-only after the constructor.
|
||||
|
@ -135,21 +127,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
|
||||
private final FieldInfos fieldInfos;
|
||||
|
||||
/** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */
|
||||
private final Set<String> allNonStoredDVs;
|
||||
|
||||
/** Contains the names/patterns of all docValues=true,stored=false,useDocValuesAsStored=true fields in the schema. */
|
||||
private final Set<String> nonStoredDVsUsedAsStored;
|
||||
|
||||
/** Contains the names/patterns of all docValues=true,stored=false fields, excluding those that are copyField targets in the schema. */
|
||||
private final Set<String> nonStoredDVsWithoutCopyTargets;
|
||||
|
||||
private static int largeValueLengthCacheThreshold = Integer.getInteger("solr.largeField.cacheThreshold", 512 * 1024); // internal setting
|
||||
|
||||
private final Set<String> largeFields;
|
||||
|
||||
private Collection<String> storedHighlightFieldNames; // lazy populated; use getter
|
||||
|
||||
private DirectoryFactory directoryFactory;
|
||||
|
||||
private final LeafReader leafReader;
|
||||
|
@ -161,9 +138,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
|
||||
private final NamedList<Object> readerStats;
|
||||
|
||||
|
||||
private static DirectoryReader getReader(SolrCore core, SolrIndexConfig config, DirectoryFactory directoryFactory,
|
||||
String path) throws IOException {
|
||||
String path) throws IOException {
|
||||
final Directory dir = directoryFactory.get(path, DirContext.DEFAULT, config.lockType);
|
||||
try {
|
||||
return core.getIndexReaderFactory().newReader(dir, core);
|
||||
|
@ -283,7 +259,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
this.queryResultWindowSize = solrConfig.queryResultWindowSize;
|
||||
this.queryResultMaxDocsCached = solrConfig.queryResultMaxDocsCached;
|
||||
this.useFilterForSortedQuery = solrConfig.useFilterForSortedQuery;
|
||||
this.enableLazyFieldLoading = solrConfig.enableLazyFieldLoading;
|
||||
|
||||
this.fieldInfos = leafReader.getFieldInfos();
|
||||
this.docFetcher = new SolrDocumentFetcher(this, solrConfig, enableCache);
|
||||
|
||||
this.cachingEnabled = enableCache;
|
||||
if (cachingEnabled) {
|
||||
|
@ -296,7 +274,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
queryResultCache = solrConfig.queryResultCacheConfig == null ? null
|
||||
: solrConfig.queryResultCacheConfig.newInstance();
|
||||
if (queryResultCache != null) clist.add(queryResultCache);
|
||||
documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
|
||||
SolrCache<Integer, Document> documentCache = docFetcher.getDocumentCache();
|
||||
if (documentCache != null) clist.add(documentCache);
|
||||
|
||||
if (solrConfig.userCacheConfigs.isEmpty()) {
|
||||
|
@ -316,42 +294,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
} else {
|
||||
this.filterCache = null;
|
||||
this.queryResultCache = null;
|
||||
this.documentCache = null;
|
||||
this.fieldValueCache = null;
|
||||
this.cacheMap = NO_GENERIC_CACHES;
|
||||
this.cacheList = NO_CACHES;
|
||||
}
|
||||
|
||||
final Set<String> nonStoredDVsUsedAsStored = new HashSet<>();
|
||||
final Set<String> allNonStoredDVs = new HashSet<>();
|
||||
final Set<String> nonStoredDVsWithoutCopyTargets = new HashSet<>();
|
||||
final Set<String> storedLargeFields = new HashSet<>();
|
||||
|
||||
this.fieldInfos = leafReader.getFieldInfos();
|
||||
for (FieldInfo fieldInfo : fieldInfos) { // can find materialized dynamic fields, unlike using the Solr IndexSchema.
|
||||
final SchemaField schemaField = schema.getFieldOrNull(fieldInfo.name);
|
||||
if (schemaField == null) {
|
||||
continue;
|
||||
}
|
||||
if (!schemaField.stored() && schemaField.hasDocValues()) {
|
||||
if (schemaField.useDocValuesAsStored()) {
|
||||
nonStoredDVsUsedAsStored.add(fieldInfo.name);
|
||||
}
|
||||
allNonStoredDVs.add(fieldInfo.name);
|
||||
if (!schema.isCopyFieldTarget(schemaField)) {
|
||||
nonStoredDVsWithoutCopyTargets.add(fieldInfo.name);
|
||||
}
|
||||
}
|
||||
if (schemaField.stored() && schemaField.isLarge()) {
|
||||
storedLargeFields.add(schemaField.getName());
|
||||
}
|
||||
}
|
||||
|
||||
this.nonStoredDVsUsedAsStored = Collections.unmodifiableSet(nonStoredDVsUsedAsStored);
|
||||
this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs);
|
||||
this.nonStoredDVsWithoutCopyTargets = Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets);
|
||||
this.largeFields = Collections.unmodifiableSet(storedLargeFields);
|
||||
|
||||
// We already have our own filter cache
|
||||
setQueryCache(null);
|
||||
|
||||
|
@ -361,9 +308,21 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
assert ObjectReleaseTracker.track(this);
|
||||
}
|
||||
|
||||
public SolrDocumentFetcher getDocFetcher() {
|
||||
return docFetcher;
|
||||
}
|
||||
|
||||
List<LeafReaderContext> getLeafContexts() {
|
||||
return super.leafContexts;
|
||||
}
|
||||
|
||||
public FieldInfos getFieldInfos() {
|
||||
return fieldInfos;
|
||||
}
|
||||
|
||||
/*
|
||||
* Override these two methods to provide a way to use global collection stats.
|
||||
*/
|
||||
* Override these two methods to provide a way to use global collection stats.
|
||||
*/
|
||||
@Override
|
||||
public TermStatistics termStatistics(Term term, TermContext context) throws IOException {
|
||||
final SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo();
|
||||
|
@ -526,30 +485,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
return filterCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a collection of the names of all stored fields which can be highlighted the index reader knows about.
|
||||
*/
|
||||
public Collection<String> getStoredHighlightFieldNames() {
|
||||
synchronized (this) {
|
||||
if (storedHighlightFieldNames == null) {
|
||||
storedHighlightFieldNames = new LinkedList<>();
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
final String fieldName = fieldInfo.name;
|
||||
try {
|
||||
SchemaField field = schema.getField(fieldName);
|
||||
if (field.stored() && ((field.getType() instanceof org.apache.solr.schema.TextField)
|
||||
|| (field.getType() instanceof org.apache.solr.schema.StrField))) {
|
||||
storedHighlightFieldNames.add(fieldName);
|
||||
}
|
||||
} catch (RuntimeException e) { // getField() throws a SolrException, but it arrives as a RuntimeException
|
||||
log.warn("Field [{}] found in index, but not defined in schema.", fieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
return storedHighlightFieldNames;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Set default regenerators on filter and query caches if they don't have any
|
||||
//
|
||||
|
@ -638,119 +573,26 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
// }
|
||||
// }
|
||||
|
||||
/* ********************** Document retrieval *************************/
|
||||
|
||||
/*
|
||||
* Future optimizations (yonik)
|
||||
*
|
||||
* If no cache is present: - use NO_LOAD instead of LAZY_LOAD - use LOAD_AND_BREAK if a single field is being
|
||||
* retrieved
|
||||
*/
|
||||
|
||||
/** {@link StoredFieldVisitor} which loads the specified fields eagerly (or all if null).
|
||||
* If {@link #enableLazyFieldLoading} then the rest get special lazy field entries. Designated "large"
|
||||
* fields will always get a special field entry. */
|
||||
private class SolrDocumentStoredFieldVisitor extends DocumentStoredFieldVisitor {
|
||||
private final Document doc;
|
||||
private final LazyDocument lazyFieldProducer; // arguably a better name than LazyDocument; at least how we use it here
|
||||
private final int docId;
|
||||
private final boolean addLargeFieldsLazily;
|
||||
|
||||
SolrDocumentStoredFieldVisitor(Set<String> toLoad, IndexReader reader, int docId) {
|
||||
super(toLoad);
|
||||
this.docId = docId;
|
||||
this.doc = getDocument();
|
||||
this.lazyFieldProducer = toLoad != null && enableLazyFieldLoading ? new LazyDocument(reader, docId) : null;
|
||||
this.addLargeFieldsLazily = (documentCache != null && !largeFields.isEmpty());
|
||||
//TODO can we return Status.STOP after a val is loaded and we know there are no other fields of interest?
|
||||
// When: toLoad is one single-valued field, no lazyFieldProducer
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status needsField(FieldInfo fieldInfo) throws IOException {
|
||||
Status status = super.needsField(fieldInfo);
|
||||
assert status != Status.STOP : "Status.STOP not supported or expected";
|
||||
if (addLargeFieldsLazily && largeFields.contains(fieldInfo.name)) { // load "large" fields using this lazy mechanism
|
||||
if (lazyFieldProducer != null || status == Status.YES) {
|
||||
doc.add(new LargeLazyField(fieldInfo.name, docId));
|
||||
}
|
||||
return Status.NO;
|
||||
}
|
||||
if (status == Status.NO && lazyFieldProducer != null) { // lazy
|
||||
doc.add(lazyFieldProducer.getField(fieldInfo));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the {@link Document} instance corresponding to the document id.
|
||||
*
|
||||
* @see SolrDocumentFetcher
|
||||
*/
|
||||
@Override
|
||||
public Document doc(int i) throws IOException {
|
||||
return doc(i, (Set<String>) null);
|
||||
public Document doc(int docId) throws IOException {
|
||||
return doc(docId, (Set<String>) null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Visit a document's fields using a {@link StoredFieldVisitor} This method does not currently add to the Solr
|
||||
* document cache.
|
||||
* Visit a document's fields using a {@link StoredFieldVisitor}.
|
||||
* This method does not currently add to the Solr document cache.
|
||||
*
|
||||
* @see IndexReader#document(int, StoredFieldVisitor)
|
||||
* @see SolrDocumentFetcher
|
||||
*/
|
||||
@Override
|
||||
public void doc(int docId, StoredFieldVisitor visitor) throws IOException {
|
||||
if (documentCache != null) {
|
||||
Document cached = documentCache.get(docId);
|
||||
if (cached != null) {
|
||||
visitFromCached(cached, visitor);
|
||||
return;
|
||||
}
|
||||
}
|
||||
getIndexReader().document(docId, visitor);
|
||||
}
|
||||
|
||||
/** Executes a stored field visitor against a hit from the document cache */
|
||||
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
|
||||
for (IndexableField f : document) {
|
||||
final FieldInfo info = fieldInfos.fieldInfo(f.name());
|
||||
final Status needsField = visitor.needsField(info);
|
||||
if (needsField == Status.STOP) return;
|
||||
if (needsField == Status.NO) continue;
|
||||
BytesRef binaryValue = f.binaryValue();
|
||||
if (binaryValue != null) {
|
||||
visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
|
||||
continue;
|
||||
}
|
||||
Number numericValue = f.numericValue();
|
||||
if (numericValue != null) {
|
||||
if (numericValue instanceof Double) {
|
||||
visitor.doubleField(info, numericValue.doubleValue());
|
||||
} else if (numericValue instanceof Integer) {
|
||||
visitor.intField(info, numericValue.intValue());
|
||||
} else if (numericValue instanceof Float) {
|
||||
visitor.floatField(info, numericValue.floatValue());
|
||||
} else if (numericValue instanceof Long) {
|
||||
visitor.longField(info, numericValue.longValue());
|
||||
} else {
|
||||
throw new AssertionError();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// must be String
|
||||
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
|
||||
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
|
||||
} else {
|
||||
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] toByteArrayUnwrapIfPossible(BytesRef bytesRef) {
|
||||
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
|
||||
return bytesRef.bytes;
|
||||
} else {
|
||||
return Arrays.copyOfRange(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length);
|
||||
}
|
||||
public final void doc(int docId, StoredFieldVisitor visitor) throws IOException {
|
||||
getDocFetcher().doc(docId, visitor);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -758,328 +600,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
* <p>
|
||||
* <b>NOTE</b>: the document will have all fields accessible, but if a field filter is provided, only the provided
|
||||
* fields will be loaded (the remainder will be available lazily).
|
||||
*
|
||||
* @see SolrDocumentFetcher
|
||||
*/
|
||||
@Override
|
||||
public Document doc(int i, Set<String> fields) throws IOException {
|
||||
|
||||
Document d;
|
||||
if (documentCache != null) {
|
||||
d = documentCache.get(i);
|
||||
if (d != null) return d;
|
||||
}
|
||||
|
||||
final DirectoryReader reader = getIndexReader();
|
||||
if (documentCache != null && !enableLazyFieldLoading) {
|
||||
// we do not filter the fields in this case because that would return an incomplete document which would
|
||||
// be eventually cached. The alternative would be to read the stored fields twice; once with the fields
|
||||
// and then without for caching leading to a performance hit
|
||||
// see SOLR-8858 for related discussion
|
||||
fields = null;
|
||||
}
|
||||
final SolrDocumentStoredFieldVisitor visitor = new SolrDocumentStoredFieldVisitor(fields, reader, i);
|
||||
reader.document(i, visitor);
|
||||
d = visitor.getDocument();
|
||||
|
||||
if (documentCache != null) {
|
||||
documentCache.put(i, d);
|
||||
}
|
||||
|
||||
return d;
|
||||
public final Document doc(int i, Set<String> fields) throws IOException {
|
||||
return getDocFetcher().doc(i, fields);
|
||||
}
|
||||
|
||||
/** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
|
||||
class LargeLazyField implements IndexableField {
|
||||
|
||||
final String name;
|
||||
final int docId;
|
||||
// synchronize on 'this' to access:
|
||||
BytesRef cachedBytes; // we only conditionally populate this if it's big enough
|
||||
|
||||
private LargeLazyField(String name, int docId) {
|
||||
this.name = name;
|
||||
this.docId = docId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return fieldType().toString() + "<" + name() + ">"; // mimic Field.java
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexableFieldType fieldType() {
|
||||
return schema.getField(name());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
|
||||
return analyzer.tokenStream(name(), stringValue()); // or we could throw unsupported exception?
|
||||
}
|
||||
/** (for tests) */
|
||||
synchronized boolean hasBeenLoaded() {
|
||||
return cachedBytes != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String stringValue() {
|
||||
try {
|
||||
return readBytes().utf8ToString();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
synchronized BytesRef readBytes() throws IOException {
|
||||
if (cachedBytes != null) {
|
||||
return cachedBytes;
|
||||
} else {
|
||||
BytesRef bytesRef = new BytesRef();
|
||||
getIndexReader().document(docId, new StoredFieldVisitor() {
|
||||
boolean done = false;
|
||||
@Override
|
||||
public Status needsField(FieldInfo fieldInfo) throws IOException {
|
||||
if (done) {
|
||||
return Status.STOP;
|
||||
}
|
||||
return fieldInfo.name.equals(name()) ? Status.YES : Status.NO;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
bytesRef.bytes = value;
|
||||
bytesRef.length = value.length;
|
||||
done = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
throw new UnsupportedOperationException("'large' binary fields are not (yet) supported");
|
||||
}
|
||||
});
|
||||
if (bytesRef.length < largeValueLengthCacheThreshold) {
|
||||
return cachedBytes = bytesRef;
|
||||
} else {
|
||||
return bytesRef;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef binaryValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader readerValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number numericValue() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
|
||||
*
|
||||
* @param doc
|
||||
* A SolrDocument or SolrInputDocument instance where docValues will be added
|
||||
* @param docid
|
||||
* The lucene docid of the document to be populated
|
||||
* @param fields
|
||||
* The list of docValues fields to be decorated
|
||||
*/
|
||||
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields)
|
||||
throws IOException {
|
||||
final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
|
||||
final int localId = docid - leafContexts.get(subIndex).docBase;
|
||||
final LeafReader leafReader = leafContexts.get(subIndex).reader();
|
||||
for (String fieldName : fields) {
|
||||
final SchemaField schemaField = schema.getFieldOrNull(fieldName);
|
||||
if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
|
||||
log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
|
||||
continue;
|
||||
}
|
||||
FieldInfo fi = fieldInfos.fieldInfo(fieldName);
|
||||
if (fi == null) {
|
||||
continue; // Searcher doesn't have info about this field, hence ignore it.
|
||||
}
|
||||
final DocValuesType dvType = fi.getDocValuesType();
|
||||
switch (dvType) {
|
||||
case NUMERIC:
|
||||
final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
|
||||
if (ndv == null) {
|
||||
continue;
|
||||
}
|
||||
Long val;
|
||||
if (ndv.advanceExact(localId)) {
|
||||
val = ndv.longValue();
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
Object newVal = val;
|
||||
if (schemaField.getType().isPointField()) {
|
||||
// TODO: Maybe merge PointField with TrieFields here
|
||||
NumberType type = schemaField.getType().getNumberType();
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
newVal = val.intValue();
|
||||
break;
|
||||
case LONG:
|
||||
newVal = val;
|
||||
break;
|
||||
case FLOAT:
|
||||
newVal = Float.intBitsToFloat(val.intValue());
|
||||
break;
|
||||
case DOUBLE:
|
||||
newVal = Double.longBitsToDouble(val);
|
||||
break;
|
||||
case DATE:
|
||||
newVal = new Date(val);
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Unexpected PointType: " + type);
|
||||
}
|
||||
} else {
|
||||
if (schemaField.getType() instanceof TrieIntField) {
|
||||
newVal = val.intValue();
|
||||
} else if (schemaField.getType() instanceof TrieFloatField) {
|
||||
newVal = Float.intBitsToFloat(val.intValue());
|
||||
} else if (schemaField.getType() instanceof TrieDoubleField) {
|
||||
newVal = Double.longBitsToDouble(val);
|
||||
} else if (schemaField.getType() instanceof TrieDateField) {
|
||||
newVal = new Date(val);
|
||||
} else if (schemaField.getType() instanceof EnumField) {
|
||||
newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
|
||||
}
|
||||
}
|
||||
doc.addField(fieldName, newVal);
|
||||
break;
|
||||
case BINARY:
|
||||
BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
|
||||
if (bdv == null) {
|
||||
continue;
|
||||
}
|
||||
BytesRef value;
|
||||
if (bdv.advanceExact(localId)) {
|
||||
value = BytesRef.deepCopyOf(bdv.binaryValue());
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
doc.addField(fieldName, value);
|
||||
break;
|
||||
case SORTED:
|
||||
SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
|
||||
if (sdv == null) {
|
||||
continue;
|
||||
}
|
||||
if (sdv.advanceExact(localId)) {
|
||||
final BytesRef bRef = sdv.binaryValue();
|
||||
// Special handling for Boolean fields since they're stored as 'T' and 'F'.
|
||||
if (schemaField.getType() instanceof BoolField) {
|
||||
doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
|
||||
} else {
|
||||
doc.addField(fieldName, bRef.utf8ToString());
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SORTED_NUMERIC:
|
||||
final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
|
||||
NumberType type = schemaField.getType().getNumberType();
|
||||
if (numericDv != null) {
|
||||
if (numericDv.advance(localId) == localId) {
|
||||
final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
|
||||
for (int i = 0; i < numericDv.docValueCount(); i++) {
|
||||
long number = numericDv.nextValue();
|
||||
switch (type) {
|
||||
case INTEGER:
|
||||
outValues.add((int)number);
|
||||
break;
|
||||
case LONG:
|
||||
outValues.add(number);
|
||||
break;
|
||||
case FLOAT:
|
||||
outValues.add(NumericUtils.sortableIntToFloat((int)number));
|
||||
break;
|
||||
case DOUBLE:
|
||||
outValues.add(NumericUtils.sortableLongToDouble(number));
|
||||
break;
|
||||
case DATE:
|
||||
outValues.add(new Date(number));
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Unexpected PointType: " + type);
|
||||
}
|
||||
}
|
||||
assert outValues.size() > 0;
|
||||
doc.addField(fieldName, outValues);
|
||||
}
|
||||
}
|
||||
case SORTED_SET:
|
||||
final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
|
||||
if (values != null && values.getValueCount() > 0) {
|
||||
if (values.advance(localId) == localId) {
|
||||
final List<Object> outValues = new LinkedList<Object>();
|
||||
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
|
||||
value = values.lookupOrd(ord);
|
||||
outValues.add(schemaField.getType().toObject(schemaField, value));
|
||||
}
|
||||
assert outValues.size() > 0;
|
||||
doc.addField(fieldName, outValues);
|
||||
}
|
||||
}
|
||||
case NONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of docs (the doc ids actually), and reads them into an array of Documents.
|
||||
*/
|
||||
public void readDocs(Document[] docs, DocList ids) throws IOException {
|
||||
readDocs(docs, ids, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of docs (the doc ids actually) and a set of fields to load, and reads them into an array of Documents.
|
||||
*/
|
||||
public void readDocs(Document[] docs, DocList ids, Set<String> fields) throws IOException {
|
||||
final DocIterator iter = ids.iterator();
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
docs[i] = doc(iter.nextDoc(), fields);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable set of non-stored docValues field names.
|
||||
*
|
||||
* @param onlyUseDocValuesAsStored
|
||||
* If false, returns all non-stored docValues. If true, returns only those non-stored docValues which have
|
||||
* the {@link SchemaField#useDocValuesAsStored()} flag true.
|
||||
*/
|
||||
public Set<String> getNonStoredDVs(boolean onlyUseDocValuesAsStored) {
|
||||
return onlyUseDocValuesAsStored ? nonStoredDVsUsedAsStored : allNonStoredDVs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable set of names of non-stored docValues fields, except those that are targets of a copy field.
|
||||
*/
|
||||
public Set<String> getNonStoredDVsWithoutCopyTargets() {
|
||||
return nonStoredDVsWithoutCopyTargets;
|
||||
}
|
||||
|
||||
/* ********************** end document retrieval *************************/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/** expert: internal API, subject to change */
|
||||
public SolrCache<String,UnInvertedField> getFieldValueCache() {
|
||||
return fieldValueCache;
|
||||
|
@ -2555,15 +2083,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
return a.intersects(getDocSet(deState));
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of document IDs, and returns an array of Documents containing all of the stored fields.
|
||||
*/
|
||||
public Document[] readDocs(DocList ids) throws IOException {
|
||||
final Document[] docs = new Document[ids.size()];
|
||||
readDocs(docs, ids);
|
||||
return docs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Warm this searcher based on an old one (primarily for auto-cache warming).
|
||||
*/
|
||||
|
|
|
@ -19,12 +19,12 @@ package org.apache.solr.search.grouping.distributed.shardresultserializer;
|
|||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -285,9 +285,7 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
|
|||
}
|
||||
|
||||
private Document retrieveDocument(final SchemaField uniqueField, int doc) throws IOException {
|
||||
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(uniqueField.getName());
|
||||
rb.req.getSearcher().doc(doc, visitor);
|
||||
return visitor.getDocument();
|
||||
return rb.req.getSearcher().doc(doc, Collections.singleton(uniqueField.getName()));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -248,7 +248,7 @@ public class SolrPluginUtils {
|
|||
SolrQueryRequest req,
|
||||
SolrQueryResponse res) throws IOException {
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
if(!searcher.enableLazyFieldLoading) {
|
||||
if(!searcher.getDocFetcher().isLazyFieldLoadingEnabled()) {
|
||||
// nothing to do
|
||||
return;
|
||||
}
|
||||
|
@ -1022,6 +1022,7 @@ public class SolrPluginUtils {
|
|||
* @return The new {@link org.apache.solr.common.SolrDocumentList} containing all the loaded docs
|
||||
* @throws java.io.IOException if there was a problem loading the docs
|
||||
* @since solr 1.4
|
||||
* @deprecated TODO in 7.0 remove this. It was inlined into ClusteringComponent. DWS: 'ids' is ugly.
|
||||
*/
|
||||
public static SolrDocumentList docListToSolrDocumentList(
|
||||
DocList docs,
|
||||
|
@ -1029,6 +1030,10 @@ public class SolrPluginUtils {
|
|||
Set<String> fields,
|
||||
Map<SolrDocument, Integer> ids ) throws IOException
|
||||
{
|
||||
/* DWS deprecation note:
|
||||
It's only called by ClusteringComponent, and I think the "ids" param aspect is a bit messy and not worth supporting.
|
||||
If someone wants a similar method they can speak up and we can add a method to SolrDocumentFetcher.
|
||||
*/
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
|
||||
SolrDocumentList list = new SolrDocumentList();
|
||||
|
|
|
@ -112,8 +112,8 @@ public class LargeFieldTest extends SolrTestCaseJ4 {
|
|||
private void assertLazyNotLoaded(Document d, String fieldName) {
|
||||
IndexableField field = d.getField(fieldName);
|
||||
if (fieldName == BIG_FIELD) {
|
||||
assertTrue(field instanceof SolrIndexSearcher.LargeLazyField);
|
||||
assertFalse(((SolrIndexSearcher.LargeLazyField)field).hasBeenLoaded());
|
||||
assertTrue(field instanceof SolrDocumentFetcher.LargeLazyField);
|
||||
assertFalse(((SolrDocumentFetcher.LargeLazyField)field).hasBeenLoaded());
|
||||
} else {
|
||||
assertTrue(field instanceof LazyDocument.LazyField);
|
||||
assertFalse(((LazyDocument.LazyField)field).hasBeenLoaded());
|
||||
|
@ -123,8 +123,8 @@ public class LargeFieldTest extends SolrTestCaseJ4 {
|
|||
private void assertLazyLoaded(Document d, String fieldName) {
|
||||
IndexableField field = d.getField(fieldName);
|
||||
if (fieldName == BIG_FIELD) {
|
||||
assertTrue(field instanceof SolrIndexSearcher.LargeLazyField);
|
||||
assertTrue(((SolrIndexSearcher.LargeLazyField)field).hasBeenLoaded());
|
||||
assertTrue(field instanceof SolrDocumentFetcher.LargeLazyField);
|
||||
assertTrue(((SolrDocumentFetcher.LargeLazyField)field).hasBeenLoaded());
|
||||
} else {
|
||||
assertTrue(field instanceof LazyDocument.LazyField);
|
||||
assertTrue(((LazyDocument.LazyField)field).hasBeenLoaded());
|
||||
|
|
|
@ -16,37 +16,28 @@
|
|||
*/
|
||||
package org.apache.solr.util;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QueryCommand;
|
||||
import org.apache.solr.search.QueryResult;
|
||||
import org.apache.solr.util.SolrPluginUtils.DisjunctionMaxQueryParser;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.util.SolrPluginUtils.DisjunctionMaxQueryParser;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests that the functions in SolrPluginUtils work as advertised.
|
||||
|
@ -58,52 +49,6 @@ public class SolrPluginUtilsTest extends SolrTestCaseJ4 {
|
|||
initCore("solrconfig.xml","schema.xml");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDocListConversion() throws Exception {
|
||||
assertU("", adoc("id", "3234", "val_i", "1",
|
||||
"val_dynamic", "quick red fox"));
|
||||
assertU("", adoc("id", "3235", "val_i", "1",
|
||||
"val_dynamic", "quick green fox"));
|
||||
assertU("", adoc("id", "3236", "val_i", "1",
|
||||
"val_dynamic", "quick brown fox"));
|
||||
assertU("", commit());
|
||||
|
||||
RefCounted<SolrIndexSearcher> holder = h.getCore().getSearcher();
|
||||
try {
|
||||
SolrIndexSearcher srchr = holder.get();
|
||||
QueryResult qr = new QueryResult();
|
||||
QueryCommand cmd = new QueryCommand();
|
||||
cmd.setQuery(new MatchAllDocsQuery());
|
||||
cmd.setLen(10);
|
||||
qr = srchr.search(qr, cmd);
|
||||
|
||||
DocList docs = qr.getDocList();
|
||||
assertEquals("wrong docs size", 3, docs.size());
|
||||
Set<String> fields = new HashSet<>();
|
||||
fields.add("val_dynamic");
|
||||
fields.add("dynamic_val");
|
||||
fields.add("range_facet_l"); // copied from id
|
||||
|
||||
SolrDocumentList list = SolrPluginUtils.docListToSolrDocumentList(docs, srchr, fields, null);
|
||||
assertEquals("wrong list Size", docs.size(), list.size());
|
||||
for (SolrDocument document : list) {
|
||||
|
||||
assertTrue("unexpected field", ! document.containsKey("val_i"));
|
||||
assertTrue("unexpected id field", ! document.containsKey("id"));
|
||||
|
||||
assertTrue("original field", document.containsKey("val_dynamic"));
|
||||
assertTrue("dyn copy field", document.containsKey("dynamic_val"));
|
||||
assertTrue("copy field", document.containsKey("range_facet_l"));
|
||||
|
||||
assertNotNull("original field null", document.get("val_dynamic"));
|
||||
assertNotNull("dyn copy field null", document.get("dynamic_val"));
|
||||
assertNotNull("copy field null", document.get("range_facet_l"));
|
||||
}
|
||||
} finally {
|
||||
if (null != holder) holder.decref();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPartialEscape() {
|
||||
|
||||
|
|
Loading…
Reference in New Issue