SOLR-14194: Highlighters now supports docValues for the uniqueKey

and the original highlighter can highlight docValues.
This commit is contained in:
David Smiley 2020-02-11 02:18:08 -05:00
parent 71b869381e
commit 9a4f7661e9
No known key found for this signature in database
GPG Key ID: 6FDFF3BF6796FD4A
10 changed files with 143 additions and 47 deletions

View File

@ -91,6 +91,9 @@ Improvements
* SOLR-14245: Validate Replica / ReplicaInfo on creation. (ab)
* SOLR-14194: Highlighting now works when the uniqueKey field is not stored but has docValues. And the original
highlighter can now highlight text fields from docValues. (Andrzej Wislowski, David Smiley)
Optimizations
---------------------

View File

@ -27,7 +27,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PostingsEnum;
@ -272,16 +271,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
if (keyField != null) {
// guaranteed to be one and only one since this is uniqueKey!
SolrDocument solrDoc = docFetcher.solrDoc(docId, srf);
String uKey = null;
Object val = solrDoc.getFieldValue(uniqFieldName);
if (val != null) {
if (val instanceof StoredField) {
uKey = ((StoredField) val).stringValue();
} else {
uKey = val.toString();
}
}
String uKey = schema.printableUniqueKey(solrDoc);
assert null != uKey;
docNL.add("uniqueKey", uKey);
termVectors.add(uKey, docNL);

View File

@ -34,7 +34,6 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
@ -63,6 +62,7 @@ import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.MapSolrParams;
@ -73,11 +73,13 @@ import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -445,10 +447,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
String[] fieldNames = getHighlightFields(query, req, defaultFields);
Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req);
SolrReturnFields returnFields;
if (preFetchFieldNames != null) {
preFetchFieldNames.add(keyField.getName());
returnFields = new SolrReturnFields(preFetchFieldNames.toArray(new String[0]), req);
} else {
returnFields = new SolrReturnFields(new String[0], req);
}
FvhContainer fvhContainer = new FvhContainer(null, null); // Lazy container for fvh and fieldQuery
IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855
@ -458,7 +463,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
DocIterator iterator = docs.iterator();
for (int i = 0; i < docs.size(); i++) {
int docId = iterator.nextDoc();
Document doc = searcher.doc(docId, preFetchFieldNames);
SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, returnFields);
@SuppressWarnings("rawtypes")
NamedList docHighlights = new SimpleOrderedMap();
@ -482,9 +487,9 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return fragments;
}
protected Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField,
FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
SolrParams params) throws IOException {
protected Object doHighlightingOfField(SolrDocument doc, int docId, SchemaField schemaField,
FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
SolrParams params) throws IOException {
Object fieldHighlights;
if (schemaField == null) {
fieldHighlights = null;
@ -527,12 +532,20 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return fieldHighlights;
}
/** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}.
/**
* Returns the field names to be passed to {@link org.apache.solr.search.SolrDocumentFetcher#solrDoc(int, SolrReturnFields)}.
* Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid
* the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */
* the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null.
*/
protected Set<String> getDocPrefetchFieldNames(String[] hlFieldNames, SolrQueryRequest req) {
Set<String> preFetchFieldNames = new HashSet<>(hlFieldNames.length + 1);//+1 for uniqueyKey added after
Collections.addAll(preFetchFieldNames, hlFieldNames);
for (String hlFieldName : hlFieldNames) {
String alternateField = req.getParams().getFieldParam(hlFieldName, HighlightParams.ALTERNATE_FIELD);
if (alternateField != null) {
preFetchFieldNames.add(alternateField);
}
}
return preFetchFieldNames;
}
@ -555,7 +568,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId,
protected Object doHighlightingByFastVectorHighlighter(SolrDocument doc, int docId,
SchemaField schemaField, FvhContainer fvhContainer,
IndexReader reader, SolrQueryRequest req) throws IOException {
SolrParams params = req.getParams();
@ -577,7 +590,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object doHighlightingByHighlighter(Document doc, int docId, SchemaField schemaField, Query query,
protected Object doHighlightingByHighlighter(SolrDocument doc, int docId, SchemaField schemaField, Query query,
IndexReader reader, SolrQueryRequest req) throws IOException {
final SolrParams params = req.getParams();
final String fieldName = schemaField.getName();
@ -709,18 +722,25 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Fetches field values to highlight. If the field value should come from an atypical place (or another aliased
* field name, then a subclass could override to implement that.
*/
protected List<String> getFieldValues(Document doc, String fieldName, int maxValues, int maxCharsToAnalyze,
protected List<String> getFieldValues(SolrDocument doc, String fieldName, int maxValues, int maxCharsToAnalyze,
SolrQueryRequest req) {
// Collect the Fields we will examine (could be more than one if multi-valued)
Collection<Object> fieldValues = doc.getFieldValues(fieldName);
if (fieldValues == null) {
return Collections.emptyList();
}
FieldType fieldType = req.getSchema().getFieldType(fieldName);
List<String> result = new ArrayList<>();
for (IndexableField thisField : doc.getFields()) {
if (! thisField.name().equals(fieldName)) {
continue;
for (Object value : fieldValues) {
String strValue;
if (value instanceof IndexableField) {
strValue = fieldType.toExternal((IndexableField)value);
} else {
strValue = value.toString(); // TODO FieldType needs an API for this, e.g. toExternalFromDv()
}
String value = thisField.stringValue();
result.add(value);
result.add(strValue);
maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value
maxCharsToAnalyze -= strValue.length();//we exit early if we'll never get to analyze the value
maxValues--;
if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
break;
@ -743,7 +763,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
protected Object alternateField(SolrDocument doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
IndexReader reader, SolrQueryRequest req) throws IOException {
IndexSchema schema = req.getSearcher().getSchema();
SolrParams params = req.getParams();
@ -775,20 +795,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
// Fallback to static non-highlighted
IndexableField[] docFields = doc.getFields(alternateField);
if (docFields.length == 0) {
List<String> listFields = getFieldValues(doc, alternateField, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
if (listFields.isEmpty()) {
// The alternate field did not exist, treat the original field as fallback instead
docFields = doc.getFields(fieldName);
}
List<String> listFields = new ArrayList<>();
for (IndexableField field : docFields) {
if (field.binaryValue() == null)
listFields.add(field.stringValue());
listFields = getFieldValues(doc, fieldName, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
if (listFields.isEmpty()) {
return null;
}
}
if (listFields.isEmpty()) {
return null;
}
String[] altTexts = listFields.toArray(new String[listFields.size()]);
Encoder encoder = getEncoder(fieldName, params);

View File

@ -18,7 +18,6 @@ package org.apache.solr.highlight;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Locale;
@ -26,7 +25,6 @@ import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
@ -37,6 +35,7 @@ import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
import org.apache.lucene.search.uhighlight.WholeBreakIterator;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
@ -49,6 +48,7 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.util.RTimerTree;
import org.apache.solr.util.plugin.PluginInfoInitialized;
@ -210,13 +210,12 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
IndexSchema schema = searcher.getSchema();
SchemaField keyField = schema.getUniqueKeyField();
if (keyField != null) {
Set<String> selector = Collections.singleton(keyField.getName());
SolrReturnFields returnFields = new SolrReturnFields(keyField.getName(), null);
String[] uniqueKeys = new String[docIDs.length];
for (int i = 0; i < docIDs.length; i++) {
int docid = docIDs[i];
Document doc = searcher.doc(docid, selector);
String id = schema.printableUniqueKey(doc);
uniqueKeys[i] = id;
SolrDocument solrDoc = searcher.getDocFetcher().solrDoc(docid, returnFields);
uniqueKeys[i] = schema.printableUniqueKey(solrDoc);
}
return uniqueKeys;
} else {

View File

@ -50,6 +50,7 @@ import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version;
import org.apache.solr.common.MapSerializable;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
@ -339,6 +340,18 @@ public class IndexSchema {
return f==null ? null : uniqueKeyFieldType.toExternal(f);
}
/** Like {@link #printableUniqueKey(org.apache.lucene.document.Document)} */
public String printableUniqueKey(SolrDocument solrDoc) {
Object val = solrDoc.getFieldValue(uniqueKeyFieldName);
if (val == null) {
return null;
} else if (val instanceof IndexableField) {
return uniqueKeyFieldType.toExternal((IndexableField) val);
} else {
return val.toString();
}
}
private SchemaField getIndexedField(String fname) {
SchemaField f = getFields().get(fname);
if (f==null) {

View File

@ -36,7 +36,7 @@
</analyzer>
</fieldType>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="id" type="string" indexed="true" stored="${solr.tests.id.stored:true}" multiValued="false" docValues="${solr.tests.id.docValues:false}" required="false"/>
<field name="text" type="text_offsets" indexed="true" stored="true"/>
<field name="text2" type="text" indexed="true" stored="true"/>
<field name="text3" type="text_offsets" indexed="true" stored="true" large="true"/>

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.junit.AfterClass;
import org.junit.BeforeClass;
public class HighlighterWithoutStoredIdTest extends HighlighterTest {
@BeforeClass
public static void beforeClassProps() {
System.setProperty("solr.tests.id.stored", "false");
System.setProperty("solr.tests.id.docValues", "true");
}
@AfterClass
public static void afterClassProps() {
System.clearProperty("solr.tests.id.stored");
System.clearProperty("solr.tests.id.docValues");
}
}

View File

@ -45,6 +45,8 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
System.clearProperty("filterCache.enabled");
System.clearProperty("queryResultCache.enabled");
System.clearProperty("documentCache.enabled");
System.clearProperty("solr.tests.id.stored");
System.clearProperty("solr.tests.id.docValues");
}
@Override

View File

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.junit.AfterClass;
import org.junit.BeforeClass;
/** Tests for the UnifiedHighlighter Solr plugin **/
public class TestUnifiedSolrHighlighterWithoutStoredId extends TestUnifiedSolrHighlighter {
@BeforeClass
public static void beforeClassProps() {
System.setProperty("solr.tests.id.stored", "false");
System.setProperty("solr.tests.id.docValues", "true");
}
@AfterClass
public static void afterClassProps() {
System.clearProperty("solr.tests.id.stored");
System.clearProperty("solr.tests.id.docValues");
}
}

View File

@ -173,6 +173,7 @@ The Original Highlighter, sometimes called the "Standard Highlighter" or "Defaul
Its query accuracy is good enough for most needs, although it's not quite as good/perfect as the Unified Highlighter.
+
The Original Highlighter will normally analyze stored text on the fly in order to highlight. It will use full term vectors if available.
If the text isn't "stored" but is in doc values (`docValues="true"`), this highlighter can work with it.
+
Where this highlighter falls short is performance; it's often twice as slow as the Unified Highlighter. And despite being the most customizable, it doesn't have a BreakIterator based fragmenter (all the others do), which could pose a challenge for some languages.