mirror of https://github.com/apache/lucene.git
SOLR-14194: Highlighters now supports docValues for the uniqueKey
and the original highlighter can highlight docValues.
This commit is contained in:
parent
71b869381e
commit
9a4f7661e9
|
@ -91,6 +91,9 @@ Improvements
|
|||
|
||||
* SOLR-14245: Validate Replica / ReplicaInfo on creation. (ab)
|
||||
|
||||
* SOLR-14194: Highlighting now works when the uniqueKey field is not stored but has docValues. And the original
|
||||
highlighter can now highlight text fields from docValues. (Andrzej Wislowski, David Smiley)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -272,16 +271,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
if (keyField != null) {
|
||||
// guaranteed to be one and only one since this is uniqueKey!
|
||||
SolrDocument solrDoc = docFetcher.solrDoc(docId, srf);
|
||||
|
||||
String uKey = null;
|
||||
Object val = solrDoc.getFieldValue(uniqFieldName);
|
||||
if (val != null) {
|
||||
if (val instanceof StoredField) {
|
||||
uKey = ((StoredField) val).stringValue();
|
||||
} else {
|
||||
uKey = val.toString();
|
||||
}
|
||||
}
|
||||
String uKey = schema.printableUniqueKey(solrDoc);
|
||||
assert null != uKey;
|
||||
docNL.add("uniqueKey", uKey);
|
||||
termVectors.add(uKey, docNL);
|
||||
|
|
|
@ -34,7 +34,6 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -63,6 +62,7 @@ import org.apache.lucene.search.vectorhighlight.FieldQuery;
|
|||
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
|
||||
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
|
||||
import org.apache.lucene.util.AttributeSource.State;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
|
@ -73,11 +73,13 @@ import org.apache.solr.core.PluginInfo;
|
|||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.HighlightComponent;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SolrReturnFields;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -445,10 +447,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
String[] fieldNames = getHighlightFields(query, req, defaultFields);
|
||||
|
||||
Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req);
|
||||
SolrReturnFields returnFields;
|
||||
if (preFetchFieldNames != null) {
|
||||
preFetchFieldNames.add(keyField.getName());
|
||||
returnFields = new SolrReturnFields(preFetchFieldNames.toArray(new String[0]), req);
|
||||
} else {
|
||||
returnFields = new SolrReturnFields(new String[0], req);
|
||||
}
|
||||
|
||||
FvhContainer fvhContainer = new FvhContainer(null, null); // Lazy container for fvh and fieldQuery
|
||||
|
||||
IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855
|
||||
|
@ -458,7 +463,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
DocIterator iterator = docs.iterator();
|
||||
for (int i = 0; i < docs.size(); i++) {
|
||||
int docId = iterator.nextDoc();
|
||||
Document doc = searcher.doc(docId, preFetchFieldNames);
|
||||
SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, returnFields);
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
NamedList docHighlights = new SimpleOrderedMap();
|
||||
|
@ -482,9 +487,9 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
return fragments;
|
||||
}
|
||||
|
||||
protected Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField,
|
||||
FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
|
||||
SolrParams params) throws IOException {
|
||||
protected Object doHighlightingOfField(SolrDocument doc, int docId, SchemaField schemaField,
|
||||
FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
|
||||
SolrParams params) throws IOException {
|
||||
Object fieldHighlights;
|
||||
if (schemaField == null) {
|
||||
fieldHighlights = null;
|
||||
|
@ -527,12 +532,20 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
return fieldHighlights;
|
||||
}
|
||||
|
||||
/** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}.
|
||||
/**
|
||||
* Returns the field names to be passed to {@link org.apache.solr.search.SolrDocumentFetcher#solrDoc(int, SolrReturnFields)}.
|
||||
* Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid
|
||||
* the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */
|
||||
* the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null.
|
||||
*/
|
||||
protected Set<String> getDocPrefetchFieldNames(String[] hlFieldNames, SolrQueryRequest req) {
|
||||
Set<String> preFetchFieldNames = new HashSet<>(hlFieldNames.length + 1);//+1 for uniqueyKey added after
|
||||
Collections.addAll(preFetchFieldNames, hlFieldNames);
|
||||
for (String hlFieldName : hlFieldNames) {
|
||||
String alternateField = req.getParams().getFieldParam(hlFieldName, HighlightParams.ALTERNATE_FIELD);
|
||||
if (alternateField != null) {
|
||||
preFetchFieldNames.add(alternateField);
|
||||
}
|
||||
}
|
||||
return preFetchFieldNames;
|
||||
}
|
||||
|
||||
|
@ -555,7 +568,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
|
||||
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId,
|
||||
protected Object doHighlightingByFastVectorHighlighter(SolrDocument doc, int docId,
|
||||
SchemaField schemaField, FvhContainer fvhContainer,
|
||||
IndexReader reader, SolrQueryRequest req) throws IOException {
|
||||
SolrParams params = req.getParams();
|
||||
|
@ -577,7 +590,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
|
||||
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Object doHighlightingByHighlighter(Document doc, int docId, SchemaField schemaField, Query query,
|
||||
protected Object doHighlightingByHighlighter(SolrDocument doc, int docId, SchemaField schemaField, Query query,
|
||||
IndexReader reader, SolrQueryRequest req) throws IOException {
|
||||
final SolrParams params = req.getParams();
|
||||
final String fieldName = schemaField.getName();
|
||||
|
@ -709,18 +722,25 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
/** Fetches field values to highlight. If the field value should come from an atypical place (or another aliased
|
||||
* field name, then a subclass could override to implement that.
|
||||
*/
|
||||
protected List<String> getFieldValues(Document doc, String fieldName, int maxValues, int maxCharsToAnalyze,
|
||||
protected List<String> getFieldValues(SolrDocument doc, String fieldName, int maxValues, int maxCharsToAnalyze,
|
||||
SolrQueryRequest req) {
|
||||
// Collect the Fields we will examine (could be more than one if multi-valued)
|
||||
Collection<Object> fieldValues = doc.getFieldValues(fieldName);
|
||||
if (fieldValues == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
FieldType fieldType = req.getSchema().getFieldType(fieldName);
|
||||
List<String> result = new ArrayList<>();
|
||||
for (IndexableField thisField : doc.getFields()) {
|
||||
if (! thisField.name().equals(fieldName)) {
|
||||
continue;
|
||||
for (Object value : fieldValues) {
|
||||
String strValue;
|
||||
if (value instanceof IndexableField) {
|
||||
strValue = fieldType.toExternal((IndexableField)value);
|
||||
} else {
|
||||
strValue = value.toString(); // TODO FieldType needs an API for this, e.g. toExternalFromDv()
|
||||
}
|
||||
String value = thisField.stringValue();
|
||||
result.add(value);
|
||||
result.add(strValue);
|
||||
|
||||
maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value
|
||||
maxCharsToAnalyze -= strValue.length();//we exit early if we'll never get to analyze the value
|
||||
maxValues--;
|
||||
if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
|
||||
break;
|
||||
|
@ -743,7 +763,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
|
||||
/** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
|
||||
protected Object alternateField(SolrDocument doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
|
||||
IndexReader reader, SolrQueryRequest req) throws IOException {
|
||||
IndexSchema schema = req.getSearcher().getSchema();
|
||||
SolrParams params = req.getParams();
|
||||
|
@ -775,20 +795,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
|
||||
|
||||
// Fallback to static non-highlighted
|
||||
IndexableField[] docFields = doc.getFields(alternateField);
|
||||
if (docFields.length == 0) {
|
||||
List<String> listFields = getFieldValues(doc, alternateField, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
|
||||
if (listFields.isEmpty()) {
|
||||
// The alternate field did not exist, treat the original field as fallback instead
|
||||
docFields = doc.getFields(fieldName);
|
||||
}
|
||||
List<String> listFields = new ArrayList<>();
|
||||
for (IndexableField field : docFields) {
|
||||
if (field.binaryValue() == null)
|
||||
listFields.add(field.stringValue());
|
||||
listFields = getFieldValues(doc, fieldName, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
|
||||
if (listFields.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (listFields.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
String[] altTexts = listFields.toArray(new String[listFields.size()]);
|
||||
|
||||
Encoder encoder = getEncoder(fieldName, params);
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.solr.highlight;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -26,7 +25,6 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -37,6 +35,7 @@ import org.apache.lucene.search.uhighlight.PassageFormatter;
|
|||
import org.apache.lucene.search.uhighlight.PassageScorer;
|
||||
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
|
||||
import org.apache.lucene.search.uhighlight.WholeBreakIterator;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -49,6 +48,7 @@ import org.apache.solr.schema.SchemaField;
|
|||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SolrReturnFields;
|
||||
import org.apache.solr.util.RTimerTree;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
|
||||
|
@ -210,13 +210,12 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
IndexSchema schema = searcher.getSchema();
|
||||
SchemaField keyField = schema.getUniqueKeyField();
|
||||
if (keyField != null) {
|
||||
Set<String> selector = Collections.singleton(keyField.getName());
|
||||
SolrReturnFields returnFields = new SolrReturnFields(keyField.getName(), null);
|
||||
String[] uniqueKeys = new String[docIDs.length];
|
||||
for (int i = 0; i < docIDs.length; i++) {
|
||||
int docid = docIDs[i];
|
||||
Document doc = searcher.doc(docid, selector);
|
||||
String id = schema.printableUniqueKey(doc);
|
||||
uniqueKeys[i] = id;
|
||||
SolrDocument solrDoc = searcher.getDocFetcher().solrDoc(docid, returnFields);
|
||||
uniqueKeys[i] = schema.printableUniqueKey(solrDoc);
|
||||
}
|
||||
return uniqueKeys;
|
||||
} else {
|
||||
|
|
|
@ -50,6 +50,7 @@ import org.apache.lucene.queries.payloads.PayloadDecoder;
|
|||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.common.MapSerializable;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -339,6 +340,18 @@ public class IndexSchema {
|
|||
return f==null ? null : uniqueKeyFieldType.toExternal(f);
|
||||
}
|
||||
|
||||
/** Like {@link #printableUniqueKey(org.apache.lucene.document.Document)} */
|
||||
public String printableUniqueKey(SolrDocument solrDoc) {
|
||||
Object val = solrDoc.getFieldValue(uniqueKeyFieldName);
|
||||
if (val == null) {
|
||||
return null;
|
||||
} else if (val instanceof IndexableField) {
|
||||
return uniqueKeyFieldType.toExternal((IndexableField) val);
|
||||
} else {
|
||||
return val.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private SchemaField getIndexedField(String fname) {
|
||||
SchemaField f = getFields().get(fname);
|
||||
if (f==null) {
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="id" type="string" indexed="true" stored="${solr.tests.id.stored:true}" multiValued="false" docValues="${solr.tests.id.docValues:false}" required="false"/>
|
||||
<field name="text" type="text_offsets" indexed="true" stored="true"/>
|
||||
<field name="text2" type="text" indexed="true" stored="true"/>
|
||||
<field name="text3" type="text_offsets" indexed="true" stored="true" large="true"/>
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.highlight;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class HighlighterWithoutStoredIdTest extends HighlighterTest {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClassProps() {
|
||||
System.setProperty("solr.tests.id.stored", "false");
|
||||
System.setProperty("solr.tests.id.docValues", "true");
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClassProps() {
|
||||
System.clearProperty("solr.tests.id.stored");
|
||||
System.clearProperty("solr.tests.id.docValues");
|
||||
}
|
||||
|
||||
}
|
|
@ -45,6 +45,8 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
|||
System.clearProperty("filterCache.enabled");
|
||||
System.clearProperty("queryResultCache.enabled");
|
||||
System.clearProperty("documentCache.enabled");
|
||||
System.clearProperty("solr.tests.id.stored");
|
||||
System.clearProperty("solr.tests.id.docValues");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.highlight;
|
||||
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/** Tests for the UnifiedHighlighter Solr plugin **/
|
||||
public class TestUnifiedSolrHighlighterWithoutStoredId extends TestUnifiedSolrHighlighter {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClassProps() {
|
||||
System.setProperty("solr.tests.id.stored", "false");
|
||||
System.setProperty("solr.tests.id.docValues", "true");
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClassProps() {
|
||||
System.clearProperty("solr.tests.id.stored");
|
||||
System.clearProperty("solr.tests.id.docValues");
|
||||
}
|
||||
}
|
|
@ -173,6 +173,7 @@ The Original Highlighter, sometimes called the "Standard Highlighter" or "Defaul
|
|||
Its query accuracy is good enough for most needs, although it's not quite as good/perfect as the Unified Highlighter.
|
||||
+
|
||||
The Original Highlighter will normally analyze stored text on the fly in order to highlight. It will use full term vectors if available.
|
||||
If the text isn't "stored" but is in doc values (`docValues="true"`), this highlighter can work with it.
|
||||
+
|
||||
Where this highlighter falls short is performance; it's often twice as slow as the Unified Highlighter. And despite being the most customizable, it doesn't have a BreakIterator based fragmenter (all the others do), which could pose a challenge for some languages.
|
||||
|
||||
|
|
Loading…
Reference in New Issue