From 9a4f7661e96416e3aac3e48f1108422ab4184473 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Tue, 11 Feb 2020 02:18:08 -0500 Subject: [PATCH] SOLR-14194: Highlighters now supports docValues for the uniqueKey and the original highlighter can highlight docValues. --- solr/CHANGES.txt | 3 + .../component/TermVectorComponent.java | 12 +-- .../highlight/DefaultSolrHighlighter.java | 73 +++++++++++-------- .../highlight/UnifiedSolrHighlighter.java | 11 ++- .../org/apache/solr/schema/IndexSchema.java | 13 ++++ .../conf/schema-unifiedhighlight.xml | 2 +- .../HighlighterWithoutStoredIdTest.java | 36 +++++++++ .../highlight/TestUnifiedSolrHighlighter.java | 2 + ...UnifiedSolrHighlighterWithoutStoredId.java | 37 ++++++++++ solr/solr-ref-guide/src/highlighting.adoc | 1 + 10 files changed, 143 insertions(+), 47 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java create mode 100644 solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 50d686b6394..86c0e20c749 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -91,6 +91,9 @@ Improvements * SOLR-14245: Validate Replica / ReplicaInfo on creation. (ab) +* SOLR-14194: Highlighting now works when the uniqueKey field is not stored but has docValues. And the original + highlighter can now highlight text fields from docValues. (Andrzej Wislowski, David Smiley) + Optimizations --------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java index 8d7617eb282..5a18ee4cc0f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java @@ -27,7 +27,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.lucene.document.StoredField; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PostingsEnum; @@ -272,16 +271,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar if (keyField != null) { // guaranteed to be one and only one since this is uniqueKey! SolrDocument solrDoc = docFetcher.solrDoc(docId, srf); - - String uKey = null; - Object val = solrDoc.getFieldValue(uniqFieldName); - if (val != null) { - if (val instanceof StoredField) { - uKey = ((StoredField) val).stringValue(); - } else { - uKey = val.toString(); - } - } + String uKey = schema.printableUniqueKey(solrDoc); assert null != uKey; docNL.add("uniqueKey", uKey); termVectors.add(uKey, docNL); diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 088170dff5e..8a60026b896 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -34,7 +34,6 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.document.Document; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.IndexReader; @@ -63,6 +62,7 @@ import org.apache.lucene.search.vectorhighlight.FieldQuery; import org.apache.lucene.search.vectorhighlight.FragListBuilder; import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; import org.apache.lucene.util.AttributeSource.State; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.MapSolrParams; @@ -73,11 +73,13 @@ import org.apache.solr.core.PluginInfo; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.HighlightComponent; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SolrReturnFields; import org.apache.solr.util.plugin.PluginInfoInitialized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -445,10 +447,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf String[] fieldNames = getHighlightFields(query, req, defaultFields); Set preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req); + SolrReturnFields returnFields; if (preFetchFieldNames != null) { preFetchFieldNames.add(keyField.getName()); + returnFields = new SolrReturnFields(preFetchFieldNames.toArray(new String[0]), req); + } else { + returnFields = new SolrReturnFields(new String[0], req); } - FvhContainer fvhContainer = new FvhContainer(null, null); // Lazy container for fvh and fieldQuery IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855 @@ -458,7 +463,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); - Document doc = searcher.doc(docId, preFetchFieldNames); + SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, returnFields); @SuppressWarnings("rawtypes") NamedList docHighlights = new SimpleOrderedMap(); @@ -482,9 +487,9 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf return fragments; } - protected Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField, - FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req, - SolrParams params) throws IOException { + protected Object doHighlightingOfField(SolrDocument doc, int docId, SchemaField schemaField, + FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req, + SolrParams params) throws IOException { Object fieldHighlights; if (schemaField == null) { fieldHighlights = null; @@ -527,12 +532,20 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf return fieldHighlights; } - /** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}. + /** + * Returns the field names to be passed to {@link org.apache.solr.search.SolrDocumentFetcher#solrDoc(int, SolrReturnFields)}. * Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid - * the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */ + * the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. + */ protected Set getDocPrefetchFieldNames(String[] hlFieldNames, SolrQueryRequest req) { Set preFetchFieldNames = new HashSet<>(hlFieldNames.length + 1);//+1 for uniqueyKey added after Collections.addAll(preFetchFieldNames, hlFieldNames); + for (String hlFieldName : hlFieldNames) { + String alternateField = req.getParams().getFieldParam(hlFieldName, HighlightParams.ALTERNATE_FIELD); + if (alternateField != null) { + preFetchFieldNames.add(alternateField); + } + } return preFetchFieldNames; } @@ -555,7 +568,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") - protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId, + protected Object doHighlightingByFastVectorHighlighter(SolrDocument doc, int docId, SchemaField schemaField, FvhContainer fvhContainer, IndexReader reader, SolrQueryRequest req) throws IOException { SolrParams params = req.getParams(); @@ -577,7 +590,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") - protected Object doHighlightingByHighlighter(Document doc, int docId, SchemaField schemaField, Query query, + protected Object doHighlightingByHighlighter(SolrDocument doc, int docId, SchemaField schemaField, Query query, IndexReader reader, SolrQueryRequest req) throws IOException { final SolrParams params = req.getParams(); final String fieldName = schemaField.getName(); @@ -709,18 +722,25 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Fetches field values to highlight. If the field value should come from an atypical place (or another aliased * field name, then a subclass could override to implement that. */ - protected List getFieldValues(Document doc, String fieldName, int maxValues, int maxCharsToAnalyze, + protected List getFieldValues(SolrDocument doc, String fieldName, int maxValues, int maxCharsToAnalyze, SolrQueryRequest req) { // Collect the Fields we will examine (could be more than one if multi-valued) + Collection fieldValues = doc.getFieldValues(fieldName); + if (fieldValues == null) { + return Collections.emptyList(); + } + FieldType fieldType = req.getSchema().getFieldType(fieldName); List result = new ArrayList<>(); - for (IndexableField thisField : doc.getFields()) { - if (! thisField.name().equals(fieldName)) { - continue; + for (Object value : fieldValues) { + String strValue; + if (value instanceof IndexableField) { + strValue = fieldType.toExternal((IndexableField)value); + } else { + strValue = value.toString(); // TODO FieldType needs an API for this, e.g. toExternalFromDv() } - String value = thisField.stringValue(); - result.add(value); + result.add(strValue); - maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value + maxCharsToAnalyze -= strValue.length();//we exit early if we'll never get to analyze the value maxValues--; if (maxValues <= 0 || maxCharsToAnalyze <= 0) { break; @@ -743,7 +763,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") - protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query, + protected Object alternateField(SolrDocument doc, int docId, String fieldName, FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req) throws IOException { IndexSchema schema = req.getSearcher().getSchema(); SolrParams params = req.getParams(); @@ -775,20 +795,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf // Fallback to static non-highlighted - IndexableField[] docFields = doc.getFields(alternateField); - if (docFields.length == 0) { + List listFields = getFieldValues(doc, alternateField, Integer.MAX_VALUE, Integer.MAX_VALUE, req); + if (listFields.isEmpty()) { // The alternate field did not exist, treat the original field as fallback instead - docFields = doc.getFields(fieldName); - } - List listFields = new ArrayList<>(); - for (IndexableField field : docFields) { - if (field.binaryValue() == null) - listFields.add(field.stringValue()); + listFields = getFieldValues(doc, fieldName, Integer.MAX_VALUE, Integer.MAX_VALUE, req); + if (listFields.isEmpty()) { + return null; + } } - if (listFields.isEmpty()) { - return null; - } String[] altTexts = listFields.toArray(new String[listFields.size()]); Encoder encoder = getEncoder(fieldName, params); diff --git a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java index 90632146ecf..b226663f9a0 100644 --- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java @@ -18,7 +18,6 @@ package org.apache.solr.highlight; import java.io.IOException; import java.text.BreakIterator; -import java.util.Collections; import java.util.EnumSet; import java.util.List; import java.util.Locale; @@ -26,7 +25,6 @@ import java.util.Map; import java.util.Set; import java.util.function.Predicate; -import org.apache.lucene.document.Document; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Query; @@ -37,6 +35,7 @@ import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.PassageScorer; import org.apache.lucene.search.uhighlight.UnifiedHighlighter; import org.apache.lucene.search.uhighlight.WholeBreakIterator; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.SolrParams; @@ -49,6 +48,7 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SolrReturnFields; import org.apache.solr.util.RTimerTree; import org.apache.solr.util.plugin.PluginInfoInitialized; @@ -210,13 +210,12 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf IndexSchema schema = searcher.getSchema(); SchemaField keyField = schema.getUniqueKeyField(); if (keyField != null) { - Set selector = Collections.singleton(keyField.getName()); + SolrReturnFields returnFields = new SolrReturnFields(keyField.getName(), null); String[] uniqueKeys = new String[docIDs.length]; for (int i = 0; i < docIDs.length; i++) { int docid = docIDs[i]; - Document doc = searcher.doc(docid, selector); - String id = schema.printableUniqueKey(doc); - uniqueKeys[i] = id; + SolrDocument solrDoc = searcher.getDocFetcher().solrDoc(docid, returnFields); + uniqueKeys[i] = schema.printableUniqueKey(solrDoc); } return uniqueKeys; } else { diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java index 5f213d3d953..6bd2e79f031 100644 --- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java @@ -50,6 +50,7 @@ import org.apache.lucene.queries.payloads.PayloadDecoder; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Version; import org.apache.solr.common.MapSerializable; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.CommonParams; @@ -339,6 +340,18 @@ public class IndexSchema { return f==null ? null : uniqueKeyFieldType.toExternal(f); } + /** Like {@link #printableUniqueKey(org.apache.lucene.document.Document)} */ + public String printableUniqueKey(SolrDocument solrDoc) { + Object val = solrDoc.getFieldValue(uniqueKeyFieldName); + if (val == null) { + return null; + } else if (val instanceof IndexableField) { + return uniqueKeyFieldType.toExternal((IndexableField) val); + } else { + return val.toString(); + } + } + private SchemaField getIndexedField(String fname) { SchemaField f = getFields().get(fname); if (f==null) { diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml b/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml index 0c9a08357ad..90b7c524710 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml @@ -36,7 +36,7 @@ - + diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java new file mode 100644 index 00000000000..97df83bf8b8 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.highlight; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class HighlighterWithoutStoredIdTest extends HighlighterTest { + + @BeforeClass + public static void beforeClassProps() { + System.setProperty("solr.tests.id.stored", "false"); + System.setProperty("solr.tests.id.docValues", "true"); + } + + @AfterClass + public static void afterClassProps() { + System.clearProperty("solr.tests.id.stored"); + System.clearProperty("solr.tests.id.docValues"); + } + +} diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java index f6418cb8595..9d890b16384 100644 --- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java +++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java @@ -45,6 +45,8 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 { System.clearProperty("filterCache.enabled"); System.clearProperty("queryResultCache.enabled"); System.clearProperty("documentCache.enabled"); + System.clearProperty("solr.tests.id.stored"); + System.clearProperty("solr.tests.id.docValues"); } @Override diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java new file mode 100644 index 00000000000..e7fb6cd320d --- /dev/null +++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.highlight; + + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** Tests for the UnifiedHighlighter Solr plugin **/ +public class TestUnifiedSolrHighlighterWithoutStoredId extends TestUnifiedSolrHighlighter { + + @BeforeClass + public static void beforeClassProps() { + System.setProperty("solr.tests.id.stored", "false"); + System.setProperty("solr.tests.id.docValues", "true"); + } + + @AfterClass + public static void afterClassProps() { + System.clearProperty("solr.tests.id.stored"); + System.clearProperty("solr.tests.id.docValues"); + } +} diff --git a/solr/solr-ref-guide/src/highlighting.adoc b/solr/solr-ref-guide/src/highlighting.adoc index ab6b5a6da0c..cdbc873beed 100644 --- a/solr/solr-ref-guide/src/highlighting.adoc +++ b/solr/solr-ref-guide/src/highlighting.adoc @@ -173,6 +173,7 @@ The Original Highlighter, sometimes called the "Standard Highlighter" or "Defaul Its query accuracy is good enough for most needs, although it's not quite as good/perfect as the Unified Highlighter. + The Original Highlighter will normally analyze stored text on the fly in order to highlight. It will use full term vectors if available. +If the text isn't "stored" but is in doc values (`docValues="true"`), this highlighter can work with it. + Where this highlighter falls short is performance; it's often twice as slow as the Unified Highlighter. And despite being the most customizable, it doesn't have a BreakIterator based fragmenter (all the others do), which could pose a challenge for some languages.