mirror of https://github.com/apache/lucene.git
SOLR-4271: add support for PostingsHighlighter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1429413 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8e49b54fde
commit
64e18dc0f6
|
@ -162,7 +162,7 @@ New Features
|
|||
extractWikipedia.alg was changed to use this task, so now it creates two
|
||||
files. (Doron Cohen)
|
||||
|
||||
* LUCENE-4290: Added PostingsHighlighter to the sandbox module. It uses
|
||||
* LUCENE-4290: Added PostingsHighlighter to the highlighter module. It uses
|
||||
offsets from the postings lists to highlight documents. (Robert Muir)
|
||||
|
||||
* LUCENE-4628: Added CommonTermsQuery that executes high-frequency terms
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.sandbox.postingshighlight;
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.sandbox.postingshighlight;
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -25,6 +25,32 @@ package org.apache.lucene.sandbox.postingshighlight;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class PassageFormatter {
|
||||
private final String preTag;
|
||||
private final String postTag;
|
||||
private final String ellipsis;
|
||||
|
||||
/**
|
||||
* Creates a new PassageFormatter with the default tags.
|
||||
*/
|
||||
public PassageFormatter() {
|
||||
this("<b>", "</b>", "... ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new PassageFormatter with custom tags.
|
||||
* @param preTag text which should appear before a highlighted term.
|
||||
* @param postTag text which should appear after a highlighted term.
|
||||
* @param ellipsis text which should be used to connect two unconnected passages.
|
||||
*/
|
||||
public PassageFormatter(String preTag, String postTag, String ellipsis) {
|
||||
if (preTag == null || postTag == null || ellipsis == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
this.preTag = preTag;
|
||||
this.postTag = postTag;
|
||||
this.ellipsis = ellipsis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats the top <code>passages</code> from <code>content</code>
|
||||
* into a human-readable text snippet.
|
||||
|
@ -40,7 +66,7 @@ public class PassageFormatter {
|
|||
for (Passage passage : passages) {
|
||||
// don't add ellipsis if its the first one, or if its connected.
|
||||
if (passage.startOffset > pos && pos > 0) {
|
||||
sb.append("... ");
|
||||
sb.append(ellipsis);
|
||||
}
|
||||
pos = passage.startOffset;
|
||||
for (int i = 0; i < passage.numMatches; i++) {
|
||||
|
@ -51,9 +77,9 @@ public class PassageFormatter {
|
|||
sb.append(content.substring(pos, start));
|
||||
}
|
||||
if (end > pos) {
|
||||
sb.append("<b>");
|
||||
sb.append(preTag);
|
||||
sb.append(content.substring(Math.max(pos, start), end));
|
||||
sb.append("</b>");
|
||||
sb.append(postTag);
|
||||
pos = end;
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.sandbox.postingshighlight;
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.sandbox.postingshighlight;
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -62,8 +62,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
* into a {@link Passage}, and then scores each Passage using a separate {@link PassageScorer}.
|
||||
* Passages are finally formatted into highlighted snippets with a {@link PassageFormatter}.
|
||||
* <p>
|
||||
* <b>WARNING</b>: The code is very new and may still have some exciting bugs! This is why
|
||||
* it's located under Lucene's sandbox module.
|
||||
* <b>WARNING</b>: The code is very new and may still have some exciting bugs!
|
||||
* <p>
|
||||
* Example usage:
|
||||
* <pre class="prettyprint">
|
||||
|
@ -256,7 +255,7 @@ public final class PostingsHighlighter {
|
|||
LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
|
||||
String contents[][] = new String[fields.length][docids.length];
|
||||
for (int i = 0; i < docids.length; i++) {
|
||||
reader.document(docids[i], visitor);
|
||||
searcher.doc(docids[i], visitor);
|
||||
for (int j = 0; j < fields.length; j++) {
|
||||
contents[j][i] = visitor.getValue(j).toString();
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.sandbox.postingshighlight;
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.sandbox.postingshighlight;
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -189,6 +189,8 @@ New Features
|
|||
rolling averages; median, 75th, 95th, 99th, 99.9th percentile request times
|
||||
(Alan Woodward, Shawn Heisey, Adrien Grand, Uwe Schindler)
|
||||
|
||||
* SOLR-4271: Add support for PostingsHighlighter. (Robert Muir)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.solr.common.params.HighlightParams;
|
|||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.highlight.PostingsSolrHighlighter;
|
||||
import org.apache.solr.highlight.SolrHighlighter;
|
||||
import org.apache.solr.highlight.DefaultSolrHighlighter;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
@ -128,7 +129,7 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
|
|||
}
|
||||
|
||||
if(highlightQuery != null) {
|
||||
boolean rewrite = !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
|
||||
boolean rewrite = (highlighter instanceof PostingsSolrHighlighter == false) && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
|
||||
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
|
||||
highlightQuery = rewrite ? highlightQuery.rewrite(req.getSearcher().getIndexReader()) : highlightQuery;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,189 @@
|
|||
package org.apache.solr.highlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Collections;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.postingshighlight.PassageFormatter;
|
||||
import org.apache.lucene.search.postingshighlight.PassageScorer;
|
||||
import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
|
||||
/**
|
||||
* Highlighter impl that uses {@link PostingsHighlighter}
|
||||
* <p>
|
||||
* Example configuration:
|
||||
* <pre class="prettyprint">
|
||||
* <searchComponent class="solr.HighlightComponent" name="highlight">
|
||||
* <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"
|
||||
* preTag="&lt;em&gt;"
|
||||
* postTag="&lt;/em&gt;"
|
||||
* ellipsis="... "
|
||||
* maxLength=10000/>
|
||||
* </searchComponent>
|
||||
* </pre>
|
||||
* <p>
|
||||
* Notes:
|
||||
* <ul>
|
||||
* <li>fields to highlight must be configured with storeOffsetsWithPositions="true"
|
||||
* <li>hl.fl specifies the field list.
|
||||
* <li>hl.snippets specifies how many underlying sentence fragments form the resulting snippet.
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PostingsSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {
|
||||
protected PostingsHighlighter highlighter;
|
||||
|
||||
@Override
|
||||
public void initalize(SolrConfig config) {}
|
||||
|
||||
@Override
|
||||
public void init(PluginInfo info) {
|
||||
Map<String,String> attributes = info.attributes;
|
||||
BreakIterator breakIterator = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
PassageScorer scorer = new PassageScorer();
|
||||
|
||||
// formatter parameters: preTag/postTag/ellipsis
|
||||
String preTag = attributes.get("preTag");
|
||||
if (preTag == null) {
|
||||
preTag = "<em>";
|
||||
}
|
||||
String postTag = attributes.get("postTag");
|
||||
if (postTag == null) {
|
||||
postTag = "</em>";
|
||||
}
|
||||
String ellipsis = attributes.get("ellipsis");
|
||||
if (ellipsis == null) {
|
||||
ellipsis = "... ";
|
||||
}
|
||||
PassageFormatter formatter = new PassageFormatter(preTag, postTag, ellipsis);
|
||||
|
||||
// maximum content size to process
|
||||
int maxLength = PostingsHighlighter.DEFAULT_MAX_LENGTH;
|
||||
if (attributes.containsKey("maxLength")) {
|
||||
maxLength = Integer.parseInt(attributes.get("maxLength"));
|
||||
}
|
||||
highlighter = new PostingsHighlighter(maxLength, breakIterator, scorer, formatter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
|
||||
SolrParams params = req.getParams();
|
||||
|
||||
// if highlighting isnt enabled, then why call doHighlighting?
|
||||
if (isHighlightingEnabled(params)) {
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
TopDocs topDocs = toTopDocs(docs);
|
||||
|
||||
// fetch the unique keys
|
||||
String[] keys = getUniqueKeys(searcher, topDocs);
|
||||
|
||||
// query-time parameters
|
||||
String[] fieldNames = getHighlightFields(query, req, defaultFields);
|
||||
int numSnippets = params.getInt(HighlightParams.SNIPPETS, 1);
|
||||
|
||||
Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, topDocs, numSnippets);
|
||||
return encodeSnippets(keys, fieldNames, snippets);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the resulting snippets into a namedlist
|
||||
* @param keys the document unique keys
|
||||
* @param fieldNames field names to highlight in the order
|
||||
* @param snippets map from field name to snippet array for the docs
|
||||
* @return encoded namedlist of summaries
|
||||
*/
|
||||
protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String,String[]> snippets) {
|
||||
NamedList<Object> list = new SimpleOrderedMap<Object>();
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
NamedList<Object> summary = new SimpleOrderedMap<Object>();
|
||||
for (String field : fieldNames) {
|
||||
String snippet = snippets.get(field)[i];
|
||||
// box in an array to match the format of existing highlighters,
|
||||
// even though its always one element.
|
||||
if (snippet == null) {
|
||||
summary.add(field, new String[0]);
|
||||
} else {
|
||||
summary.add(field, new String[] { snippet });
|
||||
}
|
||||
}
|
||||
list.add(keys[i], summary);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/** Converts solr's DocList to a lucene TopDocs */
|
||||
protected TopDocs toTopDocs(DocList docs) {
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[docs.size()];
|
||||
DocIterator iterator = docs.iterator();
|
||||
for (int i = 0; i < scoreDocs.length; i++) {
|
||||
if (!iterator.hasNext()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
scoreDocs[i] = new ScoreDoc(iterator.nextDoc(), Float.NaN);
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
return new TopDocs(docs.matches(), scoreDocs, Float.NaN);
|
||||
}
|
||||
|
||||
/** Retrieves the unique keys for the topdocs to key the results */
|
||||
protected String[] getUniqueKeys(SolrIndexSearcher searcher, TopDocs topDocs) throws IOException {
|
||||
IndexSchema schema = searcher.getSchema();
|
||||
SchemaField keyField = schema.getUniqueKeyField();
|
||||
if (keyField != null) {
|
||||
Set<String> selector = Collections.singleton(keyField.getName());
|
||||
String uniqueKeys[] = new String[topDocs.scoreDocs.length];
|
||||
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||
int docid = topDocs.scoreDocs[i].doc;
|
||||
StoredDocument doc = searcher.doc(docid, selector);
|
||||
String id = schema.printableUniqueKey(doc);
|
||||
uniqueKeys[i] = id;
|
||||
}
|
||||
return uniqueKeys;
|
||||
} else {
|
||||
return new String[topDocs.scoreDocs.length];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -109,6 +109,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
private final SolrCache[] cacheList;
|
||||
private static final SolrCache[] noCaches = new SolrCache[0];
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
// TODO: do we need this separate set of field names? we can just use the fieldinfos?
|
||||
private final Collection<String> fieldNames;
|
||||
private Collection<String> storedHighlightFieldNames;
|
||||
private DirectoryFactory directoryFactory;
|
||||
|
@ -199,7 +201,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
optimizer = null;
|
||||
|
||||
fieldNames = new HashSet<String>();
|
||||
for(FieldInfo fieldInfo : atomicReader.getFieldInfos()) {
|
||||
fieldInfos = atomicReader.getFieldInfos();
|
||||
for(FieldInfo fieldInfo : fieldInfos) {
|
||||
fieldNames.add(fieldInfo.name);
|
||||
}
|
||||
|
||||
|
@ -509,13 +512,56 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
}
|
||||
|
||||
/** Visit a document's fields using a {@link StoredFieldVisitor}
|
||||
* This method does not currently use the Solr document cache.
|
||||
* This method does not currently add to the Solr document cache.
|
||||
*
|
||||
* @see IndexReader#document(int, StoredFieldVisitor) */
|
||||
@Override
|
||||
public void doc(int n, StoredFieldVisitor visitor) throws IOException {
|
||||
if (documentCache != null) {
|
||||
StoredDocument cached = documentCache.get(n);
|
||||
if (cached != null) {
|
||||
visitFromCached(cached, visitor);
|
||||
return;
|
||||
}
|
||||
}
|
||||
getIndexReader().document(n, visitor);
|
||||
}
|
||||
|
||||
/** Executes a stored field visitor against a hit from the document cache */
|
||||
private void visitFromCached(StoredDocument document, StoredFieldVisitor visitor) throws IOException {
|
||||
for (StorableField f : document) {
|
||||
FieldInfo info = fieldInfos.fieldInfo(f.name());
|
||||
switch(visitor.needsField(info)) {
|
||||
case YES:
|
||||
if (f.binaryValue() != null) {
|
||||
BytesRef binaryValue = f.binaryValue();
|
||||
byte copy[] = new byte[binaryValue.length];
|
||||
System.arraycopy(binaryValue.bytes, binaryValue.offset, copy, 0, copy.length);
|
||||
visitor.binaryField(info, copy);
|
||||
} else if (f.numericValue() != null) {
|
||||
Number numericValue = f.numericValue();
|
||||
if (numericValue instanceof Double) {
|
||||
visitor.doubleField(info, numericValue.doubleValue());
|
||||
} else if (numericValue instanceof Integer) {
|
||||
visitor.intField(info, numericValue.intValue());
|
||||
} else if (numericValue instanceof Float) {
|
||||
visitor.floatField(info, numericValue.floatValue());
|
||||
} else if (numericValue instanceof Long) {
|
||||
visitor.longField(info, numericValue.longValue());
|
||||
} else {
|
||||
throw new AssertionError();
|
||||
}
|
||||
} else {
|
||||
visitor.stringField(info, f.stringValue());
|
||||
}
|
||||
break;
|
||||
case NO:
|
||||
break;
|
||||
case STOP:
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the {@link Document} instance corresponding to the document id.
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- Test schema file for PostingsHighlighter -->
|
||||
|
||||
<schema name="postingshighlight" version="1.0">
|
||||
<types>
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<!-- basic text field: no offsets! -->
|
||||
<fieldtype name="text" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- text field with offsets -->
|
||||
<fieldtype name="text_offsets" class="solr.TextField" storeOffsetsWithPositions="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
</types>
|
||||
|
||||
<fields>
|
||||
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="text" type="text_offsets" indexed="true" stored="true"/>
|
||||
<field name="text2" type="text" indexed="true" stored="true"/>
|
||||
<field name="text3" type="text_offsets" indexed="true" stored="true"/>
|
||||
</fields>
|
||||
|
||||
<defaultSearchField>text</defaultSearchField>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
</schema>
|
|
@ -0,0 +1,30 @@
|
|||
<?xml version="1.0" ?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- a basic solrconfig for postings highlighter -->
|
||||
<config>
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
||||
|
||||
<searchComponent class="solr.HighlightComponent" name="highlight">
|
||||
<highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/>
|
||||
</searchComponent>
|
||||
</config>
|
|
@ -0,0 +1,103 @@
|
|||
package org.apache.solr.highlight;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.handler.component.HighlightComponent;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/** simple tests for PostingsSolrHighlighter */
|
||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
|
||||
public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-postingshighlight.xml", "schema-postingshighlight.xml");
|
||||
|
||||
// test our config is sane, just to be sure:
|
||||
|
||||
// postingshighlighter should be used
|
||||
SolrHighlighter highlighter = HighlightComponent.getHighlighter(h.getCore());
|
||||
assertTrue("wrong highlighter: " + highlighter.getClass(), highlighter instanceof PostingsSolrHighlighter);
|
||||
|
||||
// 'text' and 'text3' should have offsets, 'text2' should not
|
||||
IndexSchema schema = h.getCore().getSchema();
|
||||
assertTrue(schema.getField("text").storeOffsetsWithPositions());
|
||||
assertTrue(schema.getField("text3").storeOffsetsWithPositions());
|
||||
assertFalse(schema.getField("text2").storeOffsetsWithPositions());
|
||||
|
||||
assertU(adoc("text", "document one", "text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(adoc("text", "second document", "text2", "second document", "text3", "crappier document", "id", "102"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
public void testSimple() {
|
||||
assertQ("simplest test",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testPagination() {
|
||||
assertQ("pagination test",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "rows", "1", "start", "1"),
|
||||
"count(//lst[@name='highlighting']/*)=1",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testEmptySnippet() {
|
||||
assertQ("null snippet test",
|
||||
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
|
||||
"count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0");
|
||||
}
|
||||
|
||||
public void testDifferentField() {
|
||||
assertQ("highlighting text3",
|
||||
req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testTwoFields() {
|
||||
assertQ("highlighting text and text3",
|
||||
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
|
||||
}
|
||||
|
||||
public void testMisconfiguredField() {
|
||||
ignoreException("was indexed without offsets");
|
||||
try {
|
||||
assertQ("should fail, has no offsets",
|
||||
req("q", "text2:document", "sort", "id asc", "hl", "true", "hl.fl", "text2"));
|
||||
fail();
|
||||
} catch (Exception expected) {
|
||||
// expected
|
||||
}
|
||||
resetExceptionIgnores();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue