SOLR-4271: add support for PostingsHighlighter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1429413 13f79535-47bb-0310-9956-ffa450edef68
2025-02-28 21:39:25 +00:00 · 2013-01-05 22:51:30 +00:00 · 2013-01-05 22:51:30 +00:00 · 64e18dc0f6
commit 64e18dc0f6
parent 8e49b54fde
15 changed files with 461 additions and 16 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -162,7 +162,7 @@ New Features
  extractWikipedia.alg was changed to use this task, so now it creates two
  files. (Doron Cohen)

-* LUCENE-4290: Added PostingsHighlighter to the sandbox module. It uses
+* LUCENE-4290: Added PostingsHighlighter to the highlighter module. It uses
  offsets from the postings lists to highlight documents. (Robert Muir)

 * LUCENE-4628: Added CommonTermsQuery that executes high-frequency terms
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java
@ -1,4 +1,4 @@
-package org.apache.lucene.sandbox.postingshighlight;
+package org.apache.lucene.search.postingshighlight;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
@ -1,4 +1,4 @@
-package org.apache.lucene.sandbox.postingshighlight;
+package org.apache.lucene.search.postingshighlight;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,32 @@ package org.apache.lucene.sandbox.postingshighlight;
 * @lucene.experimental
 */
 public class PassageFormatter {
+  private final String preTag;
+  private final String postTag;
+  private final String ellipsis;
+  
+  /**
+   * Creates a new PassageFormatter with the default tags.
+   */
+  public PassageFormatter() {
+    this("<b>", "</b>", "... ");
+  }
+  
+  /**
+   * Creates a new PassageFormatter with custom tags.
+   * @param preTag text which should appear before a highlighted term.
+   * @param postTag text which should appear after a highlighted term.
+   * @param ellipsis text which should be used to connect two unconnected passages.
+   */
+  public PassageFormatter(String preTag, String postTag, String ellipsis) {
+    if (preTag == null || postTag == null || ellipsis == null) {
+      throw new NullPointerException();
+    }
+    this.preTag = preTag;
+    this.postTag = postTag;
+    this.ellipsis = ellipsis;
+  }
+  
  /**
   * Formats the top <code>passages</code> from <code>content</code>
   * into a human-readable text snippet.
@ -40,7 +66,7 @@ public class PassageFormatter {
    for (Passage passage : passages) {
      // don't add ellipsis if its the first one, or if its connected.
      if (passage.startOffset > pos && pos > 0) {
-        sb.append("... ");
+        sb.append(ellipsis);
      }
      pos = passage.startOffset;
      for (int i = 0; i < passage.numMatches; i++) {
@ -51,9 +77,9 @@ public class PassageFormatter {
          sb.append(content.substring(pos, start));
        }
        if (end > pos) {
-          sb.append("<b>");
+          sb.append(preTag);
          sb.append(content.substring(Math.max(pos, start), end));
-          sb.append("</b>");
+          sb.append(postTag);
          pos = end;
        }
      }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageScorer.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageScorer.java
@ -1,4 +1,4 @@
-package org.apache.lucene.sandbox.postingshighlight;
+package org.apache.lucene.search.postingshighlight;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
@ -1,4 +1,4 @@
-package org.apache.lucene.sandbox.postingshighlight;
+package org.apache.lucene.search.postingshighlight;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -62,8 +62,7 @@ import org.apache.lucene.util.UnicodeUtil;
 * into a {@link Passage}, and then scores each Passage using a separate {@link PassageScorer}. 
 * Passages are finally formatted into highlighted snippets with a {@link PassageFormatter}.
 * <p>
- * <b>WARNING</b>: The code is very new and may still have some exciting bugs! This is why 
- * it's located under Lucene's sandbox module. 
+ * <b>WARNING</b>: The code is very new and may still have some exciting bugs!
 * <p>
 * Example usage:
 * <pre class="prettyprint">
@ -256,7 +255,7 @@ public final class PostingsHighlighter {
    LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
    String contents[][] = new String[fields.length][docids.length];
    for (int i = 0; i < docids.length; i++) {
-      reader.document(docids[i], visitor);
+      searcher.doc(docids[i], visitor);
      for (int j = 0; j < fields.length; j++) {
        contents[j][i] = visitor.getValue(j).toString();
      }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/package.html
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/package.html
--- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
@ -1,4 +1,4 @@
-package org.apache.lucene.sandbox.postingshighlight;
+package org.apache.lucene.search.postingshighlight;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
--- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
@ -1,4 +1,4 @@
-package org.apache.lucene.sandbox.postingshighlight;
+package org.apache.lucene.search.postingshighlight;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -189,6 +189,8 @@ New Features
  rolling averages; median, 75th, 95th, 99th, 99.9th percentile request times
  (Alan Woodward, Shawn Heisey, Adrien Grand, Uwe Schindler)

+* SOLR-4271: Add support for PostingsHighlighter.  (Robert Muir)
+
 Optimizations
 ----------------------

--- a/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
@ -24,6 +24,7 @@ import org.apache.solr.common.params.HighlightParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.highlight.PostingsSolrHighlighter;
 import org.apache.solr.highlight.SolrHighlighter;
 import org.apache.solr.highlight.DefaultSolrHighlighter;
 import org.apache.solr.request.SolrQueryRequest;
@ -128,7 +129,7 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
      }
      
      if(highlightQuery != null) {
-        boolean rewrite = !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
+        boolean rewrite = (highlighter instanceof PostingsSolrHighlighter == false) && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
            Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
        highlightQuery = rewrite ?  highlightQuery.rewrite(req.getSearcher().getIndexReader()) : highlightQuery;
      }
--- a/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
@ -0,0 +1,189 @@
+package org.apache.solr.highlight;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.util.Collections;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.index.StoredDocument;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.postingshighlight.PassageFormatter;
+import org.apache.lucene.search.postingshighlight.PassageScorer;
+import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
+import org.apache.solr.common.params.HighlightParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.PluginInfo;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.plugin.PluginInfoInitialized;
+
+/** 
+ * Highlighter impl that uses {@link PostingsHighlighter}
+ * <p>
+ * Example configuration:
+ * <pre class="prettyprint">
+ *   &lt;searchComponent class="solr.HighlightComponent" name="highlight"&gt;
+ *     &lt;highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"
+ *                      preTag="&amp;lt;em&amp;gt;"
+ *                      postTag="&amp;lt;/em&amp;gt;"
+ *                      ellipsis="... "
+ *                      maxLength=10000/&gt;
+ *   &lt;/searchComponent&gt;
+ * </pre>
+ * <p>
+ * Notes:
+ *  <ul>
+ *    <li>fields to highlight must be configured with storeOffsetsWithPositions="true"
+ *    <li>hl.fl specifies the field list.
+ *    <li>hl.snippets specifies how many underlying sentence fragments form the resulting snippet.
+ *  </ul>
+ *  
+ * @lucene.experimental 
+ */
+public class PostingsSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {
+  protected PostingsHighlighter highlighter;
+
+  @Override
+  public void initalize(SolrConfig config) {}
+  
+  @Override
+  public void init(PluginInfo info) {
+    Map<String,String> attributes = info.attributes;
+    BreakIterator breakIterator = BreakIterator.getSentenceInstance(Locale.ROOT);
+    PassageScorer scorer = new PassageScorer();
+    
+    // formatter parameters: preTag/postTag/ellipsis
+    String preTag = attributes.get("preTag");
+    if (preTag == null) {
+      preTag = "<em>";
+    }
+    String postTag = attributes.get("postTag");
+    if (postTag == null) {
+      postTag = "</em>";
+    }
+    String ellipsis = attributes.get("ellipsis");
+    if (ellipsis == null) {
+      ellipsis = "... ";
+    }
+    PassageFormatter formatter = new PassageFormatter(preTag, postTag, ellipsis);
+    
+    // maximum content size to process
+    int maxLength = PostingsHighlighter.DEFAULT_MAX_LENGTH;
+    if (attributes.containsKey("maxLength")) {
+      maxLength = Integer.parseInt(attributes.get("maxLength"));
+    }
+    highlighter = new PostingsHighlighter(maxLength, breakIterator, scorer, formatter);
+  }
+
+  @Override
+  public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
+    SolrParams params = req.getParams(); 
+    
+    // if highlighting isnt enabled, then why call doHighlighting?
+    if (isHighlightingEnabled(params)) {
+      SolrIndexSearcher searcher = req.getSearcher();
+      TopDocs topDocs = toTopDocs(docs);
+      
+      // fetch the unique keys
+      String[] keys = getUniqueKeys(searcher, topDocs);
+      
+      // query-time parameters
+      String[] fieldNames = getHighlightFields(query, req, defaultFields);
+      int numSnippets = params.getInt(HighlightParams.SNIPPETS, 1);
+      
+      Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, topDocs, numSnippets);
+      return encodeSnippets(keys, fieldNames, snippets);
+    } else {
+      return null;
+    }
+  }
+  
+  /** 
+   * Encodes the resulting snippets into a namedlist
+   * @param keys the document unique keys
+   * @param fieldNames field names to highlight in the order
+   * @param snippets map from field name to snippet array for the docs
+   * @return encoded namedlist of summaries
+   */
+  protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String,String[]> snippets) {
+    NamedList<Object> list = new SimpleOrderedMap<Object>();
+    for (int i = 0; i < keys.length; i++) {
+      NamedList<Object> summary = new SimpleOrderedMap<Object>();
+      for (String field : fieldNames) {
+        String snippet = snippets.get(field)[i];
+        // box in an array to match the format of existing highlighters, 
+        // even though its always one element.
+        if (snippet == null) {
+          summary.add(field, new String[0]);
+        } else {
+          summary.add(field, new String[] { snippet });
+        }
+      }
+      list.add(keys[i], summary);
+    }
+    return list;
+  }
+  
+  /** Converts solr's DocList to a lucene TopDocs */
+  protected TopDocs toTopDocs(DocList docs) {
+    ScoreDoc[] scoreDocs = new ScoreDoc[docs.size()];
+    DocIterator iterator = docs.iterator();
+    for (int i = 0; i < scoreDocs.length; i++) {
+      if (!iterator.hasNext()) {
+        throw new AssertionError();
+      }
+      scoreDocs[i] = new ScoreDoc(iterator.nextDoc(), Float.NaN);
+    }
+    if (iterator.hasNext()) {
+      throw new AssertionError();
+    }
+    return new TopDocs(docs.matches(), scoreDocs, Float.NaN);
+  }
+  
+  /** Retrieves the unique keys for the topdocs to key the results */
+  protected String[] getUniqueKeys(SolrIndexSearcher searcher, TopDocs topDocs) throws IOException {
+    IndexSchema schema = searcher.getSchema();
+    SchemaField keyField = schema.getUniqueKeyField();
+    if (keyField != null) {
+      Set<String> selector = Collections.singleton(keyField.getName());
+      String uniqueKeys[] = new String[topDocs.scoreDocs.length];
+      for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+        int docid = topDocs.scoreDocs[i].doc;
+        StoredDocument doc = searcher.doc(docid, selector);
+        String id = schema.printableUniqueKey(doc);
+        uniqueKeys[i] = id;
+      }
+      return uniqueKeys;
+    } else {
+      return new String[topDocs.scoreDocs.length];
+    }
+  }
+}
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -109,6 +109,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
  private final SolrCache[] cacheList;
  private static final SolrCache[] noCaches = new SolrCache[0];
  
+  private final FieldInfos fieldInfos;
+  // TODO: do we need this separate set of field names? we can just use the fieldinfos?
  private final Collection<String> fieldNames;
  private Collection<String> storedHighlightFieldNames;
  private DirectoryFactory directoryFactory;
@ -199,7 +201,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
    optimizer = null;
    
    fieldNames = new HashSet<String>();
-    for(FieldInfo fieldInfo : atomicReader.getFieldInfos()) {
+    fieldInfos = atomicReader.getFieldInfos();
+    for(FieldInfo fieldInfo : fieldInfos) {
      fieldNames.add(fieldInfo.name);
    }

@ -509,13 +512,56 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
  }

  /** Visit a document's fields using a {@link StoredFieldVisitor}
-   *  This method does not currently use the Solr document cache.
+   *  This method does not currently add to the Solr document cache.
   * 
   * @see IndexReader#document(int, StoredFieldVisitor) */
  @Override
  public void doc(int n, StoredFieldVisitor visitor) throws IOException {
+    if (documentCache != null) {
+      StoredDocument cached = documentCache.get(n);
+      if (cached != null) {
+        visitFromCached(cached, visitor);
+        return;
+      }
+    }
    getIndexReader().document(n, visitor);
  }
+  
+  /** Executes a stored field visitor against a hit from the document cache */
+  private void visitFromCached(StoredDocument document, StoredFieldVisitor visitor) throws IOException {
+    for (StorableField f : document) {
+      FieldInfo info = fieldInfos.fieldInfo(f.name());
+      switch(visitor.needsField(info)) {
+        case YES:
+          if (f.binaryValue() != null) {
+            BytesRef binaryValue = f.binaryValue();
+            byte copy[] = new byte[binaryValue.length];
+            System.arraycopy(binaryValue.bytes, binaryValue.offset, copy, 0, copy.length);
+            visitor.binaryField(info, copy);
+          } else if (f.numericValue() != null) {
+            Number numericValue = f.numericValue();
+            if (numericValue instanceof Double) {
+              visitor.doubleField(info, numericValue.doubleValue());
+            } else if (numericValue instanceof Integer) {
+              visitor.intField(info, numericValue.intValue());
+            } else if (numericValue instanceof Float) {
+              visitor.floatField(info, numericValue.floatValue());
+            } else if (numericValue instanceof Long) {
+              visitor.longField(info, numericValue.longValue());
+            } else {
+              throw new AssertionError();
+            }
+          } else {
+            visitor.stringField(info, f.stringValue());
+          }
+          break;
+        case NO:
+          break;
+        case STOP:
+          return;
+      }
+    }
+  }

  /**
   * Retrieve the {@link Document} instance corresponding to the document id.
--- a/solr/core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml
@ -0,0 +1,49 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for PostingsHighlighter -->
+
+<schema name="postingshighlight" version="1.0">
+  <types>
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+
+    <!-- basic text field: no offsets! -->
+    <fieldtype name="text" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.MockTokenizerFactory"/>
+      </analyzer>
+    </fieldtype>
+    
+    <!-- text field with offsets -->
+    <fieldtype name="text_offsets" class="solr.TextField" storeOffsetsWithPositions="true">
+      <analyzer>
+        <tokenizer class="solr.MockTokenizerFactory"/>
+      </analyzer>
+    </fieldtype>
+   </types>
+
+  <fields>
+    <field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
+    <field name="text" type="text_offsets" indexed="true" stored="true"/>
+    <field name="text2" type="text" indexed="true" stored="true"/>
+    <field name="text3" type="text_offsets" indexed="true" stored="true"/>
+  </fields>
+
+  <defaultSearchField>text</defaultSearchField>
+  <uniqueKey>id</uniqueKey>
+</schema>
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml
@ -0,0 +1,30 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- a basic solrconfig for postings highlighter -->
+<config>
+  <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
+  <dataDir>${solr.data.dir:}</dataDir>
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+  <requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
+
+  <searchComponent class="solr.HighlightComponent" name="highlight">
+    <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/>
+  </searchComponent>
+</config>
--- a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
@ -0,0 +1,103 @@
+package org.apache.solr.highlight;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.handler.component.HighlightComponent;
+import org.apache.solr.schema.IndexSchema;
+import org.junit.BeforeClass;
+
+/** simple tests for PostingsSolrHighlighter */
+@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
+public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-postingshighlight.xml", "schema-postingshighlight.xml");
+    
+    // test our config is sane, just to be sure:
+    
+    // postingshighlighter should be used
+    SolrHighlighter highlighter = HighlightComponent.getHighlighter(h.getCore());
+    assertTrue("wrong highlighter: " + highlighter.getClass(), highlighter instanceof PostingsSolrHighlighter);
+    
+    // 'text' and 'text3' should have offsets, 'text2' should not
+    IndexSchema schema = h.getCore().getSchema();
+    assertTrue(schema.getField("text").storeOffsetsWithPositions());
+    assertTrue(schema.getField("text3").storeOffsetsWithPositions());
+    assertFalse(schema.getField("text2").storeOffsetsWithPositions());
+    
+    assertU(adoc("text", "document one", "text2", "document one", "text3", "crappy document", "id", "101"));
+    assertU(adoc("text", "second document", "text2", "second document", "text3", "crappier document", "id", "102"));
+    assertU(commit());
+  }
+  
+  public void testSimple() {
+    assertQ("simplest test", 
+        req("q", "text:document", "sort", "id asc", "hl", "true"),
+        "count(//lst[@name='highlighting']/*)=2",
+        "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
+        "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
+  }
+  
+  public void testPagination() {
+    assertQ("pagination test", 
+        req("q", "text:document", "sort", "id asc", "hl", "true", "rows", "1", "start", "1"),
+        "count(//lst[@name='highlighting']/*)=1",
+        "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
+  }
+  
+  public void testEmptySnippet() {
+    assertQ("null snippet test", 
+      req("q", "text:one OR *:*", "sort", "id asc", "hl", "true"),
+        "count(//lst[@name='highlighting']/*)=2",
+        "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
+        "count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0");
+  }
+  
+  public void testDifferentField() {
+    assertQ("highlighting text3", 
+        req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"),
+        "count(//lst[@name='highlighting']/*)=2",
+        "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
+        "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
+  }
+  
+  public void testTwoFields() {
+    assertQ("highlighting text and text3", 
+        req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3"),
+        "count(//lst[@name='highlighting']/*)=2",
+        "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
+        "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
+        "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
+        "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
+  }
+  
+  public void testMisconfiguredField() {
+    ignoreException("was indexed without offsets");
+    try {
+      assertQ("should fail, has no offsets",
+        req("q", "text2:document", "sort", "id asc", "hl", "true", "hl.fl", "text2"));
+      fail();
+    } catch (Exception expected) {
+      // expected
+    }
+    resetExceptionIgnores();
+  }
+}