LUCENE-4853: fix sort order bug with returned snippets

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1458944 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-03-20 16:56:25 +00:00
parent dd3a8dff69
commit f7e106bd71
2 changed files with 58 additions and 3 deletions

View File

@ -273,7 +273,7 @@ public class PostingsHighlighter {
* @throws IllegalArgumentException if <code>field</code> was indexed without
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, int[] docids, int maxPassages) throws IOException {
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassages) throws IOException {
final IndexReader reader = searcher.getIndexReader();
query = rewrite(query);
SortedSet<Term> queryTerms = new TreeSet<Term>();
@ -284,6 +284,10 @@ public class PostingsHighlighter {
BreakIterator bi = (BreakIterator)breakIterator.clone();
// Make our own copy because we sort in-place:
int[] docids = new int[docidsIn.length];
System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
// sort for sequential io
Arrays.sort(docids);
Arrays.sort(fields);
@ -302,8 +306,8 @@ public class PostingsHighlighter {
Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
String[] result = new String[docids.length];
for (int j = 0; j < docids.length; j++) {
result[j] = fieldHighlights.get(docids[j]);
for (int j = 0; j < docidsIn.length; j++) {
result[j] = fieldHighlights.get(docidsIn[j]);
}
highlights.put(field, result);
}

View File

@ -35,6 +35,7 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -748,4 +749,54 @@ public class TestPostingsHighlighter extends LuceneTestCase {
ir.close();
dir.close();
}
public void testMultipleDocs() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
int numDocs = atLeast(100);
for(int i=0;i<numDocs;i++) {
Document doc = new Document();
String content = "the answer is " + i;
if ((i & 1) == 0) {
content += " some more terms";
}
doc.add(new Field("body", content, offsetsType));
doc.add(newStringField("id", ""+i, Field.Store.YES));
iw.addDocument(doc);
if (random().nextInt(10) == 2) {
iw.commit();
}
}
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "answer"));
TopDocs hits = searcher.search(query, numDocs);
assertEquals(numDocs, hits.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, hits);
assertEquals(numDocs, snippets.length);
for(int hit=0;hit<numDocs;hit++) {
StoredDocument doc = searcher.doc(hits.scoreDocs[hit].doc);
int id = Integer.parseInt(doc.get("id"));
String expected = "the <b>answer</b> is " + id;
if ((id & 1) == 0) {
expected += " some more terms";
}
assertEquals(expected, snippets[hit]);
}
ir.close();
dir.close();
}
}