LUCENE-5453: some trivial refactoring to postingshighlighter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1569851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-02-19 17:44:02 +00:00
parent 3ecc6d8d01
commit dddbcdffb1
1 changed files with 32 additions and 21 deletions

View File

@ -456,31 +456,31 @@ public class PostingsHighlighter {
private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages, Query query) throws IOException {
Map<Integer,Object> highlights = new HashMap<Integer,Object>();
// reuse in the real sense... for docs in same segment we just advance our old enum
DocsAndPositionsEnum postings[] = null;
TermsEnum termsEnum = null;
int lastLeaf = -1;
PassageFormatter fieldFormatter = getFormatter(field);
if (fieldFormatter == null) {
throw new NullPointerException("PassageFormatter cannot be null");
}
// check if we should do any multitermprocessing
// check if we should do any multiterm processing
Analyzer analyzer = getIndexAnalyzer(field);
CharacterRunAutomaton automata[] = new CharacterRunAutomaton[0];
if (analyzer != null) {
automata = MultiTermHighlighting.extractAutomata(query, field);
}
final BytesRef allTerms[];
// resize 'terms', where the last term is the multiterm matcher
if (automata.length > 0) {
allTerms = new BytesRef[terms.length + 1];
System.arraycopy(terms, 0, allTerms, 0, terms.length);
} else {
allTerms = terms;
BytesRef newTerms[] = new BytesRef[terms.length + 1];
System.arraycopy(terms, 0, newTerms, 0, terms.length);
terms = newTerms;
}
// we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes
// otherwise, we will just advance() existing enums to the new document in the same segment.
DocsAndPositionsEnum postings[] = null;
TermsEnum termsEnum = null;
int lastLeaf = -1;
for (int i = 0; i < docids.length; i++) {
String content = contents[i];
if (content.length() == 0) {
@ -491,28 +491,39 @@ public class PostingsHighlighter {
int leaf = ReaderUtil.subIndex(doc, leaves);
AtomicReaderContext subContext = leaves.get(leaf);
AtomicReader r = subContext.reader();
Terms t = r.terms(field);
if (t == null) {
continue; // nothing to do
}
assert leaf >= lastLeaf; // increasing order
// if the segment has changed, we must initialize new enums.
if (leaf != lastLeaf) {
termsEnum = t.iterator(null);
postings = new DocsAndPositionsEnum[allTerms.length];
Terms t = r.terms(field);
if (t != null) {
termsEnum = t.iterator(null);
postings = new DocsAndPositionsEnum[terms.length];
}
}
if (termsEnum == null) {
continue; // no terms for this field, nothing to do
}
// if there are multi-term matches, we have to initialize the "fake" enum for each document
if (automata.length > 0) {
DocsAndPositionsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata);
dp.advance(doc - subContext.docBase);
postings[terms.length] = dp;
postings[terms.length-1] = dp; // last term is the multiterm matcher
}
Passage passages[] = highlightDoc(field, allTerms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
if (passages.length == 0) {
// no passages were returned, so ask for a default summary
passages = getEmptyHighlight(field, bi, maxPassages);
}
if (passages.length > 0) {
// otherwise a null snippet (eg if field is missing
// entirely from the doc)
highlights.put(doc, fieldFormatter.format(passages, content));
}
lastLeaf = leaf;
}