mirror of https://github.com/apache/lucene.git
LUCENE-5453: some trivial refactoring to postingshighlighter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1569851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3ecc6d8d01
commit
dddbcdffb1
|
@ -456,31 +456,31 @@ public class PostingsHighlighter {
|
|||
private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages, Query query) throws IOException {
|
||||
Map<Integer,Object> highlights = new HashMap<Integer,Object>();
|
||||
|
||||
// reuse in the real sense... for docs in same segment we just advance our old enum
|
||||
DocsAndPositionsEnum postings[] = null;
|
||||
TermsEnum termsEnum = null;
|
||||
int lastLeaf = -1;
|
||||
|
||||
PassageFormatter fieldFormatter = getFormatter(field);
|
||||
if (fieldFormatter == null) {
|
||||
throw new NullPointerException("PassageFormatter cannot be null");
|
||||
}
|
||||
|
||||
// check if we should do any multitermprocessing
|
||||
// check if we should do any multiterm processing
|
||||
Analyzer analyzer = getIndexAnalyzer(field);
|
||||
CharacterRunAutomaton automata[] = new CharacterRunAutomaton[0];
|
||||
if (analyzer != null) {
|
||||
automata = MultiTermHighlighting.extractAutomata(query, field);
|
||||
}
|
||||
|
||||
final BytesRef allTerms[];
|
||||
// resize 'terms', where the last term is the multiterm matcher
|
||||
if (automata.length > 0) {
|
||||
allTerms = new BytesRef[terms.length + 1];
|
||||
System.arraycopy(terms, 0, allTerms, 0, terms.length);
|
||||
} else {
|
||||
allTerms = terms;
|
||||
BytesRef newTerms[] = new BytesRef[terms.length + 1];
|
||||
System.arraycopy(terms, 0, newTerms, 0, terms.length);
|
||||
terms = newTerms;
|
||||
}
|
||||
|
||||
// we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes
|
||||
// otherwise, we will just advance() existing enums to the new document in the same segment.
|
||||
DocsAndPositionsEnum postings[] = null;
|
||||
TermsEnum termsEnum = null;
|
||||
int lastLeaf = -1;
|
||||
|
||||
for (int i = 0; i < docids.length; i++) {
|
||||
String content = contents[i];
|
||||
if (content.length() == 0) {
|
||||
|
@ -491,28 +491,39 @@ public class PostingsHighlighter {
|
|||
int leaf = ReaderUtil.subIndex(doc, leaves);
|
||||
AtomicReaderContext subContext = leaves.get(leaf);
|
||||
AtomicReader r = subContext.reader();
|
||||
Terms t = r.terms(field);
|
||||
if (t == null) {
|
||||
continue; // nothing to do
|
||||
}
|
||||
|
||||
assert leaf >= lastLeaf; // increasing order
|
||||
|
||||
// if the segment has changed, we must initialize new enums.
|
||||
if (leaf != lastLeaf) {
|
||||
termsEnum = t.iterator(null);
|
||||
postings = new DocsAndPositionsEnum[allTerms.length];
|
||||
Terms t = r.terms(field);
|
||||
if (t != null) {
|
||||
termsEnum = t.iterator(null);
|
||||
postings = new DocsAndPositionsEnum[terms.length];
|
||||
}
|
||||
}
|
||||
if (termsEnum == null) {
|
||||
continue; // no terms for this field, nothing to do
|
||||
}
|
||||
|
||||
// if there are multi-term matches, we have to initialize the "fake" enum for each document
|
||||
if (automata.length > 0) {
|
||||
DocsAndPositionsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata);
|
||||
dp.advance(doc - subContext.docBase);
|
||||
postings[terms.length] = dp;
|
||||
postings[terms.length-1] = dp; // last term is the multiterm matcher
|
||||
}
|
||||
Passage passages[] = highlightDoc(field, allTerms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
|
||||
|
||||
Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
|
||||
|
||||
if (passages.length == 0) {
|
||||
// no passages were returned, so ask for a default summary
|
||||
passages = getEmptyHighlight(field, bi, maxPassages);
|
||||
}
|
||||
|
||||
if (passages.length > 0) {
|
||||
// otherwise a null snippet (eg if field is missing
|
||||
// entirely from the doc)
|
||||
highlights.put(doc, fieldFormatter.format(passages, content));
|
||||
}
|
||||
|
||||
lastLeaf = leaf;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue