mirror of https://github.com/apache/lucene.git
LUCENE-8145: FieldOffsetStrategy.getOffsetEnum() now returns a single MultiOffsetsEnum
Closes #317
This commit is contained in:
parent
98a0b83714
commit
9422609a5d
|
@ -98,6 +98,10 @@ API Changes
|
||||||
* LUCENE-8104: Remove facets module compile-time dependency on queries
|
* LUCENE-8104: Remove facets module compile-time dependency on queries
|
||||||
(Alan Woodward)
|
(Alan Woodward)
|
||||||
|
|
||||||
|
* LUCENE-8145: UnifiedHighlighter now uses a unitary OffsetsEnum rather
|
||||||
|
than a list of enums (Alan Woodward, David Smiley, Jim Ferenczi, Timothy
|
||||||
|
Rodriguez)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-2899: Add new module analysis/opennlp, with analysis components
|
* LUCENE-2899: Add new module analysis/opennlp, with analysis components
|
||||||
|
|
|
@ -1,145 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.search.uhighlight;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides a view over several underlying PostingsEnums for the iteration of offsets on the current document only.
|
|
||||||
* It's not general purpose; the position returned is always -1 and it doesn't iterate the documents.
|
|
||||||
*/
|
|
||||||
final class CompositeOffsetsPostingsEnum extends PostingsEnum {
|
|
||||||
|
|
||||||
private final int docId;
|
|
||||||
private final int freq;
|
|
||||||
private final PriorityQueue<BoundsCheckingPostingsEnum> queue;
|
|
||||||
private boolean firstPositionConsumed = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class is used to ensure we don't over iterate the underlying
|
|
||||||
* postings enum by keeping track of the position relative to the
|
|
||||||
* frequency.
|
|
||||||
* Ideally this would've been an implementation of a PostingsEnum
|
|
||||||
* but it would have to delegate most methods and it seemed easier
|
|
||||||
* to just wrap the tweaked method.
|
|
||||||
*/
|
|
||||||
private static final class BoundsCheckingPostingsEnum {
|
|
||||||
|
|
||||||
private final PostingsEnum postingsEnum;
|
|
||||||
private int remainingPositions;
|
|
||||||
|
|
||||||
BoundsCheckingPostingsEnum(PostingsEnum postingsEnum) throws IOException {
|
|
||||||
this.postingsEnum = postingsEnum;
|
|
||||||
this.remainingPositions = postingsEnum.freq();
|
|
||||||
nextPosition();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Advances to the next position and returns true, or returns false if it can't. */
|
|
||||||
private boolean nextPosition() throws IOException {
|
|
||||||
if (remainingPositions-- > 0) {
|
|
||||||
postingsEnum.nextPosition(); // ignore the actual position; we don't care.
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/** The provided {@link PostingsEnum}s must all be positioned to the same document, and must have offsets. */
|
|
||||||
CompositeOffsetsPostingsEnum(List<PostingsEnum> postingsEnums) throws IOException {
|
|
||||||
queue = new PriorityQueue<BoundsCheckingPostingsEnum>(postingsEnums.size()) {
|
|
||||||
@Override
|
|
||||||
protected boolean lessThan(BoundsCheckingPostingsEnum a, BoundsCheckingPostingsEnum b) {
|
|
||||||
try {
|
|
||||||
return a.postingsEnum.startOffset() < b.postingsEnum.startOffset();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int freqAdd = 0;
|
|
||||||
for (PostingsEnum postingsEnum : postingsEnums) {
|
|
||||||
queue.add(new BoundsCheckingPostingsEnum(postingsEnum));
|
|
||||||
freqAdd += postingsEnum.freq();
|
|
||||||
}
|
|
||||||
freq = freqAdd;
|
|
||||||
this.docId = queue.top().postingsEnum.docID();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int freq() throws IOException {
|
|
||||||
return freq;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Advances to the next position. Always returns -1; the caller is assumed not to care for the highlighter. */
|
|
||||||
@Override
|
|
||||||
public int nextPosition() throws IOException {
|
|
||||||
if (!firstPositionConsumed) {
|
|
||||||
firstPositionConsumed = true;
|
|
||||||
} else if (queue.size() == 0) {
|
|
||||||
throw new IllegalStateException("nextPosition called too many times");
|
|
||||||
} else if (queue.top().nextPosition()) { // advance head
|
|
||||||
queue.updateTop(); //the new position may be behind another postingsEnum in the queue
|
|
||||||
} else {
|
|
||||||
queue.pop(); //this postingsEnum is consumed; get rid of it. Another will take it's place.
|
|
||||||
}
|
|
||||||
assert queue.size() > 0;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int startOffset() throws IOException {
|
|
||||||
return queue.top().postingsEnum.startOffset();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int endOffset() throws IOException {
|
|
||||||
return queue.top().postingsEnum.endOffset();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public BytesRef getPayload() throws IOException {
|
|
||||||
return queue.top().postingsEnum.getPayload();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return docId;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return 1L; //at most 1 doc is returned
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -25,9 +25,7 @@ import java.util.List;
|
||||||
import java.util.PriorityQueue;
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal highlighter abstraction that operates on a per field basis.
|
* Internal highlighter abstraction that operates on a per field basis.
|
||||||
|
@ -76,16 +74,10 @@ public class FieldHighlighter {
|
||||||
|
|
||||||
breakIterator.setText(content);
|
breakIterator.setText(content);
|
||||||
|
|
||||||
List<OffsetsEnum> offsetsEnums = fieldOffsetStrategy.getOffsetsEnums(reader, docId, content);
|
try (OffsetsEnum offsetsEnums = fieldOffsetStrategy.getOffsetsEnum(reader, docId, content)) {
|
||||||
|
|
||||||
Passage[] passages;
|
|
||||||
try {
|
|
||||||
// Highlight the offsetsEnum list against the content to produce Passages.
|
// Highlight the offsetsEnum list against the content to produce Passages.
|
||||||
passages = highlightOffsetsEnums(offsetsEnums);// and breakIterator & scorer
|
Passage[] passages = highlightOffsetsEnums(offsetsEnums);// and breakIterator & scorer
|
||||||
} finally {
|
|
||||||
// Ensure closeable resources get closed
|
|
||||||
IOUtils.close(offsetsEnums);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format the resulting Passages.
|
// Format the resulting Passages.
|
||||||
if (passages.length == 0) {
|
if (passages.length == 0) {
|
||||||
|
@ -99,6 +91,7 @@ public class FieldHighlighter {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called to summarize a document when no highlights were found.
|
* Called to summarize a document when no highlights were found.
|
||||||
|
@ -118,7 +111,6 @@ public class FieldHighlighter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Passage passage = new Passage();
|
Passage passage = new Passage();
|
||||||
passage.setScore(Float.NaN);
|
|
||||||
passage.setStartOffset(pos);
|
passage.setStartOffset(pos);
|
||||||
passage.setEndOffset(next);
|
passage.setEndOffset(next);
|
||||||
passages.add(passage);
|
passages.add(passage);
|
||||||
|
@ -131,21 +123,14 @@ public class FieldHighlighter {
|
||||||
// algorithm: treat sentence snippets as miniature documents
|
// algorithm: treat sentence snippets as miniature documents
|
||||||
// we can intersect these with the postings lists via BreakIterator.preceding(offset),s
|
// we can intersect these with the postings lists via BreakIterator.preceding(offset),s
|
||||||
// score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
|
// score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
|
||||||
protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums)
|
protected Passage[] highlightOffsetsEnums(OffsetsEnum off)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
PassageScorer scorer = passageScorer;
|
|
||||||
BreakIterator breakIterator = this.breakIterator;
|
|
||||||
final int contentLength = breakIterator.getText().getEndIndex();
|
|
||||||
|
|
||||||
//TODO consider moving this part to an aggregate OffsetsEnum subclass so we have one enum that already has its weight
|
final int contentLength = this.breakIterator.getText().getEndIndex();
|
||||||
PriorityQueue<OffsetsEnum> offsetsEnumQueue = new PriorityQueue<>(offsetsEnums.size() + 1);
|
|
||||||
for (OffsetsEnum off : offsetsEnums) {
|
if (off.nextPosition() == false) {
|
||||||
off.setWeight(scorer.weight(contentLength, off.freq()));
|
return new Passage[0];
|
||||||
if (off.nextPosition()) {// go to first position
|
|
||||||
offsetsEnumQueue.add(off);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
offsetsEnumQueue.add(new OffsetsEnum.OfPostings(new BytesRef(), EMPTY)); // a sentinel for termination
|
|
||||||
|
|
||||||
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
|
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
|
||||||
if (left.getScore() < right.getScore()) {
|
if (left.getScore() < right.getScore()) {
|
||||||
|
@ -158,25 +143,45 @@ public class FieldHighlighter {
|
||||||
});
|
});
|
||||||
Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue.
|
Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue.
|
||||||
|
|
||||||
OffsetsEnum off;
|
do {
|
||||||
while ((off = offsetsEnumQueue.poll()) != null) {
|
|
||||||
int start = off.startOffset();
|
int start = off.startOffset();
|
||||||
if (start == -1) {
|
if (start == -1) {
|
||||||
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
|
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
|
||||||
}
|
}
|
||||||
int end = off.endOffset();
|
int end = off.endOffset();
|
||||||
// LUCENE-5166: this hit would span the content limit... however more valid
|
|
||||||
// hits may exist (they are sorted by start). so we pretend like we never
|
|
||||||
// saw this term, it won't cause a passage to be added to passageQueue or anything.
|
|
||||||
assert EMPTY.startOffset() == Integer.MAX_VALUE;
|
|
||||||
if (start < contentLength && end > contentLength) {
|
if (start < contentLength && end > contentLength) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// See if this term should be part of a new passage.
|
// See if this term should be part of a new passage.
|
||||||
if (start >= passage.getEndOffset()) {
|
if (start >= passage.getEndOffset()) {
|
||||||
if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
|
passage = maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
|
||||||
// finalize passage
|
// if we exceed limit, we are done
|
||||||
passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
|
if (start >= contentLength) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// advance breakIterator
|
||||||
|
passage.setStartOffset(Math.max(this.breakIterator.preceding(start + 1), 0));
|
||||||
|
passage.setEndOffset(Math.min(this.breakIterator.following(start), contentLength));
|
||||||
|
}
|
||||||
|
// Add this term to the passage.
|
||||||
|
BytesRef term = off.getTerm();// a reference; safe to refer to
|
||||||
|
assert term != null;
|
||||||
|
passage.addMatch(start, end, term, off.freq());
|
||||||
|
} while (off.nextPosition());
|
||||||
|
maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
|
||||||
|
|
||||||
|
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
||||||
|
// sort in ascending order
|
||||||
|
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
|
||||||
|
return passages;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Passage maybeAddPassage(PriorityQueue<Passage> passageQueue, PassageScorer scorer, Passage passage, int contentLength) {
|
||||||
|
if (passage.getStartOffset() == -1) {
|
||||||
|
// empty passage, we can ignore it
|
||||||
|
return passage;
|
||||||
|
}
|
||||||
|
passage.setScore(scorer.score(passage, contentLength));
|
||||||
// new sentence: first add 'passage' to queue
|
// new sentence: first add 'passage' to queue
|
||||||
if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
|
if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
|
||||||
passage.reset(); // can't compete, just reset it
|
passage.reset(); // can't compete, just reset it
|
||||||
|
@ -189,90 +194,7 @@ public class FieldHighlighter {
|
||||||
passage = new Passage();
|
passage = new Passage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
return passage;
|
||||||
// if we exceed limit, we are done
|
|
||||||
if (start >= contentLength) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// advance breakIterator
|
|
||||||
passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
|
|
||||||
passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
|
|
||||||
}
|
|
||||||
// Add this term to the passage.
|
|
||||||
int tf = 0;
|
|
||||||
while (true) {
|
|
||||||
tf++;
|
|
||||||
BytesRef term = off.getTerm();// a reference; safe to refer to
|
|
||||||
assert term != null;
|
|
||||||
passage.addMatch(start, end, term);
|
|
||||||
// see if there are multiple occurrences of this term in this passage. If so, add them.
|
|
||||||
if (!off.nextPosition()) {
|
|
||||||
break; // No more in the entire text. Already removed from pq; move on
|
|
||||||
}
|
|
||||||
start = off.startOffset();
|
|
||||||
end = off.endOffset();
|
|
||||||
if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
|
|
||||||
offsetsEnumQueue.offer(off);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
passage.setScore(passage.getScore() + off.getWeight() * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
|
|
||||||
for (Passage p : passages) {
|
|
||||||
p.sort();
|
|
||||||
}
|
|
||||||
// sort in ascending order
|
|
||||||
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
|
|
||||||
return passages;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static final PostingsEnum EMPTY = new PostingsEnum() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextPosition() throws IOException {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int startOffset() throws IOException {
|
|
||||||
return Integer.MAX_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int endOffset() throws IOException {
|
|
||||||
return Integer.MAX_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public BytesRef getPayload() throws IOException {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int freq() throws IOException {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -58,14 +57,15 @@ public abstract class FieldOffsetStrategy {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The primary method -- return offsets for highlightable words in the specified document.
|
* The primary method -- return offsets for highlightable words in the specified document.
|
||||||
* IMPORTANT: remember to close them all.
|
*
|
||||||
|
* Callers are expected to close the returned OffsetsEnum when it has been finished with
|
||||||
*/
|
*/
|
||||||
public abstract List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException;
|
public abstract OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException;
|
||||||
|
|
||||||
protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader leafReader, int doc) throws IOException {
|
protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc) throws IOException {
|
||||||
final Terms termsIndex = leafReader.terms(field);
|
final Terms termsIndex = leafReader.terms(field);
|
||||||
if (termsIndex == null) {
|
if (termsIndex == null) {
|
||||||
return Collections.emptyList();
|
return OffsetsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<OffsetsEnum> offsetsEnums = new ArrayList<>(terms.length + automata.length);
|
final List<OffsetsEnum> offsetsEnums = new ArrayList<>(terms.length + automata.length);
|
||||||
|
@ -92,7 +92,7 @@ public abstract class FieldOffsetStrategy {
|
||||||
createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums);
|
createOffsetsEnumsForAutomata(termsIndex, doc, offsetsEnums);
|
||||||
}
|
}
|
||||||
|
|
||||||
return offsetsEnums;
|
return new OffsetsEnum.MultiOffsetsEnum(offsetsEnums);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
|
protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
|
||||||
|
@ -137,14 +137,17 @@ public abstract class FieldOffsetStrategy {
|
||||||
for (int i = 0; i < automata.length; i++) {
|
for (int i = 0; i < automata.length; i++) {
|
||||||
CharacterRunAutomaton automaton = automata[i];
|
CharacterRunAutomaton automaton = automata[i];
|
||||||
List<PostingsEnum> postingsEnums = automataPostings.get(i);
|
List<PostingsEnum> postingsEnums = automataPostings.get(i);
|
||||||
int size = postingsEnums.size();
|
if (postingsEnums.isEmpty()) {
|
||||||
if (size > 0) { //only add if we have offsets
|
continue;
|
||||||
|
}
|
||||||
|
// Build one OffsetsEnum exposing the automata.toString as the term, and the sum of freq
|
||||||
BytesRef wildcardTerm = new BytesRef(automaton.toString());
|
BytesRef wildcardTerm = new BytesRef(automaton.toString());
|
||||||
if (size == 1) { //don't wrap in a composite if there's only one OffsetsEnum
|
int sumFreq = 0;
|
||||||
results.add(new OffsetsEnum.OfPostings(wildcardTerm, postingsEnums.get(0)));
|
for (PostingsEnum postingsEnum : postingsEnums) {
|
||||||
} else {
|
sumFreq += postingsEnum.freq();
|
||||||
results.add(new OffsetsEnum.OfPostings(wildcardTerm, new CompositeOffsetsPostingsEnum(postingsEnums)));
|
|
||||||
}
|
}
|
||||||
|
for (PostingsEnum postingsEnum : postingsEnums) {
|
||||||
|
results.add(new OffsetsEnum.OfPostings(wildcardTerm, sumFreq, postingsEnum));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
|
public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
|
||||||
// note: don't need LimitTokenOffsetFilter since content is already truncated to maxLength
|
// note: don't need LimitTokenOffsetFilter since content is already truncated to maxLength
|
||||||
TokenStream tokenStream = tokenStream(content);
|
TokenStream tokenStream = tokenStream(content);
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
||||||
memoryIndex.addField(field, tokenStream);//note: calls tokenStream.reset() & close()
|
memoryIndex.addField(field, tokenStream);//note: calls tokenStream.reset() & close()
|
||||||
docId = 0;
|
docId = 0;
|
||||||
|
|
||||||
return createOffsetsEnumsFromReader(leafReader, docId);
|
return createOffsetsEnumFromReader(leafReader, docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,8 +17,6 @@
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -43,8 +41,8 @@ public class NoOpOffsetStrategy extends FieldOffsetStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
|
public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
|
||||||
return Collections.emptyList();
|
return OffsetsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,21 +21,21 @@ import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An enumeration/iterator of a term and its offsets for use by {@link FieldHighlighter}.
|
* An enumeration/iterator of a term and its offsets for use by {@link FieldHighlighter}.
|
||||||
* It is advanced and is placed in a priority queue by
|
* It is advanced and is placed in a priority queue by
|
||||||
* {@link FieldHighlighter#highlightOffsetsEnums(List)} based on the start offset.
|
* {@link FieldHighlighter#highlightOffsetsEnums(OffsetsEnum)} based on the start offset.
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
|
public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
|
||||||
|
|
||||||
private float weight; // set once in highlightOffsetsEnums
|
|
||||||
|
|
||||||
// note: the ordering clearly changes as the postings enum advances
|
// note: the ordering clearly changes as the postings enum advances
|
||||||
// note: would be neat to use some Comparator utilities with method
|
// note: would be neat to use some Comparator utilities with method
|
||||||
// references but our methods throw IOException
|
// references but our methods throw IOException
|
||||||
|
@ -82,14 +82,6 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
|
||||||
|
|
||||||
public abstract int endOffset() throws IOException;
|
public abstract int endOffset() throws IOException;
|
||||||
|
|
||||||
public float getWeight() {
|
|
||||||
return weight;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setWeight(float weight) {
|
|
||||||
this.weight = weight;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
}
|
}
|
||||||
|
@ -110,12 +102,19 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
|
||||||
public static class OfPostings extends OffsetsEnum {
|
public static class OfPostings extends OffsetsEnum {
|
||||||
private final BytesRef term;
|
private final BytesRef term;
|
||||||
private final PostingsEnum postingsEnum; // with offsets
|
private final PostingsEnum postingsEnum; // with offsets
|
||||||
|
private final int freq;
|
||||||
|
|
||||||
private int posCounter = 0; // the occurrence counter of this term within the text being highlighted.
|
private int posCounter = -1;
|
||||||
|
|
||||||
public OfPostings(BytesRef term, PostingsEnum postingsEnum) throws IOException {
|
public OfPostings(BytesRef term, int freq, PostingsEnum postingsEnum) throws IOException {
|
||||||
this.term = Objects.requireNonNull(term);
|
this.term = Objects.requireNonNull(term);
|
||||||
this.postingsEnum = Objects.requireNonNull(postingsEnum);
|
this.postingsEnum = Objects.requireNonNull(postingsEnum);
|
||||||
|
this.freq = freq;
|
||||||
|
this.posCounter = this.postingsEnum.freq();
|
||||||
|
}
|
||||||
|
|
||||||
|
public OfPostings(BytesRef term, PostingsEnum postingsEnum) throws IOException {
|
||||||
|
this(term, postingsEnum.freq(), postingsEnum);
|
||||||
}
|
}
|
||||||
|
|
||||||
public PostingsEnum getPostingsEnum() {
|
public PostingsEnum getPostingsEnum() {
|
||||||
|
@ -124,8 +123,8 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean nextPosition() throws IOException {
|
public boolean nextPosition() throws IOException {
|
||||||
if (posCounter < postingsEnum.freq()) {
|
if (posCounter > 0) {
|
||||||
posCounter++;
|
posCounter--;
|
||||||
postingsEnum.nextPosition(); // note: we don't need to save the position
|
postingsEnum.nextPosition(); // note: we don't need to save the position
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -133,11 +132,6 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int freq() throws IOException {
|
|
||||||
return postingsEnum.freq();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getTerm() throws IOException {
|
public BytesRef getTerm() throws IOException {
|
||||||
return term;
|
return term;
|
||||||
|
@ -153,5 +147,104 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
|
||||||
return postingsEnum.endOffset();
|
return postingsEnum.endOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Empty enumeration
|
||||||
|
*/
|
||||||
|
public static final OffsetsEnum EMPTY = new OffsetsEnum() {
|
||||||
|
@Override
|
||||||
|
public boolean nextPosition() throws IOException {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getTerm() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int startOffset() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int endOffset() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A view over several OffsetsEnum instances, merging them in-place
|
||||||
|
*/
|
||||||
|
public static class MultiOffsetsEnum extends OffsetsEnum {
|
||||||
|
|
||||||
|
private final PriorityQueue<OffsetsEnum> queue;
|
||||||
|
private boolean started = false;
|
||||||
|
|
||||||
|
public MultiOffsetsEnum(List<OffsetsEnum> inner) throws IOException {
|
||||||
|
this.queue = new PriorityQueue<>();
|
||||||
|
for (OffsetsEnum oe : inner) {
|
||||||
|
if (oe.nextPosition())
|
||||||
|
this.queue.add(oe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean nextPosition() throws IOException {
|
||||||
|
if (started == false) {
|
||||||
|
started = true;
|
||||||
|
return this.queue.size() > 0;
|
||||||
|
}
|
||||||
|
if (this.queue.size() > 0) {
|
||||||
|
OffsetsEnum top = this.queue.poll();
|
||||||
|
if (top.nextPosition()) {
|
||||||
|
this.queue.add(top);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
top.close();
|
||||||
|
}
|
||||||
|
return this.queue.size() > 0;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getTerm() throws IOException {
|
||||||
|
return this.queue.peek().getTerm();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int startOffset() throws IOException {
|
||||||
|
return this.queue.peek().startOffset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int endOffset() throws IOException {
|
||||||
|
return this.queue.peek().endOffset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
return this.queue.peek().freq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
// most child enums will have been closed in .nextPosition()
|
||||||
|
// here all remaining non-exhausted enums are closed
|
||||||
|
IOUtils.close(queue);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -38,59 +37,35 @@ public class Passage {
|
||||||
private int[] matchStarts = new int[8];
|
private int[] matchStarts = new int[8];
|
||||||
private int[] matchEnds = new int[8];
|
private int[] matchEnds = new int[8];
|
||||||
private BytesRef[] matchTerms = new BytesRef[8];
|
private BytesRef[] matchTerms = new BytesRef[8];
|
||||||
|
private int[] matchTermFreqInDoc = new int[8];
|
||||||
private int numMatches = 0;
|
private int numMatches = 0;
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public void addMatch(int startOffset, int endOffset, BytesRef term) {
|
public void addMatch(int startOffset, int endOffset, BytesRef term, int termFreqInDoc) {
|
||||||
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
|
||||||
if (numMatches == matchStarts.length) {
|
if (numMatches == matchStarts.length) {
|
||||||
int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||||
int newMatchStarts[] = new int[newLength];
|
int newMatchStarts[] = new int[newLength];
|
||||||
int newMatchEnds[] = new int[newLength];
|
int newMatchEnds[] = new int[newLength];
|
||||||
|
int newMatchTermFreqInDoc[] = new int[newLength];
|
||||||
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
BytesRef newMatchTerms[] = new BytesRef[newLength];
|
||||||
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
|
||||||
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
|
||||||
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
|
||||||
|
System.arraycopy(matchTermFreqInDoc, 0, newMatchTermFreqInDoc, 0, numMatches);
|
||||||
matchStarts = newMatchStarts;
|
matchStarts = newMatchStarts;
|
||||||
matchEnds = newMatchEnds;
|
matchEnds = newMatchEnds;
|
||||||
matchTerms = newMatchTerms;
|
matchTerms = newMatchTerms;
|
||||||
|
matchTermFreqInDoc = newMatchTermFreqInDoc;
|
||||||
}
|
}
|
||||||
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
|
||||||
matchStarts[numMatches] = startOffset;
|
matchStarts[numMatches] = startOffset;
|
||||||
matchEnds[numMatches] = endOffset;
|
matchEnds[numMatches] = endOffset;
|
||||||
matchTerms[numMatches] = term;
|
matchTerms[numMatches] = term;
|
||||||
|
matchTermFreqInDoc[numMatches] = termFreqInDoc;
|
||||||
numMatches++;
|
numMatches++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
|
||||||
public void sort() {
|
|
||||||
final int starts[] = matchStarts;
|
|
||||||
final int ends[] = matchEnds;
|
|
||||||
final BytesRef terms[] = matchTerms;
|
|
||||||
new InPlaceMergeSorter() {
|
|
||||||
@Override
|
|
||||||
protected void swap(int i, int j) {
|
|
||||||
int temp = starts[i];
|
|
||||||
starts[i] = starts[j];
|
|
||||||
starts[j] = temp;
|
|
||||||
|
|
||||||
temp = ends[i];
|
|
||||||
ends[i] = ends[j];
|
|
||||||
ends[j] = temp;
|
|
||||||
|
|
||||||
BytesRef tempTerm = terms[i];
|
|
||||||
terms[i] = terms[j];
|
|
||||||
terms[j] = tempTerm;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected int compare(int i, int j) {
|
|
||||||
return Integer.compare(starts[i], starts[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}.sort(0, numMatches);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public void reset() {
|
public void reset() {
|
||||||
startOffset = endOffset = -1;
|
startOffset = endOffset = -1;
|
||||||
|
@ -136,6 +111,10 @@ public class Passage {
|
||||||
return endOffset;
|
return endOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getLength() {
|
||||||
|
return endOffset - startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Passage's score.
|
* Passage's score.
|
||||||
*/
|
*/
|
||||||
|
@ -143,6 +122,10 @@ public class Passage {
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setScore(float score) {
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of term matches available in
|
* Number of term matches available in
|
||||||
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
* {@link #getMatchStarts}, {@link #getMatchEnds},
|
||||||
|
@ -182,6 +165,10 @@ public class Passage {
|
||||||
return matchTerms;
|
return matchTerms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int[] getMatchTermFreqsInDoc() {
|
||||||
|
return matchTermFreqInDoc;
|
||||||
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public void setStartOffset(int startOffset) {
|
public void setStartOffset(int startOffset) {
|
||||||
this.startOffset = startOffset;
|
this.startOffset = startOffset;
|
||||||
|
@ -193,8 +180,4 @@ public class Passage {
|
||||||
this.endOffset = endOffset;
|
this.endOffset = endOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
|
||||||
public void setScore(float score) {
|
|
||||||
this.score = score;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,10 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ranks passages found by {@link UnifiedHighlighter}.
|
* Ranks passages found by {@link UnifiedHighlighter}.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -110,4 +114,30 @@ public class PassageScorer {
|
||||||
public float norm(int passageStart) {
|
public float norm(int passageStart) {
|
||||||
return 1 + 1 / (float) Math.log(pivot + passageStart);
|
return 1 + 1 / (float) Math.log(pivot + passageStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public float score(Passage passage, int contentLength) {
|
||||||
|
float score = 0;
|
||||||
|
BytesRefHash termsHash = new BytesRefHash();
|
||||||
|
int hitCount = passage.getNumMatches();
|
||||||
|
int[] termFreqsInPassage = new int[hitCount]; // maximum size
|
||||||
|
int[] termFreqsInDoc = new int[hitCount];
|
||||||
|
Arrays.fill(termFreqsInPassage, 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||||
|
int termIndex = termsHash.add(passage.getMatchTerms()[i]);
|
||||||
|
if (termIndex < 0) {
|
||||||
|
termIndex = -(termIndex + 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
termFreqsInDoc[termIndex] = passage.getMatchTermFreqsInDoc()[i];
|
||||||
|
}
|
||||||
|
termFreqsInPassage[termIndex]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < termsHash.size(); i++) {
|
||||||
|
score += tf(termFreqsInPassage[i], passage.getLength()) * weight(contentLength, termFreqsInDoc[i]);
|
||||||
|
}
|
||||||
|
score *= norm(passage.getStartOffset());
|
||||||
|
return score;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ public class PostingsOffsetStrategy extends FieldOffsetStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
|
public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
|
||||||
final LeafReader leafReader;
|
final LeafReader leafReader;
|
||||||
if (reader instanceof LeafReader) {
|
if (reader instanceof LeafReader) {
|
||||||
leafReader = (LeafReader) reader;
|
leafReader = (LeafReader) reader;
|
||||||
|
@ -51,7 +51,7 @@ public class PostingsOffsetStrategy extends FieldOffsetStrategy {
|
||||||
docId -= leafReaderContext.docBase; // adjust 'doc' to be within this leaf reader
|
docId -= leafReaderContext.docBase; // adjust 'doc' to be within this leaf reader
|
||||||
}
|
}
|
||||||
|
|
||||||
return createOffsetsEnumsFromReader(leafReader, docId);
|
return createOffsetsEnumFromReader(leafReader, docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -40,7 +39,7 @@ public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
|
public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
|
||||||
LeafReader leafReader;
|
LeafReader leafReader;
|
||||||
if (reader instanceof LeafReader) {
|
if (reader instanceof LeafReader) {
|
||||||
leafReader = (LeafReader) reader;
|
leafReader = (LeafReader) reader;
|
||||||
|
@ -53,11 +52,11 @@ public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
|
||||||
|
|
||||||
Terms docTerms = leafReader.getTermVector(docId, field);
|
Terms docTerms = leafReader.getTermVector(docId, field);
|
||||||
if (docTerms == null) {
|
if (docTerms == null) {
|
||||||
return Collections.emptyList();
|
return OffsetsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms);
|
leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms);
|
||||||
|
|
||||||
return createOffsetsEnumsFromReader(leafReader, docId);
|
return createOffsetsEnumFromReader(leafReader, docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,8 +17,6 @@
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
@ -44,16 +42,16 @@ public class TermVectorOffsetStrategy extends FieldOffsetStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
|
public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
|
||||||
Terms tvTerms = reader.getTermVector(docId, field);
|
Terms tvTerms = reader.getTermVector(docId, field);
|
||||||
if (tvTerms == null) {
|
if (tvTerms == null) {
|
||||||
return Collections.emptyList();
|
return OffsetsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
LeafReader leafReader = new TermVectorLeafReader(field, tvTerms);
|
LeafReader leafReader = new TermVectorLeafReader(field, tvTerms);
|
||||||
docId = 0;
|
docId = 0;
|
||||||
|
|
||||||
return createOffsetsEnumsFromReader(leafReader, docId);
|
return createOffsetsEnumFromReader(leafReader, docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,6 @@
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -60,8 +58,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
|
public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
|
||||||
return Collections.singletonList(new TokenStreamOffsetsEnum(tokenStream(content), automata));
|
return new TokenStreamOffsetsEnum(tokenStream(content), automata);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class TokenStreamOffsetsEnum extends OffsetsEnum {
|
private static class TokenStreamOffsetsEnum extends OffsetsEnum {
|
||||||
|
@ -106,6 +104,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
|
||||||
return Integer.MAX_VALUE; // lie
|
return Integer.MAX_VALUE; // lie
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int startOffset() throws IOException {
|
public int startOffset() throws IOException {
|
||||||
return offsetAtt.startOffset();
|
return offsetAtt.startOffset();
|
||||||
|
|
|
@ -16,6 +16,10 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
@ -39,10 +43,6 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
||||||
|
|
||||||
Analyzer indexAnalyzer;
|
Analyzer indexAnalyzer;
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.search.uhighlight.visibility;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -69,13 +68,13 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
|
public OffsetsEnum getOffsetsEnum(IndexReader reader, int docId, String content) throws IOException {
|
||||||
return Collections.emptyList();
|
return OffsetsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<OffsetsEnum> createOffsetsEnumsFromReader(LeafReader leafReader, int doc) throws IOException {
|
protected OffsetsEnum createOffsetsEnumFromReader(LeafReader leafReader, int doc) throws IOException {
|
||||||
return super.createOffsetsEnumsFromReader(leafReader, doc);
|
return super.createOffsetsEnumFromReader(leafReader, doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -193,7 +192,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
final String fieldName = "fieldName";
|
final String fieldName = "fieldName";
|
||||||
FieldHighlighter fieldHighlighter = new FieldHighlighter(fieldName, null, null, null, 1, 1, null) {
|
FieldHighlighter fieldHighlighter = new FieldHighlighter(fieldName, null, null, null, 1, 1, null) {
|
||||||
@Override
|
@Override
|
||||||
protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums) throws IOException {
|
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
|
||||||
return super.highlightOffsetsEnums(offsetsEnums);
|
return super.highlightOffsetsEnums(offsetsEnums);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -213,31 +212,29 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums) throws IOException {
|
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
|
||||||
// TEST OffsetsEnums & Passage visibility
|
// TEST OffsetsEnums & Passage visibility
|
||||||
|
|
||||||
// this code never runs; just for compilation
|
// this code never runs; just for compilation
|
||||||
Passage p;
|
Passage p;
|
||||||
try (OffsetsEnum oe = new OffsetsEnum.OfPostings(null, EMPTY)) {
|
try (OffsetsEnum oe = new OffsetsEnum.OfPostings(null, null)) {
|
||||||
oe.getTerm();
|
oe.getTerm();
|
||||||
oe.freq();
|
|
||||||
oe.nextPosition();
|
oe.nextPosition();
|
||||||
oe.startOffset();
|
oe.startOffset();
|
||||||
oe.endOffset();
|
oe.endOffset();
|
||||||
oe.getWeight();
|
oe.freq();
|
||||||
oe.setWeight(2f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p = new Passage();
|
p = new Passage();
|
||||||
p.setStartOffset(0);
|
p.setStartOffset(0);
|
||||||
p.setEndOffset(9);
|
p.setEndOffset(9);
|
||||||
p.setScore(1f);
|
p.addMatch(1, 2, new BytesRef(), 1);
|
||||||
p.addMatch(1, 2, new BytesRef());
|
|
||||||
p.reset();
|
p.reset();
|
||||||
p.sort();
|
p.setScore(1);
|
||||||
//... getters are all exposed; custom PassageFormatter impls uses them
|
//... getters are all exposed; custom PassageFormatter impls uses them
|
||||||
|
|
||||||
return super.highlightOffsetsEnums(offsetsEnums);
|
return super.highlightOffsetsEnums(offsetsEnums);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue