LUCENE-6421: revert

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1673577 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-04-14 23:02:53 +00:00
parent d185168be9
commit 0aa98d464c
3 changed files with 203 additions and 173 deletions

View File

@ -77,9 +77,6 @@ Optimizations
* LUCENE-6388: Optimize SpanNearQuery when payloads are not present.
(Robert Muir)
* LUCENE-6421: Defer reading of positions in MultiPhraseQuery until
they are needed. (Robert Muir)
Bug Fixes
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.

View File

@ -200,30 +200,45 @@ public class MultiPhraseQuery extends Query {
for (int pos=0; pos<postingsFreqs.length; pos++) {
Term[] terms = termArrays.get(pos);
List<PostingsEnum> postings = new ArrayList<>();
for (Term term : terms) {
final PostingsEnum postingsEnum;
int docFreq;
if (terms.length > 1) {
postingsEnum = new UnionPostingsEnum(liveDocs, context, terms, termContexts, termsEnum);
// coarse -- this overcounts since a given doc can
// have more than one term:
docFreq = 0;
for(int termIdx=0;termIdx<terms.length;termIdx++) {
final Term term = terms[termIdx];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term not in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
docFreq += termsEnum.docFreq();
}
if (docFreq == 0) {
// None of the terms are in this reader
return null;
}
} else {
final Term term = terms[0];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term not in reader
return null;
}
termsEnum.seekExact(term.bytes(), termState);
postings.add(termsEnum.postings(liveDocs, null, PostingsEnum.POSITIONS));
postingsEnum = termsEnum.postings(liveDocs, null, PostingsEnum.POSITIONS);
docFreq = termsEnum.docFreq();
}
if (postings.isEmpty()) {
return null;
}
final PostingsEnum postingsEnum;
if (postings.size() == 1) {
postingsEnum = postings.get(0);
} else {
postingsEnum = new UnionPostingsEnum(postings);
}
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, positions.get(pos).intValue(), terms);
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
}
// sort by increasing docFreq order
@ -383,116 +398,26 @@ public class MultiPhraseQuery extends Query {
}
return true;
}
/**
* Takes the logical union of multiple PostingsEnum iterators.
* <p>
* Note: positions are merged during freq()
*/
static class UnionPostingsEnum extends PostingsEnum {
/** queue ordered by docid */
final DocsQueue docsQueue;
/** cost of this enum: sum of its subs */
final long cost;
/** queue ordered by position for current doc */
final PositionsQueue posQueue = new PositionsQueue();
/** current doc posQueue is working */
int posQueueDoc = -2;
/** list of subs (unordered) */
final PostingsEnum[] subs;
UnionPostingsEnum(Collection<PostingsEnum> subs) {
docsQueue = new DocsQueue(subs.size());
long cost = 0;
for (PostingsEnum sub : subs) {
docsQueue.add(sub);
cost += sub.cost();
}
this.cost = cost;
this.subs = subs.toArray(new PostingsEnum[subs.size()]);
}
@Override
public int freq() throws IOException {
int doc = docID();
if (doc != posQueueDoc) {
posQueue.clear();
for (PostingsEnum sub : subs) {
if (sub.docID() == doc) {
int freq = sub.freq();
for (int i = 0; i < freq; i++) {
posQueue.add(sub.nextPosition());
}
}
}
posQueue.sort();
posQueueDoc = doc;
}
return posQueue.size();
}
@Override
public int nextPosition() throws IOException {
return posQueue.next();
}
@Override
public int docID() {
return docsQueue.top().docID();
}
@Override
public int nextDoc() throws IOException {
PostingsEnum top = docsQueue.top();
int doc = top.docID();
do {
top.nextDoc();
top = docsQueue.updateTop();
} while (top.docID() == doc);
return top.docID();
}
@Override
public int advance(int target) throws IOException {
PostingsEnum top = docsQueue.top();
do {
top.advance(target);
top = docsQueue.updateTop();
} while (top.docID() < target);
return top.docID();
}
@Override
public long cost() {
return cost;
}
@Override
public int startOffset() throws IOException {
return -1; // offsets are unsupported
}
@Override
public int endOffset() throws IOException {
return -1; // offsets are unsupported
}
@Override
public BytesRef getPayload() throws IOException {
return null; // payloads are unsupported
}
/**
* disjunction of postings ordered by docid.
* Takes the logical union of multiple DocsEnum iterators.
*/
static class DocsQueue extends PriorityQueue<PostingsEnum> {
DocsQueue(int size) {
super(size);
// TODO: if ever we allow subclassing of the *PhraseScorer
class UnionPostingsEnum extends PostingsEnum {
private static final class DocsQueue extends PriorityQueue<PostingsEnum> {
DocsQueue(List<PostingsEnum> postingsEnums) throws IOException {
super(postingsEnums.size());
Iterator<PostingsEnum> i = postingsEnums.iterator();
while (i.hasNext()) {
PostingsEnum postings = i.next();
if (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
add(postings);
}
}
}
@Override
@ -501,46 +426,147 @@ public class MultiPhraseQuery extends Query {
}
}
/**
* queue of terms for a single document. its a sorted array of
* all the positions from all the postings
*/
static class PositionsQueue {
private int arraySize = 16;
private int index = 0;
private int size = 0;
private int[] array = new int[arraySize];
private static final class IntQueue {
private int _arraySize = 16;
private int _index = 0;
private int _lastIndex = 0;
private int[] _array = new int[_arraySize];
void add(int i) {
if (size == arraySize)
final void add(int i) {
if (_lastIndex == _arraySize)
growArray();
array[size++] = i;
_array[_lastIndex++] = i;
}
int next() {
return array[index++];
final int next() {
return _array[_index++];
}
void sort() {
Arrays.sort(array, index, size);
final void sort() {
Arrays.sort(_array, _index, _lastIndex);
}
void clear() {
index = 0;
size = 0;
final void clear() {
_index = 0;
_lastIndex = 0;
}
int size() {
return size;
final int size() {
return (_lastIndex - _index);
}
private void growArray() {
int[] newArray = new int[arraySize * 2];
System.arraycopy(array, 0, newArray, 0, arraySize);
array = newArray;
arraySize *= 2;
int[] newArray = new int[_arraySize * 2];
System.arraycopy(_array, 0, newArray, 0, _arraySize);
_array = newArray;
_arraySize *= 2;
}
}
private int _doc = -1;
private int _freq;
private DocsQueue _queue;
private IntQueue _posList;
private long cost;
public UnionPostingsEnum(Bits liveDocs, LeafReaderContext context, Term[] terms, Map<Term, TermContext> termContexts, TermsEnum termsEnum) throws IOException {
List<PostingsEnum> postingsEnums = new LinkedList<>();
for (int i = 0; i < terms.length; i++) {
final Term term = terms[i];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term doesn't exist in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
PostingsEnum postings = termsEnum.postings(liveDocs, null, PostingsEnum.POSITIONS);
cost += postings.cost();
postingsEnums.add(postings);
}
_queue = new DocsQueue(postingsEnums);
_posList = new IntQueue();
}
@Override
public final int nextDoc() throws IOException {
if (_queue.size() == 0) {
return _doc = NO_MORE_DOCS;
}
// TODO: move this init into positions(): if the search
// doesn't need the positions for this doc then don't
// waste CPU merging them:
_posList.clear();
_doc = _queue.top().docID();
// merge sort all positions together
PostingsEnum postings;
do {
postings = _queue.top();
final int freq = postings.freq();
for (int i = 0; i < freq; i++) {
_posList.add(postings.nextPosition());
}
if (postings.nextDoc() != NO_MORE_DOCS) {
_queue.updateTop();
} else {
_queue.pop();
}
} while (_queue.size() > 0 && _queue.top().docID() == _doc);
_posList.sort();
_freq = _posList.size();
return _doc;
}
@Override
public int nextPosition() {
return _posList.next();
}
@Override
public int startOffset() {
return -1;
}
@Override
public int endOffset() {
return -1;
}
@Override
public BytesRef getPayload() {
return null;
}
@Override
public final int advance(int target) throws IOException {
while (_queue.top() != null && target > _queue.top().docID()) {
PostingsEnum postings = _queue.pop();
if (postings.advance(target) != NO_MORE_DOCS) {
_queue.add(postings);
}
}
return nextDoc();
}
@Override
public final int freq() {
return _freq;
}
@Override
public final int docID() {
return _doc;
}
@Override
public long cost() {
return cost;
}
}

View File

@ -174,12 +174,14 @@ public class PhraseQuery extends Query {
static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
final PostingsEnum postings;
final int docFreq;
final int position;
final Term[] terms;
final int nTerms; // for faster comparisons
public PostingsAndFreq(PostingsEnum postings, int position, Term... terms) {
public PostingsAndFreq(PostingsEnum postings, int docFreq, int position, Term... terms) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
nTerms = terms==null ? 0 : terms.length;
if (nTerms>0) {
@ -198,6 +200,9 @@ public class PhraseQuery extends Query {
@Override
public int compareTo(PostingsAndFreq other) {
if (docFreq != other.docFreq) {
return docFreq - other.docFreq;
}
if (position != other.position) {
return position - other.position;
}
@ -218,6 +223,7 @@ public class PhraseQuery extends Query {
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + docFreq;
result = prime * result + position;
for (int i=0; i<nTerms; i++) {
result = prime * result + terms[i].hashCode();
@ -231,6 +237,7 @@ public class PhraseQuery extends Query {
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
PostingsAndFreq other = (PostingsAndFreq) obj;
if (docFreq != other.docFreq) return false;
if (position != other.position) return false;
if (terms == null) return other.terms == null;
return Arrays.equals(terms, other.terms);
@ -306,7 +313,7 @@ public class PhraseQuery extends Query {
}
te.seekExact(t.bytes(), state);
PostingsEnum postingsEnum = te.postings(liveDocs, null, PostingsEnum.POSITIONS);
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions.get(i), t);
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i), t);
}
// sort by increasing docFreq order