add and/not/or docid sets, not just docsets, and improve caching behavior
This commit is contained in:
parent
8fef3df16f
commit
ca67c12de5
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.docset;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class AndDocIdSet extends DocIdSet {
|
||||
|
||||
private final List<DocIdSet> sets;
|
||||
|
||||
public AndDocIdSet(List<DocIdSet> sets) {
|
||||
this.sets = sets;
|
||||
}
|
||||
|
||||
@Override public boolean isCacheable() {
|
||||
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
|
||||
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
|
||||
return false;
|
||||
// for (DocIdSet set : sets) {
|
||||
// if (!set.isCacheable()) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// return true;
|
||||
}
|
||||
|
||||
@Override public DocIdSetIterator iterator() throws IOException {
|
||||
return new AndDocIdSetIterator();
|
||||
}
|
||||
|
||||
class AndDocIdSetIterator extends DocIdSetIterator {
|
||||
int lastReturn = -1;
|
||||
private DocIdSetIterator[] iterators = null;
|
||||
|
||||
AndDocIdSetIterator() throws IOException {
|
||||
iterators = new DocIdSetIterator[sets.size()];
|
||||
int j = 0;
|
||||
for (DocIdSet set : sets) {
|
||||
if (set != null) {
|
||||
DocIdSetIterator dcit = set.iterator();
|
||||
iterators[j++] = dcit;
|
||||
}
|
||||
}
|
||||
lastReturn = (iterators.length > 0 ? -1 : DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int docID() {
|
||||
return lastReturn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int nextDoc() throws IOException {
|
||||
|
||||
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
DocIdSetIterator dcit = iterators[0];
|
||||
int target = dcit.nextDoc();
|
||||
int size = iterators.length;
|
||||
int skip = 0;
|
||||
int i = 1;
|
||||
while (i < size) {
|
||||
if (i != skip) {
|
||||
dcit = iterators[i];
|
||||
int docid = dcit.advance(target);
|
||||
if (docid > target) {
|
||||
target = docid;
|
||||
if (i != 0) {
|
||||
skip = i;
|
||||
i = 0;
|
||||
continue;
|
||||
} else
|
||||
skip = 0;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return (lastReturn = target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int advance(int target) throws IOException {
|
||||
|
||||
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
DocIdSetIterator dcit = iterators[0];
|
||||
target = dcit.advance(target);
|
||||
int size = iterators.length;
|
||||
int skip = 0;
|
||||
int i = 1;
|
||||
while (i < size) {
|
||||
if (i != skip) {
|
||||
dcit = iterators[i];
|
||||
int docid = dcit.advance(target);
|
||||
if (docid > target) {
|
||||
target = docid;
|
||||
if (i != 0) {
|
||||
skip = i;
|
||||
i = 0;
|
||||
continue;
|
||||
} else {
|
||||
skip = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return (lastReturn = target);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -44,12 +44,15 @@ public class AndDocSet extends DocSet {
|
|||
}
|
||||
|
||||
@Override public boolean isCacheable() {
|
||||
for (DocSet set : sets) {
|
||||
if (!set.isCacheable()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
|
||||
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
|
||||
return false;
|
||||
// for (DocSet set : sets) {
|
||||
// if (!set.isCacheable()) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// return true;
|
||||
}
|
||||
|
||||
@Override public DocIdSetIterator iterator() throws IOException {
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.docset;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class NotDocIdSet extends DocIdSet {
|
||||
|
||||
private final DocIdSet set;
|
||||
|
||||
private final int max;
|
||||
|
||||
public NotDocIdSet(DocIdSet set, int max) {
|
||||
this.max = max;
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override public boolean isCacheable() {
|
||||
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
|
||||
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
|
||||
return false;
|
||||
// return set.isCacheable();
|
||||
}
|
||||
|
||||
@Override public DocIdSetIterator iterator() throws IOException {
|
||||
return new NotDocIdSetIterator();
|
||||
}
|
||||
|
||||
class NotDocIdSetIterator extends DocIdSetIterator {
|
||||
int lastReturn = -1;
|
||||
private DocIdSetIterator it1 = null;
|
||||
private int innerDocid = -1;
|
||||
|
||||
NotDocIdSetIterator() throws IOException {
|
||||
initialize();
|
||||
}
|
||||
|
||||
private void initialize() throws IOException {
|
||||
it1 = set.iterator();
|
||||
|
||||
if ((innerDocid = it1.nextDoc()) == DocIdSetIterator.NO_MORE_DOCS) it1 = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return lastReturn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
if (target <= lastReturn) target = lastReturn + 1;
|
||||
|
||||
if (it1 != null && innerDocid < target) {
|
||||
if ((innerDocid = it1.advance(target)) == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
it1 = null;
|
||||
}
|
||||
}
|
||||
|
||||
while (it1 != null && innerDocid == target) {
|
||||
target++;
|
||||
if (target >= max) {
|
||||
return (lastReturn = DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
if ((innerDocid = it1.advance(target)) == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
it1 = null;
|
||||
}
|
||||
}
|
||||
|
||||
// ADDED THIS, bug in original code
|
||||
if (target >= max) {
|
||||
return (lastReturn = DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
|
||||
return (lastReturn = target);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -34,7 +34,10 @@ public class NotDocSet extends GetDocSet {
|
|||
}
|
||||
|
||||
@Override public boolean isCacheable() {
|
||||
return set.isCacheable();
|
||||
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
|
||||
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
|
||||
return false;
|
||||
// return set.isCacheable();
|
||||
}
|
||||
|
||||
@Override public boolean get(int doc) throws IOException {
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.docset;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class OrDocIdSet extends DocIdSet {
|
||||
|
||||
private final List<DocIdSet> sets;
|
||||
|
||||
public OrDocIdSet(List<DocIdSet> sets) {
|
||||
this.sets = sets;
|
||||
}
|
||||
|
||||
@Override public boolean isCacheable() {
|
||||
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
|
||||
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
|
||||
return false;
|
||||
// for (DocIdSet set : sets) {
|
||||
// if (!set.isCacheable()) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// return true;
|
||||
}
|
||||
|
||||
@Override public DocIdSetIterator iterator() throws IOException {
|
||||
return new OrDocIdSetIterator();
|
||||
}
|
||||
|
||||
public class OrDocIdSetIterator extends DocIdSetIterator {
|
||||
|
||||
private final class Item {
|
||||
public final DocIdSetIterator iter;
|
||||
public int doc;
|
||||
|
||||
public Item(DocIdSetIterator iter) {
|
||||
this.iter = iter;
|
||||
this.doc = -1;
|
||||
}
|
||||
}
|
||||
|
||||
private int _curDoc;
|
||||
private final Item[] _heap;
|
||||
private int _size;
|
||||
|
||||
OrDocIdSetIterator() throws IOException {
|
||||
_curDoc = -1;
|
||||
_heap = new Item[sets.size()];
|
||||
_size = 0;
|
||||
for (DocIdSet set : sets) {
|
||||
_heap[_size++] = new Item(set.iterator());
|
||||
}
|
||||
if (_size == 0) _curDoc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int docID() {
|
||||
return _curDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int nextDoc() throws IOException {
|
||||
if (_curDoc == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
Item top = _heap[0];
|
||||
while (true) {
|
||||
DocIdSetIterator topIter = top.iter;
|
||||
int docid;
|
||||
if ((docid = topIter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
top.doc = docid;
|
||||
heapAdjust();
|
||||
} else {
|
||||
heapRemoveRoot();
|
||||
if (_size == 0) return (_curDoc = DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
top = _heap[0];
|
||||
int topDoc = top.doc;
|
||||
if (topDoc > _curDoc) {
|
||||
return (_curDoc = topDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int advance(int target) throws IOException {
|
||||
if (_curDoc == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
if (target <= _curDoc) target = _curDoc + 1;
|
||||
|
||||
Item top = _heap[0];
|
||||
while (true) {
|
||||
DocIdSetIterator topIter = top.iter;
|
||||
int docid;
|
||||
if ((docid = topIter.advance(target)) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
top.doc = docid;
|
||||
heapAdjust();
|
||||
} else {
|
||||
heapRemoveRoot();
|
||||
if (_size == 0) return (_curDoc = DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
top = _heap[0];
|
||||
int topDoc = top.doc;
|
||||
if (topDoc >= target) {
|
||||
return (_curDoc = topDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Organize subScorers into a min heap with scorers generating the earlest document on top.
|
||||
/*
|
||||
private final void heapify() {
|
||||
int size = _size;
|
||||
for (int i=(size>>1)-1; i>=0; i--)
|
||||
heapAdjust(i);
|
||||
}
|
||||
*/
|
||||
/* The subtree of subScorers at root is a min heap except possibly for its root element.
|
||||
* Bubble the root down as required to make the subtree a heap.
|
||||
*/
|
||||
|
||||
private final void heapAdjust() {
|
||||
final Item[] heap = _heap;
|
||||
final Item top = heap[0];
|
||||
final int doc = top.doc;
|
||||
final int size = _size;
|
||||
int i = 0;
|
||||
|
||||
while (true) {
|
||||
int lchild = (i << 1) + 1;
|
||||
if (lchild >= size) break;
|
||||
|
||||
Item left = heap[lchild];
|
||||
int ldoc = left.doc;
|
||||
|
||||
int rchild = lchild + 1;
|
||||
if (rchild < size) {
|
||||
Item right = heap[rchild];
|
||||
int rdoc = right.doc;
|
||||
|
||||
if (rdoc <= ldoc) {
|
||||
if (doc <= rdoc) break;
|
||||
|
||||
heap[i] = right;
|
||||
i = rchild;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (doc <= ldoc) break;
|
||||
|
||||
heap[i] = left;
|
||||
i = lchild;
|
||||
}
|
||||
heap[i] = top;
|
||||
}
|
||||
|
||||
// Remove the root Scorer from subScorers and re-establish it as a heap
|
||||
|
||||
private void heapRemoveRoot() {
|
||||
_size--;
|
||||
if (_size > 0) {
|
||||
Item tmp = _heap[0];
|
||||
_heap[0] = _heap[_size];
|
||||
_heap[_size] = tmp; // keep the finished iterator at the end for debugging
|
||||
heapAdjust();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -37,10 +37,13 @@ public class OrDocSet extends DocSet {
|
|||
}
|
||||
|
||||
@Override public boolean get(int doc) throws IOException {
|
||||
for (DocSet s : sets) {
|
||||
if (s.get(doc)) return true;
|
||||
}
|
||||
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
|
||||
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
|
||||
return false;
|
||||
// for (DocSet s : sets) {
|
||||
// if (s.get(doc)) return true;
|
||||
// }
|
||||
// return false;
|
||||
}
|
||||
|
||||
@Override public boolean isCacheable() {
|
||||
|
|
|
@ -23,9 +23,9 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.elasticsearch.common.collect.Lists;
|
||||
import org.elasticsearch.common.lucene.docset.AndDocIdSet;
|
||||
import org.elasticsearch.common.lucene.docset.AndDocSet;
|
||||
import org.elasticsearch.common.lucene.docset.DocSet;
|
||||
import org.elasticsearch.common.lucene.docset.DocSets;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
@ -47,13 +47,21 @@ public class AndFilter extends Filter {
|
|||
|
||||
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
if (filters.size() == 1) {
|
||||
return DocSets.convert(reader, filters.get(0).getDocIdSet(reader));
|
||||
return filters.get(0).getDocIdSet(reader);
|
||||
}
|
||||
List<DocSet> sets = Lists.newArrayListWithExpectedSize(filters.size());
|
||||
List sets = Lists.newArrayListWithExpectedSize(filters.size());
|
||||
boolean allAreDocSet = true;
|
||||
for (Filter filter : filters) {
|
||||
sets.add(DocSets.convert(reader, filter.getDocIdSet(reader)));
|
||||
DocIdSet set = filter.getDocIdSet(reader);
|
||||
if (!(set instanceof DocSet)) {
|
||||
allAreDocSet = false;
|
||||
}
|
||||
sets.add(set);
|
||||
}
|
||||
return new AndDocSet(sets);
|
||||
if (allAreDocSet) {
|
||||
return new AndDocSet(sets);
|
||||
}
|
||||
return new AndDocIdSet(sets);
|
||||
}
|
||||
|
||||
@Override public int hashCode() {
|
||||
|
|
|
@ -22,7 +22,8 @@ package org.elasticsearch.common.lucene.search;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.elasticsearch.common.lucene.docset.DocSets;
|
||||
import org.elasticsearch.common.lucene.docset.DocSet;
|
||||
import org.elasticsearch.common.lucene.docset.NotDocIdSet;
|
||||
import org.elasticsearch.common.lucene.docset.NotDocSet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -43,7 +44,11 @@ public class NotFilter extends Filter {
|
|||
}
|
||||
|
||||
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
return new NotDocSet(DocSets.convert(reader, filter.getDocIdSet(reader)), reader.maxDoc());
|
||||
DocIdSet set = filter.getDocIdSet(reader);
|
||||
if (set instanceof DocSet) {
|
||||
return new NotDocSet((DocSet) set, reader.maxDoc());
|
||||
}
|
||||
return new NotDocIdSet(set, reader.maxDoc());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.search.DocIdSet;
|
|||
import org.apache.lucene.search.Filter;
|
||||
import org.elasticsearch.common.collect.Lists;
|
||||
import org.elasticsearch.common.lucene.docset.DocSet;
|
||||
import org.elasticsearch.common.lucene.docset.DocSets;
|
||||
import org.elasticsearch.common.lucene.docset.OrDocIdSet;
|
||||
import org.elasticsearch.common.lucene.docset.OrDocSet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -47,13 +47,21 @@ public class OrFilter extends Filter {
|
|||
|
||||
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
if (filters.size() == 1) {
|
||||
return DocSets.convert(reader, filters.get(0).getDocIdSet(reader));
|
||||
return filters.get(0).getDocIdSet(reader);
|
||||
}
|
||||
List<DocSet> sets = Lists.newArrayListWithExpectedSize(filters.size());
|
||||
List sets = Lists.newArrayListWithExpectedSize(filters.size());
|
||||
boolean allAreDocSet = true;
|
||||
for (Filter filter : filters) {
|
||||
sets.add(DocSets.convert(reader, filter.getDocIdSet(reader)));
|
||||
DocIdSet set = filter.getDocIdSet(reader);
|
||||
if (!(set instanceof DocSet)) {
|
||||
allAreDocSet = false;
|
||||
}
|
||||
sets.add(set);
|
||||
}
|
||||
return new OrDocSet(sets);
|
||||
if (allAreDocSet) {
|
||||
return new OrDocSet(sets);
|
||||
}
|
||||
return new OrDocIdSet(sets);
|
||||
}
|
||||
|
||||
@Override public int hashCode() {
|
||||
|
|
Loading…
Reference in New Issue