mirror of https://github.com/apache/lucene.git
LUCENE-1506: add FilteredDocIdSet
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@740361 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
62d56d9410
commit
35d2b765be
|
@ -149,6 +149,13 @@ New features
|
||||||
reopen you can change the readOnly of the original reader. (Jason
|
reopen you can change the readOnly of the original reader. (Jason
|
||||||
Rutherglen, Mike McCandless)
|
Rutherglen, Mike McCandless)
|
||||||
|
|
||||||
|
14. LUCENE-1506: Added FilteredDocIdSet, an abstract class which you
|
||||||
|
subclass to implement the "match" method to aceept or reject each
|
||||||
|
docID. Unlike ChainedFilter (under contrib/misc),
|
||||||
|
FilteredDocIdSet never requires you to materialize the full
|
||||||
|
bitset. Instead, match() is called on demand per docID. (John
|
||||||
|
Wang via Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract decorator class for a DocIdSet implementation
|
||||||
|
* that provides on-demand filtering/validation
|
||||||
|
* mechanism on a given DocIdSet.
|
||||||
|
*
|
||||||
|
* <p/>
|
||||||
|
*
|
||||||
|
* Technically, this same functionality could be achieved
|
||||||
|
* with ChainedFilter (under contrib/misc), however the
|
||||||
|
* benefit of this class is it never materializes the full
|
||||||
|
* bitset for the filter. Instead, the {@link #match}
|
||||||
|
* method is invoked on-demand, per docID visited during
|
||||||
|
* searching. If you know few docIDs will be visited, and
|
||||||
|
* the logic behind {@link #match} is relatively costly,
|
||||||
|
* this may be a better way to filter than ChainedFilter.
|
||||||
|
*
|
||||||
|
* @see DocIdSet
|
||||||
|
*/
|
||||||
|
|
||||||
|
public abstract class FilteredDocIdSet extends DocIdSet {
|
||||||
|
private final DocIdSet _innerSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
* @param innerSet Underlying DocIdSet
|
||||||
|
*/
|
||||||
|
public FilteredDocIdSet(DocIdSet innerSet) {
|
||||||
|
_innerSet = innerSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validation method to determine whether a docid should be in the result set.
|
||||||
|
* @param docid docid to be tested
|
||||||
|
* @return true if input docid should be in the result set, false otherwise.
|
||||||
|
*/
|
||||||
|
protected abstract boolean match(int docid);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of the contract to build a DocIdSetIterator.
|
||||||
|
* @see DocIdSetIterator
|
||||||
|
* @see FilteredDocIdSetIterator
|
||||||
|
*/
|
||||||
|
// @Override
|
||||||
|
public DocIdSetIterator iterator() throws IOException {
|
||||||
|
return new FilteredDocIdSetIterator(_innerSet.iterator()) {
|
||||||
|
protected boolean match(int docid) {
|
||||||
|
return FilteredDocIdSet.this.match(docid);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,91 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract decorator class of a DocIdSetIterator
|
||||||
|
* implementation that provides on-demand filter/validation
|
||||||
|
* mechanism on an underlying DocIdSetIterator. See {@link
|
||||||
|
* FilteredDocIdSet}.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public abstract class FilteredDocIdSetIterator extends DocIdSetIterator {
|
||||||
|
protected DocIdSetIterator _innerIter;
|
||||||
|
private int _currentDoc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
* @param innerIter Underlying DocIdSetIterator.
|
||||||
|
*/
|
||||||
|
public FilteredDocIdSetIterator(DocIdSetIterator innerIter) {
|
||||||
|
if (innerIter == null) {
|
||||||
|
throw new IllegalArgumentException("null iterator");
|
||||||
|
}
|
||||||
|
_innerIter = innerIter;
|
||||||
|
_currentDoc = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validation method to determine whether a docid should be in the result set.
|
||||||
|
* @param docid docid to be tested
|
||||||
|
* @return true if input docid should be in the result set, false otherwise.
|
||||||
|
* @see #FilteredDocIdSetIterator(DocIdSetIterator).
|
||||||
|
*/
|
||||||
|
abstract protected boolean match(int doc);
|
||||||
|
|
||||||
|
// @Override
|
||||||
|
public final int doc() {
|
||||||
|
return _currentDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Override
|
||||||
|
public final boolean next() throws IOException{
|
||||||
|
while (_innerIter.next()) {
|
||||||
|
int doc = _innerIter.doc();
|
||||||
|
if (match(doc)) {
|
||||||
|
_currentDoc = doc;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Override
|
||||||
|
public final boolean skipTo(int n) throws IOException{
|
||||||
|
boolean flag = _innerIter.skipTo(n);
|
||||||
|
if (flag) {
|
||||||
|
int doc = _innerIter.doc();
|
||||||
|
if (match(doc)) {
|
||||||
|
_currentDoc = doc;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
while (_innerIter.next()) {
|
||||||
|
int docid = _innerIter.doc();
|
||||||
|
if (match(docid)) {
|
||||||
|
_currentDoc = docid;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return flag;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,93 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestDocIdSet extends LuceneTestCase {
|
||||||
|
public void testFilteredDocIdSet() throws Exception {
|
||||||
|
final int maxdoc=10;
|
||||||
|
final DocIdSet innerSet = new DocIdSet() {
|
||||||
|
|
||||||
|
// @Override
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return new DocIdSetIterator() {
|
||||||
|
|
||||||
|
int docid=-1;
|
||||||
|
//@Override
|
||||||
|
public int doc() {
|
||||||
|
return docid;
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
docid++;
|
||||||
|
return (docid<maxdoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
do {
|
||||||
|
if (!next()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} while (target > doc());
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
DocIdSet filteredSet = new FilteredDocIdSet(innerSet){
|
||||||
|
// @Override
|
||||||
|
protected boolean match(int docid) {
|
||||||
|
return docid%2 == 0; //validate only even docids
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
DocIdSetIterator iter = filteredSet.iterator();
|
||||||
|
ArrayList/*<Integer>*/ list = new ArrayList/*<Integer>*/();
|
||||||
|
if (iter.skipTo(3)) {
|
||||||
|
list.add(new Integer(iter.doc()));
|
||||||
|
while(iter.next()) {
|
||||||
|
list.add(new Integer(iter.doc()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] docs = new int[list.size()];
|
||||||
|
int c=0;
|
||||||
|
Iterator/*<Integer>*/ intIter = list.iterator();
|
||||||
|
while(intIter.hasNext()) {
|
||||||
|
docs[c++] = ((Integer) intIter.next()).intValue();
|
||||||
|
}
|
||||||
|
int[] answer = new int[]{4,6,8};
|
||||||
|
boolean same = Arrays.equals(answer, docs);
|
||||||
|
if (!same) {
|
||||||
|
System.out.println("answer: "+Arrays.toString(answer));
|
||||||
|
System.out.println("gotten: "+Arrays.toString(docs));
|
||||||
|
fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue