LUCENE-1506: add FilteredDocIdSet

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@740361 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-02-03 17:53:46 +00:00
parent 62d56d9410
commit 35d2b765be
4 changed files with 263 additions and 0 deletions

View File

@ -149,6 +149,13 @@ New features
reopen you can change the readOnly of the original reader. (Jason reopen you can change the readOnly of the original reader. (Jason
Rutherglen, Mike McCandless) Rutherglen, Mike McCandless)
14. LUCENE-1506: Added FilteredDocIdSet, an abstract class which you
subclass to implement the "match" method to aceept or reject each
docID. Unlike ChainedFilter (under contrib/misc),
FilteredDocIdSet never requires you to materialize the full
bitset. Instead, match() is called on demand per docID. (John
Wang via Mike McCandless)
Optimizations Optimizations
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing

View File

@ -0,0 +1,72 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
/**
* Abstract decorator class for a DocIdSet implementation
* that provides on-demand filtering/validation
* mechanism on a given DocIdSet.
*
* <p/>
*
* Technically, this same functionality could be achieved
* with ChainedFilter (under contrib/misc), however the
* benefit of this class is it never materializes the full
* bitset for the filter. Instead, the {@link #match}
* method is invoked on-demand, per docID visited during
* searching. If you know few docIDs will be visited, and
* the logic behind {@link #match} is relatively costly,
* this may be a better way to filter than ChainedFilter.
*
* @see DocIdSet
*/
public abstract class FilteredDocIdSet extends DocIdSet {
private final DocIdSet _innerSet;
/**
* Constructor.
* @param innerSet Underlying DocIdSet
*/
public FilteredDocIdSet(DocIdSet innerSet) {
_innerSet = innerSet;
}
/**
* Validation method to determine whether a docid should be in the result set.
* @param docid docid to be tested
* @return true if input docid should be in the result set, false otherwise.
*/
protected abstract boolean match(int docid);
/**
* Implementation of the contract to build a DocIdSetIterator.
* @see DocIdSetIterator
* @see FilteredDocIdSetIterator
*/
// @Override
public DocIdSetIterator iterator() throws IOException {
return new FilteredDocIdSetIterator(_innerSet.iterator()) {
protected boolean match(int docid) {
return FilteredDocIdSet.this.match(docid);
}
};
}
}

View File

@ -0,0 +1,91 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
/**
* Abstract decorator class of a DocIdSetIterator
* implementation that provides on-demand filter/validation
* mechanism on an underlying DocIdSetIterator. See {@link
* FilteredDocIdSet}.
*/
public abstract class FilteredDocIdSetIterator extends DocIdSetIterator {
protected DocIdSetIterator _innerIter;
private int _currentDoc;
/**
* Constructor.
* @param innerIter Underlying DocIdSetIterator.
*/
public FilteredDocIdSetIterator(DocIdSetIterator innerIter) {
if (innerIter == null) {
throw new IllegalArgumentException("null iterator");
}
_innerIter = innerIter;
_currentDoc = -1;
}
/**
* Validation method to determine whether a docid should be in the result set.
* @param docid docid to be tested
* @return true if input docid should be in the result set, false otherwise.
* @see #FilteredDocIdSetIterator(DocIdSetIterator).
*/
abstract protected boolean match(int doc);
// @Override
public final int doc() {
return _currentDoc;
}
// @Override
public final boolean next() throws IOException{
while (_innerIter.next()) {
int doc = _innerIter.doc();
if (match(doc)) {
_currentDoc = doc;
return true;
}
}
return false;
}
// @Override
public final boolean skipTo(int n) throws IOException{
boolean flag = _innerIter.skipTo(n);
if (flag) {
int doc = _innerIter.doc();
if (match(doc)) {
_currentDoc = doc;
return true;
} else {
while (_innerIter.next()) {
int docid = _innerIter.doc();
if (match(docid)) {
_currentDoc = docid;
return true;
}
}
return false;
}
}
return flag;
}
}

View File

@ -0,0 +1,93 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.util.LuceneTestCase;
public class TestDocIdSet extends LuceneTestCase {
public void testFilteredDocIdSet() throws Exception {
final int maxdoc=10;
final DocIdSet innerSet = new DocIdSet() {
// @Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int docid=-1;
//@Override
public int doc() {
return docid;
}
//@Override
public boolean next() throws IOException {
docid++;
return (docid<maxdoc);
}
//@Override
public boolean skipTo(int target) throws IOException {
do {
if (!next()) {
return false;
}
} while (target > doc());
return true;
}
};
}
};
DocIdSet filteredSet = new FilteredDocIdSet(innerSet){
// @Override
protected boolean match(int docid) {
return docid%2 == 0; //validate only even docids
}
};
DocIdSetIterator iter = filteredSet.iterator();
ArrayList/*<Integer>*/ list = new ArrayList/*<Integer>*/();
if (iter.skipTo(3)) {
list.add(new Integer(iter.doc()));
while(iter.next()) {
list.add(new Integer(iter.doc()));
}
}
int[] docs = new int[list.size()];
int c=0;
Iterator/*<Integer>*/ intIter = list.iterator();
while(intIter.hasNext()) {
docs[c++] = ((Integer) intIter.next()).intValue();
}
int[] answer = new int[]{4,6,8};
boolean same = Arrays.equals(answer, docs);
if (!same) {
System.out.println("answer: "+Arrays.toString(answer));
System.out.println("gotten: "+Arrays.toString(docs));
fail();
}
}
}