LUCENE-1506: add FilteredDocIdSet

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@740361 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-02-03 17:53:46 +00:00
parent 62d56d9410
commit 35d2b765be
4 changed files with 263 additions and 0 deletions

View File

@ -149,6 +149,13 @@ New features
reopen you can change the readOnly of the original reader. (Jason
Rutherglen, Mike McCandless)
14. LUCENE-1506: Added FilteredDocIdSet, an abstract class which you
subclass to implement the "match" method to aceept or reject each
docID. Unlike ChainedFilter (under contrib/misc),
FilteredDocIdSet never requires you to materialize the full
bitset. Instead, match() is called on demand per docID. (John
Wang via Mike McCandless)
Optimizations
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing

View File

@ -0,0 +1,72 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
/**
* Abstract decorator class for a DocIdSet implementation
* that provides on-demand filtering/validation
* mechanism on a given DocIdSet.
*
* <p/>
*
* Technically, this same functionality could be achieved
* with ChainedFilter (under contrib/misc), however the
* benefit of this class is it never materializes the full
* bitset for the filter. Instead, the {@link #match}
* method is invoked on-demand, per docID visited during
* searching. If you know few docIDs will be visited, and
* the logic behind {@link #match} is relatively costly,
* this may be a better way to filter than ChainedFilter.
*
* @see DocIdSet
*/
public abstract class FilteredDocIdSet extends DocIdSet {
private final DocIdSet _innerSet;
/**
* Constructor.
* @param innerSet Underlying DocIdSet
*/
public FilteredDocIdSet(DocIdSet innerSet) {
_innerSet = innerSet;
}
/**
* Validation method to determine whether a docid should be in the result set.
* @param docid docid to be tested
* @return true if input docid should be in the result set, false otherwise.
*/
protected abstract boolean match(int docid);
/**
* Implementation of the contract to build a DocIdSetIterator.
* @see DocIdSetIterator
* @see FilteredDocIdSetIterator
*/
// @Override
public DocIdSetIterator iterator() throws IOException {
return new FilteredDocIdSetIterator(_innerSet.iterator()) {
protected boolean match(int docid) {
return FilteredDocIdSet.this.match(docid);
}
};
}
}

View File

@ -0,0 +1,91 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
/**
* Abstract decorator class of a DocIdSetIterator
* implementation that provides on-demand filter/validation
* mechanism on an underlying DocIdSetIterator. See {@link
* FilteredDocIdSet}.
*/
public abstract class FilteredDocIdSetIterator extends DocIdSetIterator {
protected DocIdSetIterator _innerIter;
private int _currentDoc;
/**
* Constructor.
* @param innerIter Underlying DocIdSetIterator.
*/
public FilteredDocIdSetIterator(DocIdSetIterator innerIter) {
if (innerIter == null) {
throw new IllegalArgumentException("null iterator");
}
_innerIter = innerIter;
_currentDoc = -1;
}
/**
* Validation method to determine whether a docid should be in the result set.
* @param docid docid to be tested
* @return true if input docid should be in the result set, false otherwise.
* @see #FilteredDocIdSetIterator(DocIdSetIterator).
*/
abstract protected boolean match(int doc);
// @Override
public final int doc() {
return _currentDoc;
}
// @Override
public final boolean next() throws IOException{
while (_innerIter.next()) {
int doc = _innerIter.doc();
if (match(doc)) {
_currentDoc = doc;
return true;
}
}
return false;
}
// @Override
public final boolean skipTo(int n) throws IOException{
boolean flag = _innerIter.skipTo(n);
if (flag) {
int doc = _innerIter.doc();
if (match(doc)) {
_currentDoc = doc;
return true;
} else {
while (_innerIter.next()) {
int docid = _innerIter.doc();
if (match(docid)) {
_currentDoc = docid;
return true;
}
}
return false;
}
}
return flag;
}
}

View File

@ -0,0 +1,93 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.util.LuceneTestCase;
public class TestDocIdSet extends LuceneTestCase {
public void testFilteredDocIdSet() throws Exception {
final int maxdoc=10;
final DocIdSet innerSet = new DocIdSet() {
// @Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int docid=-1;
//@Override
public int doc() {
return docid;
}
//@Override
public boolean next() throws IOException {
docid++;
return (docid<maxdoc);
}
//@Override
public boolean skipTo(int target) throws IOException {
do {
if (!next()) {
return false;
}
} while (target > doc());
return true;
}
};
}
};
DocIdSet filteredSet = new FilteredDocIdSet(innerSet){
// @Override
protected boolean match(int docid) {
return docid%2 == 0; //validate only even docids
}
};
DocIdSetIterator iter = filteredSet.iterator();
ArrayList/*<Integer>*/ list = new ArrayList/*<Integer>*/();
if (iter.skipTo(3)) {
list.add(new Integer(iter.doc()));
while(iter.next()) {
list.add(new Integer(iter.doc()));
}
}
int[] docs = new int[list.size()];
int c=0;
Iterator/*<Integer>*/ intIter = list.iterator();
while(intIter.hasNext()) {
docs[c++] = ((Integer) intIter.next()).intValue();
}
int[] answer = new int[]{4,6,8};
boolean same = Arrays.equals(answer, docs);
if (!same) {
System.out.println("answer: "+Arrays.toString(answer));
System.out.println("gotten: "+Arrays.toString(docs));
fail();
}
}
}