mirror of https://github.com/apache/lucene.git
LUCENE-1506: add FilteredDocIdSet
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@740361 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
62d56d9410
commit
35d2b765be
|
@ -149,6 +149,13 @@ New features
|
|||
reopen you can change the readOnly of the original reader. (Jason
|
||||
Rutherglen, Mike McCandless)
|
||||
|
||||
14. LUCENE-1506: Added FilteredDocIdSet, an abstract class which you
|
||||
subclass to implement the "match" method to aceept or reject each
|
||||
docID. Unlike ChainedFilter (under contrib/misc),
|
||||
FilteredDocIdSet never requires you to materialize the full
|
||||
bitset. Instead, match() is called on demand per docID. (John
|
||||
Wang via Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Abstract decorator class for a DocIdSet implementation
|
||||
* that provides on-demand filtering/validation
|
||||
* mechanism on a given DocIdSet.
|
||||
*
|
||||
* <p/>
|
||||
*
|
||||
* Technically, this same functionality could be achieved
|
||||
* with ChainedFilter (under contrib/misc), however the
|
||||
* benefit of this class is it never materializes the full
|
||||
* bitset for the filter. Instead, the {@link #match}
|
||||
* method is invoked on-demand, per docID visited during
|
||||
* searching. If you know few docIDs will be visited, and
|
||||
* the logic behind {@link #match} is relatively costly,
|
||||
* this may be a better way to filter than ChainedFilter.
|
||||
*
|
||||
* @see DocIdSet
|
||||
*/
|
||||
|
||||
public abstract class FilteredDocIdSet extends DocIdSet {
|
||||
private final DocIdSet _innerSet;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param innerSet Underlying DocIdSet
|
||||
*/
|
||||
public FilteredDocIdSet(DocIdSet innerSet) {
|
||||
_innerSet = innerSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validation method to determine whether a docid should be in the result set.
|
||||
* @param docid docid to be tested
|
||||
* @return true if input docid should be in the result set, false otherwise.
|
||||
*/
|
||||
protected abstract boolean match(int docid);
|
||||
|
||||
/**
|
||||
* Implementation of the contract to build a DocIdSetIterator.
|
||||
* @see DocIdSetIterator
|
||||
* @see FilteredDocIdSetIterator
|
||||
*/
|
||||
// @Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return new FilteredDocIdSetIterator(_innerSet.iterator()) {
|
||||
protected boolean match(int docid) {
|
||||
return FilteredDocIdSet.this.match(docid);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Abstract decorator class of a DocIdSetIterator
|
||||
* implementation that provides on-demand filter/validation
|
||||
* mechanism on an underlying DocIdSetIterator. See {@link
|
||||
* FilteredDocIdSet}.
|
||||
*/
|
||||
|
||||
public abstract class FilteredDocIdSetIterator extends DocIdSetIterator {
|
||||
protected DocIdSetIterator _innerIter;
|
||||
private int _currentDoc;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param innerIter Underlying DocIdSetIterator.
|
||||
*/
|
||||
public FilteredDocIdSetIterator(DocIdSetIterator innerIter) {
|
||||
if (innerIter == null) {
|
||||
throw new IllegalArgumentException("null iterator");
|
||||
}
|
||||
_innerIter = innerIter;
|
||||
_currentDoc = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validation method to determine whether a docid should be in the result set.
|
||||
* @param docid docid to be tested
|
||||
* @return true if input docid should be in the result set, false otherwise.
|
||||
* @see #FilteredDocIdSetIterator(DocIdSetIterator).
|
||||
*/
|
||||
abstract protected boolean match(int doc);
|
||||
|
||||
// @Override
|
||||
public final int doc() {
|
||||
return _currentDoc;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public final boolean next() throws IOException{
|
||||
while (_innerIter.next()) {
|
||||
int doc = _innerIter.doc();
|
||||
if (match(doc)) {
|
||||
_currentDoc = doc;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public final boolean skipTo(int n) throws IOException{
|
||||
boolean flag = _innerIter.skipTo(n);
|
||||
if (flag) {
|
||||
int doc = _innerIter.doc();
|
||||
if (match(doc)) {
|
||||
_currentDoc = doc;
|
||||
return true;
|
||||
} else {
|
||||
while (_innerIter.next()) {
|
||||
int docid = _innerIter.doc();
|
||||
if (match(docid)) {
|
||||
_currentDoc = docid;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestDocIdSet extends LuceneTestCase {
|
||||
public void testFilteredDocIdSet() throws Exception {
|
||||
final int maxdoc=10;
|
||||
final DocIdSet innerSet = new DocIdSet() {
|
||||
|
||||
// @Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new DocIdSetIterator() {
|
||||
|
||||
int docid=-1;
|
||||
//@Override
|
||||
public int doc() {
|
||||
return docid;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public boolean next() throws IOException {
|
||||
docid++;
|
||||
return (docid<maxdoc);
|
||||
}
|
||||
|
||||
//@Override
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
do {
|
||||
if (!next()) {
|
||||
return false;
|
||||
}
|
||||
} while (target > doc());
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
DocIdSet filteredSet = new FilteredDocIdSet(innerSet){
|
||||
// @Override
|
||||
protected boolean match(int docid) {
|
||||
return docid%2 == 0; //validate only even docids
|
||||
}
|
||||
};
|
||||
|
||||
DocIdSetIterator iter = filteredSet.iterator();
|
||||
ArrayList/*<Integer>*/ list = new ArrayList/*<Integer>*/();
|
||||
if (iter.skipTo(3)) {
|
||||
list.add(new Integer(iter.doc()));
|
||||
while(iter.next()) {
|
||||
list.add(new Integer(iter.doc()));
|
||||
}
|
||||
}
|
||||
|
||||
int[] docs = new int[list.size()];
|
||||
int c=0;
|
||||
Iterator/*<Integer>*/ intIter = list.iterator();
|
||||
while(intIter.hasNext()) {
|
||||
docs[c++] = ((Integer) intIter.next()).intValue();
|
||||
}
|
||||
int[] answer = new int[]{4,6,8};
|
||||
boolean same = Arrays.equals(answer, docs);
|
||||
if (!same) {
|
||||
System.out.println("answer: "+Arrays.toString(answer));
|
||||
System.out.println("gotten: "+Arrays.toString(docs));
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue