mirror of https://github.com/apache/lucene.git
LUCENE-1187: ChainedFilter and BooleanFilter now work with new Filter API and DocIdSetIterator-based filters.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@659635 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
08a2eb4665
commit
0ebfcc663e
|
@ -163,6 +163,10 @@ New features
|
||||||
|
|
||||||
13. LUCENE-1166: Decomposition tokenfilter for languages like German and Swedish (Thomas Peuss via Grant Ingersoll)
|
13. LUCENE-1166: Decomposition tokenfilter for languages like German and Swedish (Thomas Peuss via Grant Ingersoll)
|
||||||
|
|
||||||
|
14. LUCENE-1187: ChainedFilter and BooleanFilter now work with new Filter API
|
||||||
|
and DocIdSetIterator-based filters. Backwards-compatibility with old
|
||||||
|
BitSet-based filters is ensured. (Paul Elschot via Michael Busch)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-705: When building a compound file, use
|
1. LUCENE-705: When building a compound file, use
|
||||||
|
|
|
@ -58,7 +58,11 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.BitSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
import org.apache.lucene.util.OpenBitSetDISI;
|
||||||
|
import org.apache.lucene.util.SortedVIntList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -79,29 +83,13 @@ import java.util.BitSet;
|
||||||
*/
|
*/
|
||||||
public class ChainedFilter extends Filter
|
public class ChainedFilter extends Filter
|
||||||
{
|
{
|
||||||
/**
|
|
||||||
* {@link BitSet#or}.
|
|
||||||
*/
|
|
||||||
public static final int OR = 0;
|
public static final int OR = 0;
|
||||||
|
|
||||||
/**
|
|
||||||
* {@link BitSet#and}.
|
|
||||||
*/
|
|
||||||
public static final int AND = 1;
|
public static final int AND = 1;
|
||||||
|
|
||||||
/**
|
|
||||||
* {@link BitSet#andNot}.
|
|
||||||
*/
|
|
||||||
public static final int ANDNOT = 2;
|
public static final int ANDNOT = 2;
|
||||||
|
|
||||||
/**
|
|
||||||
* {@link BitSet#xor}.
|
|
||||||
*/
|
|
||||||
public static final int XOR = 3;
|
public static final int XOR = 3;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Logical operation when none is declared. Defaults to
|
* Logical operation when none is declared. Defaults to
|
||||||
* {@link BitSet#or}.
|
* OR.
|
||||||
*/
|
*/
|
||||||
public static int DEFAULT = OR;
|
public static int DEFAULT = OR;
|
||||||
|
|
||||||
|
@ -144,96 +132,95 @@ public class ChainedFilter extends Filter
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@link Filter#bits}.
|
* {@link Filter#getDocIdSet}.
|
||||||
*/
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
|
||||||
{
|
{
|
||||||
|
int[] index = new int[1]; // use array as reference to modifiable int;
|
||||||
|
index[0] = 0; // an object attribute would not be thread safe.
|
||||||
if (logic != -1)
|
if (logic != -1)
|
||||||
return bits(reader, logic);
|
return getDocIdSet(reader, logic, index);
|
||||||
else if (logicArray != null)
|
else if (logicArray != null)
|
||||||
return bits(reader, logicArray);
|
return getDocIdSet(reader, logicArray, index);
|
||||||
else
|
else
|
||||||
return bits(reader, DEFAULT);
|
return getDocIdSet(reader, DEFAULT, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private DocIdSetIterator getDISI(Filter filter, IndexReader reader)
|
||||||
* Delegates to each filter in the chain.
|
throws IOException
|
||||||
* @param reader IndexReader
|
|
||||||
* @param logic Logical operation
|
|
||||||
* @return BitSet
|
|
||||||
*/
|
|
||||||
private BitSet bits(IndexReader reader, int logic) throws IOException
|
|
||||||
{
|
{
|
||||||
BitSet result;
|
return filter.getDocIdSet(reader).iterator();
|
||||||
int i = 0;
|
}
|
||||||
|
|
||||||
|
private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
OpenBitSetDISI result;
|
||||||
/**
|
/**
|
||||||
* First AND operation takes place against a completely false
|
* First AND operation takes place against a completely false
|
||||||
* bitset and will always return zero results. Thanks to
|
* bitset and will always return zero results.
|
||||||
* Daniel Armbrust for pointing this out and suggesting workaround.
|
|
||||||
*/
|
*/
|
||||||
if (logic == AND)
|
if (logic == AND)
|
||||||
{
|
{
|
||||||
result = (BitSet) chain[i].bits(reader).clone();
|
result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
|
||||||
++i;
|
++index[0];
|
||||||
}
|
}
|
||||||
else if (logic == ANDNOT)
|
else if (logic == ANDNOT)
|
||||||
{
|
{
|
||||||
result = (BitSet) chain[i].bits(reader).clone();
|
result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
|
||||||
result.flip(0,reader.maxDoc());
|
result.flip(0,reader.maxDoc()); // NOTE: may set bits for deleted docs.
|
||||||
++i;
|
++index[0];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
result = new BitSet(reader.maxDoc());
|
result = new OpenBitSetDISI(reader.maxDoc());
|
||||||
}
|
|
||||||
|
|
||||||
for (; i < chain.length; i++)
|
|
||||||
{
|
|
||||||
doChain(result, reader, logic, chain[i]);
|
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Provide a SortedVIntList when it is definitely smaller than an OpenBitSet */
|
||||||
|
protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
|
||||||
|
return (result.cardinality() < (maxDocs / 9))
|
||||||
|
? (DocIdSet) new SortedVIntList(result)
|
||||||
|
: (DocIdSet) result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delegates to each filter in the chain.
|
||||||
|
* @param reader IndexReader
|
||||||
|
* @param logic Logical operation
|
||||||
|
* @return DocIdSet
|
||||||
|
*/
|
||||||
|
private DocIdSet getDocIdSet(IndexReader reader, int logic, int[] index)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
OpenBitSetDISI result = initialResult(reader, logic, index);
|
||||||
|
for (; index[0] < chain.length; index[0]++)
|
||||||
|
{
|
||||||
|
doChain(result, logic, chain[index[0]].getDocIdSet(reader));
|
||||||
|
}
|
||||||
|
return finalResult(result, reader.maxDoc());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delegates to each filter in the chain.
|
* Delegates to each filter in the chain.
|
||||||
* @param reader IndexReader
|
* @param reader IndexReader
|
||||||
* @param logic Logical operation
|
* @param logic Logical operation
|
||||||
* @return BitSet
|
* @return DocIdSet
|
||||||
*/
|
*/
|
||||||
private BitSet bits(IndexReader reader, int[] logic) throws IOException
|
private DocIdSet getDocIdSet(IndexReader reader, int[] logic, int[] index)
|
||||||
|
throws IOException
|
||||||
{
|
{
|
||||||
if (logic.length != chain.length)
|
if (logic.length != chain.length)
|
||||||
throw new IllegalArgumentException("Invalid number of elements in logic array");
|
throw new IllegalArgumentException("Invalid number of elements in logic array");
|
||||||
BitSet result;
|
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
/**
|
OpenBitSetDISI result = initialResult(reader, logic[0], index);
|
||||||
* First AND operation takes place against a completely false
|
for (; index[0] < chain.length; index[0]++)
|
||||||
* bitset and will always return zero results. Thanks to
|
|
||||||
* Daniel Armbrust for pointing this out and suggesting workaround.
|
|
||||||
*/
|
|
||||||
if (logic[0] == AND)
|
|
||||||
{
|
{
|
||||||
result = (BitSet) chain[i].bits(reader).clone();
|
doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(reader));
|
||||||
++i;
|
|
||||||
}
|
}
|
||||||
else if (logic[0] == ANDNOT)
|
return finalResult(result, reader.maxDoc());
|
||||||
{
|
|
||||||
result = (BitSet) chain[i].bits(reader).clone();
|
|
||||||
result.flip(0,reader.maxDoc());
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result = new BitSet(reader.maxDoc());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; i < chain.length; i++)
|
|
||||||
{
|
|
||||||
doChain(result, reader, logic[i], chain[i]);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString()
|
public String toString()
|
||||||
|
@ -249,26 +236,51 @@ public class ChainedFilter extends Filter
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doChain(BitSet result, IndexReader reader,
|
private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
|
||||||
int logic, Filter filter) throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
|
|
||||||
|
if (dis instanceof OpenBitSet) {
|
||||||
|
// optimized case for OpenBitSets
|
||||||
switch (logic)
|
switch (logic)
|
||||||
{
|
{
|
||||||
case OR:
|
case OR:
|
||||||
result.or(filter.bits(reader));
|
result.or((OpenBitSet) dis);
|
||||||
break;
|
break;
|
||||||
case AND:
|
case AND:
|
||||||
result.and(filter.bits(reader));
|
result.and((OpenBitSet) dis);
|
||||||
break;
|
break;
|
||||||
case ANDNOT:
|
case ANDNOT:
|
||||||
result.andNot(filter.bits(reader));
|
result.andNot((OpenBitSet) dis);
|
||||||
break;
|
break;
|
||||||
case XOR:
|
case XOR:
|
||||||
result.xor(filter.bits(reader));
|
result.xor((OpenBitSet) dis);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
doChain(result, reader, DEFAULT, filter);
|
doChain(result, DEFAULT, dis);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DocIdSetIterator disi = dis.iterator();
|
||||||
|
switch (logic)
|
||||||
|
{
|
||||||
|
case OR:
|
||||||
|
result.inPlaceOr(disi);
|
||||||
|
break;
|
||||||
|
case AND:
|
||||||
|
result.inPlaceAnd(disi);
|
||||||
|
break;
|
||||||
|
case ANDNOT:
|
||||||
|
result.inPlaceNot(disi);
|
||||||
|
break;
|
||||||
|
case XOR:
|
||||||
|
result.inPlaceXor(disi);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
doChain(result, DEFAULT, dis);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,11 +19,17 @@ package org.apache.lucene.misc;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.io.IOException;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.NoLockFactory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
@ -80,9 +86,40 @@ public class ChainedFilterTest extends TestCase {
|
||||||
new TermQuery(new Term("owner", "sue")));
|
new TermQuery(new Term("owner", "sue")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Filter[] getChainWithOldFilters(Filter[] chain) {
|
||||||
|
Filter[] oldFilters = new Filter[chain.length];
|
||||||
|
for (int i = 0; i < chain.length; i++) {
|
||||||
|
oldFilters[i] = new OldBitSetFilterWrapper(chain[i]);
|
||||||
|
}
|
||||||
|
return oldFilters;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ChainedFilter getChainedFilter(Filter[] chain, int[] logic, boolean old) {
|
||||||
|
if (old) {
|
||||||
|
chain = getChainWithOldFilters(chain);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logic == null) {
|
||||||
|
return new ChainedFilter(chain);
|
||||||
|
} else {
|
||||||
|
return new ChainedFilter(chain, logic);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ChainedFilter getChainedFilter(Filter[] chain, int logic, boolean old) {
|
||||||
|
if (old) {
|
||||||
|
chain = getChainWithOldFilters(chain);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ChainedFilter(chain, logic);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testSingleFilter() throws Exception {
|
public void testSingleFilter() throws Exception {
|
||||||
ChainedFilter chain = new ChainedFilter(
|
for (int mode = 0; mode < 2; mode++) {
|
||||||
new Filter[] {dateFilter});
|
boolean old = (mode==0);
|
||||||
|
|
||||||
|
ChainedFilter chain = getChainedFilter(new Filter[] {dateFilter}, null, old);
|
||||||
|
|
||||||
Hits hits = searcher.search(query, chain);
|
Hits hits = searcher.search(query, chain);
|
||||||
assertEquals(MAX, hits.length());
|
assertEquals(MAX, hits.length());
|
||||||
|
@ -91,65 +128,107 @@ public class ChainedFilterTest extends TestCase {
|
||||||
hits = searcher.search(query, chain);
|
hits = searcher.search(query, chain);
|
||||||
assertEquals(MAX / 2, hits.length());
|
assertEquals(MAX / 2, hits.length());
|
||||||
|
|
||||||
chain = new ChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.AND});
|
chain = getChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.AND}, old);
|
||||||
hits = searcher.search(query, chain);
|
hits = searcher.search(query, chain);
|
||||||
assertEquals(MAX / 2, hits.length());
|
assertEquals(MAX / 2, hits.length());
|
||||||
assertEquals("bob", hits.doc(0).get("owner"));
|
assertEquals("bob", hits.doc(0).get("owner"));
|
||||||
|
|
||||||
chain = new ChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.ANDNOT});
|
chain = getChainedFilter(new Filter[] {bobFilter}, new int[] {ChainedFilter.ANDNOT}, old);
|
||||||
hits = searcher.search(query, chain);
|
hits = searcher.search(query, chain);
|
||||||
assertEquals(MAX / 2, hits.length());
|
assertEquals(MAX / 2, hits.length());
|
||||||
assertEquals("sue", hits.doc(0).get("owner"));
|
assertEquals("sue", hits.doc(0).get("owner"));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testOR() throws Exception {
|
public void testOR() throws Exception {
|
||||||
ChainedFilter chain = new ChainedFilter(
|
for (int mode = 0; mode < 2; mode++) {
|
||||||
new Filter[] {sueFilter, bobFilter});
|
boolean old = (mode==0);
|
||||||
|
ChainedFilter chain = getChainedFilter(
|
||||||
|
new Filter[] {sueFilter, bobFilter}, null, old);
|
||||||
|
|
||||||
Hits hits = searcher.search(query, chain);
|
Hits hits = searcher.search(query, chain);
|
||||||
assertEquals("OR matches all", MAX, hits.length());
|
assertEquals("OR matches all", MAX, hits.length());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testAND() throws Exception {
|
public void testAND() throws Exception {
|
||||||
ChainedFilter chain = new ChainedFilter(
|
for (int mode = 0; mode < 2; mode++) {
|
||||||
new Filter[] {dateFilter, bobFilter}, ChainedFilter.AND);
|
boolean old = (mode==0);
|
||||||
|
ChainedFilter chain = getChainedFilter(
|
||||||
|
new Filter[] {dateFilter, bobFilter}, ChainedFilter.AND, old);
|
||||||
|
|
||||||
Hits hits = searcher.search(query, chain);
|
Hits hits = searcher.search(query, chain);
|
||||||
assertEquals("AND matches just bob", MAX / 2, hits.length());
|
assertEquals("AND matches just bob", MAX / 2, hits.length());
|
||||||
assertEquals("bob", hits.doc(0).get("owner"));
|
assertEquals("bob", hits.doc(0).get("owner"));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testXOR() throws Exception {
|
public void testXOR() throws Exception {
|
||||||
ChainedFilter chain = new ChainedFilter(
|
for (int mode = 0; mode < 2; mode++) {
|
||||||
new Filter[]{dateFilter, bobFilter}, ChainedFilter.XOR);
|
boolean old = (mode==0);
|
||||||
|
ChainedFilter chain = getChainedFilter(
|
||||||
|
new Filter[]{dateFilter, bobFilter}, ChainedFilter.XOR, old);
|
||||||
|
|
||||||
Hits hits = searcher.search(query, chain);
|
Hits hits = searcher.search(query, chain);
|
||||||
assertEquals("XOR matches sue", MAX / 2, hits.length());
|
assertEquals("XOR matches sue", MAX / 2, hits.length());
|
||||||
assertEquals("sue", hits.doc(0).get("owner"));
|
assertEquals("sue", hits.doc(0).get("owner"));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testANDNOT() throws Exception {
|
public void testANDNOT() throws Exception {
|
||||||
ChainedFilter chain = new ChainedFilter(
|
for (int mode = 0; mode < 2; mode++) {
|
||||||
|
boolean old = (mode==0);
|
||||||
|
ChainedFilter chain = getChainedFilter(
|
||||||
new Filter[]{dateFilter, sueFilter},
|
new Filter[]{dateFilter, sueFilter},
|
||||||
new int[] {ChainedFilter.AND, ChainedFilter.ANDNOT});
|
new int[] {ChainedFilter.AND, ChainedFilter.ANDNOT}, old);
|
||||||
|
|
||||||
Hits hits = searcher.search(query, chain);
|
Hits hits = searcher.search(query, chain);
|
||||||
assertEquals("ANDNOT matches just bob",
|
assertEquals("ANDNOT matches just bob",
|
||||||
MAX / 2, hits.length());
|
MAX / 2, hits.length());
|
||||||
assertEquals("bob", hits.doc(0).get("owner"));
|
assertEquals("bob", hits.doc(0).get("owner"));
|
||||||
|
|
||||||
chain = new ChainedFilter(
|
chain = getChainedFilter(
|
||||||
new Filter[]{bobFilter, bobFilter},
|
new Filter[]{bobFilter, bobFilter},
|
||||||
new int[] {ChainedFilter.ANDNOT, ChainedFilter.ANDNOT});
|
new int[] {ChainedFilter.ANDNOT, ChainedFilter.ANDNOT}, old);
|
||||||
|
|
||||||
hits = searcher.search(query, chain);
|
hits = searcher.search(query, chain);
|
||||||
assertEquals("ANDNOT bob ANDNOT bob matches all sues",
|
assertEquals("ANDNOT bob ANDNOT bob matches all sues",
|
||||||
MAX / 2, hits.length());
|
MAX / 2, hits.length());
|
||||||
assertEquals("sue", hits.doc(0).get("owner"));
|
assertEquals("sue", hits.doc(0).get("owner"));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private Date parseDate(String s) throws ParseException {
|
private Date parseDate(String s) throws ParseException {
|
||||||
return new SimpleDateFormat("yyyy MMM dd", Locale.US).parse(s);
|
return new SimpleDateFormat("yyyy MMM dd", Locale.US).parse(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWithCachingFilter() throws Exception {
|
||||||
|
for (int mode = 0; mode < 2; mode++) {
|
||||||
|
boolean old = (mode==0);
|
||||||
|
Directory dir = new RAMDirectory();
|
||||||
|
Analyzer analyzer = new WhitespaceAnalyzer();
|
||||||
|
|
||||||
|
IndexWriter writer = new IndexWriter(dir, analyzer, true, MaxFieldLength.LIMITED);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
Searcher searcher = new IndexSearcher(dir);
|
||||||
|
|
||||||
|
Query query = new TermQuery(new Term("none", "none"));
|
||||||
|
|
||||||
|
QueryWrapperFilter queryFilter = new QueryWrapperFilter(query);
|
||||||
|
CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter);
|
||||||
|
|
||||||
|
searcher.search(query, cachingFilter, 1);
|
||||||
|
|
||||||
|
CachingWrapperFilter cachingFilter2 = new CachingWrapperFilter(queryFilter);
|
||||||
|
Filter[] chain = new Filter[2];
|
||||||
|
chain[0] = cachingFilter;
|
||||||
|
chain[1] = cachingFilter2;
|
||||||
|
ChainedFilter cf = new ChainedFilter(chain);
|
||||||
|
|
||||||
|
// throws java.lang.ClassCastException: org.apache.lucene.util.OpenBitSet cannot be cast to java.util.BitSet
|
||||||
|
searcher.search(new MatchAllDocsQuery(), cf, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,10 @@ import java.util.BitSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
import org.apache.lucene.util.OpenBitSetDISI;
|
||||||
|
import org.apache.lucene.util.SortedVIntList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A container Filter that allows Boolean composition of Filters.
|
* A container Filter that allows Boolean composition of Filters.
|
||||||
|
@ -37,88 +41,91 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
|
||||||
public class BooleanFilter extends Filter
|
public class BooleanFilter extends Filter
|
||||||
{
|
{
|
||||||
//ArrayList of SHOULD filters
|
|
||||||
ArrayList shouldFilters = null;
|
ArrayList shouldFilters = null;
|
||||||
//ArrayList of NOT filters
|
|
||||||
ArrayList notFilters = null;
|
ArrayList notFilters = null;
|
||||||
//ArrayList of MUST filters
|
|
||||||
ArrayList mustFilters = null;
|
ArrayList mustFilters = null;
|
||||||
|
|
||||||
|
private DocIdSetIterator getDISI(ArrayList filters, int index, IndexReader reader)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
return ((Filter)filters.get(index)).getDocIdSet(reader).iterator();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the a BitSet representing the Boolean composition
|
* Returns the a DocIdSetIterator representing the Boolean composition
|
||||||
* of the filters that have been added.
|
* of the filters that have been added.
|
||||||
*/
|
*/
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
|
||||||
|
{
|
||||||
|
OpenBitSetDISI res = null;
|
||||||
|
|
||||||
public BitSet bits(IndexReader reader) throws IOException
|
if (shouldFilters != null) {
|
||||||
{
|
for (int i = 0; i < shouldFilters.size(); i++) {
|
||||||
//create a new bitSet
|
if (res == null) {
|
||||||
BitSet returnBits = null;
|
res = new OpenBitSetDISI(getDISI(shouldFilters, i, reader), reader.maxDoc());
|
||||||
|
} else {
|
||||||
//SHOULD filters
|
DocIdSet dis = ((Filter)shouldFilters.get(i)).getDocIdSet(reader);
|
||||||
if (shouldFilters!=null)
|
if(dis instanceof OpenBitSet) {
|
||||||
{
|
// optimized case for OpenBitSets
|
||||||
returnBits = ((Filter)shouldFilters.get(0)).bits(reader);
|
res.or((OpenBitSet) dis);
|
||||||
// avoid changing the original bitset - it may be cached
|
} else {
|
||||||
returnBits=(BitSet) returnBits.clone();
|
res.inPlaceOr(getDISI(shouldFilters, i, reader));
|
||||||
if (shouldFilters.size() > 1)
|
}
|
||||||
{
|
|
||||||
for (int i = 1; i < shouldFilters.size(); i++)
|
|
||||||
{
|
|
||||||
returnBits.or(((Filter)shouldFilters.get(i)).bits(reader));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//NOT filters
|
if (notFilters!=null) {
|
||||||
if (notFilters!=null)
|
for (int i = 0; i < notFilters.size(); i++) {
|
||||||
{
|
if (res == null) {
|
||||||
for (int i = 0; i < notFilters.size(); i++)
|
res = new OpenBitSetDISI(getDISI(notFilters, i, reader), reader.maxDoc());
|
||||||
{
|
res.flip(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
|
||||||
BitSet notBits=((Filter)notFilters.get(i)).bits(reader);
|
} else {
|
||||||
if(returnBits==null)
|
DocIdSet dis = ((Filter)notFilters.get(i)).getDocIdSet(reader);
|
||||||
{
|
if(dis instanceof OpenBitSet) {
|
||||||
returnBits=(BitSet) notBits.clone();
|
// optimized case for OpenBitSets
|
||||||
returnBits.flip(0,reader.maxDoc());
|
res.andNot((OpenBitSet) dis);
|
||||||
|
} else {
|
||||||
|
res.inPlaceNot(getDISI(notFilters, i, reader));
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
returnBits.andNot(notBits);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//MUST filters
|
if (mustFilters!=null) {
|
||||||
if (mustFilters!=null)
|
for (int i = 0; i < mustFilters.size(); i++) {
|
||||||
{
|
if (res == null) {
|
||||||
for (int i = 0; i < mustFilters.size(); i++)
|
res = new OpenBitSetDISI(getDISI(mustFilters, i, reader), reader.maxDoc());
|
||||||
{
|
} else {
|
||||||
BitSet mustBits=((Filter)mustFilters.get(i)).bits(reader);
|
DocIdSet dis = ((Filter)mustFilters.get(i)).getDocIdSet(reader);
|
||||||
if(returnBits==null)
|
if(dis instanceof OpenBitSet) {
|
||||||
{
|
// optimized case for OpenBitSets
|
||||||
if(mustFilters.size()==1)
|
res.and((OpenBitSet) dis);
|
||||||
{
|
} else {
|
||||||
returnBits=mustBits;
|
res.inPlaceAnd(getDISI(mustFilters, i, reader));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res !=null)
|
||||||
|
return finalResult(res, reader.maxDoc());
|
||||||
|
|
||||||
|
if (emptyDocIdSet == null)
|
||||||
|
emptyDocIdSet = new OpenBitSetDISI(1);
|
||||||
|
|
||||||
|
return emptyDocIdSet;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
/** Provide a SortedVIntList when it is definitely smaller than an OpenBitSet */
|
||||||
//don't mangle the bitset
|
protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
|
||||||
returnBits=(BitSet) mustBits.clone();
|
return (result.cardinality() < (maxDocs / 9))
|
||||||
}
|
? (DocIdSet) new SortedVIntList(result)
|
||||||
}
|
: (DocIdSet) result;
|
||||||
else
|
|
||||||
{
|
|
||||||
returnBits.and(mustBits);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(returnBits==null)
|
|
||||||
{
|
|
||||||
returnBits=new BitSet(reader.maxDoc());
|
|
||||||
}
|
|
||||||
return returnBits;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static DocIdSet emptyDocIdSet = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a new FilterClause to the Boolean Filter container
|
* Adds a new FilterClause to the Boolean Filter container
|
||||||
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
|
* @param filterClause A FilterClause object containing a Filter and an Occur parameter
|
||||||
|
@ -126,47 +133,44 @@ public class BooleanFilter extends Filter
|
||||||
|
|
||||||
public void add(FilterClause filterClause)
|
public void add(FilterClause filterClause)
|
||||||
{
|
{
|
||||||
if (filterClause.getOccur().equals(Occur.MUST))
|
if (filterClause.getOccur().equals(Occur.MUST)) {
|
||||||
{
|
if (mustFilters==null) {
|
||||||
if(mustFilters==null)
|
|
||||||
{
|
|
||||||
mustFilters=new ArrayList();
|
mustFilters=new ArrayList();
|
||||||
}
|
}
|
||||||
mustFilters.add(filterClause.getFilter());
|
mustFilters.add(filterClause.getFilter());
|
||||||
}
|
}
|
||||||
if (filterClause.getOccur().equals(Occur.SHOULD))
|
if (filterClause.getOccur().equals(Occur.SHOULD)) {
|
||||||
{
|
if (shouldFilters==null) {
|
||||||
if(shouldFilters==null)
|
|
||||||
{
|
|
||||||
shouldFilters=new ArrayList();
|
shouldFilters=new ArrayList();
|
||||||
}
|
}
|
||||||
shouldFilters.add(filterClause.getFilter());
|
shouldFilters.add(filterClause.getFilter());
|
||||||
}
|
}
|
||||||
if (filterClause.getOccur().equals(Occur.MUST_NOT))
|
if (filterClause.getOccur().equals(Occur.MUST_NOT)) {
|
||||||
{
|
if (notFilters==null) {
|
||||||
if(notFilters==null)
|
|
||||||
{
|
|
||||||
notFilters=new ArrayList();
|
notFilters=new ArrayList();
|
||||||
}
|
}
|
||||||
notFilters.add(filterClause.getFilter());
|
notFilters.add(filterClause.getFilter());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean equalFilters(ArrayList filters1, ArrayList filters2)
|
||||||
|
{
|
||||||
|
return (filters1 == filters2) ||
|
||||||
|
((filters1 != null) && filters1.equals(filters2));
|
||||||
|
}
|
||||||
|
|
||||||
public boolean equals(Object obj)
|
public boolean equals(Object obj)
|
||||||
{
|
{
|
||||||
if(this == obj)
|
if (this == obj)
|
||||||
return true;
|
return true;
|
||||||
if((obj == null) || (obj.getClass() != this.getClass()))
|
|
||||||
|
if ((obj == null) || (obj.getClass() != this.getClass()))
|
||||||
return false;
|
return false;
|
||||||
BooleanFilter test = (BooleanFilter)obj;
|
|
||||||
return (notFilters == test.notFilters||
|
BooleanFilter other = (BooleanFilter)obj;
|
||||||
(notFilters!= null && notFilters.equals(test.notFilters)))
|
return equalFilters(notFilters, other.notFilters)
|
||||||
&&
|
&& equalFilters(mustFilters, other.mustFilters)
|
||||||
(mustFilters == test.mustFilters||
|
&& equalFilters(shouldFilters, other.shouldFilters);
|
||||||
(mustFilters!= null && mustFilters.equals(test.mustFilters)))
|
|
||||||
&&
|
|
||||||
(shouldFilters == test.shouldFilters||
|
|
||||||
(shouldFilters!= null && shouldFilters.equals(test.shouldFilters)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int hashCode()
|
public int hashCode()
|
||||||
|
@ -178,42 +182,25 @@ public class BooleanFilter extends Filter
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Prints a user-readable version of this query. */
|
/** Prints a user-readable version of this query. */
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
StringBuffer buffer = new StringBuffer();
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
|
||||||
buffer.append("BooleanFilter(");
|
buffer.append("BooleanFilter(");
|
||||||
|
appendFilters(shouldFilters, "", buffer);
|
||||||
appendFilters(shouldFilters, null, buffer);
|
|
||||||
appendFilters(mustFilters, "+", buffer);
|
appendFilters(mustFilters, "+", buffer);
|
||||||
appendFilters(notFilters, "-", buffer);
|
appendFilters(notFilters, "-", buffer);
|
||||||
|
|
||||||
buffer.append(")");
|
buffer.append(")");
|
||||||
|
|
||||||
return buffer.toString();
|
return buffer.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void appendFilters(ArrayList filters, String occurString,
|
private void appendFilters(ArrayList filters, String occurString, StringBuffer buffer)
|
||||||
StringBuffer buffer)
|
|
||||||
{
|
|
||||||
if (filters == null)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (int i = 0; i < filters.size(); i++)
|
|
||||||
{
|
|
||||||
Filter filter = (Filter) filters.get(i);
|
|
||||||
if (occurString != null)
|
|
||||||
{
|
|
||||||
buffer.append(occurString);
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer.append(filter);
|
|
||||||
|
|
||||||
if (i < filters.size() - 1)
|
|
||||||
{
|
{
|
||||||
|
if (filters != null) {
|
||||||
|
for (int i = 0; i < filters.size(); i++) {
|
||||||
buffer.append(' ');
|
buffer.append(' ');
|
||||||
|
buffer.append(occurString);
|
||||||
|
buffer.append(filters.get(i).toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermDocs;
|
import org.apache.lucene.index.TermDocs;
|
||||||
import org.apache.lucene.index.TermEnum;
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
public class DuplicateFilter extends Filter
|
public class DuplicateFilter extends Filter
|
||||||
{
|
{
|
||||||
|
@ -66,7 +67,7 @@ public class DuplicateFilter extends Filter
|
||||||
this.processingMode = processingMode;
|
this.processingMode = processingMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BitSet bits(IndexReader reader) throws IOException
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
|
||||||
{
|
{
|
||||||
if(processingMode==PM_FAST_INVALIDATION)
|
if(processingMode==PM_FAST_INVALIDATION)
|
||||||
{
|
{
|
||||||
|
@ -78,10 +79,10 @@ public class DuplicateFilter extends Filter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private BitSet correctBits(IndexReader reader) throws IOException
|
private OpenBitSet correctBits(IndexReader reader) throws IOException
|
||||||
{
|
{
|
||||||
|
|
||||||
BitSet bits=new BitSet(reader.maxDoc()); //assume all are INvalid
|
OpenBitSet bits=new OpenBitSet(reader.maxDoc()); //assume all are INvalid
|
||||||
Term startTerm=new Term(fieldName,"");
|
Term startTerm=new Term(fieldName,"");
|
||||||
TermEnum te = reader.terms(startTerm);
|
TermEnum te = reader.terms(startTerm);
|
||||||
if(te!=null)
|
if(te!=null)
|
||||||
|
@ -117,10 +118,10 @@ public class DuplicateFilter extends Filter
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
private BitSet fastBits(IndexReader reader) throws IOException
|
private OpenBitSet fastBits(IndexReader reader) throws IOException
|
||||||
{
|
{
|
||||||
|
|
||||||
BitSet bits=new BitSet(reader.maxDoc());
|
OpenBitSet bits=new OpenBitSet(reader.maxDoc());
|
||||||
bits.set(0,reader.maxDoc()); //assume all are valid
|
bits.set(0,reader.maxDoc()); //assume all are valid
|
||||||
Term startTerm=new Term(fieldName,"");
|
Term startTerm=new Term(fieldName,"");
|
||||||
TermEnum te = reader.terms(startTerm);
|
TermEnum te = reader.terms(startTerm);
|
||||||
|
@ -143,7 +144,7 @@ public class DuplicateFilter extends Filter
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
lastDoc=td.doc();
|
lastDoc=td.doc();
|
||||||
bits.set(lastDoc,false);
|
bits.clear(lastDoc);
|
||||||
}while(td.next());
|
}while(td.next());
|
||||||
if(keepMode==KM_USE_LAST_OCCURRENCE)
|
if(keepMode==KM_USE_LAST_OCCURRENCE)
|
||||||
{
|
{
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.util.TreeSet;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermDocs;
|
import org.apache.lucene.index.TermDocs;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a filter for docs matching any of the terms added to this class.
|
* Constructs a filter for docs matching any of the terms added to this class.
|
||||||
|
@ -50,11 +51,11 @@ public class TermsFilter extends Filter
|
||||||
}
|
}
|
||||||
|
|
||||||
/* (non-Javadoc)
|
/* (non-Javadoc)
|
||||||
* @see org.apache.lucene.search.Filter#bits(org.apache.lucene.index.IndexReader)
|
* @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader)
|
||||||
*/
|
*/
|
||||||
public BitSet bits(IndexReader reader) throws IOException
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException
|
||||||
{
|
{
|
||||||
BitSet result=new BitSet(reader.maxDoc());
|
OpenBitSet result=new OpenBitSet(reader.maxDoc());
|
||||||
TermDocs td = reader.termDocs();
|
TermDocs td = reader.termDocs();
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.FilterClause;
|
import org.apache.lucene.search.FilterClause;
|
||||||
import org.apache.lucene.search.RangeFilter;
|
import org.apache.lucene.search.RangeFilter;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.DocIdBitSet;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -66,100 +67,141 @@ public class BooleanFilterTest extends TestCase
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Filter getRangeFilter(String field,String lowerPrice, String upperPrice)
|
private Filter getRangeFilter(String field,String lowerPrice, String upperPrice, boolean old)
|
||||||
{
|
{
|
||||||
return new RangeFilter(field,lowerPrice,upperPrice,true,true);
|
Filter f = new RangeFilter(field,lowerPrice,upperPrice,true,true);
|
||||||
|
if (old) {
|
||||||
|
return new OldBitSetFilterWrapper(f);
|
||||||
}
|
}
|
||||||
private TermsFilter getTermsFilter(String field,String text)
|
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
private Filter getTermsFilter(String field,String text, boolean old)
|
||||||
{
|
{
|
||||||
TermsFilter tf=new TermsFilter();
|
TermsFilter tf=new TermsFilter();
|
||||||
tf.addTerm(new Term(field,text));
|
tf.addTerm(new Term(field,text));
|
||||||
|
if (old) {
|
||||||
|
return new OldBitSetFilterWrapper(tf);
|
||||||
|
}
|
||||||
|
|
||||||
return tf;
|
return tf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void tstFilterCard(String mes, int expected, Filter filt)
|
||||||
|
throws Throwable
|
||||||
|
{
|
||||||
|
DocIdSetIterator disi = filt.getDocIdSet(reader).iterator();
|
||||||
|
int actual = 0;
|
||||||
|
while (disi.next()) {
|
||||||
|
actual++;
|
||||||
|
}
|
||||||
|
assertEquals(mes, expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testShould() throws Throwable
|
public void testShould() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getTermsFilter("price","030", old),BooleanClause.Occur.SHOULD));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("Should retrieves only 1 doc",1,booleanFilter);
|
||||||
assertEquals("Should retrieves only 1 doc",1,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testShoulds() throws Throwable
|
public void testShoulds() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("Shoulds are Ored together",5,booleanFilter);
|
||||||
assertEquals("Shoulds are Ored together",5,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
public void testShouldsAndMustNot() throws Throwable
|
public void testShouldsAndMustNot() throws Throwable
|
||||||
{
|
{
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
for (int i = 0; i < 2; i++) {
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
|
boolean old = (i==0);
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
|
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N"),BooleanClause.Occur.MUST_NOT));
|
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
|
||||||
assertEquals("Shoulds Ored but AndNot",4,bits.cardinality());
|
|
||||||
|
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe"),BooleanClause.Occur.MUST_NOT));
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
bits = booleanFilter.bits(reader);
|
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
|
||||||
assertEquals("Shoulds Ored but AndNots",3,bits.cardinality());
|
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
|
||||||
|
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N", old),BooleanClause.Occur.MUST_NOT));
|
||||||
|
tstFilterCard("Shoulds Ored but AndNot",4,booleanFilter);
|
||||||
|
|
||||||
|
booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe", old),BooleanClause.Occur.MUST_NOT));
|
||||||
|
tstFilterCard("Shoulds Ored but AndNots",3,booleanFilter);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
public void testShouldsAndMust() throws Throwable
|
public void testShouldsAndMust() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
|
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("Shoulds Ored but MUST",3,booleanFilter);
|
||||||
assertEquals("Shoulds Ored but MUST",3,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
public void testShouldsAndMusts() throws Throwable
|
public void testShouldsAndMusts() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
|
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getRangeFilter("price","010", "020", old),BooleanClause.Occur.SHOULD));
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getRangeFilter("price","020", "030", old),BooleanClause.Occur.SHOULD));
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
|
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("date","20040101", "20041231"),BooleanClause.Occur.MUST));
|
booleanFilter.add(new FilterClause(getRangeFilter("date","20040101", "20041231", old),BooleanClause.Occur.MUST));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("Shoulds Ored but MUSTs ANDED",1,booleanFilter);
|
||||||
assertEquals("Shoulds Ored but MUSTs ANDED",1,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
public void testShouldsAndMustsAndMustNot() throws Throwable
|
public void testShouldsAndMustsAndMustNot() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
|
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("price","030", "040"),BooleanClause.Occur.SHOULD));
|
booleanFilter.add(new FilterClause(getRangeFilter("price","030", "040", old),BooleanClause.Occur.SHOULD));
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
|
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
|
||||||
booleanFilter.add(new FilterClause(getRangeFilter("date","20050101", "20051231"),BooleanClause.Occur.MUST));
|
booleanFilter.add(new FilterClause(getRangeFilter("date","20050101", "20051231", old),BooleanClause.Occur.MUST));
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT));
|
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST_NOT));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("Shoulds Ored but MUSTs ANDED and MustNot",0,booleanFilter);
|
||||||
assertEquals("Shoulds Ored but MUSTs ANDED and MustNot",0,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testJustMust() throws Throwable
|
public void testJustMust() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
|
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"),BooleanClause.Occur.MUST));
|
booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin", old),BooleanClause.Occur.MUST));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("MUST",3,booleanFilter);
|
||||||
assertEquals("MUST",3,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
public void testJustMustNot() throws Throwable
|
public void testJustMustNot() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
|
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST_NOT));
|
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST_NOT));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("MUST_NOT",4,booleanFilter);
|
||||||
assertEquals("MUST_NOT",4,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
public void testMustAndMustNot() throws Throwable
|
public void testMustAndMustNot() throws Throwable
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
boolean old = (i==0);
|
||||||
|
|
||||||
BooleanFilter booleanFilter = new BooleanFilter();
|
BooleanFilter booleanFilter = new BooleanFilter();
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N"),BooleanClause.Occur.MUST));
|
booleanFilter.add(new FilterClause(getTermsFilter("inStock","N", old),BooleanClause.Occur.MUST));
|
||||||
booleanFilter.add(new FilterClause(getTermsFilter("price","030"),BooleanClause.Occur.MUST_NOT));
|
booleanFilter.add(new FilterClause(getTermsFilter("price","030", old),BooleanClause.Occur.MUST_NOT));
|
||||||
BitSet bits = booleanFilter.bits(reader);
|
tstFilterCard("MUST_NOT wins over MUST for same docs",0,booleanFilter);
|
||||||
assertEquals("MUST_NOT wins over MUST for same docs",0,bits.cardinality());
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -150,8 +150,8 @@ public class ParallelMultiSearcher extends MultiSearcher {
|
||||||
|
|
||||||
/** Lower-level search API.
|
/** Lower-level search API.
|
||||||
*
|
*
|
||||||
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
* <p>{@link HitCollector#collect(int,float)} is called for every matching
|
||||||
* scoring document.
|
* document.
|
||||||
*
|
*
|
||||||
* <p>Applications should only use this if they need <i>all</i> of the
|
* <p>Applications should only use this if they need <i>all</i> of the
|
||||||
* matching documents. The high-level search API ({@link
|
* matching documents. The high-level search API ({@link
|
||||||
|
|
|
@ -33,7 +33,7 @@ import java.io.IOException; // for javadoc
|
||||||
*
|
*
|
||||||
* <p>Queries, filters and sort criteria are designed to be compact so that
|
* <p>Queries, filters and sort criteria are designed to be compact so that
|
||||||
* they may be efficiently passed to a remote index, with only the top-scoring
|
* they may be efficiently passed to a remote index, with only the top-scoring
|
||||||
* hits being returned, rather than every non-zero scoring hit.
|
* hits being returned, rather than every matching hit.
|
||||||
*/
|
*/
|
||||||
public interface Searchable extends java.rmi.Remote {
|
public interface Searchable extends java.rmi.Remote {
|
||||||
/** Lower-level search API.
|
/** Lower-level search API.
|
||||||
|
|
|
@ -88,8 +88,8 @@ public abstract class Searcher implements Searchable {
|
||||||
|
|
||||||
/** Lower-level search API.
|
/** Lower-level search API.
|
||||||
*
|
*
|
||||||
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
* <p>{@link HitCollector#collect(int,float)} is called for every matching
|
||||||
* scoring document.
|
* document.
|
||||||
*
|
*
|
||||||
* <p>Applications should only use this if they need <i>all</i> of the
|
* <p>Applications should only use this if they need <i>all</i> of the
|
||||||
* matching documents. The high-level search API ({@link
|
* matching documents. The high-level search API ({@link
|
||||||
|
@ -107,8 +107,8 @@ public abstract class Searcher implements Searchable {
|
||||||
|
|
||||||
/** Lower-level search API.
|
/** Lower-level search API.
|
||||||
*
|
*
|
||||||
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
* <p>{@link HitCollector#collect(int,float)} is called for every matching
|
||||||
* scoring document.
|
* document.
|
||||||
* <br>HitCollector-based access to remote indexes is discouraged.
|
* <br>HitCollector-based access to remote indexes is discouraged.
|
||||||
*
|
*
|
||||||
* <p>Applications should only use this if they need <i>all</i> of the
|
* <p>Applications should only use this if they need <i>all</i> of the
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
|
public class OpenBitSetDISI extends OpenBitSet {
|
||||||
|
|
||||||
|
/** Construct an OpenBitSetDISI with its bits set
|
||||||
|
* from the doc ids of the given DocIdSetIterator.
|
||||||
|
* Also give a maximum size one larger than the largest doc id for which a
|
||||||
|
* bit may ever be set on this OpenBitSetDISI.
|
||||||
|
*/
|
||||||
|
public OpenBitSetDISI(DocIdSetIterator disi, int maxSize) throws IOException {
|
||||||
|
super(maxSize);
|
||||||
|
inPlaceOr(disi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Construct an OpenBitSetDISI with no bits set, and a given maximum size
|
||||||
|
* one larger than the largest doc id for which a bit may ever be set
|
||||||
|
* on this OpenBitSetDISI.
|
||||||
|
*/
|
||||||
|
public OpenBitSetDISI(int maxSize) {
|
||||||
|
super(maxSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform an inplace OR with the doc ids from a given DocIdSetIterator,
|
||||||
|
* setting the bit for each such doc id.
|
||||||
|
* These doc ids should be smaller than the maximum size passed to the
|
||||||
|
* constructor.
|
||||||
|
*/
|
||||||
|
public void inPlaceOr(DocIdSetIterator disi) throws IOException {
|
||||||
|
while (disi.next() && (disi.doc() < size())) {
|
||||||
|
fastSet(disi.doc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform an inplace AND with the doc ids from a given DocIdSetIterator,
|
||||||
|
* leaving only the bits set for which the doc ids are in common.
|
||||||
|
* These doc ids should be smaller than the maximum size passed to the
|
||||||
|
* constructor.
|
||||||
|
*/
|
||||||
|
public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
|
||||||
|
int index = nextSetBit(0);
|
||||||
|
int lastNotCleared = -1;
|
||||||
|
while ((index != -1) && disi.skipTo(index)) {
|
||||||
|
while ((index != -1) && (index < disi.doc())) {
|
||||||
|
fastClear(index);
|
||||||
|
index = nextSetBit(index + 1);
|
||||||
|
}
|
||||||
|
if (index == disi.doc()) {
|
||||||
|
lastNotCleared = index;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
assert (index == -1) || (index > disi.doc());
|
||||||
|
}
|
||||||
|
clear(lastNotCleared+1, size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
|
||||||
|
* clearing all the bits for each such doc id.
|
||||||
|
* These doc ids should be smaller than the maximum size passed to the
|
||||||
|
* constructor.
|
||||||
|
*/
|
||||||
|
public void inPlaceNot(DocIdSetIterator disi) throws IOException {
|
||||||
|
while (disi.next() && (disi.doc() < size())) {
|
||||||
|
fastClear(disi.doc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
|
||||||
|
* flipping all the bits for each such doc id.
|
||||||
|
* These doc ids should be smaller than the maximum size passed to the
|
||||||
|
* constructor.
|
||||||
|
*/
|
||||||
|
public void inPlaceXor(DocIdSetIterator disi) throws IOException {
|
||||||
|
while (disi.next() && (disi.doc() < size())) {
|
||||||
|
fastFlip(disi.doc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class used for testing compatibility with old BitSet-based filters.
|
||||||
|
* Does not override {@link Filter#getDocIdSet(IndexReader)} and thus ensures
|
||||||
|
* that {@link #bits(IndexReader)} is called.
|
||||||
|
*
|
||||||
|
* @deprecated This class will be removed together with the
|
||||||
|
* {@link Filter#bits(IndexReader)} method in Lucene 3.0.
|
||||||
|
*/
|
||||||
|
public class OldBitSetFilterWrapper extends Filter {
|
||||||
|
private Filter filter;
|
||||||
|
|
||||||
|
public OldBitSetFilterWrapper(Filter filter) {
|
||||||
|
this.filter = filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BitSet bits(IndexReader reader) throws IOException {
|
||||||
|
BitSet bits = new BitSet(reader.maxDoc());
|
||||||
|
DocIdSetIterator it = filter.getDocIdSet(reader).iterator();
|
||||||
|
while(it.next()) {
|
||||||
|
bits.set(it.doc());
|
||||||
|
}
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue