LUCENE-6025: Add BitSet.prevSetBit and cut over the join module to this API.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1635531 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2014-10-30 16:06:17 +00:00
parent de9490a0e5
commit 347e4447eb
25 changed files with 502 additions and 471 deletions

View File

@ -67,18 +67,12 @@ public class CachingWrapperFilter extends Filter implements Accountable {
* instance is use as a placeholder in the cache instead of the <code>null</code> value.
*/
protected DocIdSet docIdSetToCache(DocIdSet docIdSet, LeafReader reader) throws IOException {
if (docIdSet == null) {
// this is better than returning null, as the nonnull result can be cached
return EMPTY;
} else if (docIdSet.isCacheable()) {
if (docIdSet == null || docIdSet.isCacheable()) {
return docIdSet;
} else {
final DocIdSetIterator it = docIdSet.iterator();
// null is allowed to be returned by iterator(),
// in this case we wrap with the sentinel set,
// which is cacheable.
if (it == null) {
return EMPTY;
return null;
} else {
return cacheImpl(it, reader);
}
@ -106,6 +100,10 @@ public class CachingWrapperFilter extends Filter implements Accountable {
} else {
missCount++;
docIdSet = docIdSetToCache(filter.getDocIdSet(context, null), reader);
if (docIdSet == null) {
// We use EMPTY as a sentinel for the empty set, which is cacheable
docIdSet = EMPTY;
}
assert docIdSet.isCacheable();
cache.put(key, docIdSet);
}

View File

@ -25,8 +25,8 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.DocIdSetBuilder;
/**
* A wrapper for {@link MultiTermQuery}, that exposes its
@ -100,7 +100,7 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
final TermsEnum termsEnum = query.getTermsEnum(terms);
assert termsEnum != null;
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
DocsEnum docs = null;
while (termsEnum.next() != null) {
docs = termsEnum.docs(acceptDocs, docs, DocsEnum.FLAG_NONE);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
@ -75,4 +77,113 @@ public class BitDocIdSet extends DocIdSet {
return getClass().getSimpleName() + "(set=" + set + ",cost=" + cost + ")";
}
/**
* A builder of {@link DocIdSet}s that supports random access.
* @lucene.internal
*/
public static final class Builder {
private final int maxDoc;
private final int threshold;
private SparseFixedBitSet sparseSet;
private FixedBitSet denseSet;
// we cache an upper bound of the cost of this builder so that we don't have
// to re-compute approximateCardinality on the sparse set every time
private long costUpperBound;
/** Create a new instance that can hold <code>maxDoc</code> documents and is optionally <code>full</code>. */
public Builder(int maxDoc, boolean full) {
this.maxDoc = maxDoc;
threshold = maxDoc >>> 10;
if (full) {
denseSet = new FixedBitSet(maxDoc);
denseSet.set(0, maxDoc);
}
}
/** Create a new empty instance. */
public Builder(int maxDoc) {
this(maxDoc, false);
}
/**
* Add the content of the provided {@link DocIdSetIterator} to this builder.
*/
public void or(DocIdSetIterator it) throws IOException {
if (denseSet != null) {
// already upgraded
denseSet.or(it);
return;
}
final long itCost = it.cost();
costUpperBound += itCost;
if (costUpperBound >= threshold) {
costUpperBound = (sparseSet == null ? 0 : sparseSet.approximateCardinality()) + itCost;
if (costUpperBound >= threshold) {
// upgrade
denseSet = new FixedBitSet(maxDoc);
denseSet.or(it);
if (sparseSet != null) {
denseSet.or(new BitSetIterator(sparseSet, 0L));
}
return;
}
}
// we are still sparse
if (sparseSet == null) {
sparseSet = new SparseFixedBitSet(maxDoc);
}
sparseSet.or(it);
}
/**
* Removes from this builder documents that are not contained in <code>it</code>.
*/
public void and(DocIdSetIterator it) throws IOException {
if (denseSet != null) {
denseSet.and(it);
} else if (sparseSet != null) {
sparseSet.and(it);
}
}
/**
* Removes from this builder documents that are contained in <code>it</code>.
*/
public void andNot(DocIdSetIterator it) throws IOException {
if (denseSet != null) {
denseSet.andNot(it);
} else if (denseSet != null) {
denseSet.andNot(it);
}
}
/**
* Build a {@link DocIdSet} that contains all doc ids that have been added.
* This method may return <tt>null</tt> if no documents were addded to this
* builder.
* NOTE: this is a destructive operation, the builder should not be used
* anymore after this method has been called.
*/
public BitDocIdSet build() {
final BitDocIdSet result;
if (denseSet != null) {
result = new BitDocIdSet(denseSet);
} else if (sparseSet != null) {
result = new BitDocIdSet(sparseSet);
} else {
result = null;
}
denseSet = null;
sparseSet = null;
costUpperBound = 0;
return result;
}
}
}

View File

@ -54,10 +54,15 @@ public abstract class BitSet implements MutableBits, Accountable {
return cardinality();
}
/** Returns the index of the last set bit before or on the index specified.
* -1 is returned if there are no more set bits.
*/
public abstract int prevSetBit(int index);
/** Returns the index of the first set bit starting at the index specified.
* {@link DocIdSetIterator#NO_MORE_DOCS} is returned if there are no more set bits.
*/
public abstract int nextSetBit(int i);
public abstract int nextSetBit(int index);
/** Assert that the current doc is -1. */
protected final void assertUnpositioned(DocIdSetIterator iter) {

View File

@ -1,132 +0,0 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
/**
* A builder of {@link DocIdSet}s that supports random access.
* @lucene.internal
*/
public final class DocIdSetBuilder {
private final int maxDoc;
private final int threshold;
private SparseFixedBitSet sparseSet;
private FixedBitSet denseSet;
// we cache an upper bound of the cost of this builder so that we don't have
// to re-compute approximateCardinality on the sparse set every time
private long costUpperBound;
/** Create a new instance that can hold <code>maxDoc</code> documents and is optionally <code>full</code>. */
public DocIdSetBuilder(int maxDoc, boolean full) {
this.maxDoc = maxDoc;
threshold = maxDoc >>> 10;
if (full) {
denseSet = new FixedBitSet(maxDoc);
denseSet.set(0, maxDoc);
}
}
/** Create a new empty instance. */
public DocIdSetBuilder(int maxDoc) {
this(maxDoc, false);
}
/**
* Add the content of the provided {@link DocIdSetIterator} to this builder.
*/
public void or(DocIdSetIterator it) throws IOException {
if (denseSet != null) {
// already upgraded
denseSet.or(it);
return;
}
final long itCost = it.cost();
costUpperBound += itCost;
if (costUpperBound >= threshold) {
costUpperBound = (sparseSet == null ? 0 : sparseSet.approximateCardinality()) + itCost;
if (costUpperBound >= threshold) {
// upgrade
denseSet = new FixedBitSet(maxDoc);
denseSet.or(it);
if (sparseSet != null) {
denseSet.or(new BitSetIterator(sparseSet, 0L));
}
return;
}
}
// we are still sparse
if (sparseSet == null) {
sparseSet = new SparseFixedBitSet(maxDoc);
}
sparseSet.or(it);
}
/**
* Removes from this builder documents that are not contained in <code>it</code>.
*/
public void and(DocIdSetIterator it) throws IOException {
if (denseSet != null) {
denseSet.and(it);
} else if (sparseSet != null) {
sparseSet.and(it);
}
}
/**
* Removes from this builder documents that are contained in <code>it</code>.
*/
public void andNot(DocIdSetIterator it) throws IOException {
if (denseSet != null) {
denseSet.andNot(it);
} else if (denseSet != null) {
denseSet.andNot(it);
}
}
/**
* Build a {@link DocIdSet} that contains all doc ids that have been added.
* This method may return <tt>null</tt> if no documents were addded to this
* builder.
* NOTE: this is a destructive operation, the builder should not be used
* anymore after this method has been called.
*/
public DocIdSet build() {
final DocIdSet result;
if (denseSet != null) {
result = new BitDocIdSet(denseSet);
} else if (sparseSet != null) {
result = new BitDocIdSet(sparseSet);
} else {
result = null;
}
denseSet = null;
sparseSet = null;
costUpperBound = 0;
return result;
}
}

View File

@ -203,9 +203,7 @@ public final class FixedBitSet extends BitSet implements MutableBits, Accountabl
return DocIdSetIterator.NO_MORE_DOCS;
}
/** Returns the index of the last set bit before or on the index specified.
* -1 is returned if there are no more set bits.
*/
@Override
public int prevSetBit(int index) {
assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits;
int i = index >> 6;

View File

@ -290,59 +290,70 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
assert i < length;
final int i4096 = i >>> 12;
final long index = indices[i4096];
final long[] bitArray = this.bits[i4096];
int i64 = i >>> 6;
long indexBits = index >>> i64;
if (indexBits == 0) {
// if the index is zero, it means that there is no value in the
// current block, so return the first document of the next block
// or
// if neither the i64-th bit or any other bit on its left is set then
// it means that there are no more documents in this block, go to the
// next one
return firstDoc(i4096 + 1);
} else {
// We know we still have some 64-bits blocks that have bits set, let's
// advance to the next one by skipping trailing zeros of the index
int i1 = i & 0x3F;
int trailingZeros = Long.numberOfTrailingZeros(indexBits);
if (trailingZeros != 0) {
// no bits in the current long, go to the next one
i64 += trailingZeros;
i1 = 0;
int o = Long.bitCount(index & ((1L << i64) - 1));
if ((index & (1L << i64)) != 0) {
// There is at least one bit that is set in the current long, check if
// one of them is after i
final long bits = bitArray[o] >>> i; // shifts are mod 64
if (bits != 0) {
return i + Long.numberOfTrailingZeros(bits);
}
// So now we are on a sub 64-bits block that has values
assert (index & (1L << i64)) != 0;
// we count the number of ones on the left of i64 to figure out the
// index of the long that contains the bits we are interested in
int longIndex = Long.bitCount(index & ((1L << i64) - 1)); // shifts are mod 64 in java
final long[] longArray = bits[i4096];
assert longArray[longIndex] != 0;
long bits = longArray[longIndex] >>> i1; // shifts are mod 64 in java
if (bits != 0L) {
// hurray, we found some non-zero bits, this gives us the next document:
i1 += Long.numberOfTrailingZeros(bits);
return (i4096 << 12) | ((i64 & 0x3F) << 6) | i1;
}
// otherwise it means that although we were on a sub-64 block that contains
// documents, all documents of this sub-block have already been consumed
// so two cases:
indexBits = index >>> i64 >>> 1; // we don't shift by (i64+1) otherwise we might shift by a multiple of 64 which is a no-op
if (indexBits == 0) {
// Case 1: this was the last long of the block of 4096 bits, then go
// to the next block
return firstDoc(i4096 + 1);
}
// Case 2: go to the next sub 64-bits block in the current block of 4096 bits
// by skipping trailing zeros of the index
trailingZeros = Long.numberOfTrailingZeros(indexBits);
i64 += 1 + trailingZeros;
bits = longArray[longIndex + 1];
assert bits != 0;
i1 = Long.numberOfTrailingZeros(bits);
return (i4096 << 12) | ((i64 & 0x3F) << 6) | i1;
o += 1;
}
final long indexBits = index >>> i64 >>> 1;
if (indexBits == 0) {
// no more bits are set in the current block of 4096 bits, go to the next one
return firstDoc(i4096 + 1);
}
// there are still set bits
i64 += 1 + Long.numberOfTrailingZeros(indexBits);
final long bits = bitArray[o];
return (i64 << 6) | Long.numberOfTrailingZeros(bits);
}
/** Return the last document that occurs on or before the provided block index. */
private int lastDoc(int i4096) {
long index;
while (i4096 >= 0) {
index = indices[i4096];
if (index != 0) {
final int i64 = 63 - Long.numberOfLeadingZeros(index);
final long bits = this.bits[i4096][Long.bitCount(index) - 1];
return (i4096 << 12) | (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
}
i4096 -= 1;
}
return -1;
}
@Override
public int prevSetBit(int i) {
assert i >= 0;
final int i4096 = i >>> 12;
final long index = indices[i4096];
final long[] bitArray = this.bits[i4096];
int i64 = i >>> 6;
final long indexBits = index & ((1L << i64) - 1);
final int o = Long.bitCount(indexBits);
if ((index & (1L << i64)) != 0) {
// There is at least one bit that is set in the same long, check if there
// is one bit that is set that is lower than i
final long bits = bitArray[o] & ((1L << i << 1) - 1);
if (bits != 0) {
return (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
}
}
if (indexBits == 0) {
// no more bits are set in this block, go find the last bit in the
// previous block
return lastDoc(i4096 - 1);
}
// go to the previous long
i64 = 63 - Long.numberOfLeadingZeros(indexBits);
final long bits = bitArray[o - 1];
return (i4096 << 12) | (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
}
/** Return the long bits at the given <code>i64</code> index. */

View File

@ -25,7 +25,7 @@ import org.apache.lucene.search.DocIdSetIterator;
public class TestDocIdSetBuilder extends LuceneTestCase {
public void testEmpty() throws IOException {
assertEquals(null, new DocIdSetBuilder(1 + random().nextInt(1000)).build());
assertEquals(null, new BitDocIdSet.Builder(1 + random().nextInt(1000)).build());
}
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
@ -47,7 +47,7 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
public void testFull() throws IOException {
final int maxDoc = 1 + random().nextInt(1000);
DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc, true);
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc, true);
DocIdSet set = builder.build();
DocIdSetIterator it = set.iterator();
for (int i = 0; i < maxDoc; ++i) {
@ -57,7 +57,7 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
public void testSparse() throws IOException {
final int maxDoc = 1000000 + random().nextInt(1000000);
DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc);
final int numIterators = 1 + random().nextInt(10);
final FixedBitSet ref = new FixedBitSet(maxDoc);
for (int i = 0; i < numIterators; ++i) {
@ -76,7 +76,7 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
public void testDense() throws IOException {
final int maxDoc = 1000000 + random().nextInt(1000000);
DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc);
final int numIterators = 1 + random().nextInt(10);
final FixedBitSet ref = new FixedBitSet(maxDoc);
if (random().nextBoolean()) {

View File

@ -0,0 +1,96 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitDocIdSet;
/**
* A filter wrapper that transforms the produces doc id sets into
* {@link BitDocIdSet}s if necessary and caches them.
*/
public class BitDocIdSetCachingWrapperFilter extends BitDocIdSetFilter implements Accountable {
private final CachingWrapperFilter filter;
/** Sole constructor. */
public BitDocIdSetCachingWrapperFilter(Filter filter) {
super();
this.filter = new CachingWrapperFilter(filter) {
@Override
protected BitDocIdSet docIdSetToCache(DocIdSet docIdSet, LeafReader reader) throws IOException {
if (docIdSet == null || docIdSet instanceof BitDocIdSet) {
// this is different from CachingWrapperFilter: even when the DocIdSet is
// cacheable, we convert it to a BitSet since we require all the
// cached filters to be BitSets
return (BitDocIdSet) docIdSet;
}
final DocIdSetIterator it = docIdSet.iterator();
if (it == null) {
return null;
}
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(reader.maxDoc());
builder.or(it);
return builder.build();
}
};
}
@Override
public BitDocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
return (BitDocIdSet) filter.getDocIdSet(context, null);
}
@Override
public int hashCode() {
return getClass().hashCode() ^ filter.hashCode();
}
@Override
public boolean equals(Object obj) {
if (obj instanceof BitDocIdSetCachingWrapperFilter == false) {
return false;
}
return filter.equals(((BitDocIdSetCachingWrapperFilter) obj).filter);
}
@Override
public String toString() {
return filter.toString();
}
@Override
public long ramBytesUsed() {
return filter.ramBytesUsed();
}
@Override
public Iterable<? extends Accountable> getChildResources() {
return filter.getChildResources();
}
}

View File

@ -0,0 +1,48 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BitsFilteredDocIdSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
/**
* A {@link Filter} that produces {@link BitDocIdSet}s.
*/
public abstract class BitDocIdSetFilter extends Filter {
/** Sole constructor, typically called from sub-classes. */
protected BitDocIdSetFilter() {}
/**
* Same as {@link #getDocIdSet(LeafReaderContext, Bits)} but does not take
* acceptDocs into account and guarantees to return a {@link BitDocIdSet}.
*/
public abstract BitDocIdSet getDocIdSet(LeafReaderContext context) throws IOException;
@Override
public final DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
return BitsFilteredDocIdSet.wrap(getDocIdSet(context), acceptDocs);
}
}

View File

@ -1,63 +0,0 @@
package org.apache.lucene.search.join;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.lucene.search.DocIdSet.EMPTY;
import java.io.IOException;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.FixedBitSet;
/** A {@link CachingWrapperFilter} that caches sets using a {@link FixedBitSet},
* as required for joins. */
public final class FixedBitSetCachingWrapperFilter extends CachingWrapperFilter {
/** Sole constructor, see {@link CachingWrapperFilter#CachingWrapperFilter(Filter)}. */
public FixedBitSetCachingWrapperFilter(Filter filter) {
super(filter);
}
@Override
protected DocIdSet docIdSetToCache(DocIdSet docIdSet, LeafReader reader)
throws IOException {
if (docIdSet == null) {
return EMPTY;
} else if (docIdSet instanceof BitDocIdSet) {
// this is different from CachingWrapperFilter: even when the DocIdSet is
// cacheable, we convert it to a FixedBitSet since we require all the
// cached filters to be FixedBitSets
return docIdSet;
} else {
final DocIdSetIterator it = docIdSet.iterator();
if (it == null) {
return EMPTY;
} else {
final FixedBitSet copy = new FixedBitSet(reader.maxDoc());
copy.or(it);
return new BitDocIdSet(copy);
}
}
}
}

View File

@ -25,16 +25,15 @@ import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
/**
* Just like {@link ToParentBlockJoinQuery}, except this
@ -52,7 +51,7 @@ public class ToChildBlockJoinQuery extends Query {
* when the parent query incorrectly returns child docs. */
static final String INVALID_QUERY_MESSAGE = "Parent query yields document which is not matched by parents filter, docID=";
private final Filter parentsFilter;
private final BitDocIdSetFilter parentsFilter;
private final Query parentQuery;
// If we are rewritten, this is the original parentQuery we
@ -67,12 +66,10 @@ public class ToChildBlockJoinQuery extends Query {
* Create a ToChildBlockJoinQuery.
*
* @param parentQuery Query that matches parent documents
* @param parentsFilter Filter (must produce FixedBitSet
* per-segment, like {@link FixedBitSetCachingWrapperFilter})
* identifying the parent documents.
* @param parentsFilter Filter identifying the parent documents.
* @param doScores true if parent scores should be calculated
*/
public ToChildBlockJoinQuery(Query parentQuery, Filter parentsFilter, boolean doScores) {
public ToChildBlockJoinQuery(Query parentQuery, BitDocIdSetFilter parentsFilter, boolean doScores) {
super();
this.origParentQuery = parentQuery;
this.parentQuery = parentQuery;
@ -80,7 +77,7 @@ public class ToChildBlockJoinQuery extends Query {
this.doScores = doScores;
}
private ToChildBlockJoinQuery(Query origParentQuery, Query parentQuery, Filter parentsFilter, boolean doScores) {
private ToChildBlockJoinQuery(Query origParentQuery, Query parentQuery, BitDocIdSetFilter parentsFilter, boolean doScores) {
super();
this.origParentQuery = origParentQuery;
this.parentQuery = parentQuery;
@ -96,10 +93,10 @@ public class ToChildBlockJoinQuery extends Query {
private static class ToChildBlockJoinWeight extends Weight {
private final Query joinQuery;
private final Weight parentWeight;
private final Filter parentsFilter;
private final BitDocIdSetFilter parentsFilter;
private final boolean doScores;
public ToChildBlockJoinWeight(Query joinQuery, Weight parentWeight, Filter parentsFilter, boolean doScores) {
public ToChildBlockJoinWeight(Query joinQuery, Weight parentWeight, BitDocIdSetFilter parentsFilter, boolean doScores) {
super();
this.joinQuery = joinQuery;
this.parentWeight = parentWeight;
@ -134,22 +131,15 @@ public class ToChildBlockJoinQuery extends Query {
return null;
}
// NOTE: we cannot pass acceptDocs here because this
// will (most likely, justifiably) cause the filter to
// not return a FixedBitSet but rather a
// BitsFilteredDocIdSet. Instead, we filter by
// acceptDocs when we score:
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
// NOTE: this doesn't take acceptDocs into account, the responsibility
// to not match deleted docs is on the scorer
final BitDocIdSet parents = parentsFilter.getDocIdSet(readerContext);
if (parents == null) {
// No matches
// No parents
return null;
}
if (!(parents.bits() instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents.bits());
}
return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents.bits(), doScores, acceptDocs);
return new ToChildBlockJoinScorer(this, parentScorer, parents.bits(), doScores, acceptDocs);
}
@Override
@ -167,7 +157,7 @@ public class ToChildBlockJoinQuery extends Query {
static class ToChildBlockJoinScorer extends Scorer {
private final Scorer parentScorer;
private final FixedBitSet parentBits;
private final BitSet parentBits;
private final boolean doScores;
private final Bits acceptDocs;
@ -177,7 +167,7 @@ public class ToChildBlockJoinQuery extends Query {
private int childDoc = -1;
private int parentDoc;
public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, boolean doScores, Bits acceptDocs) {
public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, BitSet parentBits, boolean doScores, Bits acceptDocs) {
super(weight);
this.doScores = doScores;
this.parentBits = parentBits;

View File

@ -20,11 +20,10 @@ package org.apache.lucene.search.join;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BitSet;
/**
* A field comparator that allows parent documents to be sorted by fields
@ -34,15 +33,15 @@ import org.apache.lucene.util.FixedBitSet;
*/
public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<Object> {
private final Filter parentFilter;
private final Filter childFilter;
private final BitDocIdSetFilter parentFilter;
private final BitDocIdSetFilter childFilter;
final int spareSlot;
FieldComparator<Object> wrappedComparator;
FixedBitSet parentDocuments;
FixedBitSet childDocuments;
BitSet parentDocuments;
BitSet childDocuments;
ToParentBlockJoinFieldComparator(FieldComparator<Object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) {
ToParentBlockJoinFieldComparator(FieldComparator<Object> wrappedComparator, BitDocIdSetFilter parentFilter, BitDocIdSetFilter childFilter, int spareSlot) {
this.wrappedComparator = wrappedComparator;
this.parentFilter = parentFilter;
this.childFilter = childFilter;
@ -66,50 +65,22 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
@Override
public FieldComparator<Object> setNextReader(LeafReaderContext context) throws IOException {
DocIdSet innerDocuments = childFilter.getDocIdSet(context, null);
if (isEmpty(innerDocuments)) {
this.childDocuments = null;
} else if (innerDocuments.bits() instanceof FixedBitSet) {
this.childDocuments = (FixedBitSet) innerDocuments.bits();
BitDocIdSet children = childFilter.getDocIdSet(context);
if (children == null) {
childDocuments = null;
} else {
DocIdSetIterator iterator = innerDocuments.iterator();
if (iterator != null) {
this.childDocuments = toFixedBitSet(iterator, context.reader().maxDoc());
} else {
childDocuments = null;
}
childDocuments = children.bits();
}
DocIdSet rootDocuments = parentFilter.getDocIdSet(context, null);
if (isEmpty(rootDocuments)) {
this.parentDocuments = null;
} else if (rootDocuments.bits() instanceof FixedBitSet) {
this.parentDocuments = (FixedBitSet) rootDocuments.bits();
BitDocIdSet parents = parentFilter.getDocIdSet(context);
if (parents == null) {
parentDocuments = null;
} else {
DocIdSetIterator iterator = rootDocuments.iterator();
if (iterator != null) {
this.parentDocuments = toFixedBitSet(iterator, context.reader().maxDoc());
} else {
this.parentDocuments = null;
}
parentDocuments = parents.bits();
}
wrappedComparator = wrappedComparator.setNextReader(context);
return this;
}
private static boolean isEmpty(DocIdSet set) {
return set == null;
}
private static FixedBitSet toFixedBitSet(DocIdSetIterator iterator, int numBits) throws IOException {
FixedBitSet set = new FixedBitSet(numBits);
int doc;
while ((doc = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
set.set(doc);
}
return set;
}
@Override
public Object value(int slot) {
return wrappedComparator.value(slot);
@ -125,12 +96,12 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
* Create ToParentBlockJoinFieldComparator.Lowest
*
* @param wrappedComparator The {@link FieldComparator} on the child / nested level.
* @param parentFilter Filter (must produce FixedBitSet per-segment) that identifies the parent documents.
* @param parentFilter Filter that identifies the parent documents.
* @param childFilter Filter that defines which child / nested documents participates in sorting.
* @param spareSlot The extra slot inside the wrapped comparator that is used to compare which nested document
* inside the parent document scope is most competitive.
*/
public Lowest(FieldComparator<Object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) {
public Lowest(FieldComparator<Object> wrappedComparator, BitDocIdSetFilter parentFilter, BitDocIdSetFilter childFilter, int spareSlot) {
super(wrappedComparator, parentFilter, childFilter, spareSlot);
}
@ -244,12 +215,12 @@ public abstract class ToParentBlockJoinFieldComparator extends FieldComparator<O
* Create ToParentBlockJoinFieldComparator.Highest
*
* @param wrappedComparator The {@link FieldComparator} on the child / nested level.
* @param parentFilter Filter (must produce FixedBitSet per-segment) that identifies the parent documents.
* @param parentFilter Filter that identifies the parent documents.
* @param childFilter Filter that defines which child / nested documents participates in sorting.
* @param spareSlot The extra slot inside the wrapped comparator that is used to compare which nested document
* inside the parent document scope is most competitive.
*/
public Highest(FieldComparator<Object> wrappedComparator, Filter parentFilter, Filter childFilter, int spareSlot) {
public Highest(FieldComparator<Object> wrappedComparator, BitDocIdSetFilter parentFilter, BitDocIdSetFilter childFilter, int spareSlot) {
super(wrappedComparator, parentFilter, childFilter, spareSlot);
}

View File

@ -28,18 +28,17 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
/**
* This query requires that you index
@ -49,7 +48,7 @@ import org.apache.lucene.util.FixedBitSet;
* child documents must appear first, ending with the parent
* document. At search time you provide a Filter
* identifying the parents, however this Filter must provide
* an {@link FixedBitSet} per sub-reader.
* an {@link BitSet} per sub-reader.
*
* <p>Once the block index is built, use this query to wrap
* any sub-query matching only child docs and join matches in that
@ -85,7 +84,7 @@ import org.apache.lucene.util.FixedBitSet;
*/
public class ToParentBlockJoinQuery extends Query {
private final Filter parentsFilter;
private final BitDocIdSetFilter parentsFilter;
private final Query childQuery;
// If we are rewritten, this is the original childQuery we
@ -99,13 +98,11 @@ public class ToParentBlockJoinQuery extends Query {
/** Create a ToParentBlockJoinQuery.
*
* @param childQuery Query matching child documents.
* @param parentsFilter Filter (must produce FixedBitSet
* per-segment, like {@link FixedBitSetCachingWrapperFilter})
* identifying the parent documents.
* @param parentsFilter Filter identifying the parent documents.
* @param scoreMode How to aggregate multiple child scores
* into a single parent score.
**/
public ToParentBlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode) {
public ToParentBlockJoinQuery(Query childQuery, BitDocIdSetFilter parentsFilter, ScoreMode scoreMode) {
super();
this.origChildQuery = childQuery;
this.childQuery = childQuery;
@ -113,7 +110,7 @@ public class ToParentBlockJoinQuery extends Query {
this.scoreMode = scoreMode;
}
private ToParentBlockJoinQuery(Query origChildQuery, Query childQuery, Filter parentsFilter, ScoreMode scoreMode) {
private ToParentBlockJoinQuery(Query origChildQuery, Query childQuery, BitDocIdSetFilter parentsFilter, ScoreMode scoreMode) {
super();
this.origChildQuery = origChildQuery;
this.childQuery = childQuery;
@ -129,10 +126,10 @@ public class ToParentBlockJoinQuery extends Query {
private static class BlockJoinWeight extends Weight {
private final Query joinQuery;
private final Weight childWeight;
private final Filter parentsFilter;
private final BitDocIdSetFilter parentsFilter;
private final ScoreMode scoreMode;
public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) {
public BlockJoinWeight(Query joinQuery, Weight childWeight, BitDocIdSetFilter parentsFilter, ScoreMode scoreMode) {
super();
this.joinQuery = joinQuery;
this.childWeight = childWeight;
@ -172,22 +169,16 @@ public class ToParentBlockJoinQuery extends Query {
return null;
}
// NOTE: we cannot pass acceptDocs here because this
// will (most likely, justifiably) cause the filter to
// not return a FixedBitSet but rather a
// BitsFilteredDocIdSet. Instead, we filter by
// acceptDocs when we score:
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
// NOTE: this does not take accept docs into account, the responsibility
// to not match deleted docs is on the scorer
final BitDocIdSet parents = parentsFilter.getDocIdSet(readerContext);
if (parents == null) {
// No matches
return null;
}
if (!(parents.bits() instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents.bits());
}
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents.bits(), firstChildDoc, scoreMode, acceptDocs);
return new BlockJoinScorer(this, childScorer, parents.bits(), firstChildDoc, scoreMode, acceptDocs);
}
@Override
@ -207,7 +198,7 @@ public class ToParentBlockJoinQuery extends Query {
static class BlockJoinScorer extends Scorer {
private final Scorer childScorer;
private final FixedBitSet parentBits;
private final BitSet parentBits;
private final ScoreMode scoreMode;
private final Bits acceptDocs;
private int parentDoc = -1;
@ -219,7 +210,7 @@ public class ToParentBlockJoinQuery extends Query {
private float[] pendingChildScores;
private int childDocUpto;
public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) {
public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Bits acceptDocs) {
super(weight);
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
this.parentBits = parentBits;

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search.join;
*/
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.SortField;
import java.io.IOException;
@ -32,8 +31,8 @@ import java.io.IOException;
public class ToParentBlockJoinSortField extends SortField {
private final boolean order;
private final Filter parentFilter;
private final Filter childFilter;
private final BitDocIdSetFilter parentFilter;
private final BitDocIdSetFilter childFilter;
/**
* Create ToParentBlockJoinSortField. The parent document ordering is based on child document ordering (reverse).
@ -44,7 +43,7 @@ public class ToParentBlockJoinSortField extends SortField {
* @param parentFilter Filter that identifies the parent documents.
* @param childFilter Filter that defines which child documents participates in sorting.
*/
public ToParentBlockJoinSortField(String field, Type type, boolean reverse, Filter parentFilter, Filter childFilter) {
public ToParentBlockJoinSortField(String field, Type type, boolean reverse, BitDocIdSetFilter parentFilter, BitDocIdSetFilter childFilter) {
super(field, type, reverse);
this.order = reverse;
this.parentFilter = parentFilter;
@ -61,7 +60,7 @@ public class ToParentBlockJoinSortField extends SortField {
* @param parentFilter Filter that identifies the parent documents.
* @param childFilter Filter that defines which child documents participates in sorting.
*/
public ToParentBlockJoinSortField(String field, Type type, boolean reverse, boolean order, Filter parentFilter, Filter childFilter) {
public ToParentBlockJoinSortField(String field, Type type, boolean reverse, boolean order, BitDocIdSetFilter parentFilter, BitDocIdSetFilter childFilter) {
super(field, type, reverse);
this.order = order;
this.parentFilter = parentFilter;

View File

@ -93,7 +93,7 @@ public class TestBlockJoin extends LuceneTestCase {
w.close();
assertTrue(r.leaves().size() > 1);
IndexSearcher s = new IndexSearcher(r);
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BooleanQuery childQuery = new BooleanQuery();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
@ -145,7 +145,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childQuery = new BooleanQuery();
@ -236,7 +236,7 @@ public class TestBlockJoin extends LuceneTestCase {
// iterations:
qc.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
int h1 = qc.hashCode();
Query qw1 = qc.rewrite(r);
@ -297,7 +297,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childQuery = new BooleanQuery();
@ -317,7 +317,7 @@ public class TestBlockJoin extends LuceneTestCase {
assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).totalHits);
// not found test
assertEquals("noone live there", 0, s.search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
assertEquals("noone live there", 0, s.search(childJoinQuery, new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
assertEquals("noone live there", 0, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).totalHits);
// apply the UK filter by the searcher
@ -355,11 +355,11 @@ public class TestBlockJoin extends LuceneTestCase {
}
}
private StoredDocument getParentDoc(IndexReader reader, Filter parents, int childDocID) throws IOException {
private StoredDocument getParentDoc(IndexReader reader, BitDocIdSetFilter parents, int childDocID) throws IOException {
final List<LeafReaderContext> leaves = reader.leaves();
final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
final LeafReaderContext leaf = leaves.get(subIndex);
final FixedBitSet bits = (FixedBitSet) parents.getDocIdSet(leaf, null).bits();
final BitSet bits = (BitSet) parents.getDocIdSet(leaf).bits();
return leaf.reader().document(bits.nextSetBit(childDocID - leaf.docBase));
}
@ -370,7 +370,7 @@ public class TestBlockJoin extends LuceneTestCase {
w.close();
IndexSearcher s = newSearcher(r);
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ScoreMode.Avg);
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new MatchAllDocsQuery())), ScoreMode.Avg);
QueryUtils.check(random(), q, s);
s.search(q, 10);
BooleanQuery bq = new BooleanQuery();
@ -409,7 +409,7 @@ public class TestBlockJoin extends LuceneTestCase {
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(
NumericRangeQuery.newIntRange("year", 1990, 2010, true, true),
new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))),
new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))),
ScoreMode.Total
);
@ -635,7 +635,7 @@ public class TestBlockJoin extends LuceneTestCase {
final IndexSearcher joinS = new IndexSearcher(joinR);
final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
final BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
final int iters = 200*RANDOM_MULTIPLIER;
@ -902,7 +902,7 @@ public class TestBlockJoin extends LuceneTestCase {
childJoinQuery2 = parentJoinQuery2;
final Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
childJoinFilter2 = random().nextBoolean()
? new FixedBitSetCachingWrapperFilter(f): f;
? new BitDocIdSetCachingWrapperFilter(f): f;
} else {
childJoinFilter2 = null;
// AND child field w/ parent query:
@ -923,7 +923,7 @@ public class TestBlockJoin extends LuceneTestCase {
childQuery2 = parentQuery2;
final Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
childFilter2 = random().nextBoolean()
? new FixedBitSetCachingWrapperFilter(f): f;
? new BitDocIdSetCachingWrapperFilter(f): f;
} else {
childFilter2 = null;
final BooleanQuery bq2 = new BooleanQuery();
@ -1062,7 +1062,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childJobQuery = new BooleanQuery();
@ -1142,7 +1142,7 @@ public class TestBlockJoin extends LuceneTestCase {
w.close();
IndexSearcher s = newSearcher(r);
Query tq = new TermQuery(new Term("child", "1"));
Filter parentFilter = new FixedBitSetCachingWrapperFilter(
BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
new QueryWrapperFilter(
new TermQuery(new Term("parent", "1"))));
@ -1176,7 +1176,7 @@ public class TestBlockJoin extends LuceneTestCase {
w.close();
IndexSearcher s = newSearcher(r);
Query tq = new TermQuery(new Term("child", "2"));
Filter parentFilter = new FixedBitSetCachingWrapperFilter(
BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
new QueryWrapperFilter(
new TermQuery(new Term("isparent", "yes"))));
@ -1210,7 +1210,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexSearcher s = new IndexSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childQuery = new BooleanQuery();
@ -1313,7 +1313,7 @@ public class TestBlockJoin extends LuceneTestCase {
w.close();
Query childQuery = new TermQuery(new Term("childText", "text"));
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery parentQuery = new BooleanQuery();
parentQuery.add(childJoinQuery, Occur.SHOULD);
@ -1378,7 +1378,7 @@ public class TestBlockJoin extends LuceneTestCase {
// never matches:
Query childQuery = new TermQuery(new Term("childText", "bogus"));
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery parentQuery = new BooleanQuery();
parentQuery.add(childJoinQuery, Occur.SHOULD);
@ -1443,7 +1443,7 @@ public class TestBlockJoin extends LuceneTestCase {
// illegally matches parent:
Query childQuery = new TermQuery(new Term("parentText", "text"));
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery parentQuery = new BooleanQuery();
parentQuery.add(childJoinQuery, Occur.SHOULD);
@ -1495,7 +1495,7 @@ public class TestBlockJoin extends LuceneTestCase {
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
BitDocIdSetFilter parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));
Query parentQuery = new TermQuery(new Term("parent", "2"));

View File

@ -231,17 +231,17 @@ public class TestBlockJoinSorting extends LuceneTestCase {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w.w, false));
w.close();
Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent")));
Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("__type", "parent"))));
BitDocIdSetFilter childFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new PrefixQuery(new Term("field2"))));
ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(
new FilteredQuery(new MatchAllDocsQuery(), childFilter),
new FixedBitSetCachingWrapperFilter(parentFilter),
new BitDocIdSetCachingWrapperFilter(parentFilter),
ScoreMode.None
);
// Sort by field ascending, order first
ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField(
"field2", SortField.Type.STRING, false, wrap(parentFilter), wrap(childFilter)
"field2", SortField.Type.STRING, false, parentFilter, childFilter
);
Sort sort = new Sort(sortField);
TopFieldDocs topDocs = searcher.search(query, 5, sort);
@ -260,7 +260,7 @@ public class TestBlockJoinSorting extends LuceneTestCase {
// Sort by field ascending, order last
sortField = new ToParentBlockJoinSortField(
"field2", SortField.Type.STRING, false, true, wrap(parentFilter), wrap(childFilter)
"field2", SortField.Type.STRING, false, true, parentFilter, childFilter
);
sort = new Sort(sortField);
topDocs = searcher.search(query, 5, sort);
@ -279,7 +279,7 @@ public class TestBlockJoinSorting extends LuceneTestCase {
// Sort by field descending, order last
sortField = new ToParentBlockJoinSortField(
"field2", SortField.Type.STRING, true, wrap(parentFilter), wrap(childFilter)
"field2", SortField.Type.STRING, true, parentFilter, childFilter
);
sort = new Sort(sortField);
topDocs = searcher.search(query, 5, sort);
@ -297,14 +297,14 @@ public class TestBlockJoinSorting extends LuceneTestCase {
assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
// Sort by field descending, order last, sort filter (filter_1:T)
childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
childFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T")))));
query = new ToParentBlockJoinQuery(
new FilteredQuery(new MatchAllDocsQuery(), childFilter),
new FixedBitSetCachingWrapperFilter(parentFilter),
new BitDocIdSetCachingWrapperFilter(parentFilter),
ScoreMode.None
);
sortField = new ToParentBlockJoinSortField(
"field2", SortField.Type.STRING, true, wrap(parentFilter), wrap(childFilter)
"field2", SortField.Type.STRING, true, parentFilter, childFilter
);
sort = new Sort(sortField);
topDocs = searcher.search(query, 5, sort);
@ -325,8 +325,4 @@ public class TestBlockJoinSorting extends LuceneTestCase {
dir.close();
}
private Filter wrap(Filter filter) {
return random().nextBoolean() ? new FixedBitSetCachingWrapperFilter(filter) : filter;
}
}

View File

@ -17,6 +17,9 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -27,7 +30,6 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
@ -41,9 +43,6 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.ArrayList;
import java.util.List;
public class TestBlockJoinValidation extends LuceneTestCase {
public static final int AMOUNT_OF_SEGMENTS = 5;
@ -54,7 +53,7 @@ public class TestBlockJoinValidation extends LuceneTestCase {
private Directory directory;
private IndexReader indexReader;
private IndexSearcher indexSearcher;
private Filter parentsFilter;
private BitDocIdSetFilter parentsFilter;
@Rule
public ExpectedException thrown = ExpectedException.none();
@ -72,7 +71,7 @@ public class TestBlockJoinValidation extends LuceneTestCase {
indexReader = DirectoryReader.open(indexWriter, random().nextBoolean());
indexWriter.close();
indexSearcher = new IndexSearcher(indexReader);
parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new WildcardQuery(new Term("parent", "*"))));
parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(new WildcardQuery(new Term("parent", "*"))));
}
@Test

View File

@ -64,6 +64,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -452,7 +453,7 @@ public class TestJoinUtil extends LuceneTestCase {
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
FixedBitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
@ -474,7 +475,7 @@ public class TestJoinUtil extends LuceneTestCase {
}
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, false);
indexSearcher.search(joinQuery, new SimpleCollector() {
@ -859,7 +860,7 @@ public class TestJoinUtil extends LuceneTestCase {
return new TopDocs(hits.size(), scoreDocs, hits.isEmpty() ? Float.NaN : hits.get(0).getValue().score(scoreMode));
}
private FixedBitSet createExpectedResult(String queryValue, boolean from, IndexReader topLevelReader, IndexIterationContext context) throws IOException {
private BitSet createExpectedResult(String queryValue, boolean from, IndexReader topLevelReader, IndexIterationContext context) throws IOException {
final Map<String, List<RandomDoc>> randomValueDocs;
final Map<String, List<RandomDoc>> linkValueDocuments;
if (from) {
@ -870,7 +871,7 @@ public class TestJoinUtil extends LuceneTestCase {
linkValueDocuments = context.fromDocuments;
}
FixedBitSet expectedResult = new FixedBitSet(topLevelReader.maxDoc());
BitSet expectedResult = new FixedBitSet(topLevelReader.maxDoc());
List<RandomDoc> matchingDocs = randomValueDocs.get(queryValue);
if (matchingDocs == null) {
return new FixedBitSet(topLevelReader.maxDoc());

View File

@ -19,8 +19,8 @@ package org.apache.lucene.queries;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -29,8 +29,8 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.DocIdSetBuilder;
/**
* A container Filter that allows Boolean composition of Filters.
@ -51,7 +51,7 @@ public class BooleanFilter extends Filter implements Iterable<FilterClause> {
*/
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
DocIdSetBuilder res = null;
BitDocIdSet.Builder res = null;
final LeafReader reader = context.reader();
boolean hasShouldClauses = false;
@ -61,7 +61,7 @@ public class BooleanFilter extends Filter implements Iterable<FilterClause> {
final DocIdSetIterator disi = getDISI(fc.getFilter(), context);
if (disi == null) continue;
if (res == null) {
res = new DocIdSetBuilder(reader.maxDoc());
res = new BitDocIdSet.Builder(reader.maxDoc());
}
res.or(disi);
}
@ -73,7 +73,7 @@ public class BooleanFilter extends Filter implements Iterable<FilterClause> {
if (fc.getOccur() == Occur.MUST_NOT) {
if (res == null) {
assert !hasShouldClauses;
res = new DocIdSetBuilder(reader.maxDoc(), true); // NOTE: may set bits on deleted docs
res = new BitDocIdSet.Builder(reader.maxDoc(), true); // NOTE: may set bits on deleted docs
}
final DocIdSetIterator disi = getDISI(fc.getFilter(), context);
if (disi != null) {
@ -89,7 +89,7 @@ public class BooleanFilter extends Filter implements Iterable<FilterClause> {
return null; // no documents can match
}
if (res == null) {
res = new DocIdSetBuilder(reader.maxDoc());
res = new BitDocIdSet.Builder(reader.maxDoc());
res.or(disi);
} else {
res.and(disi);

View File

@ -34,9 +34,9 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdSetBuilder;
/**
* Constructs a filter for docs matching any of the terms added to this class.
@ -183,7 +183,7 @@ public final class TermsFilter extends Filter {
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
final LeafReader reader = context.reader();
DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(reader.maxDoc());
final Fields fields = reader.fields();
final BytesRef spare = new BytesRef(this.termsBytes);
if (fields == null) {

View File

@ -74,6 +74,30 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
}
}
/** Test {@link BitSet#prevSetBit(int)}. */
public void testPrevSetBit() throws IOException {
final int numBits = 1 + random().nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
for (int i = 0; i < numBits; ++i) {
assertEquals(Integer.toString(i), set1.prevSetBit(i), set2.prevSetBit(i));
}
}
}
/** Test {@link BitSet#nextSetBit(int)}. */
public void testNextSetBit() throws IOException {
final int numBits = 1 + random().nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
for (int i = 0; i < numBits; ++i) {
assertEquals(set1.nextSetBit(i), set2.nextSetBit(i));
}
}
}
/** Test the {@link BitSet#set} method. */
public void testSet() throws IOException {
final int numBits = 1 + random().nextInt(100000);
@ -239,6 +263,11 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
return bitSet.cardinality();
}
@Override
public int prevSetBit(int index) {
return bitSet.previousSetBit(index);
}
@Override
public int nextSetBit(int i) {
int next = bitSet.nextSetBit(i);

View File

@ -17,23 +17,15 @@
package org.apache.solr.response.transform;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.join.FixedBitSetCachingWrapperFilter;
import org.apache.lucene.search.join.BitDocIdSetCachingWrapperFilter;
import org.apache.lucene.search.join.BitDocIdSetFilter;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
@ -83,10 +75,10 @@ public class ChildDocTransformerFactory extends TransformerFactory {
String childFilter = params.get( "childFilter" );
int limit = params.getInt( "limit", 10 );
Filter parentsFilter = null;
BitDocIdSetFilter parentsFilter = null;
try {
Query parentFilterQuery = QParser.getParser( parentFilter, null, req).getQuery();
parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(parentFilterQuery));
parentsFilter = new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(parentFilterQuery));
} catch (SyntaxError syntaxError) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct parent filter query" );
}
@ -108,11 +100,11 @@ class ChildDocTransformer extends TransformerWithContext {
private final String name;
private final SchemaField idField;
private final IndexSchema schema;
private Filter parentsFilter;
private BitDocIdSetFilter parentsFilter;
private Query childFilterQuery;
private int limit;
public ChildDocTransformer( String name, final Filter parentsFilter,
public ChildDocTransformer( String name, final BitDocIdSetFilter parentsFilter,
final SchemaField idField, IndexSchema schema,
final Query childFilterQuery, int limit) {
this.name = name;

View File

@ -17,12 +17,11 @@
package org.apache.solr.search.join;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.join.FixedBitSetCachingWrapperFilter;
import org.apache.lucene.search.join.BitDocIdSetCachingWrapperFilter;
import org.apache.lucene.search.join.BitDocIdSetFilter;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.solr.common.params.SolrParams;
@ -67,27 +66,27 @@ class BlockJoinParentQParser extends QParser {
return new ToParentBlockJoinQuery(query, getFilter(parentList), ScoreMode.None);
}
protected Filter getFilter(Query parentList) {
BitDocIdSetFilter getFilter(Query parentList) {
SolrCache parentCache = req.getSearcher().getCache(CACHE_NAME);
// lazily retrieve from solr cache
Filter filter = null;
if (parentCache != null) {
filter = (Filter) parentCache.get(parentList);
}
Filter result;
if (filter == null) {
BitDocIdSetFilter result;
if (filter instanceof BitDocIdSetFilter) {
result = (BitDocIdSetFilter) filter;
} else {
result = createParentFilter(parentList);
if (parentCache != null) {
parentCache.put(parentList, result);
}
} else {
result = filter;
}
return result;
}
protected Filter createParentFilter(Query parentQ) {
return new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(parentQ));
private BitDocIdSetFilter createParentFilter(Query parentQ) {
return new BitDocIdSetCachingWrapperFilter(new QueryWrapperFilter(parentQ));
}
}

View File

@ -1,11 +1,31 @@
package org.apache.solr.update;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.join.FixedBitSetCachingWrapperFilter;
import org.apache.lucene.search.join.BitDocIdSetCachingWrapperFilter;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.solr.SolrTestCaseJ4;
@ -27,34 +47,6 @@ import org.junit.BeforeClass;
import org.junit.Test;
import org.xml.sax.SAXException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
@ -568,7 +560,7 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
protected ToParentBlockJoinQuery join(final String childTerm) {
return new ToParentBlockJoinQuery(
new TermQuery(new Term(child, childTerm)), new FixedBitSetCachingWrapperFilter(new TermRangeFilter(parent,
new TermQuery(new Term(child, childTerm)), new BitDocIdSetCachingWrapperFilter(new TermRangeFilter(parent,
null, null, false, false)), ScoreMode.None);
}