mirror of https://github.com/apache/lucene.git
LUCENE-7871: fix false positive match in BlockJoinSelector when children have no value.
- introducing BlockJoinSelector.wrap methods accepting children as DISI. - extracting ToParentDocValues
This commit is contained in:
parent
d070ca6051
commit
706d201815
|
@ -16,18 +16,17 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.join;
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedNumericDocValues;
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.SortedNumericSelector;
|
import org.apache.lucene.search.SortedNumericSelector;
|
||||||
import org.apache.lucene.search.SortedSetSelector;
|
import org.apache.lucene.search.SortedSetSelector;
|
||||||
import org.apache.lucene.util.BitSet;
|
import org.apache.lucene.util.BitSet;
|
||||||
|
import org.apache.lucene.util.BitSetIterator;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/** Select a value from a block of documents.
|
/** Select a value from a block of documents.
|
||||||
* @lucene.internal */
|
* @lucene.internal */
|
||||||
|
@ -78,7 +77,15 @@ public class BlockJoinSelector {
|
||||||
/** Wraps the provided {@link SortedSetDocValues} in order to only select
|
/** Wraps the provided {@link SortedSetDocValues} in order to only select
|
||||||
* one value per parent among its {@code children} using the configured
|
* one value per parent among its {@code children} using the configured
|
||||||
* {@code selection} type. */
|
* {@code selection} type. */
|
||||||
|
@Deprecated
|
||||||
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selection, BitSet parents, BitSet children) {
|
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selection, BitSet parents, BitSet children) {
|
||||||
|
return wrap(sortedSet, selection, parents, toIter(children));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Wraps the provided {@link SortedSetDocValues} in order to only select
|
||||||
|
* one value per parent among its {@code children} using the configured
|
||||||
|
* {@code selection} type. */
|
||||||
|
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selection, BitSet parents, DocIdSetIterator children) {
|
||||||
SortedDocValues values;
|
SortedDocValues values;
|
||||||
switch (selection) {
|
switch (selection) {
|
||||||
case MIN:
|
case MIN:
|
||||||
|
@ -92,162 +99,42 @@ public class BlockJoinSelector {
|
||||||
}
|
}
|
||||||
return wrap(values, selection, parents, children);
|
return wrap(values, selection, parents, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Wraps the provided {@link SortedDocValues} in order to only select
|
||||||
|
* one value per parent among its {@code children} using the configured
|
||||||
|
* {@code selection} type. */
|
||||||
|
@Deprecated
|
||||||
|
public static SortedDocValues wrap(final SortedDocValues values, Type selection, BitSet parents, BitSet children) {
|
||||||
|
return wrap(values, selection, parents, toIter(children));
|
||||||
|
}
|
||||||
|
|
||||||
/** Wraps the provided {@link SortedDocValues} in order to only select
|
/** Wraps the provided {@link SortedDocValues} in order to only select
|
||||||
* one value per parent among its {@code children} using the configured
|
* one value per parent among its {@code children} using the configured
|
||||||
* {@code selection} type. */
|
* {@code selection} type. */
|
||||||
public static SortedDocValues wrap(final SortedDocValues values, Type selection, BitSet parents, BitSet children) {
|
public static SortedDocValues wrap(final SortedDocValues values, Type selection, BitSet parents, DocIdSetIterator children) {
|
||||||
if (values.docID() != -1) {
|
if (values.docID() != -1) {
|
||||||
throw new IllegalArgumentException("values iterator was already consumed: values.docID=" + values.docID());
|
throw new IllegalArgumentException("values iterator was already consumed: values.docID=" + values.docID());
|
||||||
}
|
}
|
||||||
return new SortedDocValues() {
|
return ToParentDocValues.wrap(values, selection, parents, children);
|
||||||
|
|
||||||
private int ord = -1;
|
|
||||||
private int docID = -1;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return docID;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
assert docID != NO_MORE_DOCS;
|
|
||||||
|
|
||||||
if (values.docID() == -1) {
|
|
||||||
if (values.nextDoc() == NO_MORE_DOCS) {
|
|
||||||
docID = NO_MORE_DOCS;
|
|
||||||
return docID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (values.docID() == NO_MORE_DOCS) {
|
|
||||||
docID = NO_MORE_DOCS;
|
|
||||||
return docID;
|
|
||||||
}
|
|
||||||
|
|
||||||
int nextParentDocID = parents.nextSetBit(values.docID());
|
|
||||||
ord = values.ordValue();
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
int childDocID = values.nextDoc();
|
|
||||||
assert childDocID != nextParentDocID;
|
|
||||||
if (childDocID > nextParentDocID) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (children.get(childDocID) == false) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (selection == Type.MIN) {
|
|
||||||
ord = Math.min(ord, values.ordValue());
|
|
||||||
} else if (selection == Type.MAX) {
|
|
||||||
ord = Math.max(ord, values.ordValue());
|
|
||||||
} else {
|
|
||||||
throw new AssertionError();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
docID = nextParentDocID;
|
|
||||||
return docID;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
if (target >= parents.length()) {
|
|
||||||
docID = NO_MORE_DOCS;
|
|
||||||
return docID;
|
|
||||||
}
|
|
||||||
if (target == 0) {
|
|
||||||
assert docID() == -1;
|
|
||||||
return nextDoc();
|
|
||||||
}
|
|
||||||
int prevParentDocID = parents.prevSetBit(target-1);
|
|
||||||
if (values.docID() <= prevParentDocID) {
|
|
||||||
values.advance(prevParentDocID+1);
|
|
||||||
}
|
|
||||||
return nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean advanceExact(int targetParentDocID) throws IOException {
|
|
||||||
if (targetParentDocID < docID) {
|
|
||||||
throw new IllegalArgumentException("target must be after the current document: current=" + docID + " target=" + targetParentDocID);
|
|
||||||
}
|
|
||||||
int previousDocId = docID;
|
|
||||||
docID = targetParentDocID;
|
|
||||||
if (targetParentDocID == previousDocId) {
|
|
||||||
return ord != -1;
|
|
||||||
}
|
|
||||||
docID = targetParentDocID;
|
|
||||||
ord = -1;
|
|
||||||
if (parents.get(targetParentDocID) == false) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int prevParentDocId = docID == 0 ? -1 : parents.prevSetBit(docID - 1);
|
|
||||||
int childDoc = values.docID();
|
|
||||||
if (childDoc <= prevParentDocId) {
|
|
||||||
childDoc = values.advance(prevParentDocId + 1);
|
|
||||||
}
|
|
||||||
if (childDoc >= docID) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean hasValue = false;
|
|
||||||
for (int doc = values.docID(); doc < docID; doc = values.nextDoc()) {
|
|
||||||
if (children.get(doc)) {
|
|
||||||
ord = values.ordValue();
|
|
||||||
hasValue = true;
|
|
||||||
values.nextDoc();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (hasValue == false) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int doc = values.docID(); doc < docID; doc = values.nextDoc()) {
|
|
||||||
if (children.get(doc)) {
|
|
||||||
switch (selection) {
|
|
||||||
case MIN:
|
|
||||||
ord = Math.min(ord, values.ordValue());
|
|
||||||
break;
|
|
||||||
case MAX:
|
|
||||||
ord = Math.max(ord, values.ordValue());
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new AssertionError();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int ordValue() {
|
|
||||||
return ord;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public BytesRef lookupOrd(int ord) throws IOException {
|
|
||||||
return values.lookupOrd(ord);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getValueCount() {
|
|
||||||
return values.getValueCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return values.cost();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Wraps the provided {@link SortedNumericDocValues} in order to only select
|
/** Wraps the provided {@link SortedNumericDocValues} in order to only select
|
||||||
* one value per parent among its {@code children} using the configured
|
* one value per parent among its {@code children} using the configured
|
||||||
* {@code selection} type. */
|
* {@code selection} type. */
|
||||||
|
@Deprecated
|
||||||
public static NumericDocValues wrap(SortedNumericDocValues sortedNumerics, Type selection, BitSet parents, BitSet children) {
|
public static NumericDocValues wrap(SortedNumericDocValues sortedNumerics, Type selection, BitSet parents, BitSet children) {
|
||||||
|
return wrap(sortedNumerics, selection, parents, toIter(children));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** creates an interator for the given bitset */
|
||||||
|
protected static BitSetIterator toIter(BitSet children) {
|
||||||
|
return new BitSetIterator(children, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Wraps the provided {@link SortedNumericDocValues} in order to only select
|
||||||
|
* one value per parent among its {@code children} using the configured
|
||||||
|
* {@code selection} type. */
|
||||||
|
public static NumericDocValues wrap(SortedNumericDocValues sortedNumerics, Type selection, BitSet parents, DocIdSetIterator children) {
|
||||||
NumericDocValues values;
|
NumericDocValues values;
|
||||||
switch (selection) {
|
switch (selection) {
|
||||||
case MIN:
|
case MIN:
|
||||||
|
@ -265,145 +152,18 @@ public class BlockJoinSelector {
|
||||||
/** Wraps the provided {@link NumericDocValues}, iterating over only
|
/** Wraps the provided {@link NumericDocValues}, iterating over only
|
||||||
* child documents, in order to only select one value per parent among
|
* child documents, in order to only select one value per parent among
|
||||||
* its {@code children} using the configured {@code selection} type. */
|
* its {@code children} using the configured {@code selection} type. */
|
||||||
|
@Deprecated
|
||||||
public static NumericDocValues wrap(final NumericDocValues values, Type selection, BitSet parents, BitSet children) {
|
public static NumericDocValues wrap(final NumericDocValues values, Type selection, BitSet parents, BitSet children) {
|
||||||
return new NumericDocValues() {
|
return wrap(values,selection, parents, toIter(children));
|
||||||
|
|
||||||
private int parentDocID = -1;
|
|
||||||
private long value;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
|
|
||||||
if (parentDocID == -1) {
|
|
||||||
values.nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
|
|
||||||
// TODO: make this crazy loop more efficient
|
|
||||||
|
|
||||||
int childDocID = values.docID();
|
|
||||||
if (childDocID == NO_MORE_DOCS) {
|
|
||||||
parentDocID = NO_MORE_DOCS;
|
|
||||||
return parentDocID;
|
|
||||||
}
|
|
||||||
if (children.get(childDocID) == false) {
|
|
||||||
values.nextDoc();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert parents.get(childDocID) == false;
|
|
||||||
|
|
||||||
parentDocID = parents.nextSetBit(childDocID);
|
|
||||||
value = values.longValue();
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
childDocID = values.nextDoc();
|
|
||||||
assert childDocID != parentDocID;
|
|
||||||
if (childDocID > parentDocID) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (selection) {
|
|
||||||
case MIN:
|
|
||||||
value = Math.min(value, values.longValue());
|
|
||||||
break;
|
|
||||||
case MAX:
|
|
||||||
value = Math.max(value, values.longValue());
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new AssertionError();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return parentDocID;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int targetParentDocID) throws IOException {
|
|
||||||
if (targetParentDocID <= parentDocID) {
|
|
||||||
throw new IllegalArgumentException("target must be after the current document: current=" + parentDocID + " target=" + targetParentDocID);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (targetParentDocID == 0) {
|
|
||||||
return nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
int firstChild = parents.prevSetBit(targetParentDocID - 1) + 1;
|
|
||||||
if (values.advance(firstChild) == NO_MORE_DOCS) {
|
|
||||||
parentDocID = NO_MORE_DOCS;
|
|
||||||
return parentDocID;
|
|
||||||
} else {
|
|
||||||
return nextDoc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean advanceExact(int targetParentDocID) throws IOException {
|
|
||||||
if (targetParentDocID <= parentDocID) {
|
|
||||||
throw new IllegalArgumentException("target must be after the current document: current=" + parentDocID + " target=" + targetParentDocID);
|
|
||||||
}
|
|
||||||
parentDocID = targetParentDocID;
|
|
||||||
if (parents.get(targetParentDocID) == false) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int prevParentDocId = parentDocID == 0 ? -1 : parents.prevSetBit(parentDocID - 1);
|
|
||||||
int childDoc = values.docID();
|
|
||||||
if (childDoc <= prevParentDocId) {
|
|
||||||
childDoc = values.advance(prevParentDocId + 1);
|
|
||||||
}
|
|
||||||
if (childDoc >= parentDocID) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean hasValue = false;
|
|
||||||
for (int doc = values.docID(); doc < parentDocID; doc = values.nextDoc()) {
|
|
||||||
if (children.get(doc)) {
|
|
||||||
value = values.longValue();
|
|
||||||
hasValue = true;
|
|
||||||
values.nextDoc();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (hasValue == false) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int doc = values.docID(); doc < parentDocID; doc = values.nextDoc()) {
|
|
||||||
if (children.get(doc)) {
|
|
||||||
switch (selection) {
|
|
||||||
case MIN:
|
|
||||||
value = Math.min(value, values.longValue());
|
|
||||||
break;
|
|
||||||
case MAX:
|
|
||||||
value = Math.max(value, values.longValue());
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new AssertionError();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long longValue() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return parentDocID;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return values.cost();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Wraps the provided {@link NumericDocValues}, iterating over only
|
||||||
|
* child documents, in order to only select one value per parent among
|
||||||
|
* its {@code children} using the configured {@code selection} type. */
|
||||||
|
public static NumericDocValues wrap(final NumericDocValues values, Type selection, BitSet parents, DocIdSetIterator children) {
|
||||||
|
if (values.docID() != -1) {
|
||||||
|
throw new IllegalArgumentException("values iterator was already consumed: values.docID=" + values.docID());
|
||||||
|
}
|
||||||
|
return ToParentDocValues.wrap(values,selection, parents, children);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,296 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.search.ConjunctionDISI;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.join.BlockJoinSelector.Type;
|
||||||
|
import org.apache.lucene.util.BitSet;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
class ToParentDocValues extends DocIdSetIterator {
|
||||||
|
|
||||||
|
interface Accumulator{
|
||||||
|
void reset() throws IOException;
|
||||||
|
void increment() throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class SortedDVs extends SortedDocValues implements Accumulator{
|
||||||
|
private final SortedDocValues values;
|
||||||
|
private final BlockJoinSelector.Type selection;
|
||||||
|
private int ord = -1;
|
||||||
|
private final ToParentDocValues iter;
|
||||||
|
|
||||||
|
private SortedDVs(SortedDocValues values, BlockJoinSelector.Type selection, BitSet parents, DocIdSetIterator children) {
|
||||||
|
this.values = values;
|
||||||
|
this.selection = selection;
|
||||||
|
this.iter = new ToParentDocValues(values,parents, children, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return iter.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public
|
||||||
|
void reset() throws IOException {
|
||||||
|
ord = values.ordValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public
|
||||||
|
void increment() throws IOException {
|
||||||
|
if (selection == BlockJoinSelector.Type.MIN) {
|
||||||
|
ord = Math.min(ord, values.ordValue());
|
||||||
|
} else if (selection == BlockJoinSelector.Type.MAX) {
|
||||||
|
ord = Math.max(ord, values.ordValue());
|
||||||
|
} else {
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return iter.nextDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
return iter.advance(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean advanceExact(int targetParentDocID) throws IOException {
|
||||||
|
return iter.advanceExact(targetParentDocID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ordValue() {
|
||||||
|
return ord;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef lookupOrd(int ord) throws IOException {
|
||||||
|
return values.lookupOrd(ord);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
return values.getValueCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return values.cost();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static private final class NumDV extends NumericDocValues implements Accumulator{
|
||||||
|
private final NumericDocValues values;
|
||||||
|
private long value;
|
||||||
|
private final BlockJoinSelector.Type selection;
|
||||||
|
|
||||||
|
private final ToParentDocValues iter;
|
||||||
|
|
||||||
|
private NumDV(NumericDocValues values, BlockJoinSelector.Type selection, BitSet parents, DocIdSetIterator children) {
|
||||||
|
this.values = values;
|
||||||
|
this.selection = selection;
|
||||||
|
iter = new ToParentDocValues(values, parents, children, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
value = values.longValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void increment() throws IOException {
|
||||||
|
switch (selection) {
|
||||||
|
case MIN:
|
||||||
|
value = Math.min(value, values.longValue());
|
||||||
|
break;
|
||||||
|
case MAX:
|
||||||
|
value = Math.max(value, values.longValue());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return iter.nextDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int targetParentDocID) throws IOException {
|
||||||
|
return iter.advance(targetParentDocID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean advanceExact(int targetParentDocID) throws IOException {
|
||||||
|
return iter.advanceExact(targetParentDocID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long longValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return iter.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return values.cost();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ToParentDocValues(DocIdSetIterator values, BitSet parents, DocIdSetIterator children, Accumulator collect) {
|
||||||
|
this.parents = parents;
|
||||||
|
childWithValues = ConjunctionDISI.intersectIterators(Arrays.asList(children, values));
|
||||||
|
this.collector = collect;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final private BitSet parents;
|
||||||
|
private int docID = -1;
|
||||||
|
final private Accumulator collector;
|
||||||
|
boolean seen=false;
|
||||||
|
private DocIdSetIterator childWithValues;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
assert docID != NO_MORE_DOCS;
|
||||||
|
|
||||||
|
assert childWithValues.docID()!=docID || docID==-1;
|
||||||
|
if (childWithValues.docID()<docID || docID==-1) {
|
||||||
|
childWithValues.nextDoc();
|
||||||
|
}
|
||||||
|
if (childWithValues.docID() == NO_MORE_DOCS) {
|
||||||
|
docID = NO_MORE_DOCS;
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
assert parents.get(childWithValues.docID()) == false;
|
||||||
|
|
||||||
|
int nextParentDocID = parents.nextSetBit(childWithValues.docID());
|
||||||
|
collector.reset();
|
||||||
|
seen=true;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int childDocID = childWithValues.nextDoc();
|
||||||
|
assert childDocID != nextParentDocID;
|
||||||
|
if (childDocID > nextParentDocID) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
collector.increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
docID = nextParentDocID;
|
||||||
|
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= parents.length()) {
|
||||||
|
docID = NO_MORE_DOCS;
|
||||||
|
return docID;
|
||||||
|
}
|
||||||
|
if (target == 0) {
|
||||||
|
assert docID() == -1;
|
||||||
|
return nextDoc();
|
||||||
|
}
|
||||||
|
int prevParentDocID = parents.prevSetBit(target-1);
|
||||||
|
if (childWithValues.docID() <= prevParentDocID) {
|
||||||
|
childWithValues.advance(prevParentDocID+1);
|
||||||
|
}
|
||||||
|
return nextDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
public boolean advanceExact(int targetParentDocID) throws IOException {
|
||||||
|
if (targetParentDocID < docID) {
|
||||||
|
throw new IllegalArgumentException("target must be after the current document: current=" + docID + " target=" + targetParentDocID);
|
||||||
|
}
|
||||||
|
int previousDocId = docID;
|
||||||
|
docID = targetParentDocID;
|
||||||
|
if (targetParentDocID == previousDocId) {
|
||||||
|
return seen;//ord != -1; rlly???
|
||||||
|
}
|
||||||
|
docID = targetParentDocID;
|
||||||
|
seen =false;
|
||||||
|
//ord = -1;
|
||||||
|
if (parents.get(targetParentDocID) == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int prevParentDocId = docID == 0 ? -1 : parents.prevSetBit(docID - 1);
|
||||||
|
int childDoc = childWithValues.docID();
|
||||||
|
if (childDoc <= prevParentDocId) {
|
||||||
|
childDoc = childWithValues.advance(prevParentDocId + 1);
|
||||||
|
}
|
||||||
|
if (childDoc >= docID) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (childWithValues.docID() < docID) {
|
||||||
|
collector.reset();
|
||||||
|
seen=true;
|
||||||
|
childWithValues.nextDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seen == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int doc = childWithValues.docID(); doc < docID; doc = childWithValues.nextDoc()) {
|
||||||
|
collector.increment();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static NumericDocValues wrap(NumericDocValues values, Type selection, BitSet parents2,
|
||||||
|
DocIdSetIterator children) {
|
||||||
|
return new ToParentDocValues.NumDV(values,selection, parents2, children);
|
||||||
|
}
|
||||||
|
|
||||||
|
static SortedDocValues wrap(SortedDocValues values, Type selection, BitSet parents2,
|
||||||
|
DocIdSetIterator children) {
|
||||||
|
return new ToParentDocValues.SortedDVs(values, selection, parents2, children);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -16,20 +16,22 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.join;
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.BitSet;
|
import org.apache.lucene.util.BitSet;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
|
|
||||||
public class TestBlockJoinSelector extends LuceneTestCase {
|
public class TestBlockJoinSelector extends LuceneTestCase {
|
||||||
|
|
||||||
public void testDocsWithValue() {
|
public void testDocsWithValue() {
|
||||||
|
@ -64,6 +66,48 @@ public class TestBlockJoinSelector extends LuceneTestCase {
|
||||||
assertFalse(docsWithValue.get(19));
|
assertFalse(docsWithValue.get(19));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void assertNoMoreDoc(DocIdSetIterator sdv, int maxDoc) throws IOException{
|
||||||
|
Random r = random();
|
||||||
|
if(r.nextBoolean()){
|
||||||
|
assertEquals(NO_MORE_DOCS, sdv.nextDoc());
|
||||||
|
} else {
|
||||||
|
if (r.nextBoolean()) {
|
||||||
|
assertEquals(NO_MORE_DOCS, sdv.advance(sdv.docID()+random().nextInt(maxDoc-sdv.docID())));
|
||||||
|
} else {
|
||||||
|
final int noMatchDoc = sdv.docID()+random().nextInt(maxDoc-sdv.docID()-1)+1;
|
||||||
|
assertFalse(advanceExact(sdv,noMatchDoc));
|
||||||
|
assertEquals(noMatchDoc, sdv.docID());
|
||||||
|
if (r.nextBoolean()){
|
||||||
|
assertEquals(NO_MORE_DOCS, sdv.nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nextDoc(DocIdSetIterator sdv, int docId) throws IOException{
|
||||||
|
Random r = random();
|
||||||
|
if(r.nextBoolean()){
|
||||||
|
return sdv.nextDoc();
|
||||||
|
} else {
|
||||||
|
if (r.nextBoolean()) {
|
||||||
|
return sdv.advance(sdv.docID()+random().nextInt(docId-sdv.docID()-1)+1);
|
||||||
|
} else {
|
||||||
|
if (r.nextBoolean()){
|
||||||
|
final int noMatchDoc = sdv.docID()+random().nextInt(docId-sdv.docID()-1)+1;
|
||||||
|
assertFalse(advanceExact(sdv,noMatchDoc));
|
||||||
|
assertEquals(noMatchDoc, sdv.docID());
|
||||||
|
}
|
||||||
|
assertTrue(advanceExact(sdv,docId));
|
||||||
|
return sdv.docID();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean advanceExact(DocIdSetIterator sdv, int target) throws IOException {
|
||||||
|
return sdv instanceof SortedDocValues ? ((SortedDocValues) sdv).advanceExact(target)
|
||||||
|
: ((NumericDocValues) sdv).advanceExact(target);
|
||||||
|
}
|
||||||
|
|
||||||
public void testSortedSelector() throws IOException {
|
public void testSortedSelector() throws IOException {
|
||||||
final BitSet parents = new FixedBitSet(20);
|
final BitSet parents = new FixedBitSet(20);
|
||||||
parents.set(0);
|
parents.set(0);
|
||||||
|
@ -89,22 +133,18 @@ public class TestBlockJoinSelector extends LuceneTestCase {
|
||||||
ords[18] = 10;
|
ords[18] = 10;
|
||||||
|
|
||||||
final SortedDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MIN, parents, children);
|
final SortedDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MIN, parents, children);
|
||||||
assertEquals(5, mins.nextDoc());
|
assertEquals(5, nextDoc(mins,5));
|
||||||
assertEquals(3, mins.ordValue());
|
assertEquals(3, mins.ordValue());
|
||||||
assertEquals(15, mins.nextDoc());
|
assertEquals(15, nextDoc(mins,15));
|
||||||
assertEquals(10, mins.ordValue());
|
assertEquals(10, mins.ordValue());
|
||||||
assertEquals(19, mins.nextDoc());
|
assertNoMoreDoc(mins, 20);
|
||||||
assertEquals(10, mins.ordValue());
|
|
||||||
assertEquals(NO_MORE_DOCS, mins.nextDoc());
|
|
||||||
|
|
||||||
final SortedDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MAX, parents, children);
|
final SortedDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MAX, parents, children);
|
||||||
assertEquals(5, maxs.nextDoc());
|
assertEquals(5, nextDoc(maxs,5));
|
||||||
assertEquals(7, maxs.ordValue());
|
assertEquals(7, maxs.ordValue());
|
||||||
assertEquals(15, maxs.nextDoc());
|
assertEquals(15, nextDoc(maxs,15));
|
||||||
assertEquals(10, maxs.ordValue());
|
assertEquals(10, maxs.ordValue());
|
||||||
assertEquals(19, maxs.nextDoc());
|
assertNoMoreDoc( maxs,20);
|
||||||
assertEquals(10, maxs.ordValue());
|
|
||||||
assertEquals(NO_MORE_DOCS, maxs.nextDoc());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CannedSortedDocValues extends SortedDocValues {
|
private static class CannedSortedDocValues extends SortedDocValues {
|
||||||
|
@ -207,18 +247,18 @@ public class TestBlockJoinSelector extends LuceneTestCase {
|
||||||
longs[18] = 10;
|
longs[18] = 10;
|
||||||
|
|
||||||
final NumericDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MIN, parents, children);
|
final NumericDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MIN, parents, children);
|
||||||
assertEquals(5, mins.nextDoc());
|
assertEquals(5, nextDoc(mins,5));
|
||||||
assertEquals(3, mins.longValue());
|
assertEquals(3, mins.longValue());
|
||||||
assertEquals(15, mins.nextDoc());
|
assertEquals(15, nextDoc(mins,15));
|
||||||
assertEquals(10, mins.longValue());
|
assertEquals(10, mins.longValue());
|
||||||
assertEquals(NO_MORE_DOCS, mins.nextDoc());
|
assertNoMoreDoc(mins, 20);
|
||||||
|
|
||||||
final NumericDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MAX, parents, children);
|
final NumericDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedNumericDocValues(longs, docsWithValue)), BlockJoinSelector.Type.MAX, parents, children);
|
||||||
assertEquals(5, maxs.nextDoc());
|
assertEquals(5, nextDoc(maxs, 5));
|
||||||
assertEquals(7, maxs.longValue());
|
assertEquals(7, maxs.longValue());
|
||||||
assertEquals(15, maxs.nextDoc());
|
assertEquals(15, nextDoc(maxs, 15));
|
||||||
assertEquals(10, maxs.longValue());
|
assertEquals(10, maxs.longValue());
|
||||||
assertEquals(NO_MORE_DOCS, maxs.nextDoc());
|
assertNoMoreDoc(maxs, 20);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CannedNumericDocValues extends NumericDocValues {
|
private static class CannedNumericDocValues extends NumericDocValues {
|
||||||
|
|
|
@ -501,6 +501,9 @@ when using one of Exact*StatsCache (Mikhail Khludnev)
|
||||||
* SOLR-10910: Clean up a few details left over from pluggable transient core and untangling
|
* SOLR-10910: Clean up a few details left over from pluggable transient core and untangling
|
||||||
CoreDescriptor/CoreContainer references (Erick Erickson)
|
CoreDescriptor/CoreContainer references (Erick Erickson)
|
||||||
|
|
||||||
|
* LUCENE-7871: fix false positive match in BlockJoinSelector when children have no value, introducing
|
||||||
|
wrap methods accepting children as DISI. Extracting ToParentDocValues (Mikhail Khludnev)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1)
|
* SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1)
|
||||||
|
|
Loading…
Reference in New Issue