mirror of https://github.com/apache/lucene.git
LUCENE-3171: add modules/join to enable joining parent + child documents when indexed as a doc block
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2745612a4c
commit
4a3b510739
|
@ -66,6 +66,12 @@ New Features
|
||||||
highlighting speed up. Use FastVectorHighlighter.setPhraseLimit() to set limit
|
highlighting speed up. Use FastVectorHighlighter.setPhraseLimit() to set limit
|
||||||
(e.g. 5000). (Mike Sokolov via Koji Sekiguchi)
|
(e.g. 5000). (Mike Sokolov via Koji Sekiguchi)
|
||||||
|
|
||||||
|
* LUCENE-3171: Added BlockJoinQuery and BlockJoinCollector, under the
|
||||||
|
new contrib/join module, to enable searches that require joining
|
||||||
|
between parent and child documents. Joined (children + parent)
|
||||||
|
documents must be indexed as a document block, using
|
||||||
|
IndexWriter.add/UpdateDocuments ((Mark Harwood, Mike McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
|
@ -793,8 +793,13 @@ public abstract class FieldComparator<T> {
|
||||||
@Override
|
@Override
|
||||||
public void setScorer(Scorer scorer) {
|
public void setScorer(Scorer scorer) {
|
||||||
// wrap with a ScoreCachingWrappingScorer so that successive calls to
|
// wrap with a ScoreCachingWrappingScorer so that successive calls to
|
||||||
// score() will not incur score computation over and over again.
|
// score() will not incur score computation over and
|
||||||
|
// over again.
|
||||||
|
if (!(scorer instanceof ScoreCachingWrappingScorer)) {
|
||||||
this.scorer = new ScoreCachingWrappingScorer(scorer);
|
this.scorer = new ScoreCachingWrappingScorer(scorer);
|
||||||
|
} else {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -31,12 +31,12 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
* @see IndexSearcher#search(Query,Filter,int,Sort)
|
* @see IndexSearcher#search(Query,Filter,int,Sort)
|
||||||
* @see FieldCache
|
* @see FieldCache
|
||||||
*/
|
*/
|
||||||
public abstract class FieldValueHitQueue extends PriorityQueue<FieldValueHitQueue.Entry> {
|
public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends PriorityQueue<T> {
|
||||||
|
|
||||||
final static class Entry extends ScoreDoc {
|
public static class Entry extends ScoreDoc {
|
||||||
int slot;
|
public int slot;
|
||||||
|
|
||||||
Entry(int slot, int doc, float score) {
|
public Entry(int slot, int doc, float score) {
|
||||||
super(doc, score);
|
super(doc, score);
|
||||||
this.slot = slot;
|
this.slot = slot;
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ public abstract class FieldValueHitQueue extends PriorityQueue<FieldValueHitQueu
|
||||||
* An implementation of {@link FieldValueHitQueue} which is optimized in case
|
* An implementation of {@link FieldValueHitQueue} which is optimized in case
|
||||||
* there is just one comparator.
|
* there is just one comparator.
|
||||||
*/
|
*/
|
||||||
private static final class OneComparatorFieldValueHitQueue extends FieldValueHitQueue {
|
private static final class OneComparatorFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> {
|
||||||
private final int oneReverseMul;
|
private final int oneReverseMul;
|
||||||
|
|
||||||
public OneComparatorFieldValueHitQueue(SortField[] fields, int size)
|
public OneComparatorFieldValueHitQueue(SortField[] fields, int size)
|
||||||
|
@ -92,7 +92,7 @@ public abstract class FieldValueHitQueue extends PriorityQueue<FieldValueHitQueu
|
||||||
* An implementation of {@link FieldValueHitQueue} which is optimized in case
|
* An implementation of {@link FieldValueHitQueue} which is optimized in case
|
||||||
* there is more than one comparator.
|
* there is more than one comparator.
|
||||||
*/
|
*/
|
||||||
private static final class MultiComparatorsFieldValueHitQueue extends FieldValueHitQueue {
|
private static final class MultiComparatorsFieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends FieldValueHitQueue<T> {
|
||||||
|
|
||||||
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size)
|
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -156,24 +156,28 @@ public abstract class FieldValueHitQueue extends PriorityQueue<FieldValueHitQueu
|
||||||
* The number of hits to retain. Must be greater than zero.
|
* The number of hits to retain. Must be greater than zero.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static FieldValueHitQueue create(SortField[] fields, int size) throws IOException {
|
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size) throws IOException {
|
||||||
|
|
||||||
if (fields.length == 0) {
|
if (fields.length == 0) {
|
||||||
throw new IllegalArgumentException("Sort must contain at least one field");
|
throw new IllegalArgumentException("Sort must contain at least one field");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fields.length == 1) {
|
if (fields.length == 1) {
|
||||||
return new OneComparatorFieldValueHitQueue(fields, size);
|
return new OneComparatorFieldValueHitQueue<T>(fields, size);
|
||||||
} else {
|
} else {
|
||||||
return new MultiComparatorsFieldValueHitQueue(fields, size);
|
return new MultiComparatorsFieldValueHitQueue<T>(fields, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldComparator[] getComparators() { return comparators; }
|
public FieldComparator[] getComparators() {
|
||||||
|
return comparators;
|
||||||
|
}
|
||||||
|
|
||||||
int[] getReverseMul() { return reverseMul; }
|
public int[] getReverseMul() {
|
||||||
|
return reverseMul;
|
||||||
|
}
|
||||||
|
|
||||||
protected void setComparator(int pos, FieldComparator comparator) {
|
public void setComparator(int pos, FieldComparator comparator) {
|
||||||
if (pos==0) firstComparator = comparator;
|
if (pos==0) firstComparator = comparator;
|
||||||
comparators[pos] = comparator;
|
comparators[pos] = comparator;
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,9 +48,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
|
|
||||||
FieldComparator comparator;
|
FieldComparator comparator;
|
||||||
final int reverseMul;
|
final int reverseMul;
|
||||||
final FieldValueHitQueue queue;
|
final FieldValueHitQueue<Entry> queue;
|
||||||
|
|
||||||
public OneComparatorNonScoringCollector(FieldValueHitQueue queue,
|
public OneComparatorNonScoringCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
this.queue = queue;
|
this.queue = queue;
|
||||||
|
@ -113,7 +113,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
private static class OutOfOrderOneComparatorNonScoringCollector extends
|
private static class OutOfOrderOneComparatorNonScoringCollector extends
|
||||||
OneComparatorNonScoringCollector {
|
OneComparatorNonScoringCollector {
|
||||||
|
|
||||||
public OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueue queue,
|
public OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -160,7 +160,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
|
|
||||||
Scorer scorer;
|
Scorer scorer;
|
||||||
|
|
||||||
public OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue,
|
public OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -221,7 +221,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
OneComparatorScoringNoMaxScoreCollector {
|
OneComparatorScoringNoMaxScoreCollector {
|
||||||
|
|
||||||
public OutOfOrderOneComparatorScoringNoMaxScoreCollector(
|
public OutOfOrderOneComparatorScoringNoMaxScoreCollector(
|
||||||
FieldValueHitQueue queue, int numHits, boolean fillFields)
|
FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -274,7 +274,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
|
|
||||||
Scorer scorer;
|
Scorer scorer;
|
||||||
|
|
||||||
public OneComparatorScoringMaxScoreCollector(FieldValueHitQueue queue,
|
public OneComparatorScoringMaxScoreCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
|
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
|
||||||
|
@ -334,7 +334,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
private static class OutOfOrderOneComparatorScoringMaxScoreCollector extends
|
private static class OutOfOrderOneComparatorScoringMaxScoreCollector extends
|
||||||
OneComparatorScoringMaxScoreCollector {
|
OneComparatorScoringMaxScoreCollector {
|
||||||
|
|
||||||
public OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueue queue,
|
public OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -384,8 +384,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
|
|
||||||
final FieldComparator[] comparators;
|
final FieldComparator[] comparators;
|
||||||
final int[] reverseMul;
|
final int[] reverseMul;
|
||||||
final FieldValueHitQueue queue;
|
final FieldValueHitQueue<Entry> queue;
|
||||||
public MultiComparatorNonScoringCollector(FieldValueHitQueue queue,
|
public MultiComparatorNonScoringCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
this.queue = queue;
|
this.queue = queue;
|
||||||
|
@ -471,7 +471,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
private static class OutOfOrderMultiComparatorNonScoringCollector extends
|
private static class OutOfOrderMultiComparatorNonScoringCollector extends
|
||||||
MultiComparatorNonScoringCollector {
|
MultiComparatorNonScoringCollector {
|
||||||
|
|
||||||
public OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueue queue,
|
public OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -540,7 +540,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
|
|
||||||
Scorer scorer;
|
Scorer scorer;
|
||||||
|
|
||||||
public MultiComparatorScoringMaxScoreCollector(FieldValueHitQueue queue,
|
public MultiComparatorScoringMaxScoreCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
|
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
|
||||||
|
@ -619,7 +619,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
private final static class OutOfOrderMultiComparatorScoringMaxScoreCollector
|
private final static class OutOfOrderMultiComparatorScoringMaxScoreCollector
|
||||||
extends MultiComparatorScoringMaxScoreCollector {
|
extends MultiComparatorScoringMaxScoreCollector {
|
||||||
|
|
||||||
public OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueue queue,
|
public OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -692,7 +692,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
|
|
||||||
Scorer scorer;
|
Scorer scorer;
|
||||||
|
|
||||||
public MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue,
|
public MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueue<Entry> queue,
|
||||||
int numHits, boolean fillFields) throws IOException {
|
int numHits, boolean fillFields) throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -771,7 +771,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
extends MultiComparatorScoringNoMaxScoreCollector {
|
extends MultiComparatorScoringNoMaxScoreCollector {
|
||||||
|
|
||||||
public OutOfOrderMultiComparatorScoringNoMaxScoreCollector(
|
public OutOfOrderMultiComparatorScoringNoMaxScoreCollector(
|
||||||
FieldValueHitQueue queue, int numHits, boolean fillFields)
|
FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(queue, numHits, fillFields);
|
super(queue, numHits, fillFields);
|
||||||
}
|
}
|
||||||
|
@ -917,7 +917,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
|
throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldValueHitQueue queue = FieldValueHitQueue.create(sort.fields, numHits);
|
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||||
if (queue.getComparators().length == 1) {
|
if (queue.getComparators().length == 1) {
|
||||||
if (docsScoredInOrder) {
|
if (docsScoredInOrder) {
|
||||||
if (trackMaxScore) {
|
if (trackMaxScore) {
|
||||||
|
@ -972,7 +972,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
protected void populateResults(ScoreDoc[] results, int howMany) {
|
protected void populateResults(ScoreDoc[] results, int howMany) {
|
||||||
if (fillFields) {
|
if (fillFields) {
|
||||||
// avoid casting if unnecessary.
|
// avoid casting if unnecessary.
|
||||||
FieldValueHitQueue queue = (FieldValueHitQueue) pq;
|
FieldValueHitQueue<Entry> queue = (FieldValueHitQueue<Entry>) pq;
|
||||||
for (int i = howMany - 1; i >= 0; i--) {
|
for (int i = howMany - 1; i >= 0; i--) {
|
||||||
results[i] = queue.fillFields(queue.pop());
|
results[i] = queue.fillFields(queue.pop());
|
||||||
}
|
}
|
||||||
|
@ -993,12 +993,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is a maxScoring tracking collector and there were no results,
|
// If this is a maxScoring tracking collector and there were no results,
|
||||||
return new TopFieldDocs(totalHits, results, ((FieldValueHitQueue) pq).getFields(), maxScore);
|
return new TopFieldDocs(totalHits, results, ((FieldValueHitQueue<Entry>) pq).getFields(), maxScore);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean acceptsDocsOutOfOrder() {
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -393,6 +393,56 @@ public final class ArrayUtil {
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int[][] grow(int[][] array, int minSize) {
|
||||||
|
if (array.length < minSize) {
|
||||||
|
int[][] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
|
||||||
|
System.arraycopy(array, 0, newArray, 0, array.length);
|
||||||
|
return newArray;
|
||||||
|
} else {
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int[][] grow(int[][] array) {
|
||||||
|
return grow(array, 1 + array.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int[][] shrink(int[][] array, int targetSize) {
|
||||||
|
final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||||
|
if (newSize != array.length) {
|
||||||
|
int[][] newArray = new int[newSize][];
|
||||||
|
System.arraycopy(array, 0, newArray, 0, newSize);
|
||||||
|
return newArray;
|
||||||
|
} else {
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static float[][] grow(float[][] array, int minSize) {
|
||||||
|
if (array.length < minSize) {
|
||||||
|
float[][] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
|
||||||
|
System.arraycopy(array, 0, newArray, 0, array.length);
|
||||||
|
return newArray;
|
||||||
|
} else {
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static float[][] grow(float[][] array) {
|
||||||
|
return grow(array, 1 + array.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static float[][] shrink(float[][] array, int targetSize) {
|
||||||
|
final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||||
|
if (newSize != array.length) {
|
||||||
|
float[][] newArray = new float[newSize][];
|
||||||
|
System.arraycopy(array, 0, newArray, 0, newSize);
|
||||||
|
return newArray;
|
||||||
|
} else {
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns hash of chars in range start (inclusive) to
|
* Returns hash of chars in range start (inclusive) to
|
||||||
* end (inclusive)
|
* end (inclusive)
|
||||||
|
@ -617,6 +667,7 @@ public final class ArrayUtil {
|
||||||
*/
|
*/
|
||||||
public static <T> void mergeSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
|
public static <T> void mergeSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
|
||||||
if (toIndex-fromIndex <= 1) return;
|
if (toIndex-fromIndex <= 1) return;
|
||||||
|
//System.out.println("SORT: " + (toIndex-fromIndex));
|
||||||
getSorter(a, comp).mergeSort(fromIndex, toIndex-1);
|
getSorter(a, comp).mergeSort(fromIndex, toIndex-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@ import java.util.zip.ZipEntry;
|
||||||
import java.util.zip.ZipFile;
|
import java.util.zip.ZipFile;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.index.CheckIndex;
|
import org.apache.lucene.index.CheckIndex;
|
||||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||||
|
@ -491,4 +492,25 @@ public class _TestUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: this is likely buggy, and cannot clone fields
|
||||||
|
// with tokenStreamValues, etc. Use at your own risk!!
|
||||||
|
|
||||||
|
// TODO: is there a pre-existing way to do this!!!
|
||||||
|
public static Document cloneDocument(Document doc1) {
|
||||||
|
final Document doc2 = new Document();
|
||||||
|
for(Fieldable f : doc1.getFields()) {
|
||||||
|
Field field1 = (Field) f;
|
||||||
|
|
||||||
|
Field field2 = new Field(field1.name(),
|
||||||
|
field1.stringValue(),
|
||||||
|
field1.isStored() ? Field.Store.YES : Field.Store.NO,
|
||||||
|
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
|
||||||
|
field2.setOmitNorms(field1.getOmitNorms());
|
||||||
|
field2.setOmitTermFreqAndPositions(field1.getOmitTermFreqAndPositions());
|
||||||
|
doc2.add(field2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,6 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
|
@ -69,28 +68,6 @@ public class TestNRTThreads extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: is there a pre-existing way to do this!!!
|
|
||||||
private Document cloneDoc(Document doc1) {
|
|
||||||
final Document doc2 = new Document();
|
|
||||||
for(Fieldable f : doc1.getFields()) {
|
|
||||||
Field field1 = (Field) f;
|
|
||||||
|
|
||||||
Field field2 = new Field(field1.name(),
|
|
||||||
field1.stringValue(),
|
|
||||||
field1.isStored() ? Field.Store.YES : Field.Store.NO,
|
|
||||||
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
|
|
||||||
if (field1.getOmitNorms()) {
|
|
||||||
field2.setOmitNorms(true);
|
|
||||||
}
|
|
||||||
if (field1.getOmitTermFreqAndPositions()) {
|
|
||||||
field2.setOmitTermFreqAndPositions(true);
|
|
||||||
}
|
|
||||||
doc2.add(field2);
|
|
||||||
}
|
|
||||||
|
|
||||||
return doc2;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNRTThreads() throws Exception {
|
public void testNRTThreads() throws Exception {
|
||||||
|
|
||||||
|
@ -218,7 +195,7 @@ public class TestNRTThreads extends LuceneTestCase {
|
||||||
|
|
||||||
allSubDocs.add(subDocs);
|
allSubDocs.add(subDocs);
|
||||||
doc.add(packIDField);
|
doc.add(packIDField);
|
||||||
docsList.add(cloneDoc(doc));
|
docsList.add(_TestUtil.cloneDocument(doc));
|
||||||
docIDs.add(doc.get("docid"));
|
docIDs.add(doc.get("docid"));
|
||||||
|
|
||||||
final int maxDocCount = _TestUtil.nextInt(random, 1, 10);
|
final int maxDocCount = _TestUtil.nextInt(random, 1, 10);
|
||||||
|
@ -227,7 +204,7 @@ public class TestNRTThreads extends LuceneTestCase {
|
||||||
if (doc == null) {
|
if (doc == null) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
docsList.add(cloneDoc(doc));
|
docsList.add(_TestUtil.cloneDocument(doc));
|
||||||
docIDs.add(doc.get("docid"));
|
docIDs.add(doc.get("docid"));
|
||||||
}
|
}
|
||||||
addCount.addAndGet(docsList.size());
|
addCount.addAndGet(docsList.size());
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
<fileset dir="common" includes="build.xml" />
|
<fileset dir="common" includes="build.xml" />
|
||||||
<fileset dir="grouping" includes="build.xml" />
|
<fileset dir="grouping" includes="build.xml" />
|
||||||
<fileset dir="queries" includes="build.xml" />
|
<fileset dir="queries" includes="build.xml" />
|
||||||
|
<fileset dir="join" includes="build.xml" />
|
||||||
<fileset dir="suggest" includes="build.xml" />
|
<fileset dir="suggest" includes="build.xml" />
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
@ -39,6 +40,7 @@
|
||||||
<fileset dir="common" includes="build.xml" />
|
<fileset dir="common" includes="build.xml" />
|
||||||
<fileset dir="grouping" includes="build.xml" />
|
<fileset dir="grouping" includes="build.xml" />
|
||||||
<fileset dir="queries" includes="build.xml" />
|
<fileset dir="queries" includes="build.xml" />
|
||||||
|
<fileset dir="join" includes="build.xml" />
|
||||||
<fileset dir="suggest" includes="build.xml" />
|
<fileset dir="suggest" includes="build.xml" />
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
@ -52,6 +54,7 @@
|
||||||
<fileset dir="common" includes="build.xml" />
|
<fileset dir="common" includes="build.xml" />
|
||||||
<fileset dir="grouping" includes="build.xml" />
|
<fileset dir="grouping" includes="build.xml" />
|
||||||
<fileset dir="queries" includes="build.xml" />
|
<fileset dir="queries" includes="build.xml" />
|
||||||
|
<fileset dir="join" includes="build.xml" />
|
||||||
<fileset dir="suggest" includes="build.xml" />
|
<fileset dir="suggest" includes="build.xml" />
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
@ -65,6 +68,7 @@
|
||||||
<fileset dir="common" includes="build.xml" />
|
<fileset dir="common" includes="build.xml" />
|
||||||
<fileset dir="grouping" includes="build.xml" />
|
<fileset dir="grouping" includes="build.xml" />
|
||||||
<fileset dir="queries" includes="build.xml" />
|
<fileset dir="queries" includes="build.xml" />
|
||||||
|
<fileset dir="join" includes="build.xml" />
|
||||||
<fileset dir="suggest" includes="build.xml" />
|
<fileset dir="suggest" includes="build.xml" />
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
@ -79,6 +83,7 @@
|
||||||
<fileset dir="common" includes="build.xml" />
|
<fileset dir="common" includes="build.xml" />
|
||||||
<fileset dir="grouping" includes="build.xml" />
|
<fileset dir="grouping" includes="build.xml" />
|
||||||
<fileset dir="queries" includes="build.xml" />
|
<fileset dir="queries" includes="build.xml" />
|
||||||
|
<fileset dir="join" includes="build.xml" />
|
||||||
<fileset dir="suggest" includes="build.xml" />
|
<fileset dir="suggest" includes="build.xml" />
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
@ -91,6 +96,7 @@
|
||||||
<fileset dir="common" includes="build.xml" />
|
<fileset dir="common" includes="build.xml" />
|
||||||
<fileset dir="grouping" includes="build.xml" />
|
<fileset dir="grouping" includes="build.xml" />
|
||||||
<fileset dir="queries" includes="build.xml" />
|
<fileset dir="queries" includes="build.xml" />
|
||||||
|
<fileset dir="join" includes="build.xml" />
|
||||||
<fileset dir="suggest" includes="build.xml" />
|
<fileset dir="suggest" includes="build.xml" />
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
@ -105,6 +111,7 @@
|
||||||
<fileset dir="common" includes="build.xml" />
|
<fileset dir="common" includes="build.xml" />
|
||||||
<fileset dir="grouping" includes="build.xml" />
|
<fileset dir="grouping" includes="build.xml" />
|
||||||
<fileset dir="queries" includes="build.xml" />
|
<fileset dir="queries" includes="build.xml" />
|
||||||
|
<fileset dir="join" includes="build.xml" />
|
||||||
<fileset dir="suggest" includes="build.xml" />
|
<fileset dir="suggest" includes="build.xml" />
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<project name="join" default="default">
|
||||||
|
<description>
|
||||||
|
Queries and collectors for performing joins
|
||||||
|
</description>
|
||||||
|
|
||||||
|
<property name="build.dir" location="build/" />
|
||||||
|
<import file="../../lucene/contrib/contrib-build.xml"/>
|
||||||
|
|
||||||
|
<property name="build.dir" location="build/" />
|
||||||
|
<property name="dist.dir" location="dist/" />
|
||||||
|
|
||||||
|
<module-uptodate name="grouping" jarfile="${common.dir}/../modules/grouping/build/lucene-grouping-${version}.jar"
|
||||||
|
property="grouping.uptodate" classpath.property="grouping.jar"/>
|
||||||
|
|
||||||
|
<path id="classpath">
|
||||||
|
<pathelement path="${grouping.jar}"/>
|
||||||
|
<path refid="base.classpath"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<path id="run.classpath">
|
||||||
|
<path refid="classpath"/>
|
||||||
|
<pathelement location="${build.dir}/classes/java"/>
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<property name="build.dir" location="build/" />
|
||||||
|
<property name="dist.dir" location="dist/" />
|
||||||
|
<property name="maven.dist.dir" location="../dist/maven" />
|
||||||
|
|
||||||
|
<target name="compile-grouping" unless="grouping.uptodate">
|
||||||
|
<subant target="default">
|
||||||
|
<fileset dir="${common.dir}/../modules/grouping" includes="build.xml"/>
|
||||||
|
</subant>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="init" depends="contrib-build.init,compile-grouping"/>
|
||||||
|
|
||||||
|
<target name="dist-maven" depends="jar-core,javadocs,contrib-build.dist-maven" />
|
||||||
|
</project>
|
|
@ -0,0 +1,472 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.FieldComparator;
|
||||||
|
import org.apache.lucene.search.FieldValueHitQueue;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreCachingWrappingScorer;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.TopDocsCollector;
|
||||||
|
import org.apache.lucene.search.TopFieldCollector;
|
||||||
|
import org.apache.lucene.search.TopScoreDocCollector;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.search.grouping.GroupDocs;
|
||||||
|
import org.apache.lucene.search.grouping.TopGroups;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
|
|
||||||
|
/** Collects parent document hits for a Query containing one more more
|
||||||
|
* BlockJoinQuery clauses, sorted by the
|
||||||
|
* specified parent Sort. Note that this cannot perform
|
||||||
|
* arbitrary joins; rather, it requires that all joined
|
||||||
|
* documents are indexed as a doc block (using {@link
|
||||||
|
* IndexWriter#addDocuments} or {@link
|
||||||
|
* IndexWriter#updateDocuments}). Ie, the join is computed
|
||||||
|
* at index time.
|
||||||
|
*
|
||||||
|
* <p>The parent Sort must only use
|
||||||
|
* fields from the parent documents; sorting by field in
|
||||||
|
* the child documents is not supported.</p>
|
||||||
|
*
|
||||||
|
* <p>You should only use this
|
||||||
|
* collector if one or more of the clauses in the query is
|
||||||
|
* a {@link BlockJoinQuery}. This collector will find those query
|
||||||
|
* clauses and record the matching child documents for the
|
||||||
|
* top scoring parent documents.</p>
|
||||||
|
*
|
||||||
|
* <p>Multiple joins (star join) and nested joins and a mix
|
||||||
|
* of the two are allowed, as long as in all cases the
|
||||||
|
* documents corresponding to a single row of each joined
|
||||||
|
* parent table were indexed as a doc block.</p>
|
||||||
|
*
|
||||||
|
* <p>For the simple star join you can retrieve the
|
||||||
|
* {@link TopGroups} instance containing each {@link BlockJoinQuery}'s
|
||||||
|
* matching child documents for the top parent groups,
|
||||||
|
* using {@link #getTopGroups}. Ie,
|
||||||
|
* a single query, which will contain two or more
|
||||||
|
* {@link BlockJoinQuery}'s as clauses representing the star join,
|
||||||
|
* can then retrieve two or more {@link TopGroups} instances.</p>
|
||||||
|
*
|
||||||
|
* <p>For nested joins, the query will run correctly (ie,
|
||||||
|
* match the right parent and child documents), however,
|
||||||
|
* because TopGroups is currently unable to support nesting
|
||||||
|
* (each group is not able to hold another TopGroups), you
|
||||||
|
* are only able to retrieve the TopGroups of the first
|
||||||
|
* join. The TopGroups of the nested joins will not be
|
||||||
|
* correct.
|
||||||
|
*
|
||||||
|
* See {@link org.apache.lucene.search.join} for a code
|
||||||
|
* sample.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class BlockJoinCollector extends Collector {
|
||||||
|
|
||||||
|
private final Sort sort;
|
||||||
|
|
||||||
|
// Maps each BlockJoinQuery instance to its "slot" in
|
||||||
|
// joinScorers and in OneGroup's cached doc/scores/count:
|
||||||
|
private final Map<Query,Integer> joinQueryID = new HashMap<Query,Integer>();
|
||||||
|
private final int numParentHits;
|
||||||
|
private final FieldValueHitQueue<OneGroup> queue;
|
||||||
|
private final FieldComparator[] comparators;
|
||||||
|
private final int[] reverseMul;
|
||||||
|
private final int compEnd;
|
||||||
|
private final boolean trackMaxScore;
|
||||||
|
private final boolean trackScores;
|
||||||
|
|
||||||
|
private int docBase;
|
||||||
|
private BlockJoinQuery.BlockJoinScorer[] joinScorers = new BlockJoinQuery.BlockJoinScorer[0];
|
||||||
|
private IndexReader.AtomicReaderContext currentReaderContext;
|
||||||
|
private Scorer scorer;
|
||||||
|
private boolean queueFull;
|
||||||
|
|
||||||
|
private OneGroup bottom;
|
||||||
|
private int totalHitCount;
|
||||||
|
private float maxScore = Float.NaN;
|
||||||
|
|
||||||
|
/* Creates a BlockJoinCollector. The provided sort must
|
||||||
|
* not be null. */
|
||||||
|
public BlockJoinCollector(Sort sort, int numParentHits, boolean trackScores, boolean trackMaxScore) throws IOException {
|
||||||
|
// TODO: allow null sort to be specialized to relevance
|
||||||
|
// only collector
|
||||||
|
this.sort = sort;
|
||||||
|
this.trackMaxScore = trackMaxScore;
|
||||||
|
this.trackScores = trackScores;
|
||||||
|
this.numParentHits = numParentHits;
|
||||||
|
queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
|
||||||
|
comparators = queue.getComparators();
|
||||||
|
reverseMul = queue.getReverseMul();
|
||||||
|
compEnd = comparators.length - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class OneGroup extends FieldValueHitQueue.Entry {
|
||||||
|
public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
|
||||||
|
super(comparatorSlot, parentDoc, parentScore);
|
||||||
|
docs = new int[numJoins][];
|
||||||
|
for(int joinID=0;joinID<numJoins;joinID++) {
|
||||||
|
docs[joinID] = new int[5];
|
||||||
|
}
|
||||||
|
if (doScores) {
|
||||||
|
scores = new float[numJoins][];
|
||||||
|
for(int joinID=0;joinID<numJoins;joinID++) {
|
||||||
|
scores[joinID] = new float[5];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
counts = new int[numJoins];
|
||||||
|
}
|
||||||
|
AtomicReaderContext readerContext;
|
||||||
|
int[][] docs;
|
||||||
|
float[][] scores;
|
||||||
|
int[] counts;
|
||||||
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int parentDoc) throws IOException {
|
||||||
|
//System.out.println("C parentDoc=" + parentDoc);
|
||||||
|
totalHitCount++;
|
||||||
|
|
||||||
|
float score = Float.NaN;
|
||||||
|
|
||||||
|
if (trackMaxScore) {
|
||||||
|
score = scorer.score();
|
||||||
|
if (score > maxScore) {
|
||||||
|
maxScore = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: we could sweep all joinScorers here and
|
||||||
|
// aggregate total child hit count, so we can fill this
|
||||||
|
// in getTopGroups (we wire it to 0 now)
|
||||||
|
|
||||||
|
if (queueFull) {
|
||||||
|
//System.out.println(" queueFull");
|
||||||
|
// Fastmatch: return if this hit is not competitive
|
||||||
|
for (int i = 0;; i++) {
|
||||||
|
final int c = reverseMul[i] * comparators[i].compareBottom(parentDoc);
|
||||||
|
if (c < 0) {
|
||||||
|
// Definitely not competitive.
|
||||||
|
//System.out.println(" skip");
|
||||||
|
return;
|
||||||
|
} else if (c > 0) {
|
||||||
|
// Definitely competitive.
|
||||||
|
break;
|
||||||
|
} else if (i == compEnd) {
|
||||||
|
// Here c=0. If we're at the last comparator, this doc is not
|
||||||
|
// competitive, since docs are visited in doc Id order, which means
|
||||||
|
// this doc cannot compete with any other document in the queue.
|
||||||
|
//System.out.println(" skip");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.println(" competes! doc=" + (docBase + parentDoc));
|
||||||
|
|
||||||
|
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||||
|
for (int i = 0; i < comparators.length; i++) {
|
||||||
|
comparators[i].copy(bottom.slot, parentDoc);
|
||||||
|
}
|
||||||
|
if (!trackMaxScore && trackScores) {
|
||||||
|
score = scorer.score();
|
||||||
|
}
|
||||||
|
bottom.doc = docBase + parentDoc;
|
||||||
|
bottom.readerContext = currentReaderContext;
|
||||||
|
bottom.score = score;
|
||||||
|
copyGroups(bottom);
|
||||||
|
bottom = queue.updateTop();
|
||||||
|
|
||||||
|
for (int i = 0; i < comparators.length; i++) {
|
||||||
|
comparators[i].setBottom(bottom.slot);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Startup transient: queue is not yet full:
|
||||||
|
final int comparatorSlot = totalHitCount - 1;
|
||||||
|
|
||||||
|
// Copy hit into queue
|
||||||
|
for (int i = 0; i < comparators.length; i++) {
|
||||||
|
comparators[i].copy(comparatorSlot, parentDoc);
|
||||||
|
}
|
||||||
|
//System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
|
||||||
|
final OneGroup og = new OneGroup(comparatorSlot, docBase+parentDoc, score, joinScorers.length, trackScores);
|
||||||
|
og.readerContext = currentReaderContext;
|
||||||
|
copyGroups(og);
|
||||||
|
bottom = queue.add(og);
|
||||||
|
queueFull = totalHitCount == numParentHits;
|
||||||
|
if (queueFull) {
|
||||||
|
// End of startup transient: queue just filled up:
|
||||||
|
for (int i = 0; i < comparators.length; i++) {
|
||||||
|
comparators[i].setBottom(bottom.slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pulls out child doc and scores for all join queries:
|
||||||
|
private void copyGroups(OneGroup og) {
|
||||||
|
// While rare, it's possible top arrays could be too
|
||||||
|
// short if join query had null scorer on first
|
||||||
|
// segment(s) but then became non-null on later segments
|
||||||
|
final int numSubScorers = joinScorers.length;
|
||||||
|
if (og.docs.length < numSubScorers) {
|
||||||
|
// While rare, this could happen if join query had
|
||||||
|
// null scorer on first segment(s) but then became
|
||||||
|
// non-null on later segments
|
||||||
|
og.docs = ArrayUtil.grow(og.docs);
|
||||||
|
}
|
||||||
|
if (og.counts.length < numSubScorers) {
|
||||||
|
og.counts = ArrayUtil.grow(og.counts);
|
||||||
|
}
|
||||||
|
if (trackScores && og.scores.length < numSubScorers) {
|
||||||
|
og.scores = ArrayUtil.grow(og.scores);
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.println("copyGroups parentDoc=" + og.doc);
|
||||||
|
for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
|
||||||
|
final BlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
|
||||||
|
//System.out.println(" scorer=" + joinScorer);
|
||||||
|
if (joinScorer != null) {
|
||||||
|
og.counts[scorerIDX] = joinScorer.getChildCount();
|
||||||
|
//System.out.println(" count=" + og.counts[scorerIDX]);
|
||||||
|
og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
|
||||||
|
/*
|
||||||
|
for(int idx=0;idx<og.counts[scorerIDX];idx++) {
|
||||||
|
System.out.println(" docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
if (trackScores) {
|
||||||
|
og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
currentReaderContext = context;
|
||||||
|
docBase = context.docBase;
|
||||||
|
for (int compIDX = 0; compIDX < comparators.length; compIDX++) {
|
||||||
|
queue.setComparator(compIDX, comparators[compIDX].setNextReader(context));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) {
|
||||||
|
//System.out.println("C.setScorer scorer=" + scorer);
|
||||||
|
// Since we invoke .score(), and the comparators likely
|
||||||
|
// do as well, cache it so it's only "really" computed
|
||||||
|
// once:
|
||||||
|
this.scorer = new ScoreCachingWrappingScorer(scorer);
|
||||||
|
for (int compIDX = 0; compIDX < comparators.length; compIDX++) {
|
||||||
|
comparators[compIDX].setScorer(this.scorer);
|
||||||
|
}
|
||||||
|
Arrays.fill(joinScorers, null);
|
||||||
|
|
||||||
|
// Find any BlockJoinScorers out there:
|
||||||
|
scorer.visitScorers(new Scorer.ScorerVisitor<Query,Query,Scorer>() {
|
||||||
|
private void enroll(BlockJoinQuery query, BlockJoinQuery.BlockJoinScorer scorer) {
|
||||||
|
final Integer slot = joinQueryID.get(query);
|
||||||
|
if (slot == null) {
|
||||||
|
joinQueryID.put(query, joinScorers.length);
|
||||||
|
//System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
|
||||||
|
final BlockJoinQuery.BlockJoinScorer[] newArray = new BlockJoinQuery.BlockJoinScorer[1+joinScorers.length];
|
||||||
|
System.arraycopy(joinScorers, 0, newArray, 0, joinScorers.length);
|
||||||
|
joinScorers = newArray;
|
||||||
|
joinScorers[joinScorers.length-1] = scorer;
|
||||||
|
} else {
|
||||||
|
joinScorers[slot] = scorer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visitOptional(Query parent, Query child, Scorer scorer) {
|
||||||
|
//System.out.println("visitOpt");
|
||||||
|
if (child instanceof BlockJoinQuery) {
|
||||||
|
enroll((BlockJoinQuery) child,
|
||||||
|
(BlockJoinQuery.BlockJoinScorer) scorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visitRequired(Query parent, Query child, Scorer scorer) {
|
||||||
|
//System.out.println("visitReq parent=" + parent + " child=" + child + " scorer=" + scorer);
|
||||||
|
if (child instanceof BlockJoinQuery) {
|
||||||
|
enroll((BlockJoinQuery) child,
|
||||||
|
(BlockJoinQuery.BlockJoinScorer) scorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visitProhibited(Query parent, Query child, Scorer scorer) {
|
||||||
|
//System.out.println("visitProh");
|
||||||
|
if (child instanceof BlockJoinQuery) {
|
||||||
|
enroll((BlockJoinQuery) child,
|
||||||
|
(BlockJoinQuery.BlockJoinScorer) scorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static class FakeScorer extends Scorer {
|
||||||
|
|
||||||
|
float score;
|
||||||
|
int doc;
|
||||||
|
|
||||||
|
public FakeScorer() {
|
||||||
|
super((Weight) null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private OneGroup[] sortedGroups;
|
||||||
|
|
||||||
|
private void sortQueue() {
|
||||||
|
sortedGroups = new OneGroup[queue.size()];
|
||||||
|
for(int downTo=queue.size()-1;downTo>=0;downTo--) {
|
||||||
|
sortedGroups[downTo] = queue.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the TopGroups for the specified
|
||||||
|
* BlockJoinQuery. The groupValue of each GroupDocs will
|
||||||
|
* be the parent docID for that group. Note that the
|
||||||
|
* {@link GroupDocs#totalHits}, which would be the
|
||||||
|
* total number of child documents matching that parent,
|
||||||
|
* is not computed (will always be 0). Returns null if
|
||||||
|
* no groups matched. */
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public TopGroups<Integer> getTopGroups(BlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields)
|
||||||
|
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
final Integer _slot = joinQueryID.get(query);
|
||||||
|
if (_slot == null) {
|
||||||
|
if (totalHitCount == 0) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("the Query did not contain the provided BlockJoinQuery");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unbox once
|
||||||
|
final int slot = _slot;
|
||||||
|
|
||||||
|
if (offset >= queue.size()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
int totalGroupedHitCount = 0;
|
||||||
|
|
||||||
|
if (sortedGroups == null) {
|
||||||
|
sortQueue();
|
||||||
|
}
|
||||||
|
|
||||||
|
final FakeScorer fakeScorer = new FakeScorer();
|
||||||
|
|
||||||
|
final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
|
||||||
|
|
||||||
|
for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
|
||||||
|
final OneGroup og = sortedGroups[groupIDX];
|
||||||
|
|
||||||
|
// At this point we hold all docs w/ in each group,
|
||||||
|
// unsorted; we now sort them:
|
||||||
|
final TopDocsCollector collector;
|
||||||
|
if (withinGroupSort == null) {
|
||||||
|
// Sort by score
|
||||||
|
if (!trackScores) {
|
||||||
|
throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
|
||||||
|
}
|
||||||
|
collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
|
||||||
|
} else {
|
||||||
|
// Sort by fields
|
||||||
|
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, trackScores, trackMaxScore, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
collector.setScorer(fakeScorer);
|
||||||
|
collector.setNextReader(og.readerContext);
|
||||||
|
final int numChildDocs = og.counts[slot];
|
||||||
|
for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
|
||||||
|
final int doc = og.docs[slot][docIDX];
|
||||||
|
fakeScorer.doc = doc;
|
||||||
|
if (trackScores) {
|
||||||
|
fakeScorer.score = og.scores[slot][docIDX];
|
||||||
|
}
|
||||||
|
collector.collect(doc);
|
||||||
|
}
|
||||||
|
totalGroupedHitCount += numChildDocs;
|
||||||
|
|
||||||
|
final Object[] groupSortValues;
|
||||||
|
|
||||||
|
if (fillSortFields) {
|
||||||
|
groupSortValues = new Object[comparators.length];
|
||||||
|
for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
|
||||||
|
groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
groupSortValues = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);
|
||||||
|
|
||||||
|
groups[groupIDX-offset] = new GroupDocs<Integer>(topDocs.getMaxScore(),
|
||||||
|
og.counts[slot],
|
||||||
|
topDocs.scoreDocs,
|
||||||
|
og.doc,
|
||||||
|
groupSortValues);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new TopGroups<Integer>(new TopGroups<Integer>(sort.getSort(),
|
||||||
|
withinGroupSort == null ? null : withinGroupSort.getSort(),
|
||||||
|
0, totalGroupedHitCount, groups),
|
||||||
|
totalHitCount);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,410 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.search.grouping.TopGroups;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This query requires that you index
|
||||||
|
* children and parent docs as a single block, using the
|
||||||
|
* {@link IndexWriter#addDocuments} or {@link
|
||||||
|
* IndexWriter#updateDocuments} API. In each block, the
|
||||||
|
* child documents must appear first, ending with the parent
|
||||||
|
* document. At search time you provide a Filter
|
||||||
|
* identifying the parents, however this Filter must provide
|
||||||
|
* an {@link OpenBitSet} per sub-reader.
|
||||||
|
*
|
||||||
|
* <p>Once the block index is built, use this query to wrap
|
||||||
|
* any sub-query matching only child docs and join matches in that
|
||||||
|
* child document space up to the parent document space.
|
||||||
|
* You can then use this Query as a clause with
|
||||||
|
* other queries in the parent document space.</p>
|
||||||
|
*
|
||||||
|
* <p>The child documents must be orthogonal to the parent
|
||||||
|
* documents: the wrapped child query must never
|
||||||
|
* return a parent document.</p>
|
||||||
|
*
|
||||||
|
* If you'd like to retrieve {@link TopGroups} for the
|
||||||
|
* resulting query, use the {@link BlockJoinCollector}.
|
||||||
|
* Note that this is not necessary, ie, if you simply want
|
||||||
|
* to collect the parent documents and don't need to see
|
||||||
|
* which child documents matched under that parent, then
|
||||||
|
* you can use any collector.
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: If the overall query contains parent-only
|
||||||
|
* matches, for example you OR a parent-only query with a
|
||||||
|
* joined child-only query, then the resulting collected documents
|
||||||
|
* will be correct, however the {@link TopGroups} you get
|
||||||
|
* from {@link BlockJoinCollector} will not contain every
|
||||||
|
* child for parents that had matched.
|
||||||
|
*
|
||||||
|
* <p>See {@link org.apache.lucene.search.join} for an
|
||||||
|
* overview. </p>
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class BlockJoinQuery extends Query {
|
||||||
|
|
||||||
|
public static enum ScoreMode {None, Avg, Max, Total};
|
||||||
|
|
||||||
|
private final Filter parentsFilter;
|
||||||
|
private final Query childQuery;
|
||||||
|
|
||||||
|
// If we are rewritten, this is the original childQuery we
|
||||||
|
// were passed; we use this for .equals() and
|
||||||
|
// .hashCode(). This makes rewritten query equal the
|
||||||
|
// original, so that user does not have to .rewrite() their
|
||||||
|
// query before searching:
|
||||||
|
private final Query origChildQuery;
|
||||||
|
private final ScoreMode scoreMode;
|
||||||
|
|
||||||
|
public BlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode) {
|
||||||
|
super();
|
||||||
|
this.origChildQuery = childQuery;
|
||||||
|
this.childQuery = childQuery;
|
||||||
|
this.parentsFilter = parentsFilter;
|
||||||
|
this.scoreMode = scoreMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BlockJoinQuery(Query origChildQuery, Query childQuery, Filter parentsFilter, ScoreMode scoreMode) {
|
||||||
|
super();
|
||||||
|
this.origChildQuery = origChildQuery;
|
||||||
|
this.childQuery = childQuery;
|
||||||
|
this.parentsFilter = parentsFilter;
|
||||||
|
this.scoreMode = scoreMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||||
|
return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class BlockJoinWeight extends Weight {
|
||||||
|
private final Query joinQuery;
|
||||||
|
private final Weight childWeight;
|
||||||
|
private final Filter parentsFilter;
|
||||||
|
private final ScoreMode scoreMode;
|
||||||
|
|
||||||
|
public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) {
|
||||||
|
super();
|
||||||
|
this.joinQuery = joinQuery;
|
||||||
|
this.childWeight = childWeight;
|
||||||
|
this.parentsFilter = parentsFilter;
|
||||||
|
this.scoreMode = scoreMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query getQuery() {
|
||||||
|
return joinQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getValue() {
|
||||||
|
return childWeight.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float sumOfSquaredWeights() throws IOException {
|
||||||
|
return childWeight.sumOfSquaredWeights();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void normalize(float norm) {
|
||||||
|
childWeight.normalize(norm);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(AtomicReaderContext readerContext, ScorerContext context) throws IOException {
|
||||||
|
// Pass scoreDocsInOrder true, topScorer false to our sub:
|
||||||
|
final Scorer childScorer = childWeight.scorer(readerContext, ScorerContext.def().scoreDocsInOrder(true).topScorer(false));
|
||||||
|
|
||||||
|
if (childScorer == null) {
|
||||||
|
// No matches
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int firstChildDoc = childScorer.nextDoc();
|
||||||
|
if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
// No matches
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext);
|
||||||
|
// TODO: once we do random-access filters we can
|
||||||
|
// generalize this:
|
||||||
|
if (parents == null) {
|
||||||
|
// No matches
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (!(parents instanceof OpenBitSet)) {
|
||||||
|
throw new IllegalStateException("parentFilter must return OpenBitSet; got " + parents);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BlockJoinScorer(this, childScorer, (OpenBitSet) parents, firstChildDoc, scoreMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(AtomicReaderContext reader, int doc) throws IOException {
|
||||||
|
// TODO
|
||||||
|
throw new UnsupportedOperationException(getClass().getName() +
|
||||||
|
" cannot explain match on parent document");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean scoresDocsOutOfOrder() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class BlockJoinScorer extends Scorer {
|
||||||
|
private final Scorer childScorer;
|
||||||
|
private final OpenBitSet parentBits;
|
||||||
|
private final ScoreMode scoreMode;
|
||||||
|
private int parentDoc;
|
||||||
|
private float parentScore;
|
||||||
|
private int nextChildDoc;
|
||||||
|
|
||||||
|
private int[] pendingChildDocs = new int[5];
|
||||||
|
private float[] pendingChildScores;
|
||||||
|
private int childDocUpto;
|
||||||
|
|
||||||
|
public BlockJoinScorer(Weight weight, Scorer childScorer, OpenBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
|
||||||
|
super(weight);
|
||||||
|
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
|
||||||
|
this.parentBits = parentBits;
|
||||||
|
this.childScorer = childScorer;
|
||||||
|
this.scoreMode = scoreMode;
|
||||||
|
if (scoreMode != ScoreMode.None) {
|
||||||
|
pendingChildScores = new float[5];
|
||||||
|
}
|
||||||
|
nextChildDoc = firstChildDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visitSubScorers(Query parent, BooleanClause.Occur relationship,
|
||||||
|
ScorerVisitor<Query, Query, Scorer> visitor) {
|
||||||
|
super.visitSubScorers(parent, relationship, visitor);
|
||||||
|
//childScorer.visitSubScorers(weight.getQuery(), BooleanClause.Occur.MUST, visitor);
|
||||||
|
childScorer.visitScorers(visitor);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getChildCount() {
|
||||||
|
return childDocUpto;
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] swapChildDocs(int[] other) {
|
||||||
|
final int[] ret = pendingChildDocs;
|
||||||
|
if (other == null) {
|
||||||
|
pendingChildDocs = new int[5];
|
||||||
|
} else {
|
||||||
|
pendingChildDocs = other;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
float[] swapChildScores(float[] other) {
|
||||||
|
if (scoreMode == ScoreMode.None) {
|
||||||
|
throw new IllegalStateException("ScoreMode is None");
|
||||||
|
}
|
||||||
|
final float[] ret = pendingChildScores;
|
||||||
|
if (other == null) {
|
||||||
|
pendingChildScores = new float[5];
|
||||||
|
} else {
|
||||||
|
pendingChildScores = other;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
|
||||||
|
|
||||||
|
if (nextChildDoc == NO_MORE_DOCS) {
|
||||||
|
//System.out.println(" end");
|
||||||
|
return parentDoc = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gather all children sharing the same parent as nextChildDoc
|
||||||
|
parentDoc = parentBits.nextSetBit(nextChildDoc);
|
||||||
|
//System.out.println(" parentDoc=" + parentDoc);
|
||||||
|
assert parentDoc != -1;
|
||||||
|
|
||||||
|
float totalScore = 0;
|
||||||
|
float maxScore = Float.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
childDocUpto = 0;
|
||||||
|
do {
|
||||||
|
//System.out.println(" c=" + nextChildDoc);
|
||||||
|
if (pendingChildDocs.length == childDocUpto) {
|
||||||
|
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
|
||||||
|
if (scoreMode != ScoreMode.None) {
|
||||||
|
pendingChildScores = ArrayUtil.grow(pendingChildScores);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pendingChildDocs[childDocUpto] = nextChildDoc;
|
||||||
|
if (scoreMode != ScoreMode.None) {
|
||||||
|
// TODO: specialize this into dedicated classes per-scoreMode
|
||||||
|
final float childScore = childScorer.score();
|
||||||
|
pendingChildScores[childDocUpto] = childScore;
|
||||||
|
maxScore = Math.max(childScore, maxScore);
|
||||||
|
totalScore += childScore;
|
||||||
|
}
|
||||||
|
childDocUpto++;
|
||||||
|
nextChildDoc = childScorer.nextDoc();
|
||||||
|
} while (nextChildDoc < parentDoc);
|
||||||
|
//System.out.println(" nextChildDoc=" + nextChildDoc);
|
||||||
|
|
||||||
|
// Parent & child docs are supposed to be orthogonal:
|
||||||
|
assert nextChildDoc != parentDoc;
|
||||||
|
|
||||||
|
switch(scoreMode) {
|
||||||
|
case Avg:
|
||||||
|
parentScore = totalScore / childDocUpto;
|
||||||
|
break;
|
||||||
|
case Max:
|
||||||
|
parentScore = maxScore;
|
||||||
|
break;
|
||||||
|
case Total:
|
||||||
|
parentScore = totalScore;
|
||||||
|
break;
|
||||||
|
case None:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.println(" return parentDoc=" + parentDoc);
|
||||||
|
return parentDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return parentDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException {
|
||||||
|
return parentScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int parentTarget) throws IOException {
|
||||||
|
|
||||||
|
//System.out.println("Q.advance parentTarget=" + parentTarget);
|
||||||
|
if (parentTarget == NO_MORE_DOCS) {
|
||||||
|
return parentDoc = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int prevParentDoc = parentBits.prevSetBit(parentTarget-1);
|
||||||
|
|
||||||
|
//System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
|
||||||
|
assert prevParentDoc >= parentDoc;
|
||||||
|
if (prevParentDoc > nextChildDoc) {
|
||||||
|
nextChildDoc = childScorer.advance(prevParentDoc);
|
||||||
|
// System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
|
||||||
|
//} else {
|
||||||
|
//System.out.println(" skip childScorer advance");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parent & child docs are supposed to be orthogonal:
|
||||||
|
assert nextChildDoc != prevParentDoc;
|
||||||
|
|
||||||
|
final int nd = nextDoc();
|
||||||
|
//System.out.println(" return nextParentDoc=" + nd);
|
||||||
|
return nd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void extractTerms(Set<Term> terms) {
|
||||||
|
childQuery.extractTerms(terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
final Query childRewrite = childQuery.rewrite(reader);
|
||||||
|
if (childRewrite != childQuery) {
|
||||||
|
return new BlockJoinQuery(childQuery,
|
||||||
|
childRewrite,
|
||||||
|
parentsFilter,
|
||||||
|
scoreMode);
|
||||||
|
} else {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "BlockJoinQuery ("+childQuery.toString()+")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBoost(float boost) {
|
||||||
|
throw new UnsupportedOperationException("this query cannot support boosting; please use childQuery.setBoost instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getBoost() {
|
||||||
|
throw new UnsupportedOperationException("this query cannot support boosting; please use childQuery.getBoost instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object _other) {
|
||||||
|
if (_other instanceof BlockJoinQuery) {
|
||||||
|
final BlockJoinQuery other = (BlockJoinQuery) _other;
|
||||||
|
return origChildQuery.equals(other.origChildQuery) &&
|
||||||
|
parentsFilter.equals(other.parentsFilter) &&
|
||||||
|
scoreMode == other.scoreMode;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int hash = 1;
|
||||||
|
hash = prime * hash + origChildQuery.hashCode();
|
||||||
|
hash = prime * hash + scoreMode.hashCode();
|
||||||
|
hash = prime * hash + parentsFilter.hashCode();
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object clone() {
|
||||||
|
return new BlockJoinQuery((Query) origChildQuery.clone(),
|
||||||
|
parentsFilter,
|
||||||
|
scoreMode);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<p>This module supports index-time joins while searching, where joined
|
||||||
|
documents are indexed as a single document block using
|
||||||
|
{@link org.apache.lucene.index.IndexWriter#addDocuments}. This is useful for any normalized content (XML documents or database tables). In database terms, all rows for all
|
||||||
|
joined tables matching a single row of the primary table must be
|
||||||
|
indexed as a single document block, with the parent document
|
||||||
|
being last in the group.</p>
|
||||||
|
|
||||||
|
<p>When you index in this way, the documents in your index are divided
|
||||||
|
into parent documents (the last document of each block) and child
|
||||||
|
documents (all others). You provide a {@link org.apache.lucene.search.Filter} that identifies the
|
||||||
|
parent documents, as Lucene does not currently record any information
|
||||||
|
about doc blocks.</p>
|
||||||
|
|
||||||
|
<p>At search time, use {@link org.apache.lucene.search.join.BlockJoinQuery} to remap
|
||||||
|
matches from any child {@link org.apache.lucene.search.Query} (ie, a query that matches only
|
||||||
|
child documents) up to the parent document space. The resulting
|
||||||
|
{@link org.apache.lucene.search.join.BlockJoinQuery} can then be used as a clause in any query that
|
||||||
|
matches parent documents.</p>
|
||||||
|
|
||||||
|
<p>If you only care about the parent documents matching the query, you
|
||||||
|
can use any collector to collect the parent hits, but if you'd also
|
||||||
|
like to see which child documents match for each parent document,
|
||||||
|
use the {@link org.apache.lucene.search.join.BlockJoinCollector} to collect the hits. Once the
|
||||||
|
search is done, you retrieve a {@link
|
||||||
|
org.apache.lucene.search.grouping.TopGroups} instance from the
|
||||||
|
{@link org.apache.lucene.search.join.BlockJoinCollector#getTopGroups} method.</p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,5 @@
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
Lucene's join module
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,476 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.NumericField;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.grouping.GroupDocs;
|
||||||
|
import org.apache.lucene.search.grouping.TopGroups;
|
||||||
|
import org.apache.lucene.search.join.BlockJoinCollector;
|
||||||
|
import org.apache.lucene.search.join.BlockJoinQuery;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
public class TestBlockJoin extends LuceneTestCase {
|
||||||
|
|
||||||
|
// One resume...
|
||||||
|
private Document makeResume(String name, String country) {
|
||||||
|
Document resume = new Document();
|
||||||
|
resume.add(newField("docType", "resume", Field.Index.NOT_ANALYZED));
|
||||||
|
resume.add(newField("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
resume.add(newField("country", country, Field.Index.NOT_ANALYZED));
|
||||||
|
return resume;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ... has multiple jobs
|
||||||
|
private Document makeJob(String skill, int year) {
|
||||||
|
Document job = new Document();
|
||||||
|
job.add(newField("skill", skill, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
job.add(new NumericField("year").setIntValue(year));
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSimple() throws Exception {
|
||||||
|
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
|
||||||
|
|
||||||
|
final List<Document> docs = new ArrayList<Document>();
|
||||||
|
|
||||||
|
docs.add(makeJob("java", 2007));
|
||||||
|
docs.add(makeJob("python", 2010));
|
||||||
|
docs.add(makeResume("Lisa", "United Kingdom"));
|
||||||
|
w.addDocuments(docs);
|
||||||
|
|
||||||
|
docs.clear();
|
||||||
|
docs.add(makeJob("ruby", 2005));
|
||||||
|
docs.add(makeJob("java", 2006));
|
||||||
|
docs.add(makeResume("Frank", "United States"));
|
||||||
|
w.addDocuments(docs);
|
||||||
|
|
||||||
|
IndexReader r = w.getReader();
|
||||||
|
w.close();
|
||||||
|
IndexSearcher s = new IndexSearcher(r);
|
||||||
|
|
||||||
|
// Create a filter that defines "parent" documents in the index - in this case resumes
|
||||||
|
Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
|
||||||
|
|
||||||
|
// Define child document criteria (finds an example of relevant work experience)
|
||||||
|
BooleanQuery childQuery = new BooleanQuery();
|
||||||
|
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
|
||||||
|
childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
|
||||||
|
|
||||||
|
// Define parent document criteria (find a resident in the UK)
|
||||||
|
Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
|
||||||
|
|
||||||
|
// Wrap the child document query to 'join' any matches
|
||||||
|
// up to corresponding parent:
|
||||||
|
BlockJoinQuery childJoinQuery = new BlockJoinQuery(childQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg);
|
||||||
|
|
||||||
|
// Combine the parent and nested child queries into a single query for a candidate
|
||||||
|
BooleanQuery fullQuery = new BooleanQuery();
|
||||||
|
fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
|
||||||
|
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
|
||||||
|
|
||||||
|
BlockJoinCollector c = new BlockJoinCollector(Sort.RELEVANCE, 1, true, false);
|
||||||
|
|
||||||
|
s.search(fullQuery, c);
|
||||||
|
|
||||||
|
TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
|
||||||
|
|
||||||
|
//assertEquals(1, results.totalHitCount);
|
||||||
|
assertEquals(1, results.totalGroupedHitCount);
|
||||||
|
assertEquals(1, results.groups.length);
|
||||||
|
|
||||||
|
final GroupDocs<Integer> group = results.groups[0];
|
||||||
|
assertEquals(1, group.totalHits);
|
||||||
|
|
||||||
|
Document childDoc = s.doc(group.scoreDocs[0].doc);
|
||||||
|
//System.out.println(" doc=" + group.scoreDocs[0].doc);
|
||||||
|
assertEquals("java", childDoc.get("skill"));
|
||||||
|
assertNotNull(group.groupValue);
|
||||||
|
Document parentDoc = s.doc(group.groupValue);
|
||||||
|
assertEquals("Lisa", parentDoc.get("name"));
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String[][] getRandomFields(int maxUniqueValues) {
|
||||||
|
|
||||||
|
final String[][] fields = new String[_TestUtil.nextInt(random, 2, 4)][];
|
||||||
|
for(int fieldID=0;fieldID<fields.length;fieldID++) {
|
||||||
|
final int valueCount;
|
||||||
|
if (fieldID == 0) {
|
||||||
|
valueCount = 2;
|
||||||
|
} else {
|
||||||
|
valueCount = _TestUtil.nextInt(random, 1, maxUniqueValues);
|
||||||
|
}
|
||||||
|
|
||||||
|
final String[] values = fields[fieldID] = new String[valueCount];
|
||||||
|
for(int i=0;i<valueCount;i++) {
|
||||||
|
values[i] = _TestUtil.randomRealisticUnicodeString(random);
|
||||||
|
//values[i] = _TestUtil.randomSimpleString(random);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Term randomParentTerm(String[] values) {
|
||||||
|
return new Term("parent0", values[random.nextInt(values.length)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Term randomChildTerm(String[] values) {
|
||||||
|
return new Term("child0", values[random.nextInt(values.length)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Sort getRandomSort(String prefix, int numFields) {
|
||||||
|
final List<SortField> sortFields = new ArrayList<SortField>();
|
||||||
|
// TODO: sometimes sort by score; problem is scores are
|
||||||
|
// not comparable across the two indices
|
||||||
|
// sortFields.add(SortField.FIELD_SCORE);
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
sortFields.add(new SortField(prefix + random.nextInt(numFields), SortField.Type.STRING, random.nextBoolean()));
|
||||||
|
} else if (random.nextBoolean()) {
|
||||||
|
sortFields.add(new SortField(prefix + random.nextInt(numFields), SortField.Type.STRING, random.nextBoolean()));
|
||||||
|
sortFields.add(new SortField(prefix + random.nextInt(numFields), SortField.Type.STRING, random.nextBoolean()));
|
||||||
|
}
|
||||||
|
// Break ties:
|
||||||
|
sortFields.add(new SortField(prefix + "ID", SortField.Type.INT));
|
||||||
|
return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandom() throws Exception {
|
||||||
|
// We build two indices at once: one normalized (which
|
||||||
|
// BlockJoinQuery/Collector can query) and the other w/
|
||||||
|
// same docs just fully denormalized:
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final Directory joinDir = newDirectory();
|
||||||
|
|
||||||
|
final int numParentDocs = _TestUtil.nextInt(random, 100*RANDOM_MULTIPLIER, 300*RANDOM_MULTIPLIER);
|
||||||
|
//final int numParentDocs = 30;
|
||||||
|
|
||||||
|
// Values for parent fields:
|
||||||
|
final String[][] parentFields = getRandomFields(numParentDocs/2);
|
||||||
|
// Values for child fields:
|
||||||
|
final String[][] childFields = getRandomFields(numParentDocs);
|
||||||
|
|
||||||
|
// TODO: test star join, nested join cases too!
|
||||||
|
final RandomIndexWriter w = new RandomIndexWriter(random, dir);
|
||||||
|
final RandomIndexWriter joinW = new RandomIndexWriter(random, joinDir);
|
||||||
|
for(int parentDocID=0;parentDocID<numParentDocs;parentDocID++) {
|
||||||
|
Document parentDoc = new Document();
|
||||||
|
Document parentJoinDoc = new Document();
|
||||||
|
Field id = newField("parentID", ""+parentDocID, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||||
|
parentDoc.add(id);
|
||||||
|
parentJoinDoc.add(id);
|
||||||
|
parentJoinDoc.add(newField("isParent", "x", Field.Index.NOT_ANALYZED));
|
||||||
|
for(int field=0;field<parentFields.length;field++) {
|
||||||
|
if (random.nextDouble() < 0.9) {
|
||||||
|
Field f = newField("parent" + field,
|
||||||
|
parentFields[field][random.nextInt(parentFields[field].length)],
|
||||||
|
Field.Index.NOT_ANALYZED);
|
||||||
|
parentDoc.add(f);
|
||||||
|
parentJoinDoc.add(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Document> joinDocs = new ArrayList<Document>();
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" " + parentDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int numChildDocs = _TestUtil.nextInt(random, 1, 20);
|
||||||
|
for(int childDocID=0;childDocID<numChildDocs;childDocID++) {
|
||||||
|
// Denormalize: copy all parent fields into child doc:
|
||||||
|
Document childDoc = _TestUtil.cloneDocument(parentDoc);
|
||||||
|
Document joinChildDoc = new Document();
|
||||||
|
joinDocs.add(joinChildDoc);
|
||||||
|
|
||||||
|
Field childID = newField("childID", ""+childDocID, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||||
|
childDoc.add(childID);
|
||||||
|
joinChildDoc.add(childID);
|
||||||
|
|
||||||
|
for(int childFieldID=0;childFieldID<childFields.length;childFieldID++) {
|
||||||
|
if (random.nextDouble() < 0.9) {
|
||||||
|
Field f = newField("child" + childFieldID,
|
||||||
|
childFields[childFieldID][random.nextInt(childFields[childFieldID].length)],
|
||||||
|
Field.Index.NOT_ANALYZED);
|
||||||
|
childDoc.add(f);
|
||||||
|
joinChildDoc.add(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" " + joinChildDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
w.addDocument(childDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parent last:
|
||||||
|
joinDocs.add(parentJoinDoc);
|
||||||
|
joinW.addDocuments(joinDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
final IndexReader r = w.getReader();
|
||||||
|
w.close();
|
||||||
|
final IndexReader joinR = joinW.getReader();
|
||||||
|
joinW.close();
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: reader=" + r);
|
||||||
|
System.out.println("TEST: joinReader=" + joinR);
|
||||||
|
|
||||||
|
for(int docIDX=0;docIDX<joinR.maxDoc();docIDX++) {
|
||||||
|
System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final IndexSearcher s = new IndexSearcher(r);
|
||||||
|
s.setDefaultFieldSortScoring(true, true);
|
||||||
|
|
||||||
|
final IndexSearcher joinS = new IndexSearcher(joinR);
|
||||||
|
|
||||||
|
final Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
|
||||||
|
|
||||||
|
final int iters = 200*RANDOM_MULTIPLIER;
|
||||||
|
|
||||||
|
for(int iter=0;iter<iters;iter++) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Query childQuery;
|
||||||
|
if (random.nextInt(3) == 2) {
|
||||||
|
final int childFieldID = random.nextInt(childFields.length);
|
||||||
|
childQuery = new TermQuery(new Term("child" + childFieldID,
|
||||||
|
childFields[childFieldID][random.nextInt(childFields[childFieldID].length)]));
|
||||||
|
} else if (random.nextInt(3) == 2) {
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
childQuery = bq;
|
||||||
|
final int numClauses = _TestUtil.nextInt(random, 2, 4);
|
||||||
|
boolean didMust = false;
|
||||||
|
for(int clauseIDX=0;clauseIDX<numClauses;clauseIDX++) {
|
||||||
|
Query clause;
|
||||||
|
BooleanClause.Occur occur;
|
||||||
|
if (!didMust && random.nextBoolean()) {
|
||||||
|
occur = random.nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
|
||||||
|
clause = new TermQuery(randomChildTerm(childFields[0]));
|
||||||
|
didMust = true;
|
||||||
|
} else {
|
||||||
|
occur = BooleanClause.Occur.SHOULD;
|
||||||
|
final int childFieldID = _TestUtil.nextInt(random, 1, childFields.length-1);
|
||||||
|
clause = new TermQuery(new Term("child" + childFieldID,
|
||||||
|
childFields[childFieldID][random.nextInt(childFields[childFieldID].length)]));
|
||||||
|
}
|
||||||
|
bq.add(clause, occur);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
childQuery = bq;
|
||||||
|
|
||||||
|
bq.add(new TermQuery(randomChildTerm(childFields[0])),
|
||||||
|
BooleanClause.Occur.MUST);
|
||||||
|
final int childFieldID = _TestUtil.nextInt(random, 1, childFields.length-1);
|
||||||
|
bq.add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random.nextInt(childFields[childFieldID].length)])),
|
||||||
|
random.nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
|
||||||
|
final BlockJoinQuery childJoinQuery = new BlockJoinQuery(childQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg);
|
||||||
|
|
||||||
|
// To run against the block-join index:
|
||||||
|
final Query parentJoinQuery;
|
||||||
|
|
||||||
|
// Same query as parentJoinQuery, but to run against
|
||||||
|
// the fully denormalized index (so we can compare)
|
||||||
|
// results:
|
||||||
|
final Query parentQuery;
|
||||||
|
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
parentQuery = childQuery;
|
||||||
|
parentJoinQuery = childJoinQuery;
|
||||||
|
} else {
|
||||||
|
// AND parent field w/ child field
|
||||||
|
final BooleanQuery bq = new BooleanQuery();
|
||||||
|
parentJoinQuery = bq;
|
||||||
|
final Term parentTerm = randomParentTerm(parentFields[0]);
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
bq.add(childJoinQuery, BooleanClause.Occur.MUST);
|
||||||
|
bq.add(new TermQuery(parentTerm),
|
||||||
|
BooleanClause.Occur.MUST);
|
||||||
|
} else {
|
||||||
|
bq.add(new TermQuery(parentTerm),
|
||||||
|
BooleanClause.Occur.MUST);
|
||||||
|
bq.add(childJoinQuery, BooleanClause.Occur.MUST);
|
||||||
|
}
|
||||||
|
|
||||||
|
final BooleanQuery bq2 = new BooleanQuery();
|
||||||
|
parentQuery = bq2;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
bq2.add(childQuery, BooleanClause.Occur.MUST);
|
||||||
|
bq2.add(new TermQuery(parentTerm),
|
||||||
|
BooleanClause.Occur.MUST);
|
||||||
|
} else {
|
||||||
|
bq2.add(new TermQuery(parentTerm),
|
||||||
|
BooleanClause.Occur.MUST);
|
||||||
|
bq2.add(childQuery, BooleanClause.Occur.MUST);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final Sort parentSort = getRandomSort("parent", parentFields.length);
|
||||||
|
final Sort childSort = getRandomSort("child", childFields.length);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge both sorst:
|
||||||
|
final List<SortField> sortFields = new ArrayList<SortField>(Arrays.asList(parentSort.getSort()));
|
||||||
|
sortFields.addAll(Arrays.asList(childSort.getSort()));
|
||||||
|
final Sort parentAndChildSort = new Sort(sortFields.toArray(new SortField[sortFields.size()]));
|
||||||
|
|
||||||
|
final TopDocs results = s.search(parentQuery, null, r.numDocs(),
|
||||||
|
parentAndChildSort);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: normal index gets " + results.totalHits + " hits");
|
||||||
|
final ScoreDoc[] hits = results.scoreDocs;
|
||||||
|
for(int hitIDX=0;hitIDX<hits.length;hitIDX++) {
|
||||||
|
final Document doc = s.doc(hits[hitIDX].doc);
|
||||||
|
//System.out.println(" score=" + hits[hitIDX].score + " parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
|
||||||
|
System.out.println(" parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
|
||||||
|
FieldDoc fd = (FieldDoc) hits[hitIDX];
|
||||||
|
if (fd.fields != null) {
|
||||||
|
System.out.print(" ");
|
||||||
|
for(Object o : fd.fields) {
|
||||||
|
if (o instanceof BytesRef) {
|
||||||
|
System.out.print(((BytesRef) o).utf8ToString() + " ");
|
||||||
|
} else {
|
||||||
|
System.out.print(o + " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final BlockJoinCollector c = new BlockJoinCollector(parentSort, 10, true, true);
|
||||||
|
|
||||||
|
joinS.search(parentJoinQuery, c);
|
||||||
|
|
||||||
|
final int hitsPerGroup = _TestUtil.nextInt(random, 1, 20);
|
||||||
|
//final int hitsPerGroup = 100;
|
||||||
|
final TopGroups<Integer> joinResults = c.getTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.groups.length) + " groups; hitsPerGroup=" + hitsPerGroup);
|
||||||
|
if (joinResults != null) {
|
||||||
|
final GroupDocs<Integer>[] groups = joinResults.groups;
|
||||||
|
for(int groupIDX=0;groupIDX<groups.length;groupIDX++) {
|
||||||
|
final GroupDocs<Integer> group = groups[groupIDX];
|
||||||
|
if (group.groupSortValues != null) {
|
||||||
|
System.out.print(" ");
|
||||||
|
for(Object o : group.groupSortValues) {
|
||||||
|
if (o instanceof BytesRef) {
|
||||||
|
System.out.print(((BytesRef) o).utf8ToString() + " ");
|
||||||
|
} else {
|
||||||
|
System.out.print(o + " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
|
||||||
|
assertNotNull(group.groupValue);
|
||||||
|
final Document parentDoc = joinS.doc(group.groupValue);
|
||||||
|
System.out.println(" group parentID=" + parentDoc.get("parentID") + " (docID=" + group.groupValue + ")");
|
||||||
|
for(int hitIDX=0;hitIDX<group.scoreDocs.length;hitIDX++) {
|
||||||
|
final Document doc = joinS.doc(group.scoreDocs[hitIDX].doc);
|
||||||
|
//System.out.println(" score=" + group.scoreDocs[hitIDX].score + " childID=" + doc.get("childID") + " (docID=" + group.scoreDocs[hitIDX].doc + ")");
|
||||||
|
System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + group.scoreDocs[hitIDX].doc + ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.totalHits == 0) {
|
||||||
|
assertNull(joinResults);
|
||||||
|
} else {
|
||||||
|
compareHits(r, joinR, results, joinResults);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
joinR.close();
|
||||||
|
dir.close();
|
||||||
|
joinDir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void compareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<Integer> joinResults) throws Exception {
|
||||||
|
// results is 'complete'; joinResults is a subset
|
||||||
|
int resultUpto = 0;
|
||||||
|
int joinGroupUpto = 0;
|
||||||
|
|
||||||
|
final ScoreDoc[] hits = results.scoreDocs;
|
||||||
|
final GroupDocs<Integer>[] groupDocs = joinResults.groups;
|
||||||
|
|
||||||
|
while(joinGroupUpto < groupDocs.length) {
|
||||||
|
final GroupDocs<Integer> group = groupDocs[joinGroupUpto++];
|
||||||
|
final ScoreDoc[] groupHits = group.scoreDocs;
|
||||||
|
assertNotNull(group.groupValue);
|
||||||
|
final Document parentDoc = joinR.document(group.groupValue);
|
||||||
|
final String parentID = parentDoc.get("parentID");
|
||||||
|
//System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc);
|
||||||
|
assertNotNull(parentID);
|
||||||
|
assertTrue(groupHits.length > 0);
|
||||||
|
for(int hitIDX=0;hitIDX<groupHits.length;hitIDX++) {
|
||||||
|
final Document nonJoinHit = r.document(hits[resultUpto++].doc);
|
||||||
|
final Document joinHit = joinR.document(groupHits[hitIDX].doc);
|
||||||
|
assertEquals(parentID,
|
||||||
|
nonJoinHit.get("parentID"));
|
||||||
|
assertEquals(joinHit.get("childID"),
|
||||||
|
nonJoinHit.get("childID"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (joinGroupUpto < groupDocs.length) {
|
||||||
|
// Advance non-join hit to the next parentID:
|
||||||
|
//System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.length=" + groupDocs.length + " parentID=" + parentID);
|
||||||
|
while(true) {
|
||||||
|
assertTrue(resultUpto < hits.length);
|
||||||
|
if (!parentID.equals(r.document(hits[resultUpto].doc).get("parentID"))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
resultUpto++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue