mirror of https://github.com/apache/lucene.git
LUCENE-5493: make BlockJoinSorter a ComparatorSource taking parent/child Sort
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5493@1574909 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c2fccd80e4
commit
28a2d1dc60
|
@ -0,0 +1,207 @@
|
|||
package org.apache.lucene.index.sorter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.FieldComparatorSource;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* Helper class to sort readers that contain blocks of documents.
|
||||
*/
|
||||
public class BlockJoinComparatorSource extends FieldComparatorSource {
|
||||
final Filter parentsFilter;
|
||||
final Sort parentSort;
|
||||
final Sort childSort;
|
||||
|
||||
/**
|
||||
* Create a new BlockJoinComparatorSource, sorting only blocks of documents
|
||||
* with {@code parentSort} and not reordering children with a block.
|
||||
*
|
||||
* @param parentsFilter Filter identifying parent documents
|
||||
* @param parentSort Sort for parent documents
|
||||
*/
|
||||
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) {
|
||||
this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new BlockJoinComparatorSource, specifying the sort order for both
|
||||
* blocks of documents and children within a block.
|
||||
*
|
||||
* @param parentsFilter Filter identifying parent documents
|
||||
* @param parentSort Sort for parent documents
|
||||
* @param childSort Sort for child documents in the same block
|
||||
*/
|
||||
public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort) {
|
||||
this.parentsFilter = parentsFilter;
|
||||
this.parentSort = parentSort;
|
||||
this.childSort = childSort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
// we keep parallel slots: the parent ids and the child ids
|
||||
final int parentSlots[] = new int[numHits];
|
||||
final int childSlots[] = new int[numHits];
|
||||
|
||||
SortField parentFields[] = parentSort.getSort();
|
||||
final int parentReverseMul[] = new int[parentFields.length];
|
||||
final FieldComparator<?> parentComparators[] = new FieldComparator[parentFields.length];
|
||||
for (int i = 0; i < parentFields.length; i++) {
|
||||
parentReverseMul[i] = parentFields[i].getReverse() ? -1 : 1;
|
||||
parentComparators[i] = parentFields[i].getComparator(2, i);
|
||||
}
|
||||
|
||||
SortField childFields[] = childSort.getSort();
|
||||
final int childReverseMul[] = new int[childFields.length];
|
||||
final FieldComparator<?> childComparators[] = new FieldComparator[childFields.length];
|
||||
for (int i = 0; i < childFields.length; i++) {
|
||||
childReverseMul[i] = childFields[i].getReverse() ? -1 : 1;
|
||||
childComparators[i] = childFields[i].getComparator(2, i);
|
||||
}
|
||||
|
||||
// NOTE: not quite right i guess, really our sort "value" is more complex...
|
||||
// but at the moment you really should only use this at indexing time.
|
||||
return new FieldComparator<Integer>() {
|
||||
int bottomParent;
|
||||
int bottomChild;
|
||||
FixedBitSet parentBits;
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
try {
|
||||
return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(int slot) {
|
||||
bottomParent = parentSlots[slot];
|
||||
bottomChild = childSlots[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Integer value) {
|
||||
// we dont have enough information (the docid is needed)
|
||||
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
return compare(bottomChild, bottomParent, doc, parent(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
// we dont have enough information (the docid is needed)
|
||||
throw new UnsupportedOperationException("this comparator cannot be used with deep paging");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
childSlots[slot] = doc;
|
||||
parentSlots[slot] = parent(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(context, null);
|
||||
if (parents == null) {
|
||||
throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!");
|
||||
}
|
||||
if (!(parents instanceof FixedBitSet)) {
|
||||
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
|
||||
}
|
||||
parentBits = (FixedBitSet) parents;
|
||||
for (int i = 0; i < parentComparators.length; i++) {
|
||||
parentComparators[i] = parentComparators[i].setNextReader(context);
|
||||
}
|
||||
for (int i = 0; i < childComparators.length; i++) {
|
||||
childComparators[i] = childComparators[i].setNextReader(context);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer value(int slot) {
|
||||
// really our sort "value" is more complex...
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) {
|
||||
super.setScorer(scorer);
|
||||
for (FieldComparator<?> comp : parentComparators) {
|
||||
comp.setScorer(scorer);
|
||||
}
|
||||
for (FieldComparator<?> comp : childComparators) {
|
||||
comp.setScorer(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
int parent(int doc) {
|
||||
return parentBits.nextSetBit(doc);
|
||||
}
|
||||
|
||||
int compare(int docID1, int parent1, int docID2, int parent2) throws IOException {
|
||||
if (parent1 == parent2) { // both are in the same block
|
||||
// nocommit: should not be needed?
|
||||
if (docID1 == parent1 || docID2 == parent2) {
|
||||
// keep parents at the end of blocks
|
||||
return docID1 - docID2;
|
||||
} else {
|
||||
return compare(docID1, docID2, childComparators, childReverseMul);
|
||||
}
|
||||
} else {
|
||||
int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
|
||||
// nocommit: should not be needed?
|
||||
if (cmp == 0) {
|
||||
return parent1 - parent2;
|
||||
} else {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int compare(int docID1, int docID2, FieldComparator<?> comparators[], int reverseMul[]) throws IOException {
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
comparators[i].copy(0, docID1);
|
||||
comparators[i].copy(1, docID2);
|
||||
int comp = reverseMul[i] * comparators[i].compare(0, 1);
|
||||
if (comp != 0) {
|
||||
return comp;
|
||||
}
|
||||
}
|
||||
return 0; // no need to docid tiebreak
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package org.apache.lucene.index.sorter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* Helper class to sort readers that contain blocks of documents.
|
||||
*/
|
||||
public abstract class BlockJoinSorter extends Sorter {
|
||||
|
||||
protected final Filter parentsFilter;
|
||||
|
||||
/** Sole constructor. */
|
||||
public BlockJoinSorter(Filter parentsFilter) {
|
||||
this.parentsFilter = parentsFilter;
|
||||
}
|
||||
|
||||
/** Return a {@link Sorter.DocComparator} instance that will be called on
|
||||
* parent doc IDs. */
|
||||
protected abstract DocComparator getParentComparator(AtomicReader reader);
|
||||
|
||||
/** Return a {@link Sorter.DocComparator} instance that will be called on
|
||||
* children of the same parent. By default, children of the same parent are
|
||||
* not reordered. */
|
||||
protected DocComparator getChildComparator(AtomicReader reader) {
|
||||
return INDEX_ORDER_COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final DocMap sort(AtomicReader reader) throws IOException {
|
||||
final DocIdSet parents = parentsFilter.getDocIdSet(reader.getContext(), null);
|
||||
if (parents == null) {
|
||||
throw new IllegalStateException("AtomicReader " + reader + " contains no parents!");
|
||||
}
|
||||
if (!(parents instanceof FixedBitSet)) {
|
||||
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
|
||||
}
|
||||
final FixedBitSet parentBits = (FixedBitSet) parents;
|
||||
final DocComparator parentComparator = getParentComparator(reader);
|
||||
final DocComparator childComparator = getChildComparator(reader);
|
||||
final DocComparator comparator = new DocComparator() {
|
||||
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
final int parent1 = parentBits.nextSetBit(docID1);
|
||||
final int parent2 = parentBits.nextSetBit(docID2);
|
||||
if (parent1 == parent2) { // both are in the same block
|
||||
if (docID1 == parent1 || docID2 == parent2) {
|
||||
// keep parents at the end of blocks
|
||||
return docID1 - docID2;
|
||||
} else {
|
||||
return childComparator.compare(docID1, docID2);
|
||||
}
|
||||
} else {
|
||||
int cmp = parentComparator.compare(parent1, parent2);
|
||||
if (cmp == 0) {
|
||||
cmp = parent1 - parent2;
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
return sort(reader.maxDoc(), comparator);
|
||||
}
|
||||
|
||||
}
|
|
@ -37,6 +37,8 @@ import org.apache.lucene.search.DocIdSet;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -89,47 +91,14 @@ public class TestBlockJoinSorter extends LuceneTestCase {
|
|||
final AtomicReader reader = getOnlySegmentReader(indexReader);
|
||||
final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
|
||||
final FixedBitSet parentBits = (FixedBitSet) parentsFilter.getDocIdSet(reader.getContext(), null);
|
||||
|
||||
final NumericDocValues parentValues = reader.getNumericDocValues("parent_val");
|
||||
final Sorter.DocComparator parentComparator = new Sorter.DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
assertTrue(parentBits.get(docID1));
|
||||
assertTrue(parentBits.get(docID2));
|
||||
return Long.compare(parentValues.get(docID1), parentValues.get(docID2));
|
||||
}
|
||||
};
|
||||
|
||||
final NumericDocValues childValues = reader.getNumericDocValues("child_val");
|
||||
final Sorter.DocComparator childComparator = new Sorter.DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
assertFalse(parentBits.get(docID1));
|
||||
assertFalse(parentBits.get(docID2));
|
||||
return Long.compare(childValues.get(docID1), childValues.get(docID2));
|
||||
}
|
||||
};
|
||||
|
||||
final Sorter sorter = new BlockJoinSorter(parentsFilter) {
|
||||
|
||||
@Override
|
||||
public String getID() {
|
||||
return "Dummy";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocComparator getParentComparator(AtomicReader r) {
|
||||
assertEquals(reader, r);
|
||||
return parentComparator;
|
||||
}
|
||||
final Sort parentSort = new Sort(new SortField("parent_val", SortField.Type.LONG));
|
||||
final Sort childSort = new Sort(new SortField("child_val", SortField.Type.LONG));
|
||||
|
||||
@Override
|
||||
protected DocComparator getChildComparator(AtomicReader r) {
|
||||
assertEquals(reader, r);
|
||||
return childComparator;
|
||||
}
|
||||
|
||||
};
|
||||
final Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort)));
|
||||
final Sorter sorter = new SortSorter(sort);
|
||||
final Sorter.DocMap docMap = sorter.sort(reader);
|
||||
assertEquals(reader.maxDoc(), docMap.size());
|
||||
|
||||
|
|
Loading…
Reference in New Issue