mirror of https://github.com/apache/lucene.git
SOLR-236: grouping - fix NPE if rows=0, add prototype string grouping speedup
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1035074 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a73f5e279c
commit
18c317a1e6
|
@ -19,10 +19,14 @@ package org.apache.solr.search;
|
|||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.schema.StrFieldSource;
|
||||
import org.apache.solr.search.function.DocValues;
|
||||
import org.apache.solr.search.function.StringIndexDocValues;
|
||||
import org.apache.solr.search.function.ValueSource;
|
||||
import org.apache.solr.util.SentinelIntSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
@ -141,6 +145,9 @@ public class Grouping {
|
|||
Collector createCollector() throws IOException {
|
||||
maxGroupToFind = getMax(offset, numGroups, maxDoc);
|
||||
|
||||
// if we aren't going to return any groups, disregard the offset
|
||||
if (numGroups == 0) maxGroupToFind = 0;
|
||||
|
||||
if (compareSorts(sort, groupSort)) {
|
||||
collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
|
||||
} else {
|
||||
|
@ -151,10 +158,15 @@ public class Grouping {
|
|||
|
||||
@Override
|
||||
Collector createNextCollector() throws IOException {
|
||||
int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
|
||||
if (docsToCollect < 0 || docsToCollect > maxDoc) docsToCollect = maxDoc;
|
||||
if (numGroups == 0) return null;
|
||||
|
||||
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
|
||||
int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
|
||||
|
||||
if (false && groupBy instanceof StrFieldSource) {
|
||||
collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
|
||||
} else {
|
||||
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
|
||||
}
|
||||
return collector2;
|
||||
}
|
||||
|
||||
|
@ -162,11 +174,16 @@ public class Grouping {
|
|||
void finish() throws IOException {
|
||||
NamedList groupResult = commonResponse();
|
||||
|
||||
if (collector.orderedGroups == null) collector.buildSet();
|
||||
|
||||
List groupList = new ArrayList();
|
||||
groupResult.add("groups", groupList); // grouped={ key={ groups=[
|
||||
|
||||
// handle case of rows=0
|
||||
if (numGroups == 0) return;
|
||||
|
||||
if (collector.orderedGroups == null) collector.buildSet();
|
||||
|
||||
|
||||
|
||||
int skipCount = offset;
|
||||
for (SearchGroup group : collector.orderedGroups) {
|
||||
if (skipCount > 0) {
|
||||
|
@ -411,7 +428,7 @@ class TopGroupCollector extends GroupCollector {
|
|||
public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
|
||||
this.vs = groupByVS;
|
||||
this.context = vsContext;
|
||||
this.nGroups = nGroups;
|
||||
this.nGroups = nGroups = Math.max(1,nGroups); // we need a minimum of 1 for this collector
|
||||
|
||||
SortField[] sortFields = sort.getSort();
|
||||
this.comparators = new FieldComparator[sortFields.length];
|
||||
|
@ -839,3 +856,52 @@ class SearchGroupDocs {
|
|||
TopDocsCollector collector;
|
||||
}
|
||||
|
||||
|
||||
|
||||
class Phase2StringGroupCollector extends Phase2GroupCollector {
|
||||
FieldCache.DocTermsIndex index;
|
||||
SentinelIntSet ordSet;
|
||||
SearchGroupDocs[] groups;
|
||||
BytesRef spare;
|
||||
|
||||
public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
|
||||
super(topGroups, groupByVS, vsContext,sort,docsPerGroup,getScores,offset);
|
||||
ordSet = new SentinelIntSet(groupMap.size(), -1);
|
||||
groups = new SearchGroupDocs[ordSet.keys.length];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
for (SearchGroupDocs group : groupMap.values())
|
||||
group.collector.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
int slot = ordSet.find(index.getOrd(doc));
|
||||
if (slot >= 0) {
|
||||
groups[slot].collector.collect(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
super.setNextReader(reader, docBase);
|
||||
index = ((StringIndexDocValues)docValues).getDocTermsIndex();
|
||||
|
||||
ordSet.clear();
|
||||
for (SearchGroupDocs group : groupMap.values()) {
|
||||
int ord = index.binarySearchLookup(((MutableValueStr)group.groupValue).value, spare);
|
||||
if (ord > 0) {
|
||||
int slot = ordSet.put(ord);
|
||||
groups[slot] = group;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -41,6 +41,10 @@ public abstract class StringIndexDocValues extends DocValues {
|
|||
}
|
||||
this.vs = vs;
|
||||
}
|
||||
|
||||
public FieldCache.DocTermsIndex getDocTermsIndex() {
|
||||
return termsIndex;
|
||||
}
|
||||
|
||||
protected abstract String toTerm(String readableValue);
|
||||
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/** A native int set where one value is reserved to mean "EMPTY" */
|
||||
public class SentinelIntSet {
|
||||
public int[] keys;
|
||||
public int count;
|
||||
public final int emptyVal;
|
||||
public int rehashCount; // the count at which a rehash should be done
|
||||
|
||||
public SentinelIntSet(int size, int emptyVal) {
|
||||
this.emptyVal = emptyVal;
|
||||
int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
|
||||
rehashCount = tsize - (tsize>>2);
|
||||
if (tsize <= rehashCount) {
|
||||
tsize <<= 1;
|
||||
rehashCount = tsize - (tsize>>2);
|
||||
}
|
||||
keys = new int[tsize];
|
||||
if (emptyVal != 0)
|
||||
clear();
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
Arrays.fill(keys, emptyVal);
|
||||
count = 0;
|
||||
}
|
||||
|
||||
public int hash(int key) {
|
||||
return key;
|
||||
}
|
||||
|
||||
public int size() { return count; }
|
||||
|
||||
/** returns the slot for this key */
|
||||
public int getSlot(int key) {
|
||||
assert key != emptyVal;
|
||||
int h = hash(key);
|
||||
int s = h & (keys.length-1);
|
||||
if (keys[s] == key || keys[s]== emptyVal) return s;
|
||||
|
||||
int increment = (h>>7)|1;
|
||||
do {
|
||||
s = (s + increment) & (keys.length-1);
|
||||
} while (keys[s] != key && keys[s] != emptyVal);
|
||||
return s;
|
||||
}
|
||||
|
||||
/** returns the slot for this key, or -slot-1 if not found */
|
||||
public int find(int key) {
|
||||
assert key != emptyVal;
|
||||
int h = hash(key);
|
||||
int s = h & (keys.length-1);
|
||||
if (keys[s] == key) return s;
|
||||
if (keys[s] == emptyVal) return -s-1;
|
||||
|
||||
int increment = (h>>7)|1;
|
||||
for(;;) {
|
||||
s = (s + increment) & (keys.length-1);
|
||||
if (keys[s] == key) return s;
|
||||
if (keys[s] == emptyVal) return -s-1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public boolean exists(int key) {
|
||||
return find(key) >= 0;
|
||||
}
|
||||
|
||||
|
||||
public int put(int key) {
|
||||
int s = find(key);
|
||||
if (s < 0) {
|
||||
if (count >= rehashCount) {
|
||||
rehash();
|
||||
s = getSlot(key);
|
||||
} else {
|
||||
s = -s-1;
|
||||
}
|
||||
count++;
|
||||
keys[s] = key;
|
||||
putKey(key, s);
|
||||
} else {
|
||||
overwriteKey(key, s);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
protected void putKey(int key, int slot) {}
|
||||
protected void overwriteKey(int key, int slot) {}
|
||||
|
||||
protected void startRehash(int newSize) {}
|
||||
protected void moveKey(int key, int oldSlot, int newSlot) {}
|
||||
protected void endRehash() {}
|
||||
|
||||
public void rehash() {
|
||||
int newSize = keys.length << 1;
|
||||
startRehash(newSize);
|
||||
int[] oldKeys = keys;
|
||||
keys = new int[newSize];
|
||||
for (int i=0; i<oldKeys.length; i++) {
|
||||
int key = oldKeys[i];
|
||||
if (key == emptyVal) continue;
|
||||
int newSlot = getSlot(key);
|
||||
keys[newSlot] = key;
|
||||
moveKey(key, i, newSlot);
|
||||
}
|
||||
endRehash();
|
||||
rehashCount = newSize - (newSize>>2);
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue