SOLR-236: grouping - fix NPE if rows=0, add prototype string grouping speedup

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1035074 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-11-14 21:34:10 +00:00
parent a73f5e279c
commit 18c317a1e6
3 changed files with 208 additions and 6 deletions

View File

@ -19,10 +19,14 @@ package org.apache.solr.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.StrFieldSource;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.StringIndexDocValues;
import org.apache.solr.search.function.ValueSource;
import org.apache.solr.util.SentinelIntSet;
import java.io.IOException;
import java.util.*;
@ -141,6 +145,9 @@ public class Grouping {
Collector createCollector() throws IOException {
maxGroupToFind = getMax(offset, numGroups, maxDoc);
// if we aren't going to return any groups, disregard the offset
if (numGroups == 0) maxGroupToFind = 0;
if (compareSorts(sort, groupSort)) {
collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
} else {
@ -151,10 +158,15 @@ public class Grouping {
@Override
Collector createNextCollector() throws IOException {
int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
if (docsToCollect < 0 || docsToCollect > maxDoc) docsToCollect = maxDoc;
if (numGroups == 0) return null;
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
if (false && groupBy instanceof StrFieldSource) {
collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
} else {
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
}
return collector2;
}
@ -162,11 +174,16 @@ public class Grouping {
void finish() throws IOException {
NamedList groupResult = commonResponse();
if (collector.orderedGroups == null) collector.buildSet();
List groupList = new ArrayList();
groupResult.add("groups", groupList); // grouped={ key={ groups=[
// handle case of rows=0
if (numGroups == 0) return;
if (collector.orderedGroups == null) collector.buildSet();
int skipCount = offset;
for (SearchGroup group : collector.orderedGroups) {
if (skipCount > 0) {
@ -411,7 +428,7 @@ class TopGroupCollector extends GroupCollector {
public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
this.vs = groupByVS;
this.context = vsContext;
this.nGroups = nGroups;
this.nGroups = nGroups = Math.max(1,nGroups); // we need a minimum of 1 for this collector
SortField[] sortFields = sort.getSort();
this.comparators = new FieldComparator[sortFields.length];
@ -839,3 +856,52 @@ class SearchGroupDocs {
TopDocsCollector collector;
}
class Phase2StringGroupCollector extends Phase2GroupCollector {
FieldCache.DocTermsIndex index;
SentinelIntSet ordSet;
SearchGroupDocs[] groups;
BytesRef spare;
public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
super(topGroups, groupByVS, vsContext,sort,docsPerGroup,getScores,offset);
ordSet = new SentinelIntSet(groupMap.size(), -1);
groups = new SearchGroupDocs[ordSet.keys.length];
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
for (SearchGroupDocs group : groupMap.values())
group.collector.setScorer(scorer);
}
@Override
public void collect(int doc) throws IOException {
int slot = ordSet.find(index.getOrd(doc));
if (slot >= 0) {
groups[slot].collector.collect(doc);
}
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
super.setNextReader(reader, docBase);
index = ((StringIndexDocValues)docValues).getDocTermsIndex();
ordSet.clear();
for (SearchGroupDocs group : groupMap.values()) {
int ord = index.binarySearchLookup(((MutableValueStr)group.groupValue).value, spare);
if (ord > 0) {
int slot = ordSet.put(ord);
groups[slot] = group;
}
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
}

View File

@ -41,6 +41,10 @@ public abstract class StringIndexDocValues extends DocValues {
}
this.vs = vs;
}
public FieldCache.DocTermsIndex getDocTermsIndex() {
return termsIndex;
}
protected abstract String toTerm(String readableValue);

View File

@ -0,0 +1,132 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import java.util.Arrays;
/** A native int set where one value is reserved to mean "EMPTY" */
public class SentinelIntSet {
public int[] keys;
public int count;
public final int emptyVal;
public int rehashCount; // the count at which a rehash should be done
public SentinelIntSet(int size, int emptyVal) {
this.emptyVal = emptyVal;
int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
rehashCount = tsize - (tsize>>2);
if (tsize <= rehashCount) {
tsize <<= 1;
rehashCount = tsize - (tsize>>2);
}
keys = new int[tsize];
if (emptyVal != 0)
clear();
}
public void clear() {
Arrays.fill(keys, emptyVal);
count = 0;
}
public int hash(int key) {
return key;
}
public int size() { return count; }
/** returns the slot for this key */
public int getSlot(int key) {
assert key != emptyVal;
int h = hash(key);
int s = h & (keys.length-1);
if (keys[s] == key || keys[s]== emptyVal) return s;
int increment = (h>>7)|1;
do {
s = (s + increment) & (keys.length-1);
} while (keys[s] != key && keys[s] != emptyVal);
return s;
}
/** returns the slot for this key, or -slot-1 if not found */
public int find(int key) {
assert key != emptyVal;
int h = hash(key);
int s = h & (keys.length-1);
if (keys[s] == key) return s;
if (keys[s] == emptyVal) return -s-1;
int increment = (h>>7)|1;
for(;;) {
s = (s + increment) & (keys.length-1);
if (keys[s] == key) return s;
if (keys[s] == emptyVal) return -s-1;
}
}
public boolean exists(int key) {
return find(key) >= 0;
}
public int put(int key) {
int s = find(key);
if (s < 0) {
if (count >= rehashCount) {
rehash();
s = getSlot(key);
} else {
s = -s-1;
}
count++;
keys[s] = key;
putKey(key, s);
} else {
overwriteKey(key, s);
}
return s;
}
protected void putKey(int key, int slot) {}
protected void overwriteKey(int key, int slot) {}
protected void startRehash(int newSize) {}
protected void moveKey(int key, int oldSlot, int newSlot) {}
protected void endRehash() {}
public void rehash() {
int newSize = keys.length << 1;
startRehash(newSize);
int[] oldKeys = keys;
keys = new int[newSize];
for (int i=0; i<oldKeys.length; i++) {
int key = oldKeys[i];
if (key == emptyVal) continue;
int newSlot = getSlot(key);
keys[newSlot] = key;
moveKey(key, i, newSlot);
}
endRehash();
rehashCount = newSize - (newSize>>2);
}
}