mirror of https://github.com/apache/lucene.git
SOLR-236: grouping - fix NPE if rows=0, add prototype string grouping speedup
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1035074 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a73f5e279c
commit
18c317a1e6
|
@ -19,10 +19,14 @@ package org.apache.solr.search;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
|
import org.apache.solr.schema.StrFieldSource;
|
||||||
import org.apache.solr.search.function.DocValues;
|
import org.apache.solr.search.function.DocValues;
|
||||||
|
import org.apache.solr.search.function.StringIndexDocValues;
|
||||||
import org.apache.solr.search.function.ValueSource;
|
import org.apache.solr.search.function.ValueSource;
|
||||||
|
import org.apache.solr.util.SentinelIntSet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
@ -141,6 +145,9 @@ public class Grouping {
|
||||||
Collector createCollector() throws IOException {
|
Collector createCollector() throws IOException {
|
||||||
maxGroupToFind = getMax(offset, numGroups, maxDoc);
|
maxGroupToFind = getMax(offset, numGroups, maxDoc);
|
||||||
|
|
||||||
|
// if we aren't going to return any groups, disregard the offset
|
||||||
|
if (numGroups == 0) maxGroupToFind = 0;
|
||||||
|
|
||||||
if (compareSorts(sort, groupSort)) {
|
if (compareSorts(sort, groupSort)) {
|
||||||
collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
|
collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
|
||||||
} else {
|
} else {
|
||||||
|
@ -151,10 +158,15 @@ public class Grouping {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Collector createNextCollector() throws IOException {
|
Collector createNextCollector() throws IOException {
|
||||||
int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
|
if (numGroups == 0) return null;
|
||||||
if (docsToCollect < 0 || docsToCollect > maxDoc) docsToCollect = maxDoc;
|
|
||||||
|
|
||||||
|
int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
|
||||||
|
|
||||||
|
if (false && groupBy instanceof StrFieldSource) {
|
||||||
|
collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
|
||||||
|
} else {
|
||||||
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
|
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
|
||||||
|
}
|
||||||
return collector2;
|
return collector2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,11 +174,16 @@ public class Grouping {
|
||||||
void finish() throws IOException {
|
void finish() throws IOException {
|
||||||
NamedList groupResult = commonResponse();
|
NamedList groupResult = commonResponse();
|
||||||
|
|
||||||
if (collector.orderedGroups == null) collector.buildSet();
|
|
||||||
|
|
||||||
List groupList = new ArrayList();
|
List groupList = new ArrayList();
|
||||||
groupResult.add("groups", groupList); // grouped={ key={ groups=[
|
groupResult.add("groups", groupList); // grouped={ key={ groups=[
|
||||||
|
|
||||||
|
// handle case of rows=0
|
||||||
|
if (numGroups == 0) return;
|
||||||
|
|
||||||
|
if (collector.orderedGroups == null) collector.buildSet();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int skipCount = offset;
|
int skipCount = offset;
|
||||||
for (SearchGroup group : collector.orderedGroups) {
|
for (SearchGroup group : collector.orderedGroups) {
|
||||||
if (skipCount > 0) {
|
if (skipCount > 0) {
|
||||||
|
@ -411,7 +428,7 @@ class TopGroupCollector extends GroupCollector {
|
||||||
public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
|
public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
|
||||||
this.vs = groupByVS;
|
this.vs = groupByVS;
|
||||||
this.context = vsContext;
|
this.context = vsContext;
|
||||||
this.nGroups = nGroups;
|
this.nGroups = nGroups = Math.max(1,nGroups); // we need a minimum of 1 for this collector
|
||||||
|
|
||||||
SortField[] sortFields = sort.getSort();
|
SortField[] sortFields = sort.getSort();
|
||||||
this.comparators = new FieldComparator[sortFields.length];
|
this.comparators = new FieldComparator[sortFields.length];
|
||||||
|
@ -839,3 +856,52 @@ class SearchGroupDocs {
|
||||||
TopDocsCollector collector;
|
TopDocsCollector collector;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Phase2StringGroupCollector extends Phase2GroupCollector {
|
||||||
|
FieldCache.DocTermsIndex index;
|
||||||
|
SentinelIntSet ordSet;
|
||||||
|
SearchGroupDocs[] groups;
|
||||||
|
BytesRef spare;
|
||||||
|
|
||||||
|
public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
|
||||||
|
super(topGroups, groupByVS, vsContext,sort,docsPerGroup,getScores,offset);
|
||||||
|
ordSet = new SentinelIntSet(groupMap.size(), -1);
|
||||||
|
groups = new SearchGroupDocs[ordSet.keys.length];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
for (SearchGroupDocs group : groupMap.values())
|
||||||
|
group.collector.setScorer(scorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
int slot = ordSet.find(index.getOrd(doc));
|
||||||
|
if (slot >= 0) {
|
||||||
|
groups[slot].collector.collect(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||||
|
super.setNextReader(reader, docBase);
|
||||||
|
index = ((StringIndexDocValues)docValues).getDocTermsIndex();
|
||||||
|
|
||||||
|
ordSet.clear();
|
||||||
|
for (SearchGroupDocs group : groupMap.values()) {
|
||||||
|
int ord = index.binarySearchLookup(((MutableValueStr)group.groupValue).value, spare);
|
||||||
|
if (ord > 0) {
|
||||||
|
int slot = ordSet.put(ord);
|
||||||
|
groups[slot] = group;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -42,6 +42,10 @@ public abstract class StringIndexDocValues extends DocValues {
|
||||||
this.vs = vs;
|
this.vs = vs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public FieldCache.DocTermsIndex getDocTermsIndex() {
|
||||||
|
return termsIndex;
|
||||||
|
}
|
||||||
|
|
||||||
protected abstract String toTerm(String readableValue);
|
protected abstract String toTerm(String readableValue);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.util;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/** A native int set where one value is reserved to mean "EMPTY" */
|
||||||
|
public class SentinelIntSet {
|
||||||
|
public int[] keys;
|
||||||
|
public int count;
|
||||||
|
public final int emptyVal;
|
||||||
|
public int rehashCount; // the count at which a rehash should be done
|
||||||
|
|
||||||
|
public SentinelIntSet(int size, int emptyVal) {
|
||||||
|
this.emptyVal = emptyVal;
|
||||||
|
int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
|
||||||
|
rehashCount = tsize - (tsize>>2);
|
||||||
|
if (tsize <= rehashCount) {
|
||||||
|
tsize <<= 1;
|
||||||
|
rehashCount = tsize - (tsize>>2);
|
||||||
|
}
|
||||||
|
keys = new int[tsize];
|
||||||
|
if (emptyVal != 0)
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {
|
||||||
|
Arrays.fill(keys, emptyVal);
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hash(int key) {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() { return count; }
|
||||||
|
|
||||||
|
/** returns the slot for this key */
|
||||||
|
public int getSlot(int key) {
|
||||||
|
assert key != emptyVal;
|
||||||
|
int h = hash(key);
|
||||||
|
int s = h & (keys.length-1);
|
||||||
|
if (keys[s] == key || keys[s]== emptyVal) return s;
|
||||||
|
|
||||||
|
int increment = (h>>7)|1;
|
||||||
|
do {
|
||||||
|
s = (s + increment) & (keys.length-1);
|
||||||
|
} while (keys[s] != key && keys[s] != emptyVal);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns the slot for this key, or -slot-1 if not found */
|
||||||
|
public int find(int key) {
|
||||||
|
assert key != emptyVal;
|
||||||
|
int h = hash(key);
|
||||||
|
int s = h & (keys.length-1);
|
||||||
|
if (keys[s] == key) return s;
|
||||||
|
if (keys[s] == emptyVal) return -s-1;
|
||||||
|
|
||||||
|
int increment = (h>>7)|1;
|
||||||
|
for(;;) {
|
||||||
|
s = (s + increment) & (keys.length-1);
|
||||||
|
if (keys[s] == key) return s;
|
||||||
|
if (keys[s] == emptyVal) return -s-1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean exists(int key) {
|
||||||
|
return find(key) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public int put(int key) {
|
||||||
|
int s = find(key);
|
||||||
|
if (s < 0) {
|
||||||
|
if (count >= rehashCount) {
|
||||||
|
rehash();
|
||||||
|
s = getSlot(key);
|
||||||
|
} else {
|
||||||
|
s = -s-1;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
keys[s] = key;
|
||||||
|
putKey(key, s);
|
||||||
|
} else {
|
||||||
|
overwriteKey(key, s);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected void putKey(int key, int slot) {}
|
||||||
|
protected void overwriteKey(int key, int slot) {}
|
||||||
|
|
||||||
|
protected void startRehash(int newSize) {}
|
||||||
|
protected void moveKey(int key, int oldSlot, int newSlot) {}
|
||||||
|
protected void endRehash() {}
|
||||||
|
|
||||||
|
public void rehash() {
|
||||||
|
int newSize = keys.length << 1;
|
||||||
|
startRehash(newSize);
|
||||||
|
int[] oldKeys = keys;
|
||||||
|
keys = new int[newSize];
|
||||||
|
for (int i=0; i<oldKeys.length; i++) {
|
||||||
|
int key = oldKeys[i];
|
||||||
|
if (key == emptyVal) continue;
|
||||||
|
int newSlot = getSlot(key);
|
||||||
|
keys[newSlot] = key;
|
||||||
|
moveKey(key, i, newSlot);
|
||||||
|
}
|
||||||
|
endRehash();
|
||||||
|
rehashCount = newSize - (newSize>>2);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue