SOLR-236: grouping - fix NPE if rows=0, add prototype string grouping speedup

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1035074 13f79535-47bb-0310-9956-ffa450edef68
2010-11-14 21:34:10 +00:00 · 2010-11-14 21:34:10 +00:00 · 18c317a1e6
parent a73f5e279c
commit 18c317a1e6
3 changed files with 208 additions and 6 deletions
--- a/solr/src/java/org/apache/solr/search/Grouping.java
+++ b/solr/src/java/org/apache/solr/search/Grouping.java
@ -19,10 +19,14 @@ package org.apache.solr.search;

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.*;
+import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.schema.StrFieldSource;
 import org.apache.solr.search.function.DocValues;
+import org.apache.solr.search.function.StringIndexDocValues;
 import org.apache.solr.search.function.ValueSource;
+import org.apache.solr.util.SentinelIntSet;

 import java.io.IOException;
 import java.util.*;
@ -141,6 +145,9 @@ public class Grouping {
    Collector createCollector() throws IOException {
      maxGroupToFind = getMax(offset, numGroups, maxDoc);

+      // if we aren't going to return any groups, disregard the offset 
+      if (numGroups == 0) maxGroupToFind = 0;
+
      if (compareSorts(sort, groupSort)) {
        collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
      } else {
@ -151,10 +158,15 @@ public class Grouping {

    @Override
    Collector createNextCollector() throws IOException {
-      int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
-      if (docsToCollect < 0 || docsToCollect > maxDoc) docsToCollect = maxDoc;
+      if (numGroups == 0) return null;

+      int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+
+      if (false && groupBy instanceof StrFieldSource) {
+        collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
+      } else {
        collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset);
+      }
      return collector2;
    }

@ -162,11 +174,16 @@ public class Grouping {
    void finish() throws IOException {
      NamedList groupResult = commonResponse();

-      if (collector.orderedGroups == null) collector.buildSet();
-
      List groupList = new ArrayList();
      groupResult.add("groups", groupList);        // grouped={ key={ groups=[

+      // handle case of rows=0
+      if (numGroups == 0) return;
+
+      if (collector.orderedGroups == null) collector.buildSet();
+
+
+
      int skipCount = offset;
      for (SearchGroup group : collector.orderedGroups) {
        if (skipCount > 0) {
@ -411,7 +428,7 @@ class TopGroupCollector extends GroupCollector {
  public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
    this.vs = groupByVS;
    this.context = vsContext;
-    this.nGroups = nGroups;
+    this.nGroups = nGroups = Math.max(1,nGroups);  // we need a minimum of 1 for this collector

    SortField[] sortFields = sort.getSort();
    this.comparators = new FieldComparator[sortFields.length];
@ -839,3 +856,52 @@ class SearchGroupDocs {
  TopDocsCollector collector;
 }

+
+
+class Phase2StringGroupCollector extends Phase2GroupCollector {
+  FieldCache.DocTermsIndex index;
+  SentinelIntSet ordSet;
+  SearchGroupDocs[] groups;
+  BytesRef spare;
+
+  public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
+    super(topGroups, groupByVS, vsContext,sort,docsPerGroup,getScores,offset);
+    ordSet = new SentinelIntSet(groupMap.size(), -1);
+    groups = new SearchGroupDocs[ordSet.keys.length];
+  }
+
+  @Override
+  public void setScorer(Scorer scorer) throws IOException {
+    this.scorer = scorer;
+    for (SearchGroupDocs group : groupMap.values())
+      group.collector.setScorer(scorer);
+  }
+
+  @Override
+  public void collect(int doc) throws IOException {
+    int slot = ordSet.find(index.getOrd(doc));
+    if (slot >= 0) {
+      groups[slot].collector.collect(doc);
+    }
+  }
+
+  @Override
+  public void setNextReader(IndexReader reader, int docBase) throws IOException {
+    super.setNextReader(reader, docBase);
+    index = ((StringIndexDocValues)docValues).getDocTermsIndex();
+
+    ordSet.clear();
+    for (SearchGroupDocs group : groupMap.values()) {
+      int ord = index.binarySearchLookup(((MutableValueStr)group.groupValue).value, spare);
+      if (ord > 0) {
+        int slot = ordSet.put(ord);
+        groups[slot] = group;
+      }
+    }
+  }
+
+  @Override
+  public boolean acceptsDocsOutOfOrder() {
+    return false;
+  }
+}
--- a/solr/src/java/org/apache/solr/search/function/StringIndexDocValues.java
+++ b/solr/src/java/org/apache/solr/search/function/StringIndexDocValues.java
@ -42,6 +42,10 @@ public abstract class StringIndexDocValues extends DocValues {
      this.vs = vs;
    }

+    public FieldCache.DocTermsIndex getDocTermsIndex() {
+      return termsIndex;
+    }
+  
    protected abstract String toTerm(String readableValue);

    @Override
--- a/solr/src/java/org/apache/solr/util/SentinelIntSet.java
+++ b/solr/src/java/org/apache/solr/util/SentinelIntSet.java
@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.util.Arrays;
+
+/** A native int set where one value is reserved to mean "EMPTY" */
+public class SentinelIntSet {
+  public int[] keys;
+  public int count;
+  public final int emptyVal;
+  public int rehashCount;   // the count at which a rehash should be done
+
+  public SentinelIntSet(int size, int emptyVal) {
+    this.emptyVal = emptyVal;
+    int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
+    rehashCount = tsize - (tsize>>2);
+    if (tsize <= rehashCount) {
+      tsize <<= 1;
+      rehashCount = tsize - (tsize>>2);
+    }
+    keys = new int[tsize];
+    if (emptyVal != 0)
+      clear();
+  }
+
+  public void clear() {
+    Arrays.fill(keys, emptyVal);
+    count = 0;
+  }
+
+  public int hash(int key) {
+    return key;
+  }
+
+  public int size() { return count; }
+
+  /** returns the slot for this key */
+  public int getSlot(int key) {
+    assert key != emptyVal;
+    int h = hash(key);
+    int s = h & (keys.length-1);
+    if (keys[s] == key || keys[s]== emptyVal) return s;
+
+    int increment = (h>>7)|1;
+    do {
+      s = (s + increment) & (keys.length-1);
+    } while (keys[s] != key && keys[s] != emptyVal);
+    return s;
+  }
+
+  /** returns the slot for this key, or -slot-1 if not found */
+  public int find(int key) {
+    assert key != emptyVal;
+    int h = hash(key);
+    int s = h & (keys.length-1);
+    if (keys[s] == key) return s;
+    if (keys[s] == emptyVal) return -s-1;
+
+    int increment = (h>>7)|1;
+    for(;;) {
+      s = (s + increment) & (keys.length-1);
+      if (keys[s] == key) return s;
+      if (keys[s] == emptyVal) return -s-1;
+    }
+  }
+
+
+  public boolean exists(int key) {
+    return find(key) >= 0;
+  }
+
+
+  public int put(int key) {
+    int s = find(key);
+    if (s < 0) {
+      if (count >= rehashCount) {
+        rehash();
+        s = getSlot(key);
+      } else {
+        s = -s-1;
+      }
+      count++;
+      keys[s] = key;
+      putKey(key, s);
+    } else {
+      overwriteKey(key, s);
+    }
+    return s;
+  }
+
+
+  protected void putKey(int key, int slot) {}
+  protected void overwriteKey(int key, int slot) {}
+
+  protected void startRehash(int newSize) {}
+  protected void moveKey(int key, int oldSlot, int newSlot) {}
+  protected void endRehash() {}
+
+  public void rehash() {
+    int newSize = keys.length << 1;
+    startRehash(newSize);
+    int[] oldKeys = keys;
+    keys = new int[newSize];
+    for (int i=0; i<oldKeys.length; i++) {
+      int key = oldKeys[i];
+      if (key == emptyVal) continue;
+      int newSlot = getSlot(key);
+      keys[newSlot] = key;
+      moveKey(key, i, newSlot);
+    }
+    endRehash();
+    rehashCount = newSize - (newSize>>2);
+
+  }
+
+}