cut over these collectors

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4765@1444887 13f79535-47bb-0310-9956-ffa450edef68
2013-02-11 17:16:49 +00:00 · 2013-02-11 17:16:49 +00:00 · f2798476cd
parent d74fda14f2
commit f2798476cd
1 changed files with 39 additions and 79 deletions
--- a/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
@ -21,8 +21,7 @@ import java.io.IOException;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.DocTermOrds;
+import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.Scorer;
@ -181,9 +180,8 @@ abstract class TermsWithScoreCollector extends Collector {
  // impl that works with multiple values per document
  static class MV extends TermsWithScoreCollector {
-    DocTermOrds fromDocTermOrds;
+    SortedSetDocValues fromDocTermOrds;
-    TermsEnum docTermsEnum;
+    final BytesRef scratch = new BytesRef();
    DocTermOrds.TermOrdsIterator reuse;
    MV(String field, ScoreMode scoreMode) {
      super(field, scoreMode);
@ -191,54 +189,33 @@ abstract class TermsWithScoreCollector extends Collector {
    @Override
    public void collect(int doc) throws IOException {
-      reuse = fromDocTermOrds.lookup(doc, reuse);
+      fromDocTermOrds.setDocument(doc);
-      int[] buffer = new int[5];
+      long ord;
-
+      while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-      int chunk;
+        fromDocTermOrds.lookupOrd(ord, scratch);
-      do {
+        
-        chunk = reuse.read(buffer);
+        int termID = collectedTerms.add(scratch);
-        if (chunk == 0) {
+        if (termID < 0) {
-          return;
+          termID = -termID - 1;
-        }
+        } else {
-
+          if (termID >= scoreSums.length) {
-        for (int idx = 0; idx < chunk; idx++) {
+            scoreSums = ArrayUtil.grow(scoreSums);
          int key = buffer[idx];
          docTermsEnum.seekExact((long) key);
          int ord = collectedTerms.add(docTermsEnum.term());
          if (ord < 0) {
            ord = -ord - 1;
          } else {
            if (ord >= scoreSums.length) {
              scoreSums = ArrayUtil.grow(scoreSums);
            }
          }
          final float current = scorer.score();
          final float existing = scoreSums[ord];
          if (Float.compare(existing, 0.0f) == 0) {
            scoreSums[ord] = current;
          } else {
            switch (scoreMode) {
              case Total:
                scoreSums[ord] = existing + current;
                break;
              case Max:
                if (current > existing) {
                  scoreSums[ord] = current;
                }
            }
          }
        }
-      } while (chunk >= buffer.length);
+        
        switch (scoreMode) {
          case Total:
            scoreSums[termID] += scorer.score();
            break;
          case Max:
            scoreSums[termID] = Math.max(scoreSums[termID], scorer.score());
        }
      }
    }
    @Override
    public void setNextReader(AtomicReaderContext context) throws IOException {
-      // nocommit: cut over
+      fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
      DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
      fromDocTermOrds = iterator.getParent();
      docTermsEnum = fromDocTermOrds.getOrdTermsEnum(context.reader());
      reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
    }
    static class Avg extends MV {
@ -251,40 +228,23 @@ abstract class TermsWithScoreCollector extends Collector {
      @Override
      public void collect(int doc) throws IOException {
-        reuse = fromDocTermOrds.lookup(doc, reuse);
+        fromDocTermOrds.setDocument(doc);
-        int[] buffer = new int[5];
+        long ord;
-
+        while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-        int chunk;
+          fromDocTermOrds.lookupOrd(ord, scratch);
-        do {
+          
-          chunk = reuse.read(buffer);
+          int termID = collectedTerms.add(scratch);
-          if (chunk == 0) {
+          if (termID < 0) {
-            return;
+            termID = -termID - 1;
-          }
+          } else {
-
+            if (termID >= scoreSums.length) {
-          for (int idx = 0; idx < chunk; idx++) {
+              scoreSums = ArrayUtil.grow(scoreSums);
            int key = buffer[idx];
            docTermsEnum.seekExact((long) key);
            int ord = collectedTerms.add(docTermsEnum.term());
            if (ord < 0) {
              ord = -ord - 1;
            } else {
              if (ord >= scoreSums.length) {
                scoreSums = ArrayUtil.grow(scoreSums);
                scoreCounts = ArrayUtil.grow(scoreCounts);
              }
            }
            float current = scorer.score();
            float existing = scoreSums[ord];
            if (Float.compare(existing, 0.0f) == 0) {
              scoreSums[ord] = current;
              scoreCounts[ord] = 1;
            } else {
              scoreSums[ord] = scoreSums[ord] + current;
              scoreCounts[ord]++;
            }
          }
-        } while (chunk >= buffer.length);
+          
          scoreSums[termID] += scorer.score();
          scoreCounts[termID]++;
        }
      }
      @Override