SOLR-1900: override new methods in SolrIndexReader, convert facet.method=enum to flex API

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@940604 13f79535-47bb-0310-9956-ffa450edef68
2010-05-03 20:33:16 +00:00 · 2010-05-03 20:33:16 +00:00 · 1a9fab6982
parent 89c24fbe37
commit 1a9fab6982
2 changed files with 141 additions and 59 deletions
--- a/solr/src/java/org/apache/solr/request/SimpleFacets.java
+++ b/solr/src/java/org/apache/solr/request/SimpleFacets.java
@ -17,12 +17,11 @@
 package org.apache.solr.request;
-import org.apache.lucene.index.Term;
+import org.apache.lucene.index.*;
 import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.params.RequiredSolrParams;
@ -498,28 +497,55 @@ public class SimpleFacets {
    IndexReader r = searcher.getReader();
    FieldType ft = schema.getFieldType(field);
    boolean sortByCount = sort.equals("count") || sort.equals("true");
    final int maxsize = limit>=0 ? offset+limit : Integer.MAX_VALUE-1;
-    final BoundedTreeSet<CountPair<String,Integer>> queue = (sort.equals("count") || sort.equals("true")) ? new BoundedTreeSet<CountPair<String,Integer>>(maxsize) : null;
+    final BoundedTreeSet<CountPair<BytesRef,Integer>> queue = sortByCount ? new BoundedTreeSet<CountPair<BytesRef,Integer>>(maxsize) : null;
    final NamedList res = new NamedList();
    int min=mincount-1;  // the smallest value in the top 'N' values    
    int off=offset;
    int lim=limit>=0 ? limit : Integer.MAX_VALUE;
-    String startTerm = prefix==null ? "" : ft.toInternal(prefix);
+    BytesRef startTermBytes = null;
-    TermEnum te = r.terms(new Term(field,startTerm));
+    if (prefix != null) {
-    TermDocs td = r.termDocs();
+      String indexedPrefix = ft.toInternal(prefix);
      startTermBytes = new BytesRef(indexedPrefix);
    }
-    if (docs.size() >= mincount) { 
+    Fields fields = MultiFields.getFields(r);
-    do {
+    Terms terms = fields==null ? null : fields.terms(field);
-      Term t = te.term();
+    TermsEnum termsEnum = null;
-      if (null == t || ! t.field().equals(field))
+    if (terms != null) {
      termsEnum = terms.iterator();
      // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
      // facet.offset when sorting by index order.
      if (startTermBytes != null) {
        if (termsEnum.seek(startTermBytes, true) == TermsEnum.SeekStatus.END) {
          termsEnum = null;
        }
      } else {
        // position termsEnum on first term
        termsEnum.next();
      }
    }
    Term template = new Term(field);
    DocsEnum docsEnum = null;
    if (termsEnum != null && docs.size() >= mincount) {
      for(;;) {
        BytesRef term = termsEnum.term();
        if (term == null)
          break;
-      if (prefix!=null && !t.text().startsWith(prefix)) break;
+        if (startTermBytes != null && !term.startsWith(startTermBytes))
          break;
-      int df = te.docFreq();
+        int df = termsEnum.docFreq();
        // If we are sorting, we can use df>min (rather than >=) since we
        // are going in index order.  For certain term distributions this can
@ -529,36 +555,57 @@ public class SimpleFacets {
          if (df >= minDfFilterCache) {
            // use the filter cache
            // TODO: not a big deal, but there are prob more efficient ways to go from utf8 to string
            // TODO: need a term query that takes a BytesRef
            Term t = template.createTerm(new String(term.utf8ToString()));
            c = searcher.numDocs(new TermQuery(t), docs);
          } else {
            // iterate over TermDocs to calculate the intersection
-          td.seek(te);
+
            // TODO: specialize when base docset is a bitset or hash set (skipDocs)?  or does it matter for this?
            // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
            docsEnum = termsEnum.docs(null, docsEnum);
            // this should be the same bulk result object if sharing of the docsEnum succeeded
            DocsEnum.BulkReadResult bulk = docsEnum.getBulkResult();
            c=0;
-          while (td.next()) {
+            for (;;) {
-            if (docs.exists(td.doc())) c++;
+              int nDocs = docsEnum.read();
              if (nDocs == 0) break;
              int[] docArr = bulk.docs.ints;  // this might be movable outside the loop, but perhaps not worth the risk.
              for (int i=0; i<nDocs; i++) {
                if (docs.exists(docArr[i])) c++;
              }
            }
          }
-        if (sort.equals("count") || sort.equals("true")) {
+          if (sortByCount) {
            if (c>min) {
-            queue.add(new CountPair<String,Integer>(t.text(), c));
+              BytesRef termCopy = new BytesRef(term);
              queue.add(new CountPair<BytesRef,Integer>(termCopy, c));
              if (queue.size()>=maxsize) min=queue.last().val;
            }
          } else {
            if (c >= mincount && --off<0) {
              if (--lim<0) break;
-            res.add(ft.indexedToReadable(t.text()), c);
+              BytesRef termCopy = new BytesRef(term);
              String s = term.utf8ToString();
              res.add(ft.indexedToReadable(s), c);
            }
          }
        }
    } while (te.next());
    }
-    if (sort.equals("count") || sort.equals("true")) {
+        termsEnum.next();
-      for (CountPair<String,Integer> p : queue) {
+      }
    }
    if (sortByCount) {
      for (CountPair<BytesRef,Integer> p : queue) {
        if (--off>=0) continue;
        if (--lim<0) break;
-        res.add(ft.indexedToReadable(p.key), p.val);
+        String s = p.key.utf8ToString();        
        res.add(ft.indexedToReadable(s), p.val);
      }
    }
@ -566,9 +613,6 @@ public class SimpleFacets {
      res.add(null, getFieldMissingCount(searcher,docs,field));
    }
    te.close();
    td.close();    
    return res;
  }
--- a/solr/src/java/org/apache/solr/search/SolrIndexReader.java
+++ b/solr/src/java/org/apache/solr/search/SolrIndexReader.java
@ -23,6 +23,8 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import java.io.IOException;
 import java.util.Collection;
@ -222,6 +224,11 @@ public class SolrIndexReader extends FilterIndexReader {
    return in.directory();
  }
  @Override
  public Bits getDeletedDocs() throws IOException {
    return in.getDeletedDocs();
  }
  @Override
  public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
    return in.getTermFreqVectors(docNumber);
@ -297,6 +304,11 @@ public class SolrIndexReader extends FilterIndexReader {
    return in.terms();
  }
  @Override
  public Fields fields() throws IOException {
    return in.fields();
  }
  @Override
  public TermEnum terms(Term t) throws IOException {
    return in.terms(t);
@ -308,6 +320,11 @@ public class SolrIndexReader extends FilterIndexReader {
    return in.docFreq(t);
  }
  @Override
  public int docFreq(String field, BytesRef t) throws IOException {
    return in.docFreq(field, t);
  }
  @Override
  public TermDocs termDocs() throws IOException {
    ensureOpen();
@ -320,6 +337,21 @@ public class SolrIndexReader extends FilterIndexReader {
    return in.termDocs(term);
  }
  @Override
  public Terms terms(String field) throws IOException {
    return in.terms(field);
  }
  @Override
  public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term) throws IOException {
    return in.termDocsEnum(skipDocs, field, term);
  }
  @Override
  public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term) throws IOException {
    return in.termPositionsEnum(skipDocs, field, term);
  }
  @Override
  public TermPositions termPositions() throws IOException {
    ensureOpen();
@ -329,6 +361,7 @@ public class SolrIndexReader extends FilterIndexReader {
  @Override
  protected void doDelete(int n) throws  CorruptIndexException, IOException { in.deleteDocument(n); }
  // Let FilterIndexReader handle commit()... we cannot override commit()
  // or call in.commit() ourselves.
  // protected void doCommit() throws IOException { in.commit(); }
@ -363,6 +396,11 @@ public class SolrIndexReader extends FilterIndexReader {
    return subReaders;
  }
  @Override
  public int getSubReaderDocBase(IndexReader subReader) {
    return in.getSubReaderDocBase(subReader);
  }
  @Override
  public int hashCode() {
    return in.hashCode();
@ -405,7 +443,7 @@ public class SolrIndexReader extends FilterIndexReader {
  @Override
  public long getUniqueTermCount() throws IOException {
-    return super.getUniqueTermCount();
+    return in.getUniqueTermCount();
  }
  @Override