LUCENE-1586: add IndexReader.getUniqueTermCount

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@762647 13f79535-47bb-0310-9956-ffa450edef68
2009-04-07 08:25:14 +00:00 · 2009-04-07 08:25:14 +00:00 · c7fe3c5cb0
parent bddac65336
commit c7fe3c5cb0
4 changed files with 55 additions and 0 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -207,6 +207,9 @@ New features
    a specific fields to set the score for a document.  (Karl Wettin
    via Mike McCandless)
 19. LUCENE-1586: Add IndexReader.getUniqueTermCount().  (Mike
    McCandless via Derek)
 Optimizations
 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
--- a/src/java/org/apache/lucene/index/IndexReader.java
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@ -1254,4 +1254,20 @@ public abstract class IndexReader implements Cloneable {
  public Object getFieldCacheKey() {
    return this;
  }
  /** Returns the number of unique terms (across all fields)
   *  in this reader.
   *
   *  This method returns long, even though internally
   *  Lucene cannot handle more than 2^31 unique terms, for
   *  a possible future when this limitation is removed.
   *
   *  @throws UnsupportedOperationException if this count
   *  cannot be easily determined (eg Multi*Readers).
   *  Instead, you should call {@link
   *  #getSequentialSubReaders} and ask each sub reader for
   *  its unique term count. */
  public long getUniqueTermCount() throws IOException {
    throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
  }
 }
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@ -1257,4 +1257,8 @@ class SegmentReader extends DirectoryIndexReader {
  public final Object getFieldCacheKey() {
    return freqStream;
  }
  public long getUniqueTermCount() {
    return tis.size();
  }
 }
--- a/src/test/org/apache/lucene/index/TestIndexReader.java
+++ b/src/test/org/apache/lucene/index/TestIndexReader.java
@ -1794,4 +1794,36 @@ public class TestIndexReader extends LuceneTestCase
    dir.close();
  }
  // LUCENE-1586: getUniqueTermCount
  public void testUniqueTermCount() throws Exception {
    Directory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
    Document doc = new Document();
    doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.addDocument(doc);
    writer.commit();
    IndexReader r = IndexReader.open(dir);
    assertEquals(36, r.getUniqueTermCount());
    writer.addDocument(doc);
    writer.commit();
    IndexReader r2 = r.reopen();
    r.close();
    try {
      r2.getUniqueTermCount();
      fail("expected exception");
    } catch (UnsupportedOperationException uoe) {
      // expected
    }
    IndexReader[] subs = r2.getSequentialSubReaders();
    for(int i=0;i<subs.length;i++) {
      assertEquals(36, subs[i].getUniqueTermCount());
    }
    r2.close();
    writer.close();
    dir.close();
  }
 }