mirror of https://github.com/apache/lucene.git
LUCENE-1586: add IndexReader.getUniqueTermCount
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@762647 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bddac65336
commit
c7fe3c5cb0
|
@ -207,6 +207,9 @@ New features
|
||||||
a specific fields to set the score for a document. (Karl Wettin
|
a specific fields to set the score for a document. (Karl Wettin
|
||||||
via Mike McCandless)
|
via Mike McCandless)
|
||||||
|
|
||||||
|
19. LUCENE-1586: Add IndexReader.getUniqueTermCount(). (Mike
|
||||||
|
McCandless via Derek)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
||||||
|
|
|
@ -1254,4 +1254,20 @@ public abstract class IndexReader implements Cloneable {
|
||||||
public Object getFieldCacheKey() {
|
public Object getFieldCacheKey() {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns the number of unique terms (across all fields)
|
||||||
|
* in this reader.
|
||||||
|
*
|
||||||
|
* This method returns long, even though internally
|
||||||
|
* Lucene cannot handle more than 2^31 unique terms, for
|
||||||
|
* a possible future when this limitation is removed.
|
||||||
|
*
|
||||||
|
* @throws UnsupportedOperationException if this count
|
||||||
|
* cannot be easily determined (eg Multi*Readers).
|
||||||
|
* Instead, you should call {@link
|
||||||
|
* #getSequentialSubReaders} and ask each sub reader for
|
||||||
|
* its unique term count. */
|
||||||
|
public long getUniqueTermCount() throws IOException {
|
||||||
|
throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1257,4 +1257,8 @@ class SegmentReader extends DirectoryIndexReader {
|
||||||
public final Object getFieldCacheKey() {
|
public final Object getFieldCacheKey() {
|
||||||
return freqStream;
|
return freqStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getUniqueTermCount() {
|
||||||
|
return tis.size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1794,4 +1794,36 @@ public class TestIndexReader extends LuceneTestCase
|
||||||
|
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-1586: getUniqueTermCount
|
||||||
|
public void testUniqueTermCount() throws Exception {
|
||||||
|
Directory dir = new MockRAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
|
||||||
|
doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
writer.addDocument(doc);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
IndexReader r = IndexReader.open(dir);
|
||||||
|
assertEquals(36, r.getUniqueTermCount());
|
||||||
|
writer.addDocument(doc);
|
||||||
|
writer.commit();
|
||||||
|
IndexReader r2 = r.reopen();
|
||||||
|
r.close();
|
||||||
|
try {
|
||||||
|
r2.getUniqueTermCount();
|
||||||
|
fail("expected exception");
|
||||||
|
} catch (UnsupportedOperationException uoe) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
IndexReader[] subs = r2.getSequentialSubReaders();
|
||||||
|
for(int i=0;i<subs.length;i++) {
|
||||||
|
assertEquals(36, subs[i].getUniqueTermCount());
|
||||||
|
}
|
||||||
|
r2.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue