diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fc3e37513c7..15ad31fbbf1 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -55,6 +55,12 @@ API Changes * LUCENE-4307: Renamed IndexReader.getTopReaderContext to IndexReader.getContext. (Robert Muir) +* LUCENE-4316: Deprecate Fields.getUniqueTermCount and remove it from + AtomicReader. If you really want the unique term count across all + fields, just sum up Terms.size() across those fields. This method + only exists so that this statistic can be accessed for Lucene 3.x + segments, which don't support Terms.size(). (Uwe Schindler, Robert Muir) + Bug Fixes * LUCENE-4297: BooleanScorer2 would multiply the coord() factor diff --git a/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java index 22d78b809fb..35d4a8ee501 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java @@ -211,10 +211,6 @@ public class BloomFilteringPostingsFormat extends PostingsFormat { return delegateFieldsProducer.size(); } - public long getUniqueTermCount() throws IOException { - return delegateFieldsProducer.getUniqueTermCount(); - } - class BloomFilteredTerms extends Terms { private Terms delegateTerms; private FuzzySet filter; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index fd17b10d68a..98e1a36ae85 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -144,15 +144,6 @@ public class DirectPostingsFormat extends PostingsFormat { return fields.size(); } - @Override - public long getUniqueTermCount() { - long numTerms = 0; - for(DirectField field : fields.values()) { - numTerms += field.terms.length; - } - return numTerms; - } - @Override public void close() { } diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java index a2ef5efbb73..2192447ba48 100644 --- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java @@ -175,17 +175,6 @@ public abstract class AtomicReader extends IndexReader { } return null; } - - /** Returns the number of unique terms (across all fields) - * in this reader. - */ - public final long getUniqueTermCount() throws IOException { - final Fields fields = fields(); - if (fields == null) { - return 0; - } - return fields.getUniqueTermCount(); - } /** * Returns {@link DocValues} for this field. diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 4c8c35071ea..0daf83d1f07 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1112,21 +1112,6 @@ public class CheckIndex { throw new RuntimeException("fieldCount mismatch " + fieldCount + " vs recomputed field count " + computedFieldCount); } } - - // for most implementations, this is boring (just the sum across all fields) - // but codecs that don't work per-field like preflex actually implement this, - // but don't implement it on Terms, so the check isn't redundant. - long uniqueTermCountAllFields = fields.getUniqueTermCount(); - - // this means something is seriously screwed, e.g. we are somehow getting enclosed in PFCW!!!!!! - - if (uniqueTermCountAllFields == -1) { - throw new RuntimeException("invalid termCount: -1"); - } - - if (status.termCount != uniqueTermCountAllFields) { - throw new RuntimeException("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.termCount)); - } if (doPrint) { msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); diff --git a/lucene/core/src/java/org/apache/lucene/index/Fields.java b/lucene/core/src/java/org/apache/lucene/index/Fields.java index 76af3cf9746..9af477d152b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Fields.java +++ b/lucene/core/src/java/org/apache/lucene/index/Fields.java @@ -38,26 +38,5 @@ public abstract class Fields implements Iterable { * {@link #iterator} will return as many field names. */ public abstract int size(); - /** Returns the number of terms for all fields, or -1 if this - * measure isn't stored by the codec. Note that, just like - * other term measures, this measure does not take deleted - * documents into account. */ - // TODO: deprecate? - public long getUniqueTermCount() throws IOException { - long numTerms = 0; - for (String field : this) { - Terms terms = terms(field); - if (terms != null) { - final long termCount = terms.size(); - if (termCount == -1) { - return -1; - } - - numTerms += termCount; - } - } - return numTerms; - } - public final static Fields[] EMPTY_ARRAY = new Fields[0]; } diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 9608a6542fd..c2cb3a6eb9b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -60,11 +60,6 @@ public class FilterAtomicReader extends AtomicReader { public int size() { return in.size(); } - - @Override - public long getUniqueTermCount() throws IOException { - return in.getUniqueTermCount(); - } } /** Base class for filtering {@link Terms} diff --git a/lucene/core/src/java/org/apache/lucene/index/package.html b/lucene/core/src/java/org/apache/lucene/index/package.html index a1d97c3859f..d4db08459f8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/package.html +++ b/lucene/core/src/java/org/apache/lucene/index/package.html @@ -212,9 +212,6 @@ while ((docid = docsAndPositionsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS number of deleted documents in the index.
  • {@link org.apache.lucene.index.Fields#size}: Returns the number of indexed fields. -
  • {@link org.apache.lucene.index.Fields#getUniqueTermCount}: Returns the number - of indexed terms, the sum of {@link org.apache.lucene.index.Terms#size} - across all fields.

    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java index bb4247fbe30..1954ff32d9d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java @@ -807,7 +807,8 @@ public void testFilesOpenClose() throws IOException { DirectoryReader r = DirectoryReader.open(dir); AtomicReader r1 = getOnlySegmentReader(r); - assertEquals(36, r1.getUniqueTermCount()); + assertEquals(26, r1.terms("field").size()); + assertEquals(10, r1.terms("number").size()); writer.addDocument(doc); writer.commit(); DirectoryReader r2 = DirectoryReader.openIfChanged(r); @@ -815,7 +816,8 @@ public void testFilesOpenClose() throws IOException { r.close(); for(AtomicReaderContext s : r2.leaves()) { - assertEquals(36, s.reader().getUniqueTermCount()); + assertEquals(26, s.reader().terms("field").size()); + assertEquals(10, s.reader().terms("number").size()); } r2.close(); writer.close(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java index d3925053c18..56f9601424e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java @@ -191,10 +191,6 @@ public class TestDuelingCodecs extends LuceneTestCase { if (leftFields.size() != -1 && rightFields.size() != -1) { assertEquals(info, leftFields.size(), rightFields.size()); } - - if (leftFields.getUniqueTermCount() != -1 && rightFields.getUniqueTermCount() != -1) { - assertEquals(info, leftFields.getUniqueTermCount(), rightFields.getUniqueTermCount()); - } } /** diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java index a25c0020a58..682c0c6252a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java @@ -86,11 +86,6 @@ public class AssertingPostingsFormat extends PostingsFormat { public int size() { return in.size(); } - - @Override - public long getUniqueTermCount() throws IOException { - return in.getUniqueTermCount(); - } } static class AssertingFieldsConsumer extends FieldsConsumer {