LUCENE-6715: Take value tables into account for memory usage of doc values.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1695027 13f79535-47bb-0310-9956-ffa450edef68
2015-08-10 11:48:15 +00:00 · 2015-08-10 11:48:15 +00:00 · 5186ce4e23
parent 264767d47d
commit 5186ce4e23
2 changed files with 9 additions and 5 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesProducer.java
@ -104,6 +104,7 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    this.maxDoc = state.segmentInfo.maxDoc();
    merging = false;
    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
    int version = -1;
    int numFields = -1;
@ -151,8 +152,6 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
  }
  private void readSortedField(FieldInfo info, IndexInput meta) throws IOException {
@ -339,6 +338,7 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
        for (int i = 0; i < uniqueValues; ++i) {
          entry.table[i] = meta.readLong();
        }
        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.table));
        entry.bitsPerValue = meta.readVInt();
        break;
      case DELTA_COMPRESSED:
@ -396,6 +396,7 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
      for (int i = 0; i < totalTableLength; ++i) {
        entry.table[i] = meta.readLong();
      }
      ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.table));
      final int tableSize = meta.readInt();
      if (tableSize > totalTableLength + 1) { // +1 because of the empty set
        throw new CorruptIndexException("SORTED_SET_TABLE cannot have more set ids than ords in its dictionary, got " + totalTableLength + " ords and " + tableSize + " sets", meta);
@ -404,6 +405,7 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
      for (int i = 1; i < entry.tableOffsets.length; ++i) {
        entry.tableOffsets[i] = entry.tableOffsets[i - 1] + meta.readInt();
      }
      ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.tableOffsets));
    } else if (entry.format != SORTED_SINGLE_VALUED && entry.format != SORTED_WITH_ADDRESSES) {
      throw new CorruptIndexException("Unknown format: " + entry.format, meta);
    }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
@ -118,17 +118,19 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
      // something like a Collections.newSetFromMap(new HashMap<>()) uses quite
      // some memory... So for now the test ignores the overhead of such
      // collections but can we do better?
      long v;
      if (o instanceof Collection) {
        Collection<?> coll = (Collection<?>) o;
        queue.addAll((Collection<?>) o);
-        return (long) coll.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+        v = (long) coll.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
      } else if (o instanceof Map) {
        final Map<?, ?> map = (Map<?,?>) o;
        queue.addAll(map.keySet());
        queue.addAll(map.values());
-        return 2L * map.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+        v = 2L * map.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
      } else {
        v = super.accumulateObject(o, shallowSize, fieldValues, queue);
      }
      long v = super.accumulateObject(o, shallowSize, fieldValues, queue);
      // System.out.println(o.getClass() + "=" + v);
      return v;
    }