From 42fdbbeb95b324addcf513d9541200648710d9f8 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 26 Jun 2015 15:54:03 +0000 Subject: [PATCH] LUCENE-6325: use array for number -> FieldInfo lookup, except in very sparse cases git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1687789 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 +++ .../org/apache/lucene/index/FieldInfos.java | 36 ++++++++++++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e83264858e5..73d2ac60628 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -225,6 +225,10 @@ Optimizations with a filter of "baz" will internally leapfrog foo,bar,baz as one conjunction. (Ryan Ernst, Robert Muir, Adrien Grande) +* LUCENE-6325: Reduce RAM usage of FieldInfos, and speed up lookup by + number, by using an array instead of TreeMap except in very sparse + cases (Robert Muir, Mike McCandless) + Build * LUCENE-6518: Don't report false thread leaks from IBM J9 diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java index 1199642fb3a..0023f28129d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java @@ -25,6 +25,8 @@ import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; +import org.apache.lucene.util.ArrayUtil; + /** * Collection of {@link FieldInfo}s (accessible by number or by name). * @lucene.experimental @@ -38,7 +40,10 @@ public class FieldInfos implements Iterable { private final boolean hasNorms; private final boolean hasDocValues; - private final SortedMap byNumber = new TreeMap<>(); + // used only by fieldInfo(int) + private final FieldInfo[] byNumberTable; // contiguous + private final SortedMap byNumberMap; // sparse + private final HashMap byName = new HashMap<>(); private final Collection values; // for an unmodifiable iterator @@ -54,6 +59,7 @@ public class FieldInfos implements Iterable { boolean hasNorms = false; boolean hasDocValues = false; + TreeMap byNumber = new TreeMap<>(); for (FieldInfo info : infos) { if (info.number < 0) { throw new IllegalArgumentException("illegal field number: " + info.number + " for field " + info.name); @@ -84,6 +90,22 @@ public class FieldInfos implements Iterable { this.hasNorms = hasNorms; this.hasDocValues = hasDocValues; this.values = Collections.unmodifiableCollection(byNumber.values()); + Integer max = byNumber.isEmpty() ? null : Collections.max(byNumber.keySet()); + + // Only usee TreeMap in the very sparse case (< 1/16th of the numbers are used), + // because TreeMap uses ~ 64 (32 bit JVM) or 120 (64 bit JVM w/o compressed oops) + // overall bytes per entry, but array uses 4 (32 bit JMV) or 8 + // (64 bit JVM w/o compressed oops): + if (max != null && max < ArrayUtil.MAX_ARRAY_LENGTH && max < 16L*byNumber.size()) { + byNumberMap = null; + byNumberTable = new FieldInfo[max+1]; + for (Map.Entry entry : byNumber.entrySet()) { + byNumberTable[entry.getKey()] = entry.getValue(); + } + } else { + byNumberMap = byNumber; + byNumberTable = null; + } } /** Returns true if any fields have freqs */ @@ -123,8 +145,7 @@ public class FieldInfos implements Iterable { /** Returns the number of fields */ public int size() { - assert byNumber.size() == byName.size(); - return byNumber.size(); + return byName.size(); } /** @@ -157,7 +178,14 @@ public class FieldInfos implements Iterable { if (fieldNumber < 0) { throw new IllegalArgumentException("Illegal field number: " + fieldNumber); } - return byNumber.get(fieldNumber); + if (byNumberTable != null) { + if (fieldNumber >= byNumberTable.length) { + return null; + } + return byNumberTable[fieldNumber]; + } else { + return byNumberMap.get(fieldNumber); + } } static final class FieldNumbers {