diff --git a/CHANGES.txt b/CHANGES.txt index b87a481cf01..4116d2e94cd 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -159,6 +159,13 @@ Optimizations 7. Use uncached access to norms when merging to reduce RAM usage. (Bug #32847). (Doug Cutting) + 8. Don't read term index when random-access is not required. This + reduces time to open IndexReaders and they use less memory when + random access is not required, e.g., when merging segments. The + term index is now read into memory lazily at the first + random-access. (Doug Cutting) + + Infrastructure 1. Lucene's source code repository has converted from CVS to diff --git a/src/java/org/apache/lucene/index/TermInfosReader.java b/src/java/org/apache/lucene/index/TermInfosReader.java index 3f394132b49..13c327b2083 100644 --- a/src/java/org/apache/lucene/index/TermInfosReader.java +++ b/src/java/org/apache/lucene/index/TermInfosReader.java @@ -33,6 +33,12 @@ final class TermInfosReader { private SegmentTermEnum origEnum; private long size; + private Term[] indexTerms = null; + private TermInfo[] indexInfos; + private long[] indexPointers; + + private SegmentTermEnum indexEnum; + TermInfosReader(Directory dir, String seg, FieldInfos fis) throws IOException { directory = dir; @@ -42,7 +48,10 @@ final class TermInfosReader { origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"), fieldInfos, false); size = origEnum.size; - readIndex(); + + indexEnum = + new SegmentTermEnum(directory.openInput(segment + ".tii"), + fieldInfos, true); } protected void finalize() { @@ -73,28 +82,24 @@ final class TermInfosReader { return termEnum; } - Term[] indexTerms = null; - TermInfo[] indexInfos; - long[] indexPointers; - - private final void readIndex() throws IOException { - SegmentTermEnum indexEnum = - new SegmentTermEnum(directory.openInput(segment + ".tii"), - fieldInfos, true); + private final void ensureIndexIsRead() throws IOException { + if (indexTerms != null) // index already read + return; // do nothing try { - int indexSize = (int)indexEnum.size; + int indexSize = (int)indexEnum.size; // otherwise read index indexTerms = new Term[indexSize]; indexInfos = new TermInfo[indexSize]; indexPointers = new long[indexSize]; - + for (int i = 0; indexEnum.next(); i++) { - indexTerms[i] = indexEnum.term(); - indexInfos[i] = indexEnum.termInfo(); - indexPointers[i] = indexEnum.indexPointer; + indexTerms[i] = indexEnum.term(); + indexInfos[i] = indexEnum.termInfo(); + indexPointers[i] = indexEnum.indexPointer; } } finally { - indexEnum.close(); + indexEnum.close(); + indexEnum = null; } } @@ -126,6 +131,8 @@ final class TermInfosReader { TermInfo get(Term term) throws IOException { if (size == 0) return null; + ensureIndexIsRead(); + // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = getEnum(); if (enumerator.term() != null // term is at or past current @@ -179,6 +186,7 @@ final class TermInfosReader { final long getPosition(Term term) throws IOException { if (size == 0) return -1; + ensureIndexIsRead(); int indexOffset = getIndexOffset(term); seekEnum(indexOffset);