mirror of https://github.com/apache/lucene.git
Read term index lazily at first random-access to terms.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@156444 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4ed95e7f75
commit
be8ae58254
|
@ -159,6 +159,13 @@ Optimizations
|
|||
7. Use uncached access to norms when merging to reduce RAM usage.
|
||||
(Bug #32847). (Doug Cutting)
|
||||
|
||||
8. Don't read term index when random-access is not required. This
|
||||
reduces time to open IndexReaders and they use less memory when
|
||||
random access is not required, e.g., when merging segments. The
|
||||
term index is now read into memory lazily at the first
|
||||
random-access. (Doug Cutting)
|
||||
|
||||
|
||||
Infrastructure
|
||||
|
||||
1. Lucene's source code repository has converted from CVS to
|
||||
|
|
|
@ -33,6 +33,12 @@ final class TermInfosReader {
|
|||
private SegmentTermEnum origEnum;
|
||||
private long size;
|
||||
|
||||
private Term[] indexTerms = null;
|
||||
private TermInfo[] indexInfos;
|
||||
private long[] indexPointers;
|
||||
|
||||
private SegmentTermEnum indexEnum;
|
||||
|
||||
TermInfosReader(Directory dir, String seg, FieldInfos fis)
|
||||
throws IOException {
|
||||
directory = dir;
|
||||
|
@ -42,7 +48,10 @@ final class TermInfosReader {
|
|||
origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"),
|
||||
fieldInfos, false);
|
||||
size = origEnum.size;
|
||||
readIndex();
|
||||
|
||||
indexEnum =
|
||||
new SegmentTermEnum(directory.openInput(segment + ".tii"),
|
||||
fieldInfos, true);
|
||||
}
|
||||
|
||||
protected void finalize() {
|
||||
|
@ -73,28 +82,24 @@ final class TermInfosReader {
|
|||
return termEnum;
|
||||
}
|
||||
|
||||
Term[] indexTerms = null;
|
||||
TermInfo[] indexInfos;
|
||||
long[] indexPointers;
|
||||
|
||||
private final void readIndex() throws IOException {
|
||||
SegmentTermEnum indexEnum =
|
||||
new SegmentTermEnum(directory.openInput(segment + ".tii"),
|
||||
fieldInfos, true);
|
||||
private final void ensureIndexIsRead() throws IOException {
|
||||
if (indexTerms != null) // index already read
|
||||
return; // do nothing
|
||||
try {
|
||||
int indexSize = (int)indexEnum.size;
|
||||
int indexSize = (int)indexEnum.size; // otherwise read index
|
||||
|
||||
indexTerms = new Term[indexSize];
|
||||
indexInfos = new TermInfo[indexSize];
|
||||
indexPointers = new long[indexSize];
|
||||
|
||||
for (int i = 0; indexEnum.next(); i++) {
|
||||
indexTerms[i] = indexEnum.term();
|
||||
indexInfos[i] = indexEnum.termInfo();
|
||||
indexPointers[i] = indexEnum.indexPointer;
|
||||
indexTerms[i] = indexEnum.term();
|
||||
indexInfos[i] = indexEnum.termInfo();
|
||||
indexPointers[i] = indexEnum.indexPointer;
|
||||
}
|
||||
} finally {
|
||||
indexEnum.close();
|
||||
indexEnum.close();
|
||||
indexEnum = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -126,6 +131,8 @@ final class TermInfosReader {
|
|||
TermInfo get(Term term) throws IOException {
|
||||
if (size == 0) return null;
|
||||
|
||||
ensureIndexIsRead();
|
||||
|
||||
// optimize sequential access: first try scanning cached enum w/o seeking
|
||||
SegmentTermEnum enumerator = getEnum();
|
||||
if (enumerator.term() != null // term is at or past current
|
||||
|
@ -179,6 +186,7 @@ final class TermInfosReader {
|
|||
final long getPosition(Term term) throws IOException {
|
||||
if (size == 0) return -1;
|
||||
|
||||
ensureIndexIsRead();
|
||||
int indexOffset = getIndexOffset(term);
|
||||
seekEnum(indexOffset);
|
||||
|
||||
|
|
Loading…
Reference in New Issue