use less ram for SimpleText stored fields and vectors

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1395736 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-10-08 19:31:33 +00:00
parent 0933faf06b
commit 42ddef3b9e
2 changed files with 21 additions and 17 deletions

View File

@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
*/
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.FieldInfo;
@ -46,7 +45,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.*
* @lucene.experimental
*/
public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
private ArrayList<Long> offsets; /* docid -> offset in .fld file */
private long offsets[]; /* docid -> offset in .fld file */
private IndexInput in;
private BytesRef scratch = new BytesRef();
private CharsRef scratchUTF16 = new CharsRef();
@ -65,11 +64,11 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
} catch (Throwable t) {} // ensure we throw our original exception
}
}
readIndex();
readIndex(si.getDocCount());
}
// used by clone
SimpleTextStoredFieldsReader(ArrayList<Long> offsets, IndexInput in, FieldInfos fieldInfos) {
SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
this.offsets = offsets;
this.in = in;
this.fieldInfos = fieldInfos;
@ -78,19 +77,22 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
// we don't actually write a .fdx-like index, instead we read the
// stored fields file in entirety up-front and save the offsets
// so we can seek to the documents later.
private void readIndex() throws IOException {
offsets = new ArrayList<Long>();
private void readIndex(int size) throws IOException {
offsets = new long[size];
int upto = 0;
while (!scratch.equals(END)) {
readLine();
if (StringHelper.startsWith(scratch, DOC)) {
offsets.add(in.getFilePointer());
offsets[upto] = in.getFilePointer();
upto++;
}
}
assert upto == offsets.length;
}
@Override
public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
in.seek(offsets.get(n));
in.seek(offsets[n]);
readLine();
assert StringHelper.startsWith(scratch, NUM);
int numFields = parseIntAt(NUM.length);

View File

@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
@ -54,7 +53,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
* @lucene.experimental
*/
public class SimpleTextTermVectorsReader extends TermVectorsReader {
private ArrayList<Long> offsets; /* docid -> offset in .vec file */
private long offsets[]; /* docid -> offset in .vec file */
private IndexInput in;
private BytesRef scratch = new BytesRef();
private CharsRef scratchUTF16 = new CharsRef();
@ -71,11 +70,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
} catch (Throwable t) {} // ensure we throw our original exception
}
}
readIndex();
readIndex(si.getDocCount());
}
// used by clone
SimpleTextTermVectorsReader(ArrayList<Long> offsets, IndexInput in) {
SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
this.offsets = offsets;
this.in = in;
}
@ -83,26 +82,29 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
// we don't actually write a .tvx-like index, instead we read the
// vectors file in entirety up-front and save the offsets
// so we can seek to the data later.
private void readIndex() throws IOException {
offsets = new ArrayList<Long>();
private void readIndex(int maxDoc) throws IOException {
offsets = new long[maxDoc];
int upto = 0;
while (!scratch.equals(END)) {
readLine();
if (StringHelper.startsWith(scratch, DOC)) {
offsets.add(in.getFilePointer());
offsets[upto] = in.getFilePointer();
upto++;
}
}
assert upto == offsets.length;
}
@Override
public Fields get(int doc) throws IOException {
// TestTV tests for this in testBadParams... but is this
// really guaranteed by the API?
if (doc < 0 || doc >= offsets.size()) {
if (doc < 0 || doc >= offsets.length) {
throw new IllegalArgumentException("doc id out of range");
}
SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
in.seek(offsets.get(doc));
in.seek(offsets[doc]);
readLine();
assert StringHelper.startsWith(scratch, NUMFIELDS);
int numFields = parseIntAt(NUMFIELDS.length);