use less ram for SimpleText stored fields and vectors

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1395736 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-10-08 19:31:33 +00:00
parent 0933faf06b
commit 42ddef3b9e
2 changed files with 21 additions and 17 deletions

View File

@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -46,7 +45,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.*
* @lucene.experimental * @lucene.experimental
*/ */
public class SimpleTextStoredFieldsReader extends StoredFieldsReader { public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
private ArrayList<Long> offsets; /* docid -> offset in .fld file */ private long offsets[]; /* docid -> offset in .fld file */
private IndexInput in; private IndexInput in;
private BytesRef scratch = new BytesRef(); private BytesRef scratch = new BytesRef();
private CharsRef scratchUTF16 = new CharsRef(); private CharsRef scratchUTF16 = new CharsRef();
@ -65,11 +64,11 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
} catch (Throwable t) {} // ensure we throw our original exception } catch (Throwable t) {} // ensure we throw our original exception
} }
} }
readIndex(); readIndex(si.getDocCount());
} }
// used by clone // used by clone
SimpleTextStoredFieldsReader(ArrayList<Long> offsets, IndexInput in, FieldInfos fieldInfos) { SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
this.offsets = offsets; this.offsets = offsets;
this.in = in; this.in = in;
this.fieldInfos = fieldInfos; this.fieldInfos = fieldInfos;
@ -78,19 +77,22 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
// we don't actually write a .fdx-like index, instead we read the // we don't actually write a .fdx-like index, instead we read the
// stored fields file in entirety up-front and save the offsets // stored fields file in entirety up-front and save the offsets
// so we can seek to the documents later. // so we can seek to the documents later.
private void readIndex() throws IOException { private void readIndex(int size) throws IOException {
offsets = new ArrayList<Long>(); offsets = new long[size];
int upto = 0;
while (!scratch.equals(END)) { while (!scratch.equals(END)) {
readLine(); readLine();
if (StringHelper.startsWith(scratch, DOC)) { if (StringHelper.startsWith(scratch, DOC)) {
offsets.add(in.getFilePointer()); offsets[upto] = in.getFilePointer();
upto++;
} }
} }
assert upto == offsets.length;
} }
@Override @Override
public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException { public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
in.seek(offsets.get(n)); in.seek(offsets[n]);
readLine(); readLine();
assert StringHelper.startsWith(scratch, NUM); assert StringHelper.startsWith(scratch, NUM);
int numFields = parseIntAt(NUM.length); int numFields = parseIntAt(NUM.length);

View File

@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
@ -54,7 +53,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
* @lucene.experimental * @lucene.experimental
*/ */
public class SimpleTextTermVectorsReader extends TermVectorsReader { public class SimpleTextTermVectorsReader extends TermVectorsReader {
private ArrayList<Long> offsets; /* docid -> offset in .vec file */ private long offsets[]; /* docid -> offset in .vec file */
private IndexInput in; private IndexInput in;
private BytesRef scratch = new BytesRef(); private BytesRef scratch = new BytesRef();
private CharsRef scratchUTF16 = new CharsRef(); private CharsRef scratchUTF16 = new CharsRef();
@ -71,11 +70,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
} catch (Throwable t) {} // ensure we throw our original exception } catch (Throwable t) {} // ensure we throw our original exception
} }
} }
readIndex(); readIndex(si.getDocCount());
} }
// used by clone // used by clone
SimpleTextTermVectorsReader(ArrayList<Long> offsets, IndexInput in) { SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
this.offsets = offsets; this.offsets = offsets;
this.in = in; this.in = in;
} }
@ -83,26 +82,29 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
// we don't actually write a .tvx-like index, instead we read the // we don't actually write a .tvx-like index, instead we read the
// vectors file in entirety up-front and save the offsets // vectors file in entirety up-front and save the offsets
// so we can seek to the data later. // so we can seek to the data later.
private void readIndex() throws IOException { private void readIndex(int maxDoc) throws IOException {
offsets = new ArrayList<Long>(); offsets = new long[maxDoc];
int upto = 0;
while (!scratch.equals(END)) { while (!scratch.equals(END)) {
readLine(); readLine();
if (StringHelper.startsWith(scratch, DOC)) { if (StringHelper.startsWith(scratch, DOC)) {
offsets.add(in.getFilePointer()); offsets[upto] = in.getFilePointer();
upto++;
} }
} }
assert upto == offsets.length;
} }
@Override @Override
public Fields get(int doc) throws IOException { public Fields get(int doc) throws IOException {
// TestTV tests for this in testBadParams... but is this // TestTV tests for this in testBadParams... but is this
// really guaranteed by the API? // really guaranteed by the API?
if (doc < 0 || doc >= offsets.size()) { if (doc < 0 || doc >= offsets.length) {
throw new IllegalArgumentException("doc id out of range"); throw new IllegalArgumentException("doc id out of range");
} }
SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>(); SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
in.seek(offsets.get(doc)); in.seek(offsets[doc]);
readLine(); readLine();
assert StringHelper.startsWith(scratch, NUMFIELDS); assert StringHelper.startsWith(scratch, NUMFIELDS);
int numFields = parseIntAt(NUMFIELDS.length); int numFields = parseIntAt(NUMFIELDS.length);