mirror of https://github.com/apache/lucene.git
use less ram for SimpleText stored fields and vectors
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1395736 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0933faf06b
commit
42ddef3b9e
|
@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -46,7 +45,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.*
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
||||
private ArrayList<Long> offsets; /* docid -> offset in .fld file */
|
||||
private long offsets[]; /* docid -> offset in .fld file */
|
||||
private IndexInput in;
|
||||
private BytesRef scratch = new BytesRef();
|
||||
private CharsRef scratchUTF16 = new CharsRef();
|
||||
|
@ -65,11 +64,11 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
} catch (Throwable t) {} // ensure we throw our original exception
|
||||
}
|
||||
}
|
||||
readIndex();
|
||||
readIndex(si.getDocCount());
|
||||
}
|
||||
|
||||
// used by clone
|
||||
SimpleTextStoredFieldsReader(ArrayList<Long> offsets, IndexInput in, FieldInfos fieldInfos) {
|
||||
SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
|
||||
this.offsets = offsets;
|
||||
this.in = in;
|
||||
this.fieldInfos = fieldInfos;
|
||||
|
@ -78,19 +77,22 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
// we don't actually write a .fdx-like index, instead we read the
|
||||
// stored fields file in entirety up-front and save the offsets
|
||||
// so we can seek to the documents later.
|
||||
private void readIndex() throws IOException {
|
||||
offsets = new ArrayList<Long>();
|
||||
private void readIndex(int size) throws IOException {
|
||||
offsets = new long[size];
|
||||
int upto = 0;
|
||||
while (!scratch.equals(END)) {
|
||||
readLine();
|
||||
if (StringHelper.startsWith(scratch, DOC)) {
|
||||
offsets.add(in.getFilePointer());
|
||||
offsets[upto] = in.getFilePointer();
|
||||
upto++;
|
||||
}
|
||||
}
|
||||
assert upto == offsets.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
|
||||
in.seek(offsets.get(n));
|
||||
in.seek(offsets[n]);
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch, NUM);
|
||||
int numFields = parseIntAt(NUM.length);
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
@ -54,7 +53,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||
private ArrayList<Long> offsets; /* docid -> offset in .vec file */
|
||||
private long offsets[]; /* docid -> offset in .vec file */
|
||||
private IndexInput in;
|
||||
private BytesRef scratch = new BytesRef();
|
||||
private CharsRef scratchUTF16 = new CharsRef();
|
||||
|
@ -71,11 +70,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
} catch (Throwable t) {} // ensure we throw our original exception
|
||||
}
|
||||
}
|
||||
readIndex();
|
||||
readIndex(si.getDocCount());
|
||||
}
|
||||
|
||||
// used by clone
|
||||
SimpleTextTermVectorsReader(ArrayList<Long> offsets, IndexInput in) {
|
||||
SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
|
||||
this.offsets = offsets;
|
||||
this.in = in;
|
||||
}
|
||||
|
@ -83,26 +82,29 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
// we don't actually write a .tvx-like index, instead we read the
|
||||
// vectors file in entirety up-front and save the offsets
|
||||
// so we can seek to the data later.
|
||||
private void readIndex() throws IOException {
|
||||
offsets = new ArrayList<Long>();
|
||||
private void readIndex(int maxDoc) throws IOException {
|
||||
offsets = new long[maxDoc];
|
||||
int upto = 0;
|
||||
while (!scratch.equals(END)) {
|
||||
readLine();
|
||||
if (StringHelper.startsWith(scratch, DOC)) {
|
||||
offsets.add(in.getFilePointer());
|
||||
offsets[upto] = in.getFilePointer();
|
||||
upto++;
|
||||
}
|
||||
}
|
||||
assert upto == offsets.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields get(int doc) throws IOException {
|
||||
// TestTV tests for this in testBadParams... but is this
|
||||
// really guaranteed by the API?
|
||||
if (doc < 0 || doc >= offsets.size()) {
|
||||
if (doc < 0 || doc >= offsets.length) {
|
||||
throw new IllegalArgumentException("doc id out of range");
|
||||
}
|
||||
|
||||
SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
|
||||
in.seek(offsets.get(doc));
|
||||
in.seek(offsets[doc]);
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch, NUMFIELDS);
|
||||
int numFields = parseIntAt(NUMFIELDS.length);
|
||||
|
|
Loading…
Reference in New Issue