mirror of https://github.com/apache/lucene.git
use less ram for SimpleText stored fields and vectors
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1395736 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0933faf06b
commit
42ddef3b9e
|
@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -46,7 +45,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
||||||
private ArrayList<Long> offsets; /* docid -> offset in .fld file */
|
private long offsets[]; /* docid -> offset in .fld file */
|
||||||
private IndexInput in;
|
private IndexInput in;
|
||||||
private BytesRef scratch = new BytesRef();
|
private BytesRef scratch = new BytesRef();
|
||||||
private CharsRef scratchUTF16 = new CharsRef();
|
private CharsRef scratchUTF16 = new CharsRef();
|
||||||
|
@ -65,11 +64,11 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
||||||
} catch (Throwable t) {} // ensure we throw our original exception
|
} catch (Throwable t) {} // ensure we throw our original exception
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
readIndex();
|
readIndex(si.getDocCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
// used by clone
|
// used by clone
|
||||||
SimpleTextStoredFieldsReader(ArrayList<Long> offsets, IndexInput in, FieldInfos fieldInfos) {
|
SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
|
||||||
this.offsets = offsets;
|
this.offsets = offsets;
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
|
@ -78,19 +77,22 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
||||||
// we don't actually write a .fdx-like index, instead we read the
|
// we don't actually write a .fdx-like index, instead we read the
|
||||||
// stored fields file in entirety up-front and save the offsets
|
// stored fields file in entirety up-front and save the offsets
|
||||||
// so we can seek to the documents later.
|
// so we can seek to the documents later.
|
||||||
private void readIndex() throws IOException {
|
private void readIndex(int size) throws IOException {
|
||||||
offsets = new ArrayList<Long>();
|
offsets = new long[size];
|
||||||
|
int upto = 0;
|
||||||
while (!scratch.equals(END)) {
|
while (!scratch.equals(END)) {
|
||||||
readLine();
|
readLine();
|
||||||
if (StringHelper.startsWith(scratch, DOC)) {
|
if (StringHelper.startsWith(scratch, DOC)) {
|
||||||
offsets.add(in.getFilePointer());
|
offsets[upto] = in.getFilePointer();
|
||||||
|
upto++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
assert upto == offsets.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
|
public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
|
||||||
in.seek(offsets.get(n));
|
in.seek(offsets[n]);
|
||||||
readLine();
|
readLine();
|
||||||
assert StringHelper.startsWith(scratch, NUM);
|
assert StringHelper.startsWith(scratch, NUM);
|
||||||
int numFields = parseIntAt(NUM.length);
|
int numFields = parseIntAt(NUM.length);
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
@ -54,7 +53,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
private ArrayList<Long> offsets; /* docid -> offset in .vec file */
|
private long offsets[]; /* docid -> offset in .vec file */
|
||||||
private IndexInput in;
|
private IndexInput in;
|
||||||
private BytesRef scratch = new BytesRef();
|
private BytesRef scratch = new BytesRef();
|
||||||
private CharsRef scratchUTF16 = new CharsRef();
|
private CharsRef scratchUTF16 = new CharsRef();
|
||||||
|
@ -71,11 +70,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
} catch (Throwable t) {} // ensure we throw our original exception
|
} catch (Throwable t) {} // ensure we throw our original exception
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
readIndex();
|
readIndex(si.getDocCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
// used by clone
|
// used by clone
|
||||||
SimpleTextTermVectorsReader(ArrayList<Long> offsets, IndexInput in) {
|
SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
|
||||||
this.offsets = offsets;
|
this.offsets = offsets;
|
||||||
this.in = in;
|
this.in = in;
|
||||||
}
|
}
|
||||||
|
@ -83,26 +82,29 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
// we don't actually write a .tvx-like index, instead we read the
|
// we don't actually write a .tvx-like index, instead we read the
|
||||||
// vectors file in entirety up-front and save the offsets
|
// vectors file in entirety up-front and save the offsets
|
||||||
// so we can seek to the data later.
|
// so we can seek to the data later.
|
||||||
private void readIndex() throws IOException {
|
private void readIndex(int maxDoc) throws IOException {
|
||||||
offsets = new ArrayList<Long>();
|
offsets = new long[maxDoc];
|
||||||
|
int upto = 0;
|
||||||
while (!scratch.equals(END)) {
|
while (!scratch.equals(END)) {
|
||||||
readLine();
|
readLine();
|
||||||
if (StringHelper.startsWith(scratch, DOC)) {
|
if (StringHelper.startsWith(scratch, DOC)) {
|
||||||
offsets.add(in.getFilePointer());
|
offsets[upto] = in.getFilePointer();
|
||||||
|
upto++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
assert upto == offsets.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields get(int doc) throws IOException {
|
public Fields get(int doc) throws IOException {
|
||||||
// TestTV tests for this in testBadParams... but is this
|
// TestTV tests for this in testBadParams... but is this
|
||||||
// really guaranteed by the API?
|
// really guaranteed by the API?
|
||||||
if (doc < 0 || doc >= offsets.size()) {
|
if (doc < 0 || doc >= offsets.length) {
|
||||||
throw new IllegalArgumentException("doc id out of range");
|
throw new IllegalArgumentException("doc id out of range");
|
||||||
}
|
}
|
||||||
|
|
||||||
SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
|
SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
|
||||||
in.seek(offsets.get(doc));
|
in.seek(offsets[doc]);
|
||||||
readLine();
|
readLine();
|
||||||
assert StringHelper.startsWith(scratch, NUMFIELDS);
|
assert StringHelper.startsWith(scratch, NUMFIELDS);
|
||||||
int numFields = parseIntAt(NUMFIELDS.length);
|
int numFields = parseIntAt(NUMFIELDS.length);
|
||||||
|
|
Loading…
Reference in New Issue