mirror of https://github.com/apache/lucene.git
reduce RAM cost per unique field while writing postings
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dd93b85321
commit
33b30097aa
|
@ -69,7 +69,27 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
final FieldInfos fieldInfos;
|
final FieldInfos fieldInfos;
|
||||||
FieldInfo currentField;
|
FieldInfo currentField;
|
||||||
private final TermsIndexWriterBase termsIndexWriter;
|
private final TermsIndexWriterBase termsIndexWriter;
|
||||||
private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
|
|
||||||
|
private static class FieldMetaData {
|
||||||
|
public final FieldInfo fieldInfo;
|
||||||
|
public final long numTerms;
|
||||||
|
public final long termsStartPointer;
|
||||||
|
public final long sumTotalTermFreq;
|
||||||
|
public final long sumDocFreq;
|
||||||
|
public final int docCount;
|
||||||
|
|
||||||
|
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
|
||||||
|
assert numTerms > 0;
|
||||||
|
this.fieldInfo = fieldInfo;
|
||||||
|
this.termsStartPointer = termsStartPointer;
|
||||||
|
this.numTerms = numTerms;
|
||||||
|
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||||
|
this.sumDocFreq = sumDocFreq;
|
||||||
|
this.docCount = docCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
|
||||||
|
|
||||||
// private final String segment;
|
// private final String segment;
|
||||||
|
|
||||||
|
@ -108,9 +128,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
assert currentField == null || currentField.name.compareTo(field.name) < 0;
|
assert currentField == null || currentField.name.compareTo(field.name) < 0;
|
||||||
currentField = field;
|
currentField = field;
|
||||||
TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
|
TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
|
||||||
final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
|
return new TermsWriter(fieldIndexWriter, field, postingsWriter);
|
||||||
fields.add(terms);
|
|
||||||
return terms;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -118,27 +136,18 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
int nonZeroCount = 0;
|
|
||||||
for(TermsWriter field : fields) {
|
|
||||||
if (field.numTerms > 0) {
|
|
||||||
nonZeroCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final long dirStart = out.getFilePointer();
|
final long dirStart = out.getFilePointer();
|
||||||
|
|
||||||
out.writeVInt(nonZeroCount);
|
out.writeVInt(fields.size());
|
||||||
for(TermsWriter field : fields) {
|
for(FieldMetaData field : fields) {
|
||||||
if (field.numTerms > 0) {
|
out.writeVInt(field.fieldInfo.number);
|
||||||
out.writeVInt(field.fieldInfo.number);
|
out.writeVLong(field.numTerms);
|
||||||
out.writeVLong(field.numTerms);
|
out.writeVLong(field.termsStartPointer);
|
||||||
out.writeVLong(field.termsStartPointer);
|
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
out.writeVLong(field.sumTotalTermFreq);
|
||||||
out.writeVLong(field.sumTotalTermFreq);
|
|
||||||
}
|
|
||||||
out.writeVLong(field.sumDocFreq);
|
|
||||||
out.writeVInt(field.docCount);
|
|
||||||
}
|
}
|
||||||
|
out.writeVLong(field.sumDocFreq);
|
||||||
|
out.writeVInt(field.docCount);
|
||||||
}
|
}
|
||||||
writeTrailer(dirStart);
|
writeTrailer(dirStart);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -249,6 +258,14 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
this.sumDocFreq = sumDocFreq;
|
this.sumDocFreq = sumDocFreq;
|
||||||
this.docCount = docCount;
|
this.docCount = docCount;
|
||||||
fieldIndexWriter.finish(out.getFilePointer());
|
fieldIndexWriter.finish(out.getFilePointer());
|
||||||
|
if (numTerms > 0) {
|
||||||
|
fields.add(new FieldMetaData(fieldInfo,
|
||||||
|
numTerms,
|
||||||
|
termsStartPointer,
|
||||||
|
sumTotalTermFreq,
|
||||||
|
sumDocFreq,
|
||||||
|
docCount));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int sharedPrefix(BytesRef term1, BytesRef term2) {
|
private int sharedPrefix(BytesRef term1, BytesRef term2) {
|
||||||
|
|
|
@ -228,7 +228,30 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
final PostingsWriterBase postingsWriter;
|
final PostingsWriterBase postingsWriter;
|
||||||
final FieldInfos fieldInfos;
|
final FieldInfos fieldInfos;
|
||||||
FieldInfo currentField;
|
FieldInfo currentField;
|
||||||
private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
|
|
||||||
|
private static class FieldMetaData {
|
||||||
|
public final FieldInfo fieldInfo;
|
||||||
|
public final BytesRef rootCode;
|
||||||
|
public final long numTerms;
|
||||||
|
public final long indexStartFP;
|
||||||
|
public final long sumTotalTermFreq;
|
||||||
|
public final long sumDocFreq;
|
||||||
|
public final int docCount;
|
||||||
|
|
||||||
|
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount) {
|
||||||
|
assert numTerms > 0;
|
||||||
|
this.fieldInfo = fieldInfo;
|
||||||
|
assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
|
||||||
|
this.rootCode = rootCode;
|
||||||
|
this.indexStartFP = indexStartFP;
|
||||||
|
this.numTerms = numTerms;
|
||||||
|
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||||
|
this.sumDocFreq = sumDocFreq;
|
||||||
|
this.docCount = docCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
|
||||||
// private final String segment;
|
// private final String segment;
|
||||||
|
|
||||||
/** Create a new writer. The number of items (terms or
|
/** Create a new writer. The number of items (terms or
|
||||||
|
@ -313,9 +336,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
//if (DEBUG) System.out.println("\nBTTW.addField seg=" + segment + " field=" + field.name);
|
//if (DEBUG) System.out.println("\nBTTW.addField seg=" + segment + " field=" + field.name);
|
||||||
assert currentField == null || currentField.name.compareTo(field.name) < 0;
|
assert currentField == null || currentField.name.compareTo(field.name) < 0;
|
||||||
currentField = field;
|
currentField = field;
|
||||||
final TermsWriter terms = new TermsWriter(field);
|
return new TermsWriter(field);
|
||||||
fields.add(terms);
|
|
||||||
return terms;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
|
static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
|
||||||
|
@ -1007,6 +1028,14 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// System.out.println("SAVED to " + dotFileName);
|
// System.out.println("SAVED to " + dotFileName);
|
||||||
// w.close();
|
// w.close();
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
fields.add(new FieldMetaData(fieldInfo,
|
||||||
|
((PendingBlock) pending.get(0)).index.getEmptyOutput(),
|
||||||
|
numTerms,
|
||||||
|
indexStartFP,
|
||||||
|
sumTotalTermFreq,
|
||||||
|
sumDocFreq,
|
||||||
|
docCount));
|
||||||
} else {
|
} else {
|
||||||
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
|
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
|
||||||
assert sumDocFreq == 0;
|
assert sumDocFreq == 0;
|
||||||
|
@ -1024,34 +1053,23 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||||
IOException ioe = null;
|
IOException ioe = null;
|
||||||
try {
|
try {
|
||||||
|
|
||||||
int nonZeroCount = 0;
|
|
||||||
for(TermsWriter field : fields) {
|
|
||||||
if (field.numTerms > 0) {
|
|
||||||
nonZeroCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final long dirStart = out.getFilePointer();
|
final long dirStart = out.getFilePointer();
|
||||||
final long indexDirStart = indexOut.getFilePointer();
|
final long indexDirStart = indexOut.getFilePointer();
|
||||||
|
|
||||||
out.writeVInt(nonZeroCount);
|
out.writeVInt(fields.size());
|
||||||
|
|
||||||
for(TermsWriter field : fields) {
|
for(FieldMetaData field : fields) {
|
||||||
if (field.numTerms > 0) {
|
//System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms");
|
||||||
//System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms");
|
out.writeVInt(field.fieldInfo.number);
|
||||||
out.writeVInt(field.fieldInfo.number);
|
out.writeVLong(field.numTerms);
|
||||||
out.writeVLong(field.numTerms);
|
out.writeVInt(field.rootCode.length);
|
||||||
final BytesRef rootCode = ((PendingBlock) field.pending.get(0)).index.getEmptyOutput();
|
out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
|
||||||
assert rootCode != null: "field=" + field.fieldInfo.name + " numTerms=" + field.numTerms;
|
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||||
out.writeVInt(rootCode.length);
|
out.writeVLong(field.sumTotalTermFreq);
|
||||||
out.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length);
|
|
||||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
|
||||||
out.writeVLong(field.sumTotalTermFreq);
|
|
||||||
}
|
|
||||||
out.writeVLong(field.sumDocFreq);
|
|
||||||
out.writeVInt(field.docCount);
|
|
||||||
indexOut.writeVLong(field.indexStartFP);
|
|
||||||
}
|
}
|
||||||
|
out.writeVLong(field.sumDocFreq);
|
||||||
|
out.writeVInt(field.docCount);
|
||||||
|
indexOut.writeVLong(field.indexStartFP);
|
||||||
}
|
}
|
||||||
writeTrailer(out, dirStart);
|
writeTrailer(out, dirStart);
|
||||||
writeIndexTrailer(indexOut, indexDirStart);
|
writeIndexTrailer(indexOut, indexDirStart);
|
||||||
|
|
Loading…
Reference in New Issue