mirror of https://github.com/apache/lucene.git
LUCENE-4670: Add finish* callbacks to StoredFieldsWriter and TermVectorsWriter.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1431283 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de8bfe6dd4
commit
e38d91a8f5
|
@ -107,6 +107,10 @@ Changes in backwards compatibility policy
|
||||||
* LUCENE-4659: Massive cleanup to CategoryPath API. Additionally, CategoryPath is
|
* LUCENE-4659: Massive cleanup to CategoryPath API. Additionally, CategoryPath is
|
||||||
now immutable, so you don't need to clone() it. (Shai Erera)
|
now immutable, so you don't need to clone() it. (Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-4670: StoredFieldsWriter and TermVectorsWriter have new finish* callbacks
|
||||||
|
which are called after a doc/field/term has been completely added.
|
||||||
|
(Adrien Grand, Robert Muir)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
||||||
|
|
|
@ -56,6 +56,9 @@ public abstract class StoredFieldsWriter implements Closeable {
|
||||||
* this case <code>numStoredFields</code> will be zero. */
|
* this case <code>numStoredFields</code> will be zero. */
|
||||||
public abstract void startDocument(int numStoredFields) throws IOException;
|
public abstract void startDocument(int numStoredFields) throws IOException;
|
||||||
|
|
||||||
|
/** Called when a document and all its fields have been added. */
|
||||||
|
public void finishDocument() throws IOException {}
|
||||||
|
|
||||||
/** Writes a single stored field. */
|
/** Writes a single stored field. */
|
||||||
public abstract void writeField(FieldInfo info, StorableField field) throws IOException;
|
public abstract void writeField(FieldInfo info, StorableField field) throws IOException;
|
||||||
|
|
||||||
|
@ -116,6 +119,8 @@ public abstract class StoredFieldsWriter implements Closeable {
|
||||||
for (StorableField field : doc) {
|
for (StorableField field : doc) {
|
||||||
writeField(fieldInfos.fieldInfo(field.name()), field);
|
writeField(fieldInfos.fieldInfo(field.name()), field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finishDocument();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -72,10 +72,16 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
* will be zero. */
|
* will be zero. */
|
||||||
public abstract void startDocument(int numVectorFields) throws IOException;
|
public abstract void startDocument(int numVectorFields) throws IOException;
|
||||||
|
|
||||||
|
/** Called after a doc and all its fields have been added. */
|
||||||
|
public void finishDocument() throws IOException {};
|
||||||
|
|
||||||
/** Called before writing the terms of the field.
|
/** Called before writing the terms of the field.
|
||||||
* {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
|
* {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
|
||||||
public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
|
public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
|
||||||
|
|
||||||
|
/** Called after a field and all its terms have been added. */
|
||||||
|
public void finishField() throws IOException {};
|
||||||
|
|
||||||
/** Adds a term and its term frequency <code>freq</code>.
|
/** Adds a term and its term frequency <code>freq</code>.
|
||||||
* If this field has positions and/or offsets enabled, then
|
* If this field has positions and/or offsets enabled, then
|
||||||
* {@link #addPosition(int, int, int, BytesRef)} will be called
|
* {@link #addPosition(int, int, int, BytesRef)} will be called
|
||||||
|
@ -83,6 +89,9 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
*/
|
*/
|
||||||
public abstract void startTerm(BytesRef term, int freq) throws IOException;
|
public abstract void startTerm(BytesRef term, int freq) throws IOException;
|
||||||
|
|
||||||
|
/** Called after a term and all its positions have been added. */
|
||||||
|
public void finishTerm() throws IOException {}
|
||||||
|
|
||||||
/** Adds a term position and offsets */
|
/** Adds a term position and offsets */
|
||||||
public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
|
public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
|
||||||
|
|
||||||
|
@ -197,6 +206,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
|
protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
|
||||||
if (vectors == null) {
|
if (vectors == null) {
|
||||||
startDocument(0);
|
startDocument(0);
|
||||||
|
finishDocument();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -275,10 +285,13 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
addPosition(pos, startOffset, endOffset, payload);
|
addPosition(pos, startOffset, endOffset, payload);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
finishTerm();
|
||||||
}
|
}
|
||||||
assert termCount == numTerms;
|
assert termCount == numTerms;
|
||||||
|
finishField();
|
||||||
}
|
}
|
||||||
assert fieldCount == numFields;
|
assert fieldCount == numFields;
|
||||||
|
finishDocument();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return the BytesRef Comparator used to sort terms
|
/** Return the BytesRef Comparator used to sort terms
|
||||||
|
|
|
@ -136,19 +136,8 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void endWithPreviousDocument() throws IOException {
|
|
||||||
if (numBufferedDocs > 0) {
|
|
||||||
endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startDocument(int numStoredFields) throws IOException {
|
public void startDocument(int numStoredFields) throws IOException {
|
||||||
endWithPreviousDocument();
|
|
||||||
if (triggerFlush()) {
|
|
||||||
flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numBufferedDocs == this.numStoredFields.length) {
|
if (numBufferedDocs == this.numStoredFields.length) {
|
||||||
final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4);
|
final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4);
|
||||||
this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength);
|
this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength);
|
||||||
|
@ -158,6 +147,14 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
++numBufferedDocs;
|
++numBufferedDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishDocument() throws IOException {
|
||||||
|
endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
|
||||||
|
if (triggerFlush()) {
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void saveInts(int[] values, int length, DataOutput out) throws IOException {
|
private static void saveInts(int[] values, int length, DataOutput out) throws IOException {
|
||||||
assert length > 0;
|
assert length > 0;
|
||||||
if (length == 1) {
|
if (length == 1) {
|
||||||
|
@ -295,9 +292,10 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
||||||
endWithPreviousDocument();
|
|
||||||
if (numBufferedDocs > 0) {
|
if (numBufferedDocs > 0) {
|
||||||
flush();
|
flush();
|
||||||
|
} else {
|
||||||
|
assert bufferedDocs.length == 0;
|
||||||
}
|
}
|
||||||
if (docBase != numDocs) {
|
if (docBase != numDocs) {
|
||||||
throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
|
throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
|
||||||
|
@ -351,17 +349,13 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
|
if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
|
||||||
&& (numBufferedDocs == 0 || triggerFlush()) // starting a new chunk
|
&& numBufferedDocs == 0 // starting a new chunk
|
||||||
&& startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
|
&& startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
|
||||||
&& startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
|
&& startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
|
||||||
&& nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk
|
&& nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk
|
||||||
assert docID == it.docBase;
|
assert docID == it.docBase;
|
||||||
|
|
||||||
// no need to decompress, just copy data
|
// no need to decompress, just copy data
|
||||||
endWithPreviousDocument();
|
|
||||||
if (triggerFlush()) {
|
|
||||||
flush();
|
|
||||||
}
|
|
||||||
indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer());
|
indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer());
|
||||||
writeHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
|
writeHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
|
||||||
it.copyCompressedData(fieldsStream);
|
it.copyCompressedData(fieldsStream);
|
||||||
|
@ -380,6 +374,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
final int diff = docID - it.docBase;
|
final int diff = docID - it.docBase;
|
||||||
startDocument(it.numStoredFields[diff]);
|
startDocument(it.numStoredFields[diff]);
|
||||||
bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
|
bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
|
||||||
|
finishDocument();
|
||||||
++docCount;
|
++docCount;
|
||||||
mergeState.checkAbort.work(300);
|
mergeState.checkAbort.work(300);
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,14 +124,13 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
if (payloads)
|
if (payloads)
|
||||||
bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR;
|
bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR;
|
||||||
tvf.writeByte(bits);
|
tvf.writeByte(bits);
|
||||||
|
}
|
||||||
|
|
||||||
assert fieldCount <= numVectorFields;
|
@Override
|
||||||
if (fieldCount == numVectorFields) {
|
public void finishDocument() throws IOException {
|
||||||
// last field of the document
|
assert fieldCount == numVectorFields;
|
||||||
// this is crazy because the file format is crazy!
|
for (int i = 1; i < fieldCount; i++) {
|
||||||
for (int i = 1; i < fieldCount; i++) {
|
tvd.writeVLong(fps[i] - fps[i-1]);
|
||||||
tvd.writeVLong(fps[i] - fps[i-1]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,20 +221,6 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
bufferedIndex++;
|
bufferedIndex++;
|
||||||
|
|
||||||
// dump buffer if we are done
|
|
||||||
if (bufferedIndex == bufferedFreq) {
|
|
||||||
if (payloads) {
|
|
||||||
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < bufferedIndex; i++) {
|
|
||||||
if (offsets) {
|
|
||||||
tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
|
|
||||||
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
|
|
||||||
lastOffset = offsetEndBuffer[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (positions) {
|
} else if (positions) {
|
||||||
// write position delta
|
// write position delta
|
||||||
writePosition(position - lastPosition, payload);
|
writePosition(position - lastPosition, payload);
|
||||||
|
@ -248,6 +233,25 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishTerm() throws IOException {
|
||||||
|
if (bufferedIndex > 0) {
|
||||||
|
// dump buffer
|
||||||
|
assert positions && (offsets || payloads);
|
||||||
|
assert bufferedIndex == bufferedFreq;
|
||||||
|
if (payloads) {
|
||||||
|
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < bufferedIndex; i++) {
|
||||||
|
if (offsets) {
|
||||||
|
tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
|
||||||
|
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
|
||||||
|
lastOffset = offsetEndBuffer[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void writePosition(int delta, BytesRef payload) throws IOException {
|
private void writePosition(int delta, BytesRef payload) throws IOException {
|
||||||
if (payloads) {
|
if (payloads) {
|
||||||
int payloadLength = payload == null ? 0 : payload.length;
|
int payloadLength = payload == null ? 0 : payload.length;
|
||||||
|
|
|
@ -105,6 +105,7 @@ final class StoredFieldsConsumer {
|
||||||
while(lastDocID < docID) {
|
while(lastDocID < docID) {
|
||||||
fieldsWriter.startDocument(0);
|
fieldsWriter.startDocument(0);
|
||||||
lastDocID++;
|
lastDocID++;
|
||||||
|
fieldsWriter.finishDocument();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,6 +120,7 @@ final class StoredFieldsConsumer {
|
||||||
for (int i = 0; i < numStoredFields; i++) {
|
for (int i = 0; i < numStoredFields; i++) {
|
||||||
fieldsWriter.writeField(fieldInfos[i], storedFields[i]);
|
fieldsWriter.writeField(fieldInfos[i], storedFields[i]);
|
||||||
}
|
}
|
||||||
|
fieldsWriter.finishDocument();
|
||||||
lastDocID++;
|
lastDocID++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,7 @@ final class TermVectorsConsumer extends TermsHashConsumer {
|
||||||
void fill(int docID) throws IOException {
|
void fill(int docID) throws IOException {
|
||||||
while(lastDocID < docID) {
|
while(lastDocID < docID) {
|
||||||
writer.startDocument(0);
|
writer.startDocument(0);
|
||||||
|
writer.finishDocument();
|
||||||
lastDocID++;
|
lastDocID++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -108,6 +109,7 @@ final class TermVectorsConsumer extends TermsHashConsumer {
|
||||||
for (int i = 0; i < numVectorFields; i++) {
|
for (int i = 0; i < numVectorFields; i++) {
|
||||||
perFields[i].finishDocument();
|
perFields[i].finishDocument();
|
||||||
}
|
}
|
||||||
|
writer.finishDocument();
|
||||||
|
|
||||||
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
|
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
|
||||||
|
|
||||||
|
|
|
@ -182,7 +182,9 @@ final class TermVectorsConsumerPerField extends TermsHashConsumerPerField {
|
||||||
}
|
}
|
||||||
tv.addProx(freq, posReader, offReader);
|
tv.addProx(freq, posReader, offReader);
|
||||||
}
|
}
|
||||||
|
tv.finishTerm();
|
||||||
}
|
}
|
||||||
|
tv.finishField();
|
||||||
|
|
||||||
termsHashPerField.reset();
|
termsHashPerField.reset();
|
||||||
|
|
||||||
|
|
|
@ -73,25 +73,42 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum Status {
|
||||||
|
UNDEFINED, STARTED, FINISHED;
|
||||||
|
}
|
||||||
|
|
||||||
static class AssertingStoredFieldsWriter extends StoredFieldsWriter {
|
static class AssertingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
private final StoredFieldsWriter in;
|
private final StoredFieldsWriter in;
|
||||||
private int numWritten;
|
private int numWritten;
|
||||||
private int fieldCount;
|
private int fieldCount;
|
||||||
|
private Status docStatus;
|
||||||
|
|
||||||
AssertingStoredFieldsWriter(StoredFieldsWriter in) {
|
AssertingStoredFieldsWriter(StoredFieldsWriter in) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
|
this.docStatus = Status.UNDEFINED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startDocument(int numStoredFields) throws IOException {
|
public void startDocument(int numStoredFields) throws IOException {
|
||||||
|
assert docStatus != Status.STARTED;
|
||||||
in.startDocument(numStoredFields);
|
in.startDocument(numStoredFields);
|
||||||
assert fieldCount == 0;
|
assert fieldCount == 0;
|
||||||
fieldCount = numStoredFields;
|
fieldCount = numStoredFields;
|
||||||
numWritten++;
|
numWritten++;
|
||||||
|
docStatus = Status.STARTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishDocument() throws IOException {
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
|
assert fieldCount == 0;
|
||||||
|
in.finishDocument();
|
||||||
|
docStatus = Status.FINISHED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeField(FieldInfo info, StorableField field) throws IOException {
|
public void writeField(FieldInfo info, StorableField field) throws IOException {
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
in.writeField(info, field);
|
in.writeField(info, field);
|
||||||
assert fieldCount > 0;
|
assert fieldCount > 0;
|
||||||
fieldCount--;
|
fieldCount--;
|
||||||
|
@ -104,6 +121,7 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
||||||
|
assert docStatus == (numDocs > 0 ? Status.FINISHED : Status.UNDEFINED);
|
||||||
in.finish(fis, numDocs);
|
in.finish(fis, numDocs);
|
||||||
assert fieldCount == 0;
|
assert fieldCount == 0;
|
||||||
assert numDocs == numWritten;
|
assert numDocs == numWritten;
|
||||||
|
@ -112,6 +130,7 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
in.close();
|
in.close();
|
||||||
|
assert docStatus != Status.STARTED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,17 +18,20 @@ package org.apache.lucene.codecs.asserting;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
||||||
import org.apache.lucene.index.AssertingAtomicReader;
|
import org.apache.lucene.index.AssertingAtomicReader;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Just like {@link Lucene40TermVectorsFormat} but with additional asserts.
|
* Just like {@link Lucene40TermVectorsFormat} but with additional asserts.
|
||||||
|
@ -43,7 +46,7 @@ public class AssertingTermVectorsFormat extends TermVectorsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
|
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
|
||||||
return in.vectorsWriter(directory, segmentInfo, context);
|
return new AssertingTermVectorsWriter(in.vectorsWriter(directory, segmentInfo, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
static class AssertingTermVectorsReader extends TermVectorsReader {
|
static class AssertingTermVectorsReader extends TermVectorsReader {
|
||||||
|
@ -69,4 +72,119 @@ public class AssertingTermVectorsFormat extends TermVectorsFormat {
|
||||||
return new AssertingTermVectorsReader(in.clone());
|
return new AssertingTermVectorsReader(in.clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum Status {
|
||||||
|
UNDEFINED, STARTED, FINISHED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static class AssertingTermVectorsWriter extends TermVectorsWriter {
|
||||||
|
private final TermVectorsWriter in;
|
||||||
|
private Status docStatus, fieldStatus, termStatus;
|
||||||
|
private int fieldCount, termCount, positionCount;
|
||||||
|
boolean hasPositions;
|
||||||
|
|
||||||
|
AssertingTermVectorsWriter(TermVectorsWriter in) {
|
||||||
|
this.in = in;
|
||||||
|
docStatus = Status.UNDEFINED;
|
||||||
|
fieldStatus = Status.UNDEFINED;
|
||||||
|
termStatus = Status.UNDEFINED;
|
||||||
|
fieldCount = termCount = positionCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startDocument(int numVectorFields) throws IOException {
|
||||||
|
assert fieldCount == 0;
|
||||||
|
assert docStatus != Status.STARTED;
|
||||||
|
in.startDocument(numVectorFields);
|
||||||
|
docStatus = Status.STARTED;
|
||||||
|
fieldCount = numVectorFields;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishDocument() throws IOException {
|
||||||
|
assert fieldCount == 0;
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
|
in.finishDocument();
|
||||||
|
docStatus = Status.FINISHED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startField(FieldInfo info, int numTerms, boolean positions,
|
||||||
|
boolean offsets, boolean payloads) throws IOException {
|
||||||
|
assert termCount == 0;
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
|
assert fieldStatus != Status.STARTED;
|
||||||
|
in.startField(info, numTerms, positions, offsets, payloads);
|
||||||
|
fieldStatus = Status.STARTED;
|
||||||
|
termCount = numTerms;
|
||||||
|
hasPositions = positions || offsets || payloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishField() throws IOException {
|
||||||
|
assert termCount == 0;
|
||||||
|
assert fieldStatus == Status.STARTED;
|
||||||
|
in.finishField();
|
||||||
|
fieldStatus = Status.FINISHED;
|
||||||
|
--fieldCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startTerm(BytesRef term, int freq) throws IOException {
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
|
assert fieldStatus == Status.STARTED;
|
||||||
|
assert termStatus != Status.STARTED;
|
||||||
|
in.startTerm(term, freq);
|
||||||
|
termStatus = Status.STARTED;
|
||||||
|
positionCount = hasPositions ? freq : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishTerm() throws IOException {
|
||||||
|
assert positionCount == 0;
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
|
assert fieldStatus == Status.STARTED;
|
||||||
|
assert termStatus == Status.STARTED;
|
||||||
|
in.finishTerm();
|
||||||
|
termStatus = Status.FINISHED;
|
||||||
|
--termCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addPosition(int position, int startOffset, int endOffset,
|
||||||
|
BytesRef payload) throws IOException {
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
|
assert fieldStatus == Status.STARTED;
|
||||||
|
assert termStatus == Status.STARTED;
|
||||||
|
in.addPosition(position, startOffset, endOffset, payload);
|
||||||
|
--positionCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void abort() {
|
||||||
|
in.abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
||||||
|
assert docStatus == (numDocs > 0 ? Status.FINISHED : Status.UNDEFINED);
|
||||||
|
assert fieldStatus != Status.STARTED;
|
||||||
|
assert termStatus != Status.STARTED;
|
||||||
|
in.finish(fis, numDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return in.getComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
assert docStatus != Status.STARTED;
|
||||||
|
assert fieldStatus != Status.STARTED;
|
||||||
|
assert termStatus != Status.STARTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue