LUCENE-5342: Fixed bulk-merge issue in CompressingStoredFieldsFormat

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1542311 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-11-15 16:21:24 +00:00
parent 2360b51bd6
commit 678b1fa018
3 changed files with 12 additions and 3 deletions

View File

@ -216,6 +216,10 @@ Bug Fixes
deleted at a later point in time. This could cause short-term disk
pollution or OOM if in-memory directories are used. (Simon Willnauer)
* LUCENE-5342: Fixed bulk-merge issue in CompressingStoredFieldsFormat which
created corrupted segments when mixing chunk sizes.
Lucene41StoredFieldsFormat is not impacted. (Adrien Grand, Robert Muir)
API Changes
* LUCENE-5222: Add SortField.needsScores(). Previously it was not possible

View File

@ -373,6 +373,10 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
return compressionMode;
}
int getChunkSize() {
return chunkSize;
}
ChunkIterator chunkIterator(int startDocID) throws IOException {
ensureOpen();
fieldsStream.seek(indexReader.getStartPointer(startDocID));

View File

@ -337,7 +337,9 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
final Bits liveDocs = reader.getLiveDocs();
if (matchingFieldsReader == null
|| matchingFieldsReader.getVersion() != VERSION_CURRENT) { // means reader version is not the same as the writer version
|| matchingFieldsReader.getVersion() != VERSION_CURRENT // means reader version is not the same as the writer version
|| matchingFieldsReader.getCompressionMode() != compressionMode
|| matchingFieldsReader.getChunkSize() != chunkSize) { // the way data is decompressed depends on the chunk size
// naive merge...
for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
StoredDocument doc = reader.document(i);
@ -362,8 +364,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
}
if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
&& numBufferedDocs == 0 // starting a new chunk
if (numBufferedDocs == 0 // starting a new chunk
&& startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
&& startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
&& nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk