LUCENE-5342: Fixed bulk-merge issue in CompressingStoredFieldsFormat

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1542311 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-11-15 16:21:24 +00:00
parent 2360b51bd6
commit 678b1fa018
3 changed files with 12 additions and 3 deletions

View File

@ -216,6 +216,10 @@ Bug Fixes
deleted at a later point in time. This could cause short-term disk deleted at a later point in time. This could cause short-term disk
pollution or OOM if in-memory directories are used. (Simon Willnauer) pollution or OOM if in-memory directories are used. (Simon Willnauer)
* LUCENE-5342: Fixed bulk-merge issue in CompressingStoredFieldsFormat which
created corrupted segments when mixing chunk sizes.
Lucene41StoredFieldsFormat is not impacted. (Adrien Grand, Robert Muir)
API Changes API Changes
* LUCENE-5222: Add SortField.needsScores(). Previously it was not possible * LUCENE-5222: Add SortField.needsScores(). Previously it was not possible

View File

@ -373,6 +373,10 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
return compressionMode; return compressionMode;
} }
int getChunkSize() {
return chunkSize;
}
ChunkIterator chunkIterator(int startDocID) throws IOException { ChunkIterator chunkIterator(int startDocID) throws IOException {
ensureOpen(); ensureOpen();
fieldsStream.seek(indexReader.getStartPointer(startDocID)); fieldsStream.seek(indexReader.getStartPointer(startDocID));

View File

@ -337,7 +337,9 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
final Bits liveDocs = reader.getLiveDocs(); final Bits liveDocs = reader.getLiveDocs();
if (matchingFieldsReader == null if (matchingFieldsReader == null
|| matchingFieldsReader.getVersion() != VERSION_CURRENT) { // means reader version is not the same as the writer version || matchingFieldsReader.getVersion() != VERSION_CURRENT // means reader version is not the same as the writer version
|| matchingFieldsReader.getCompressionMode() != compressionMode
|| matchingFieldsReader.getChunkSize() != chunkSize) { // the way data is decompressed depends on the chunk size
// naive merge... // naive merge...
for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) { for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
StoredDocument doc = reader.document(i); StoredDocument doc = reader.document(i);
@ -362,8 +364,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
} }
if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode if (numBufferedDocs == 0 // starting a new chunk
&& numBufferedDocs == 0 // starting a new chunk
&& startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
&& startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
&& nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk