From 678b1fa018c85491bcf543a0b5b524dfc73d13cf Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 15 Nov 2013 16:21:24 +0000 Subject: [PATCH] LUCENE-5342: Fixed bulk-merge issue in CompressingStoredFieldsFormat git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1542311 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 ++++ .../codecs/compressing/CompressingStoredFieldsReader.java | 4 ++++ .../codecs/compressing/CompressingStoredFieldsWriter.java | 7 ++++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1aafbf7050a..ce7c0741b16 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -216,6 +216,10 @@ Bug Fixes deleted at a later point in time. This could cause short-term disk pollution or OOM if in-memory directories are used. (Simon Willnauer) +* LUCENE-5342: Fixed bulk-merge issue in CompressingStoredFieldsFormat which + created corrupted segments when mixing chunk sizes. + Lucene41StoredFieldsFormat is not impacted. (Adrien Grand, Robert Muir) + API Changes * LUCENE-5222: Add SortField.needsScores(). Previously it was not possible diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java index e721efd657d..588c0f5455e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java @@ -373,6 +373,10 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { return compressionMode; } + int getChunkSize() { + return chunkSize; + } + ChunkIterator chunkIterator(int startDocID) throws IOException { ensureOpen(); fieldsStream.seek(indexReader.getStartPointer(startDocID)); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java index f01605065d1..35f829daa26 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java @@ -337,7 +337,9 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { final Bits liveDocs = reader.getLiveDocs(); if (matchingFieldsReader == null - || matchingFieldsReader.getVersion() != VERSION_CURRENT) { // means reader version is not the same as the writer version + || matchingFieldsReader.getVersion() != VERSION_CURRENT // means reader version is not the same as the writer version + || matchingFieldsReader.getCompressionMode() != compressionMode + || matchingFieldsReader.getChunkSize() != chunkSize) { // the way data is decompressed depends on the chunk size // naive merge... for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) { StoredDocument doc = reader.document(i); @@ -362,8 +364,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; } - if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode - && numBufferedDocs == 0 // starting a new chunk + if (numBufferedDocs == 0 // starting a new chunk && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk