diff --git a/test/framework/src/main/java/org/elasticsearch/test/CorruptionUtils.java b/test/framework/src/main/java/org/elasticsearch/test/CorruptionUtils.java index e33babb5eb6..c8b8de13d6c 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/CorruptionUtils.java +++ b/test/framework/src/main/java/org/elasticsearch/test/CorruptionUtils.java @@ -78,24 +78,19 @@ public final class CorruptionUtils { checksumBeforeCorruption = CodecUtil.retrieveChecksum(input); } try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) { - // read - raf.position(random.nextInt((int) Math.min(Integer.MAX_VALUE, raf.size()))); - long filePointer = raf.position(); - ByteBuffer bb = ByteBuffer.wrap(new byte[1]); - raf.read(bb); - bb.flip(); + long maxPosition = raf.size(); - // corrupt - byte oldValue = bb.get(0); - byte newValue = (byte) (oldValue + 1); - bb.put(0, newValue); - - // rewrite - raf.position(filePointer); - raf.write(bb); - logger.info("Corrupting file -- flipping at position {} from {} to {} file: {}", filePointer, - Integer.toHexString(oldValue), Integer.toHexString(newValue), fileToCorrupt.getFileName()); + if (fileToCorrupt.getFileName().toString().endsWith(".cfs") && maxPosition > 4) { + // TODO: it is known that Lucene does not check the checksum of CFS file (CompoundFileS, like an archive) + // see note at https://github.com/elastic/elasticsearch/pull/33911 + // so far, don't corrupt crc32 part of checksum (last 4 bytes) of cfs file + // checksum is 8 bytes: first 4 bytes have to be zeros, while crc32 value is not verified + maxPosition -= 4; + } + final int position = random.nextInt((int) Math.min(Integer.MAX_VALUE, maxPosition)); + corruptAt(fileToCorrupt, raf, position); } + long checksumAfterCorruption; long actualChecksumAfterCorruption; try (ChecksumIndexInput input = dir.openChecksumInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) { @@ -120,5 +115,25 @@ public final class CorruptionUtils { } } + static void corruptAt(Path path, FileChannel channel, int position) throws IOException { + // read + channel.position(position); + long filePointer = channel.position(); + ByteBuffer bb = ByteBuffer.wrap(new byte[1]); + channel.read(bb); + bb.flip(); + + // corrupt + byte oldValue = bb.get(0); + byte newValue = (byte) (oldValue + 1); + bb.put(0, newValue); + + // rewrite + channel.position(filePointer); + channel.write(bb); + logger.info("Corrupting file -- flipping at position {} from {} to {} file: {}", filePointer, + Integer.toHexString(oldValue), Integer.toHexString(newValue), path.getFileName()); + } + } diff --git a/test/framework/src/test/java/org/elasticsearch/test/CorruptionUtilsTests.java b/test/framework/src/test/java/org/elasticsearch/test/CorruptionUtilsTests.java new file mode 100644 index 00000000000..2624858c3aa --- /dev/null +++ b/test/framework/src/test/java/org/elasticsearch/test/CorruptionUtilsTests.java @@ -0,0 +1,81 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test; + +import org.apache.lucene.index.CheckIndex; +import org.apache.lucene.store.SimpleFSDirectory; +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.index.shard.IndexShardTestCase; +import org.elasticsearch.index.shard.ShardPath; + +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.stream.Stream; + +import static org.elasticsearch.test.CorruptionUtils.corruptAt; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThan; + +public class CorruptionUtilsTests extends IndexShardTestCase { + + /** + * There is a dependency on Lucene bug fix + * https://github.com/elastic/elasticsearch/pull/33911 + */ + public void testLuceneCheckIndexIgnoresLast4Bytes() throws Exception { + final IndexShard indexShard = newStartedShard(true); + + final long numDocs = between(10, 100); + for (long i = 0; i < numDocs; i++) { + indexDoc(indexShard, "_doc", Long.toString(i), "{}"); + } + indexShard.flush(new FlushRequest()); + closeShards(indexShard); + + final ShardPath shardPath = indexShard.shardPath(); + + final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME); + + final Path cfsFile; + try (Stream paths = Files.walk(indexPath)) { + cfsFile = paths.filter(p -> p.getFileName().toString().endsWith(".cfs")).findFirst() + .orElseThrow(() -> new IllegalStateException("CFS file has to be there")); + } + + try (FileChannel raf = FileChannel.open(cfsFile, StandardOpenOption.READ, StandardOpenOption.WRITE)) { + assertThat(raf.size(), lessThan(Integer.MAX_VALUE * 1L)); + final int maxPosition = (int) raf.size(); + // corrupt only last 4 bytes! + final int position = randomIntBetween(maxPosition - 4, maxPosition - 1); + corruptAt(cfsFile, raf, position); + } + + final CheckIndex.Status status; + try (CheckIndex checkIndex = new CheckIndex(new SimpleFSDirectory(indexPath))) { + status = checkIndex.checkIndex(); + } + + assertThat("That's a good news! " + + "Lucene now validates CRC32 of CFS file: time to drop workaround at CorruptionUtils (and this test)", + status.clean, equalTo(true)); + } +}