Internal: Improve checksum process by bulk writing them into a single file instead of checksum file per index file, closes #747.

This commit is contained in:
kimchy 2011-03-04 04:22:47 +02:00
parent b629d36d8b
commit 4b92928c77
3 changed files with 107 additions and 40 deletions

View File

@ -25,6 +25,7 @@
<w>charfilter</w> <w>charfilter</w>
<w>charsets</w> <w>charsets</w>
<w>checksum</w> <w>checksum</w>
<w>checksums</w>
<w>chunking</w> <w>chunking</w>
<w>closeable</w> <w>closeable</w>
<w>cloudfiles</w> <w>cloudfiles</w>

View File

@ -20,8 +20,6 @@
package org.elasticsearch.index.store.support; package org.elasticsearch.index.store.support;
import org.apache.lucene.store.*; import org.apache.lucene.store.*;
import org.elasticsearch.common.Digest;
import org.elasticsearch.common.Hex;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.Unicode; import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.collect.ImmutableMap; import org.elasticsearch.common.collect.ImmutableMap;
@ -38,8 +36,10 @@ import org.elasticsearch.index.store.StoreFileMetaData;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.security.MessageDigest; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
/** /**
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
@ -101,6 +101,59 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
return Directories.estimateSize(directory()); return Directories.estimateSize(directory());
} }
public static Map<String, String> readChecksums(Directory dir) throws IOException {
long lastFound = -1;
for (String name : dir.listAll()) {
if (!name.startsWith("_checksums-")) {
continue;
}
long current = Long.parseLong(name.substring("_checksums-".length()));
if (current > lastFound) {
lastFound = current;
}
}
if (lastFound == -1) {
return ImmutableMap.of();
}
IndexInput indexInput = dir.openInput("_checksums-" + lastFound);
try {
indexInput.readInt(); // version
return indexInput.readStringStringMap();
} finally {
indexInput.close();
}
}
public void writeChecksums() throws IOException {
writeChecksums((StoreDirectory) directory());
}
private void writeChecksums(StoreDirectory dir) throws IOException {
String checksumName = "_checksums-" + System.currentTimeMillis();
ImmutableMap<String, StoreFileMetaData> files = list();
synchronized (mutex) {
Map<String, String> checksums = new HashMap<String, String>();
for (StoreFileMetaData metaData : files.values()) {
if (metaData.checksum() != null) {
checksums.put(metaData.name(), metaData.checksum());
}
}
IndexOutput output = dir.createOutput(checksumName, false);
output.writeInt(0); // version
output.writeStringStringMap(checksums);
output.close();
}
for (StoreFileMetaData metaData : files.values()) {
if (metaData.name().startsWith("_checksums") && !checksumName.equals(metaData.name())) {
try {
directory().deleteFile(metaData.name());
} catch (Exception e) {
// ignore
}
}
}
}
/** /**
* Returns <tt>true</tt> by default. * Returns <tt>true</tt> by default.
*/ */
@ -117,16 +170,12 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
} }
@Override public void writeChecksum(String name, String checksum) throws IOException { @Override public void writeChecksum(String name, String checksum) throws IOException {
// write the checksum (using the delegate, so we won't checksum this one as well...) // update the metadata to include the checksum and write a new checksums file
IndexOutput checkSumOutput = ((StoreDirectory) directory()).delegate().createOutput(name + ".cks");
byte[] checksumBytes = Unicode.fromStringAsBytes(checksum);
checkSumOutput.writeBytes(checksumBytes, checksumBytes.length);
checkSumOutput.close();
// update the metadata to include the checksum
synchronized (mutex) { synchronized (mutex) {
StoreFileMetaData metaData = filesMetadata.get(name); StoreFileMetaData metaData = filesMetadata.get(name);
metaData = new StoreFileMetaData(metaData.name(), metaData.length(), metaData.lastModified(), checksum); metaData = new StoreFileMetaData(metaData.name(), metaData.length(), metaData.lastModified(), checksum);
filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, metaData).immutableMap(); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, metaData).immutableMap();
writeChecksums();
} }
} }
@ -140,23 +189,28 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
StoreDirectory(Directory delegate) throws IOException { StoreDirectory(Directory delegate) throws IOException {
this.delegate = delegate; this.delegate = delegate;
synchronized (mutex) { synchronized (mutex) {
Map<String, String> checksums = readChecksums(delegate);
MapBuilder<String, StoreFileMetaData> builder = MapBuilder.newMapBuilder(); MapBuilder<String, StoreFileMetaData> builder = MapBuilder.newMapBuilder();
for (String file : delegate.listAll()) { for (String file : delegate.listAll()) {
// BACKWARD CKS SUPPORT
if (file.endsWith(".cks")) { // ignore checksum files here if (file.endsWith(".cks")) { // ignore checksum files here
continue; continue;
} }
// try and load the checksum for the file String checksum = checksums.get(file);
String checksum = null;
if (delegate.fileExists(file + ".cks")) { // BACKWARD CKS SUPPORT
IndexInput indexInput = delegate.openInput(file + ".cks"); if (checksum == null) {
try { if (delegate.fileExists(file + ".cks")) {
if (indexInput.length() > 0) { IndexInput indexInput = delegate.openInput(file + ".cks");
byte[] checksumBytes = new byte[(int) indexInput.length()]; try {
indexInput.readBytes(checksumBytes, 0, checksumBytes.length, false); if (indexInput.length() > 0) {
checksum = Unicode.fromBytes(checksumBytes); byte[] checksumBytes = new byte[(int) indexInput.length()];
indexInput.readBytes(checksumBytes, 0, checksumBytes.length, false);
checksum = Unicode.fromBytes(checksumBytes);
}
} finally {
indexInput.close();
} }
} finally {
indexInput.close();
} }
} }
builder.put(file, new StoreFileMetaData(file, delegate.fileLength(file), delegate.fileModified(file), checksum)); builder.put(file, new StoreFileMetaData(file, delegate.fileLength(file), delegate.fileModified(file), checksum));
@ -203,11 +257,6 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
@Override public void deleteFile(String name) throws IOException { @Override public void deleteFile(String name) throws IOException {
delegate.deleteFile(name); delegate.deleteFile(name);
try {
delegate.deleteFile(name + ".cks");
} catch (Exception e) {
// ignore
}
synchronized (mutex) { synchronized (mutex) {
filesMetadata = MapBuilder.newMapBuilder(filesMetadata).remove(name).immutableMap(); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).remove(name).immutableMap();
files = filesMetadata.keySet().toArray(new String[filesMetadata.size()]); files = filesMetadata.keySet().toArray(new String[filesMetadata.size()]);
@ -232,14 +281,6 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
public IndexOutput createOutput(String name, boolean computeChecksum) throws IOException { public IndexOutput createOutput(String name, boolean computeChecksum) throws IOException {
IndexOutput out = delegate.createOutput(name); IndexOutput out = delegate.createOutput(name);
// delete the relevant cks file for an existing file, if exists
if (filesMetadata.containsKey(name)) {
try {
delegate.deleteFile(name + ".cks");
} catch (Exception e) {
// ignore
}
}
synchronized (mutex) { synchronized (mutex) {
StoreFileMetaData metaData = new StoreFileMetaData(name, -1, -1, null); StoreFileMetaData metaData = new StoreFileMetaData(name, -1, -1, null);
filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, metaData).immutableMap(); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, metaData).immutableMap();
@ -288,6 +329,10 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
if (sync) { if (sync) {
delegate.sync(name); delegate.sync(name);
} }
// write the checksums file when we sync on the segments file (committed)
if (!name.equals("segments.gen") && name.startsWith("segments")) {
writeChecksums();
}
} }
@Override public void forceSync(String name) throws IOException { @Override public void forceSync(String name) throws IOException {
@ -301,7 +346,7 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
private final String name; private final String name;
private final MessageDigest digest; private final Checksum digest;
StoreIndexOutput(IndexOutput delegate, String name, boolean computeChecksum) { StoreIndexOutput(IndexOutput delegate, String name, boolean computeChecksum) {
this.delegate = delegate; this.delegate = delegate;
@ -315,7 +360,7 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
// and since we, in any case, always recover the segments files // and since we, in any case, always recover the segments files
this.digest = null; this.digest = null;
} else { } else {
this.digest = Digest.getMd5Digest(); this.digest = new CRC32();
} }
} else { } else {
this.digest = null; this.digest = null;
@ -326,11 +371,7 @@ public abstract class AbstractStore extends AbstractIndexShardComponent implemen
delegate.close(); delegate.close();
String checksum = null; String checksum = null;
if (digest != null) { if (digest != null) {
checksum = Hex.encodeHexString(digest.digest()); checksum = Long.toString(digest.getValue(), Character.MAX_RADIX);
IndexOutput checkSumOutput = ((StoreDirectory) directory()).delegate().createOutput(name + ".cks");
byte[] checksumBytes = Unicode.fromStringAsBytes(checksum);
checkSumOutput.writeBytes(checksumBytes, checksumBytes.length);
checkSumOutput.close();
} }
synchronized (mutex) { synchronized (mutex) {
StoreFileMetaData md = new StoreFileMetaData(name, directory().fileLength(name), directory().fileModified(name), checksum); StoreFileMetaData md = new StoreFileMetaData(name, directory().fileLength(name), directory().fileModified(name), checksum);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.indices.store; package org.elasticsearch.indices.store;
import org.apache.lucene.store.FSDirectory;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.FailedNodeException;
@ -44,6 +45,7 @@ import org.elasticsearch.index.service.IndexService;
import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.service.InternalIndexShard; import org.elasticsearch.index.shard.service.InternalIndexShard;
import org.elasticsearch.index.store.StoreFileMetaData; import org.elasticsearch.index.store.StoreFileMetaData;
import org.elasticsearch.index.store.support.AbstractStore;
import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.TransportService;
@ -162,10 +164,32 @@ public class TransportNodesListShardStoreMetaData extends TransportNodesOperatio
return new StoreFilesMetaData(false, shardId, ImmutableMap.<String, StoreFileMetaData>of()); return new StoreFilesMetaData(false, shardId, ImmutableMap.<String, StoreFileMetaData>of());
} }
Map<String, StoreFileMetaData> files = Maps.newHashMap(); Map<String, StoreFileMetaData> files = Maps.newHashMap();
// read the checksums file
FSDirectory directory = FSDirectory.open(indexFile);
try {
Map<String, String> checksums = AbstractStore.readChecksums(directory);
for (File file : indexFile.listFiles()) {
// BACKWARD CKS SUPPORT
if (file.getName().endsWith(".cks")) {
continue;
}
if (file.getName().startsWith("_checksums")) {
continue;
}
files.put(file.getName(), new StoreFileMetaData(file.getName(), file.length(), file.lastModified(), checksums.get(file.getName())));
}
} finally {
directory.close();
}
// BACKWARD CKS SUPPORT
for (File file : indexFile.listFiles()) { for (File file : indexFile.listFiles()) {
if (file.getName().endsWith(".cks")) { if (file.getName().endsWith(".cks")) {
continue; continue;
} }
if (file.getName().startsWith("_checksums")) {
continue;
}
// try and load the checksum // try and load the checksum
String checksum = null; String checksum = null;
File checksumFile = new File(file.getParentFile(), file.getName() + ".cks"); File checksumFile = new File(file.getParentFile(), file.getName() + ".cks");
@ -177,6 +201,7 @@ public class TransportNodesListShardStoreMetaData extends TransportNodesOperatio
} }
files.put(file.getName(), new StoreFileMetaData(file.getName(), file.length(), file.lastModified(), checksum)); files.put(file.getName(), new StoreFileMetaData(file.getName(), file.length(), file.lastModified(), checksum));
} }
return new StoreFilesMetaData(false, shardId, files); return new StoreFilesMetaData(false, shardId, files);
} }