Store Compression: Term Vector Vector, closes #2049.

This commit is contained in:
Shay Banon 2012-06-23 23:11:00 +02:00
parent ded5b773da
commit 2fb867b467
5 changed files with 44 additions and 16 deletions

View File

@ -206,9 +206,10 @@ public abstract class CompressedIndexInput extends IndexInput {
@Override @Override
public Object clone() { public Object clone() {
// we clone and we need to make sure we keep the same positions!
CompressedIndexInput cloned = (CompressedIndexInput) super.clone(); CompressedIndexInput cloned = (CompressedIndexInput) super.clone();
cloned.position = 0; cloned.uncompressed = new byte[uncompressed.length];
cloned.valid = 0; System.arraycopy(uncompressed, 0, cloned.uncompressed, 0, uncompressed.length);
cloned.in = (IndexInput) cloned.in.clone(); cloned.in = (IndexInput) cloned.in.clone();
return cloned; return cloned;
} }

View File

@ -66,8 +66,6 @@ public class LZFCompressedIndexInput extends CompressedIndexInput {
@Override @Override
public Object clone() { public Object clone() {
LZFCompressedIndexInput cloned = (LZFCompressedIndexInput) super.clone(); LZFCompressedIndexInput cloned = (LZFCompressedIndexInput) super.clone();
cloned.uncompressed = new byte[LZFChunk.MAX_CHUNK_LEN];
System.arraycopy(uncompressed, 0, cloned.uncompressed, 0, uncompressed.length);
cloned.inputBuffer = new byte[LZFChunk.MAX_CHUNK_LEN]; cloned.inputBuffer = new byte[LZFChunk.MAX_CHUNK_LEN];
return cloned; return cloned;
} }

View File

@ -57,17 +57,23 @@ public class Store extends AbstractIndexShardComponent {
static { static {
IndexMetaData.addDynamicSettings( IndexMetaData.addDynamicSettings(
"index.store.compress.stored" "index.store.compress.stored",
"index.store.compress.tv"
); );
} }
class ApplySettings implements IndexSettingsService.Listener { class ApplySettings implements IndexSettingsService.Listener {
@Override @Override
public void onRefreshSettings(Settings settings) { public void onRefreshSettings(Settings settings) {
boolean compressedStoredFields = settings.getAsBoolean("index.store.compress.stored", Store.this.compressedStoredFields); boolean compressStored = settings.getAsBoolean("index.store.compress.stored", Store.this.compressStored);
if (compressedStoredFields != Store.this.compressedStoredFields) { if (compressStored != Store.this.compressStored) {
logger.info("updating [index.store.compress.stored] from [{}] to [{}]", Store.this.compressedStoredFields, compressedStoredFields); logger.info("updating [index.store.compress.stored] from [{}] to [{}]", Store.this.compressStored, compressStored);
Store.this.compressedStoredFields = compressedStoredFields; Store.this.compressStored = compressStored;
}
boolean compressTv = settings.getAsBoolean("index.store.compress.tv", Store.this.compressTv);
if (compressTv != Store.this.compressTv) {
logger.info("updating [index.store.compress.tv] from [{}] to [{}]", Store.this.compressTv, compressTv);
Store.this.compressTv = compressTv;
} }
} }
} }
@ -95,7 +101,8 @@ public class Store extends AbstractIndexShardComponent {
private final boolean sync; private final boolean sync;
private volatile boolean compressedStoredFields; private volatile boolean compressStored;
private volatile boolean compressTv;
private final ApplySettings applySettings = new ApplySettings(); private final ApplySettings applySettings = new ApplySettings();
@ -109,9 +116,10 @@ public class Store extends AbstractIndexShardComponent {
this.sync = componentSettings.getAsBoolean("sync", true); // TODO we don't really need to fsync when using shared gateway... this.sync = componentSettings.getAsBoolean("sync", true); // TODO we don't really need to fsync when using shared gateway...
this.directory = new StoreDirectory(directoryService.build()); this.directory = new StoreDirectory(directoryService.build());
this.compressedStoredFields = componentSettings.getAsBoolean("compress.stored", false); this.compressStored = componentSettings.getAsBoolean("compress.stored", false);
this.compressTv = componentSettings.getAsBoolean("compress.tv", false);
logger.debug("using compress.stored [{}]", compressedStoredFields); logger.debug("using compress.stored [{}], compress.tv [{}]", compressStored, compressTv);
indexSettingsService.addListener(applySettings); indexSettingsService.addListener(applySettings);
} }
@ -480,7 +488,7 @@ public class Store extends AbstractIndexShardComponent {
computeChecksum = false; computeChecksum = false;
} }
} }
if (!raw && compressedStoredFields && name.endsWith(".fdt")) { if (!raw && ((compressStored && name.endsWith(".fdt")) || (compressTv && name.endsWith(".tvf")))) {
if (computeChecksum) { if (computeChecksum) {
// with compression, there is no need for buffering when doing checksums // with compression, there is no need for buffering when doing checksums
// since we have buffering on the compressed index output // since we have buffering on the compressed index output
@ -503,7 +511,7 @@ public class Store extends AbstractIndexShardComponent {
throw new FileNotFoundException(name); throw new FileNotFoundException(name);
} }
IndexInput in = metaData.directory().openInput(name); IndexInput in = metaData.directory().openInput(name);
if (name.endsWith(".fdt")) { if (name.endsWith(".fdt") || name.endsWith(".tvf")) {
Compressor compressor = CompressorFactory.compressor(in); Compressor compressor = CompressorFactory.compressor(in);
if (compressor != null) { if (compressor != null) {
in = compressor.indexInput(in); in = compressor.indexInput(in);
@ -519,7 +527,7 @@ public class Store extends AbstractIndexShardComponent {
throw new FileNotFoundException(name); throw new FileNotFoundException(name);
} }
IndexInput in = metaData.directory().openInput(name, bufferSize); IndexInput in = metaData.directory().openInput(name, bufferSize);
if (name.endsWith(".fdt")) { if (name.endsWith(".fdt") || name.endsWith(".tvf")) {
Compressor compressor = CompressorFactory.compressor(in); Compressor compressor = CompressorFactory.compressor(in);
if (compressor != null) { if (compressor != null) {
in = compressor.indexInput(in); in = compressor.indexInput(in);

View File

@ -45,6 +45,7 @@ public class LuceneCompressionBenchmark {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
final long MAX_SIZE = ByteSizeValue.parseBytesSizeValue("50mb").bytes(); final long MAX_SIZE = ByteSizeValue.parseBytesSizeValue("50mb").bytes();
final boolean WITH_TV = true;
final Compressor compressor = CompressorFactory.defaultCompressor(); final Compressor compressor = CompressorFactory.defaultCompressor();
@ -61,6 +62,9 @@ public class LuceneCompressionBenchmark {
if (name.endsWith(".fdt")) { if (name.endsWith(".fdt")) {
return compressor.indexOutput(super.createOutput(name)); return compressor.indexOutput(super.createOutput(name));
} }
if (WITH_TV && name.endsWith(".tvf")) {
return compressor.indexOutput(super.createOutput(name));
}
return super.createOutput(name); return super.createOutput(name);
} }
@ -75,12 +79,21 @@ public class LuceneCompressionBenchmark {
return in; return in;
} }
} }
if (WITH_TV && name.endsWith(".tvf")) {
IndexInput in = super.openInput(name);
Compressor compressor1 = CompressorFactory.compressor(in);
if (compressor1 != null) {
return compressor1.indexInput(in);
} else {
return in;
}
}
return super.openInput(name); return super.openInput(name);
} }
@Override @Override
public IndexInput openInput(String name, int bufferSize) throws IOException { public IndexInput openInput(String name, int bufferSize) throws IOException {
if (name.endsWith(".fdt")) { if (name.endsWith(".fdt") || name.endsWith(".tvf")) {
IndexInput in = super.openInput(name, bufferSize); IndexInput in = super.openInput(name, bufferSize);
// in case the override called openInput(String) // in case the override called openInput(String)
if (in instanceof CompressedIndexInput) { if (in instanceof CompressedIndexInput) {
@ -108,6 +121,10 @@ public class LuceneCompressionBenchmark {
builder.close(); builder.close();
Document doc = new Document(); Document doc = new Document();
doc.add(new Field("_source", builder.underlyingBytes(), 0, builder.underlyingBytesLength())); doc.add(new Field("_source", builder.underlyingBytes(), 0, builder.underlyingBytesLength()));
if (WITH_TV) {
Field field = new Field("text", builder.string(), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(field);
}
uncompressedWriter.addDocument(doc); uncompressedWriter.addDocument(doc);
compressedWriter.addDocument(doc); compressedWriter.addDocument(doc);
} }

View File

@ -69,6 +69,10 @@ public class TestData {
return true; return true;
} }
public String currentText() {
return text;
}
/** /**
*/ */
public XContentBuilder current(XContentBuilder builder) throws Exception { public XContentBuilder current(XContentBuilder builder) throws Exception {