From fbf4c70af9f021d4733123a4081e71b5ae860234 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 19 Jun 2012 13:15:44 +0200 Subject: [PATCH] add simple compression bench --- .../io/stream/OutputStreamStreamOutput.java | 59 ++++++++++++ .../compress/PureCompressionBenchmark.java | 93 +++++++++++++++++++ .../benchmark/compress/TestData.java | 83 +++++++++++++++++ 3 files changed, 235 insertions(+) create mode 100644 src/main/java/org/elasticsearch/common/io/stream/OutputStreamStreamOutput.java create mode 100644 src/test/java/org/elasticsearch/benchmark/compress/PureCompressionBenchmark.java create mode 100644 src/test/java/org/elasticsearch/benchmark/compress/TestData.java diff --git a/src/main/java/org/elasticsearch/common/io/stream/OutputStreamStreamOutput.java b/src/main/java/org/elasticsearch/common/io/stream/OutputStreamStreamOutput.java new file mode 100644 index 00000000000..26240bb76c5 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/io/stream/OutputStreamStreamOutput.java @@ -0,0 +1,59 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.io.stream; + +import java.io.IOException; +import java.io.OutputStream; + +/** + */ +public class OutputStreamStreamOutput extends StreamOutput { + + private final OutputStream out; + + public OutputStreamStreamOutput(OutputStream out) { + this.out = out; + } + + @Override + public void writeByte(byte b) throws IOException { + out.write(b); + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + out.write(b, offset, length); + } + + @Override + public void flush() throws IOException { + out.flush(); + } + + @Override + public void close() throws IOException { + out.close(); + } + + @Override + public void reset() throws IOException { + throw new UnsupportedOperationException(); + } +} diff --git a/src/test/java/org/elasticsearch/benchmark/compress/PureCompressionBenchmark.java b/src/test/java/org/elasticsearch/benchmark/compress/PureCompressionBenchmark.java new file mode 100644 index 00000000000..d2b32c0bc66 --- /dev/null +++ b/src/test/java/org/elasticsearch/benchmark/compress/PureCompressionBenchmark.java @@ -0,0 +1,93 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.benchmark.compress; + +import org.elasticsearch.common.compress.Compressor; +import org.elasticsearch.common.compress.CompressorFactory; +import org.elasticsearch.common.io.FileSystemUtils; +import org.elasticsearch.common.io.stream.OutputStreamStreamOutput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; + +import java.io.File; +import java.io.FileOutputStream; + +/** + */ +public class PureCompressionBenchmark { + + public static void main(String[] args) throws Exception { + + final long MAX_SIZE = ByteSizeValue.parseBytesSizeValue("50mb").bytes(); + + File testFile = new File("target/test/compress/pure"); + FileSystemUtils.deleteRecursively(testFile); + testFile.mkdirs(); + + FileOutputStream rawJson = new FileOutputStream(new File(testFile, "raw_json")); + FileOutputStream rawSmile = new FileOutputStream(new File(testFile, "raw_smile")); + + FileOutputStream compressedByDocJson = new FileOutputStream(new File(testFile, "compressed_by_doc_json")); + FileOutputStream compressedByDocSmile = new FileOutputStream(new File(testFile, "compressed_by_doc_smile")); + + Compressor compressor = CompressorFactory.defaultCompressor(); + + StreamOutput compressedJson = compressor.streamOutput(new OutputStreamStreamOutput(new FileOutputStream(new File(testFile, "compressed_json")))); + StreamOutput compressedSmile = compressor.streamOutput(new OutputStreamStreamOutput(new FileOutputStream(new File(testFile, "compressed_smile")))); + + TestData testData = new TestData(); + while (testData.next() && testData.getTotalSize() < MAX_SIZE) { + { + // json + XContentBuilder builder = XContentFactory.jsonBuilder(); + testData.current(builder); + + rawJson.write(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); + compressedJson.write(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); + + byte[] compressed = compressor.compress(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); + compressedByDocJson.write(compressed); + builder.close(); + } + + { + // smile + XContentBuilder builder = XContentFactory.smileBuilder(); + testData.current(builder); + + rawSmile.write(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); + compressedSmile.write(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); + + byte[] compressed = compressor.compress(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); + compressedByDocSmile.write(compressed); + builder.close(); + } + } + + rawJson.close(); + rawSmile.close(); + compressedJson.close(); + compressedSmile.close(); + compressedByDocJson.close(); + compressedByDocSmile.close(); + } +} diff --git a/src/test/java/org/elasticsearch/benchmark/compress/TestData.java b/src/test/java/org/elasticsearch/benchmark/compress/TestData.java new file mode 100644 index 00000000000..d4749ffe8ab --- /dev/null +++ b/src/test/java/org/elasticsearch/benchmark/compress/TestData.java @@ -0,0 +1,83 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.benchmark.compress; + +import org.elasticsearch.common.compress.bzip2.CBZip2InputStream; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.util.Date; + +/** + */ +public class TestData { + + private BufferedReader reader; + + private String line; + private String id; + private String type; + private String text; + + private long totalSize; + + public TestData() throws IOException { + URL url = new URL("http://downloads.dbpedia.org/3.0/en/longabstract_en.nt.bz2"); + BufferedInputStream stream = new BufferedInputStream(url.openStream()); + // read two bytes for the header... + stream.read(); + stream.read(); + reader = new BufferedReader(new InputStreamReader(new CBZip2InputStream(stream))); + } + + public long getTotalSize() { + return totalSize; + } + + public boolean next() throws Exception { + line = reader.readLine(); + if (line == null) { + return false; + } + totalSize += line.length(); + int endId = line.indexOf(' '); + id = line.substring(0, endId); + int endType = line.indexOf(' ', endId + 1); + type = line.substring(endId + 1, endType); + text = line.substring(endType + 1); + return true; + } + + /** + */ + public XContentBuilder current(XContentBuilder builder) throws Exception { + builder.startObject(); + builder.field("id", id); + builder.field("type", type); + builder.field("text", text); + builder.field("time", new Date()); + builder.endObject(); + return builder; + } +}