OpenSearch/src/test/java/org/elasticsearch/common/compress/CompressedXContentTests.java
Adrien Grand 08ee4a87b3 Internal: tighten up our compression framework.
We have a compression framework that we use internally, mainly to compress some
xcontent bytes. However it is quite lenient: for instance it relies on the
assumption that detection of the compression format can only be called on either
some compressed xcontent bytes or some raw xcontent bytes, but nothing checks
this. By the way, we are misusing it in BinaryFieldMapper so that if someone
indexes a binary field which happens to have the same header as a LZF stream,
then at read time, we will try to decompress it.

It also simplifies the API by removing block compression (only streaming) and
some code duplication caused by some methods accepting a byte[] and other
methods a BytesReference.
2015-05-29 12:13:18 +02:00

116 lines
4.5 KiB
Java

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.compress;
import org.apache.lucene.util.TestUtil;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.util.Random;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.not;
/**
*
*/
public class CompressedXContentTests extends ElasticsearchTestCase {
@Test
public void simpleTestsLZF() throws IOException {
simpleTests("lzf");
}
private void assertEquals(CompressedXContent s1, CompressedXContent s2) {
Assert.assertEquals(s1, s2);
assertArrayEquals(s1.uncompressed(), s2.uncompressed());
assertEquals(s1.hashCode(), s2.hashCode());
}
public void simpleTests(String compressor) throws IOException {
CompressorFactory.configure(Settings.settingsBuilder().put("compress.default.type", compressor).build());
String str = "---\nf:this is a simple string";
CompressedXContent cstr = new CompressedXContent(str);
assertThat(cstr.string(), equalTo(str));
assertThat(new CompressedXContent(str), equalTo(cstr));
String str2 = "---\nf:this is a simple string 2";
CompressedXContent cstr2 = new CompressedXContent(str2);
assertThat(cstr2.string(), not(equalTo(str)));
assertThat(new CompressedXContent(str2), not(equalTo(cstr)));
assertEquals(new CompressedXContent(str2), cstr2);
}
public void testRandom() throws IOException {
String compressor = "lzf";
CompressorFactory.configure(Settings.settingsBuilder().put("compress.default.type", compressor).build());
Random r = getRandom();
for (int i = 0; i < 1000; i++) {
String string = TestUtil.randomUnicodeString(r, 10000);
// hack to make it detected as YAML
string = "---\n" + string;
CompressedXContent compressedXContent = new CompressedXContent(string);
assertThat(compressedXContent.string(), equalTo(string));
}
}
public void testDifferentCompressedRepresentation() throws Exception {
byte[] b = "---\nf:abcdefghijabcdefghij".getBytes("UTF-8");
CompressorFactory.defaultCompressor();
Compressor compressor = CompressorFactory.defaultCompressor();
BytesStreamOutput bout = new BytesStreamOutput();
StreamOutput out = compressor.streamOutput(bout);
out.writeBytes(b);
out.flush();
out.writeBytes(b);
out.close();
final BytesReference b1 = bout.bytes();
bout = new BytesStreamOutput();
out = compressor.streamOutput(bout);
out.writeBytes(b);
out.writeBytes(b);
out.close();
final BytesReference b2 = bout.bytes();
// because of the intermediate flush, the two compressed representations
// are different. It can also happen for other reasons like if hash tables
// of different size are being used
assertFalse(b1.equals(b2));
// we used the compressed representation directly and did not recompress
assertArrayEquals(b1.toBytes(), new CompressedXContent(b1).compressed());
assertArrayEquals(b2.toBytes(), new CompressedXContent(b2).compressed());
// but compressedstring instances are still equal
assertEquals(new CompressedXContent(b1), new CompressedXContent(b2));
}
public void testHashCode() throws IOException {
assertFalse(new CompressedXContent("{\"a\":\"b\"}").hashCode() == new CompressedXContent("{\"a\":\"c\"}").hashCode());
}
}