mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-30 11:58:36 +00:00
We have a compression framework that we use internally, mainly to compress some xcontent bytes. However it is quite lenient: for instance it relies on the assumption that detection of the compression format can only be called on either some compressed xcontent bytes or some raw xcontent bytes, but nothing checks this. By the way, we are misusing it in BinaryFieldMapper so that if someone indexes a binary field which happens to have the same header as a LZF stream, then at read time, we will try to decompress it. It also simplifies the API by removing block compression (only streaming) and some code duplication caused by some methods accepting a byte[] and other methods a BytesReference.
116 lines
4.5 KiB
Java
116 lines
4.5 KiB
Java
/*
|
|
* Licensed to Elasticsearch under one or more contributor
|
|
* license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright
|
|
* ownership. Elasticsearch licenses this file to you under
|
|
* the Apache License, Version 2.0 (the "License"); you may
|
|
* not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing,
|
|
* software distributed under the License is distributed on an
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
* KIND, either express or implied. See the License for the
|
|
* specific language governing permissions and limitations
|
|
* under the License.
|
|
*/
|
|
|
|
package org.elasticsearch.common.compress;
|
|
|
|
import org.apache.lucene.util.TestUtil;
|
|
import org.elasticsearch.common.bytes.BytesReference;
|
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
|
import org.elasticsearch.common.settings.Settings;
|
|
import org.elasticsearch.test.ElasticsearchTestCase;
|
|
import org.junit.Assert;
|
|
import org.junit.Test;
|
|
|
|
import java.io.IOException;
|
|
import java.util.Random;
|
|
|
|
import static org.hamcrest.Matchers.equalTo;
|
|
import static org.hamcrest.Matchers.not;
|
|
|
|
/**
|
|
*
|
|
*/
|
|
public class CompressedXContentTests extends ElasticsearchTestCase {
|
|
|
|
@Test
|
|
public void simpleTestsLZF() throws IOException {
|
|
simpleTests("lzf");
|
|
}
|
|
|
|
private void assertEquals(CompressedXContent s1, CompressedXContent s2) {
|
|
Assert.assertEquals(s1, s2);
|
|
assertArrayEquals(s1.uncompressed(), s2.uncompressed());
|
|
assertEquals(s1.hashCode(), s2.hashCode());
|
|
}
|
|
|
|
public void simpleTests(String compressor) throws IOException {
|
|
CompressorFactory.configure(Settings.settingsBuilder().put("compress.default.type", compressor).build());
|
|
String str = "---\nf:this is a simple string";
|
|
CompressedXContent cstr = new CompressedXContent(str);
|
|
assertThat(cstr.string(), equalTo(str));
|
|
assertThat(new CompressedXContent(str), equalTo(cstr));
|
|
|
|
String str2 = "---\nf:this is a simple string 2";
|
|
CompressedXContent cstr2 = new CompressedXContent(str2);
|
|
assertThat(cstr2.string(), not(equalTo(str)));
|
|
assertThat(new CompressedXContent(str2), not(equalTo(cstr)));
|
|
assertEquals(new CompressedXContent(str2), cstr2);
|
|
}
|
|
|
|
public void testRandom() throws IOException {
|
|
String compressor = "lzf";
|
|
CompressorFactory.configure(Settings.settingsBuilder().put("compress.default.type", compressor).build());
|
|
Random r = getRandom();
|
|
for (int i = 0; i < 1000; i++) {
|
|
String string = TestUtil.randomUnicodeString(r, 10000);
|
|
// hack to make it detected as YAML
|
|
string = "---\n" + string;
|
|
CompressedXContent compressedXContent = new CompressedXContent(string);
|
|
assertThat(compressedXContent.string(), equalTo(string));
|
|
}
|
|
}
|
|
|
|
public void testDifferentCompressedRepresentation() throws Exception {
|
|
byte[] b = "---\nf:abcdefghijabcdefghij".getBytes("UTF-8");
|
|
CompressorFactory.defaultCompressor();
|
|
|
|
Compressor compressor = CompressorFactory.defaultCompressor();
|
|
BytesStreamOutput bout = new BytesStreamOutput();
|
|
StreamOutput out = compressor.streamOutput(bout);
|
|
out.writeBytes(b);
|
|
out.flush();
|
|
out.writeBytes(b);
|
|
out.close();
|
|
final BytesReference b1 = bout.bytes();
|
|
|
|
bout = new BytesStreamOutput();
|
|
out = compressor.streamOutput(bout);
|
|
out.writeBytes(b);
|
|
out.writeBytes(b);
|
|
out.close();
|
|
final BytesReference b2 = bout.bytes();
|
|
|
|
// because of the intermediate flush, the two compressed representations
|
|
// are different. It can also happen for other reasons like if hash tables
|
|
// of different size are being used
|
|
assertFalse(b1.equals(b2));
|
|
// we used the compressed representation directly and did not recompress
|
|
assertArrayEquals(b1.toBytes(), new CompressedXContent(b1).compressed());
|
|
assertArrayEquals(b2.toBytes(), new CompressedXContent(b2).compressed());
|
|
// but compressedstring instances are still equal
|
|
assertEquals(new CompressedXContent(b1), new CompressedXContent(b2));
|
|
}
|
|
|
|
public void testHashCode() throws IOException {
|
|
assertFalse(new CompressedXContent("{\"a\":\"b\"}").hashCode() == new CompressedXContent("{\"a\":\"c\"}").hashCode());
|
|
}
|
|
|
|
}
|