From 9194d36a64b3a00e1408c340c0305b201d05331c Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 30 Jan 2012 21:23:15 +0200 Subject: [PATCH] improve compressed string construction, allow to construct it from utf8 bytes and use it where applicable --- .../cluster/metadata/AliasMetaData.java | 3 ++- .../cluster/metadata/MappingMetaData.java | 4 +++- .../common/compress/CompressedString.java | 21 +++++++++++++++++++ .../index/mapper/DocumentMapper.java | 2 +- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/cluster/metadata/AliasMetaData.java b/src/main/java/org/elasticsearch/cluster/metadata/AliasMetaData.java index f0f890b6813..1ac5f56e740 100644 --- a/src/main/java/org/elasticsearch/cluster/metadata/AliasMetaData.java +++ b/src/main/java/org/elasticsearch/cluster/metadata/AliasMetaData.java @@ -168,7 +168,8 @@ public class AliasMetaData { return this; } try { - this.filter = new CompressedString(XContentFactory.jsonBuilder().map(filter).string()); + XContentBuilder builder = XContentFactory.jsonBuilder().map(filter); + this.filter = new CompressedString(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); return this; } catch (IOException e) { throw new ElasticSearchGenerationException("Failed to build json for alias request", e); diff --git a/src/main/java/org/elasticsearch/cluster/metadata/MappingMetaData.java b/src/main/java/org/elasticsearch/cluster/metadata/MappingMetaData.java index 15f9aeeeeb4..77ee0555581 100644 --- a/src/main/java/org/elasticsearch/cluster/metadata/MappingMetaData.java +++ b/src/main/java/org/elasticsearch/cluster/metadata/MappingMetaData.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.joda.FormatDateTimeFormatter; import org.elasticsearch.common.joda.Joda; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; @@ -279,7 +280,8 @@ public class MappingMetaData { public MappingMetaData(String type, Map mapping) throws IOException { this.type = type; - this.source = new CompressedString(XContentFactory.jsonBuilder().map(mapping).string()); + XContentBuilder mappingBuilder = XContentFactory.jsonBuilder().map(mapping); + this.source = new CompressedString(mappingBuilder.underlyingBytes(), 0, mappingBuilder.underlyingBytesLength()); Map withoutType = mapping; if (mapping.size() == 1 && mapping.containsKey(type)) { withoutType = (Map) mapping.get(type); diff --git a/src/main/java/org/elasticsearch/common/compress/CompressedString.java b/src/main/java/org/elasticsearch/common/compress/CompressedString.java index b6230ade2ba..0fd564cd09b 100644 --- a/src/main/java/org/elasticsearch/common/compress/CompressedString.java +++ b/src/main/java/org/elasticsearch/common/compress/CompressedString.java @@ -21,6 +21,7 @@ package org.elasticsearch.common.compress; import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.common.Unicode; +import org.elasticsearch.common.compress.lzf.LZF; import org.elasticsearch.common.compress.lzf.LZFDecoder; import org.elasticsearch.common.compress.lzf.LZFEncoder; import org.elasticsearch.common.io.stream.StreamInput; @@ -40,10 +41,30 @@ public class CompressedString implements Streamable { CompressedString() { } + /** + * Constructor assuming the data provided is compressed (UTF8). It uses the provided + * array without copying it. + */ public CompressedString(byte[] compressed) { this.bytes = compressed; } + /** + * Constructs a new compressed string, assuming the bytes are UTF8, by copying it over. + * + * @param data The byte array + * @param offset Offset into the byte array + * @param length The length of the data + * @throws IOException + */ + public CompressedString(byte[] data, int offset, int length) throws IOException { + if (LZF.isCompressed(data, offset, length)) { + this.bytes = Arrays.copyOfRange(data, offset, offset + length); + } else { + this.bytes = LZFEncoder.encode(data, offset, length); + } + } + public CompressedString(String str) throws IOException { UnicodeUtil.UTF8Result result = Unicode.unsafeFromStringAsUtf8(str); this.bytes = LZFEncoder.encode(result.result, result.length); diff --git a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index c06867044bc..41791d6dc78 100644 --- a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -573,7 +573,7 @@ public class DocumentMapper implements ToXContent { builder.startObject(); toXContent(builder, ToXContent.EMPTY_PARAMS); builder.endObject(); - this.mappingSource = new CompressedString(builder.string()); + this.mappingSource = new CompressedString(builder.underlyingBytes(), 0, builder.underlyingBytesLength()); } catch (Exception e) { throw new FailedToGenerateSourceMapperException(e.getMessage(), e); }