optimize search hit to use Text for type and id

this will reduce serialization string overheads, and faster xcontent(json) generation
This commit is contained in:
Shay Banon 2012-12-31 00:13:15 -08:00
parent 120b766f0a
commit 720feca3c5
14 changed files with 144 additions and 37 deletions

View File

@ -113,6 +113,11 @@ public abstract class AdapterStreamInput extends StreamInput {
return in.readText();
}
@Override
public Text readSharedText() throws IOException {
return in.readSharedText();
}
@Override
public int read(byte[] b) throws IOException {
return in.read(b);

View File

@ -141,6 +141,11 @@ public class AdapterStreamOutput extends StreamOutput {
out.writeText(text);
}
@Override
public void writeSharedText(Text text) throws IOException {
out.writeSharedText(text);
}
@Override
public void writeFloat(float v) throws IOException {
out.writeFloat(v);

View File

@ -20,6 +20,7 @@
package org.elasticsearch.common.io.stream;
import gnu.trove.map.hash.TIntObjectHashMap;
import org.elasticsearch.common.text.Text;
import java.io.IOException;
@ -29,9 +30,10 @@ import java.io.IOException;
public class HandlesStreamInput extends AdapterStreamInput {
private final TIntObjectHashMap<String> handles = new TIntObjectHashMap<String>();
private final TIntObjectHashMap<String> identityHandles = new TIntObjectHashMap<String>();
private final TIntObjectHashMap<Text> handlesText = new TIntObjectHashMap<Text>();
HandlesStreamInput() {
super();
}
@ -89,21 +91,41 @@ public class HandlesStreamInput extends AdapterStreamInput {
}
}
@Override
public Text readSharedText() throws IOException {
byte b = in.readByte();
if (b == 0) {
int handle = in.readVInt();
Text s = in.readText();
handlesText.put(handle, s);
return s;
} else if (b == 1) {
return handlesText.get(in.readVInt());
} else if (b == 2) {
return in.readText();
} else {
throw new IOException("Expected handle header, got [" + b + "]");
}
}
@Override
public void reset() throws IOException {
super.reset();
handles.clear();
identityHandles.clear();
handlesText.clear();
}
public void reset(StreamInput in) {
super.reset(in);
handles.clear();
identityHandles.clear();
handlesText.clear();
}
public void cleanHandles() {
handles.clear();
identityHandles.clear();
handlesText.clear();
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.common.io.stream;
import gnu.trove.impl.Constants;
import gnu.trove.map.hash.TObjectIntHashMap;
import org.elasticsearch.common.text.Text;
import java.io.IOException;
import java.util.Arrays;
@ -36,9 +37,10 @@ public class HandlesStreamOutput extends AdapterStreamOutput {
private final int identityThreshold;
private final TObjectIntHashMap<String> handles = new TObjectIntHashMap<String>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
private final HandleTable identityHandles = new HandleTable(10, (float) 3.00);
private final TObjectIntHashMap<Text> handlesText = new TObjectIntHashMap<Text>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
public HandlesStreamOutput(StreamOutput out) {
this(out, DEFAULT_IDENTITY_THRESHOLD);
}
@ -105,10 +107,37 @@ public class HandlesStreamOutput extends AdapterStreamOutput {
}
}
@Override
public void writeSharedText(Text text) throws IOException {
int length;
if (text.hasBytes()) {
length = text.bytes().length();
} else {
length = text.string().length();
}
if (length < identityThreshold) {
int handle = handlesText.get(text);
if (handle == -1) {
handle = handlesText.size();
handlesText.put(text, handle);
out.writeByte((byte) 0);
out.writeVInt(handle);
out.writeText(text);
} else {
out.writeByte((byte) 1);
out.writeVInt(handle);
}
} else {
out.writeByte((byte) 2);
out.writeText(text);
}
}
@Override
public void reset() throws IOException {
handles.clear();
identityHandles.clear();
handlesText.clear();
if (out != null) {
out.reset();
}
@ -117,6 +146,7 @@ public class HandlesStreamOutput extends AdapterStreamOutput {
public void clear() {
handles.clear();
identityHandles.clear();
handlesText.clear();
}
/**

View File

@ -198,6 +198,10 @@ public abstract class StreamInput extends InputStream {
return new StringAndBytesText(readBytesReference(length));
}
public Text readSharedText() throws IOException {
return readText();
}
@Nullable
public String readOptionalString() throws IOException {
if (readBoolean()) {

View File

@ -204,6 +204,10 @@ public abstract class StreamOutput extends OutputStream {
}
}
public void writeSharedText(Text text) throws IOException {
writeText(text);
}
public void writeString(String str) throws IOException {
int charCount = str.length();
writeVInt(charCount);

View File

@ -28,6 +28,7 @@ import org.elasticsearch.common.bytes.BytesReference;
public class BytesText implements Text {
private BytesReference bytes;
private int hash;
public BytesText(BytesReference bytes) {
this.bytes = bytes;
@ -64,7 +65,10 @@ public class BytesText implements Text {
@Override
public int hashCode() {
return bytes().hashCode();
if (hash == 0) {
hash = bytes.hashCode();
}
return hash;
}
@Override

View File

@ -44,6 +44,7 @@ public class StringAndBytesText implements Text {
private BytesReference bytes;
private String text;
private int hash;
public StringAndBytesText(BytesReference bytes) {
this.bytes = bytes;
@ -90,7 +91,10 @@ public class StringAndBytesText implements Text {
@Override
public int hashCode() {
return bytes().hashCode();
if (hash == 0) {
hash = bytes().hashCode();
}
return hash;
}
@Override

View File

@ -42,6 +42,7 @@ public class StringText implements Text {
}
private final String text;
private int hash;
public StringText(String text) {
this.text = text;
@ -75,7 +76,10 @@ public class StringText implements Text {
@Override
public int hashCode() {
// we use bytes here so we can be consistent with other text implementations
return bytes().hashCode();
if (hash == 0) {
hash = bytes().hashCode();
}
return hash;
}
@Override

View File

@ -21,12 +21,14 @@ package org.elasticsearch.common.text;
import org.elasticsearch.common.bytes.BytesReference;
import java.io.Serializable;
/**
* Text represents a (usually) long text data. We use this abstraction instead of {@link String}
* so we can represent it in a more optimized manner in memory as well as serializing it over the
* network as well as converting it to json format.
*/
public interface Text extends Comparable<Text> {
public interface Text extends Comparable<Text>, Serializable {
/**
* Are bytes available without the need to be converted into bytes when calling {@link #bytes()}.

View File

@ -35,6 +35,8 @@ import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.compress.CompressedString;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.StringAndBytesText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.*;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.internal.*;
@ -237,6 +239,7 @@ public class DocumentMapper implements ToXContent {
private final Settings indexSettings;
private final String type;
private final StringAndBytesText typeText;
private final DocumentMapperParser docMapperParser;
@ -279,6 +282,7 @@ public class DocumentMapper implements ToXContent {
this.index = index;
this.indexSettings = indexSettings;
this.type = rootObjectMapper.name();
this.typeText = new StringAndBytesText(this.type);
this.docMapperParser = docMapperParser;
this.meta = meta;
this.rootObjectMapper = rootObjectMapper;
@ -341,6 +345,10 @@ public class DocumentMapper implements ToXContent {
return this.type;
}
public Text typeText() {
return this.typeText;
}
public ImmutableMap<String, Object> meta() {
return this.meta;
}

View File

@ -23,21 +23,19 @@ import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.common.text.StringAndBytesText;
import org.elasticsearch.common.text.Text;
import java.io.IOException;
import java.io.Serializable;
/**
* The target that the search request was executed on.
*
*
*/
public class SearchShardTarget implements Streamable, Serializable, Comparable<SearchShardTarget> {
private String nodeId;
private String index;
private Text nodeId;
private Text index;
private int shardId;
private SearchShardTarget() {
@ -45,27 +43,35 @@ public class SearchShardTarget implements Streamable, Serializable, Comparable<S
}
public SearchShardTarget(String nodeId, String index, int shardId) {
this.nodeId = nodeId;
this.index = index;
this.nodeId = nodeId == null ? null : new StringAndBytesText(nodeId);
this.index = new StringAndBytesText(index);
this.shardId = shardId;
}
@Nullable
public String nodeId() {
return nodeId;
return nodeId.string();
}
@Nullable
public String getNodeId() {
return nodeId;
return nodeId();
}
public Text nodeIdText() {
return this.nodeId;
}
public String index() {
return index;
return index.string();
}
public String getIndex() {
return index;
return index();
}
public Text indexText() {
return this.index;
}
public int shardId() {
@ -84,7 +90,7 @@ public class SearchShardTarget implements Streamable, Serializable, Comparable<S
@Override
public int compareTo(SearchShardTarget o) {
int i = index.compareTo(o.index());
int i = index.string().compareTo(o.index());
if (i == 0) {
i = shardId - o.shardId;
}
@ -94,9 +100,9 @@ public class SearchShardTarget implements Streamable, Serializable, Comparable<S
@Override
public void readFrom(StreamInput in) throws IOException {
if (in.readBoolean()) {
nodeId = in.readUTF();
nodeId = in.readSharedText();
}
index = in.readUTF();
index = in.readSharedText();
shardId = in.readVInt();
}
@ -106,9 +112,9 @@ public class SearchShardTarget implements Streamable, Serializable, Comparable<S
out.writeBoolean(false);
} else {
out.writeBoolean(true);
out.writeUTF(nodeId);
out.writeSharedText(nodeId);
}
out.writeUTF(index);
out.writeSharedText(index);
out.writeVInt(shardId);
}

View File

@ -24,10 +24,13 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.text.StringAndBytesText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.index.fieldvisitor.JustUidFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.UidAndSourceFieldsVisitor;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMappers;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.search.SearchHitField;
@ -149,7 +152,14 @@ public class FetchPhase implements SearchPhase {
}
}
InternalSearchHit searchHit = new InternalSearchHit(docId, fieldsVisitor.uid().id(), fieldsVisitor.uid().type(), sourceRequested ? fieldsVisitor.source() : null, searchFields);
DocumentMapper documentMapper = context.mapperService().documentMapper(fieldsVisitor.uid().type());
Text typeText;
if (documentMapper == null) {
typeText = new StringAndBytesText(fieldsVisitor.uid().type());
} else {
typeText = documentMapper.typeText();
}
InternalSearchHit searchHit = new InternalSearchHit(docId, fieldsVisitor.uid().id(), typeText, sourceRequested ? fieldsVisitor.source() : null, searchFields);
hits[index] = searchHit;

View File

@ -63,9 +63,8 @@ public class InternalSearchHit implements SearchHit {
private float score = Float.NEGATIVE_INFINITY;
private String id;
private String type;
private Text id;
private Text type;
private long version = -1;
@ -91,9 +90,9 @@ public class InternalSearchHit implements SearchHit {
}
public InternalSearchHit(int docId, String id, String type, BytesReference source, Map<String, SearchHitField> fields) {
public InternalSearchHit(int docId, String id, Text type, BytesReference source, Map<String, SearchHitField> fields) {
this.docId = docId;
this.id = id;
this.id = new StringAndBytesText(id);
this.type = type;
this.source = source;
this.fields = fields;
@ -147,7 +146,7 @@ public class InternalSearchHit implements SearchHit {
@Override
public String id() {
return id;
return id.string();
}
@Override
@ -157,7 +156,7 @@ public class InternalSearchHit implements SearchHit {
@Override
public String type() {
return type;
return type.string();
}
@Override
@ -381,9 +380,9 @@ public class InternalSearchHit implements SearchHit {
builder.startObject();
if (explanation() != null) {
builder.field("_shard", shard.shardId());
builder.field("_node", shard.nodeId());
builder.field("_node", shard.nodeIdText());
}
builder.field(Fields._INDEX, shard.index());
builder.field(Fields._INDEX, shard.indexText());
builder.field(Fields._TYPE, type);
builder.field(Fields._ID, id);
if (version != -1) {
@ -482,8 +481,8 @@ public class InternalSearchHit implements SearchHit {
public void readFrom(StreamInput in, InternalSearchHits.StreamContext context) throws IOException {
score = in.readFloat();
id = in.readString();
type = in.readString();
id = in.readText();
type = in.readSharedText();
version = in.readLong();
source = in.readBytesReference();
if (source.length() == 0) {
@ -617,8 +616,8 @@ public class InternalSearchHit implements SearchHit {
public void writeTo(StreamOutput out, InternalSearchHits.StreamContext context) throws IOException {
out.writeFloat(score);
out.writeString(id);
out.writeString(type);
out.writeText(id);
out.writeSharedText(type);
out.writeLong(version);
out.writeBytesReference(source);
if (explanation == null) {