diff --git a/src/main/java/org/elasticsearch/action/ActionModule.java b/src/main/java/org/elasticsearch/action/ActionModule.java index dc03a164528..f9d7e28c3a5 100644 --- a/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/src/main/java/org/elasticsearch/action/ActionModule.java @@ -119,6 +119,8 @@ import org.elasticsearch.action.search.type.*; import org.elasticsearch.action.suggest.SuggestAction; import org.elasticsearch.action.suggest.TransportSuggestAction; import org.elasticsearch.action.support.TransportAction; +import org.elasticsearch.action.termvector.TermVectorAction; +import org.elasticsearch.action.termvector.TransportSingleShardTermVectorAction; import org.elasticsearch.action.update.TransportUpdateAction; import org.elasticsearch.action.update.UpdateAction; import org.elasticsearch.common.inject.AbstractModule; @@ -210,6 +212,7 @@ public class ActionModule extends AbstractModule { registerAction(IndexAction.INSTANCE, TransportIndexAction.class); registerAction(GetAction.INSTANCE, TransportGetAction.class); + registerAction(TermVectorAction.INSTANCE, TransportSingleShardTermVectorAction.class); registerAction(DeleteAction.INSTANCE, TransportDeleteAction.class, TransportIndexDeleteAction.class, TransportShardDeleteAction.class); registerAction(CountAction.INSTANCE, TransportCountAction.class); diff --git a/src/main/java/org/elasticsearch/action/termvector/TermVectorAction.java b/src/main/java/org/elasticsearch/action/termvector/TermVectorAction.java new file mode 100644 index 00000000000..00c4997be06 --- /dev/null +++ b/src/main/java/org/elasticsearch/action/termvector/TermVectorAction.java @@ -0,0 +1,46 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.termvector; + +import org.elasticsearch.action.Action; + +import org.elasticsearch.client.Client; + +/** + */ +public class TermVectorAction extends Action { + + public static final TermVectorAction INSTANCE = new TermVectorAction(); + public static final String NAME = "tv"; + + private TermVectorAction() { + super(NAME); + } + + @Override + public TermVectorResponse newResponse() { + return new TermVectorResponse(); + } + + @Override + public TermVectorRequestBuilder newRequestBuilder(Client client) { + return new TermVectorRequestBuilder(client); + } +} diff --git a/src/main/java/org/elasticsearch/action/termvector/TermVectorFields.java b/src/main/java/org/elasticsearch/action/termvector/TermVectorFields.java new file mode 100644 index 00000000000..aa0ef394e4e --- /dev/null +++ b/src/main/java/org/elasticsearch/action/termvector/TermVectorFields.java @@ -0,0 +1,469 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.termvector; + +import static org.apache.lucene.util.ArrayUtil.grow; +import gnu.trove.map.hash.TObjectLongHashMap; + +import java.io.IOException; +import java.util.Comparator; +import java.util.Iterator; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamInput; + +/** + * This class represents the result of a {@link TermVectorRequest}. It works + * exactly like the {@link Fields} class except for one thing: It can return + * offsets and payloads even if positions are not present. You must call + * nextPosition() anyway to move the counter although this method only returns + * -1,, if no positions were returned by the {@link TermVectorRequest}. + * + * The data is stored in two byte arrays ({@code headerRef} and + * {@code termVectors}, both {@link ByteRef}) that have the following format: + *

+ * {@code headerRef}: Stores offsets per field in the {@code termVectors} array + * and some header information as {@link BytesRef}. Format is + *

+ * + * termVectors: Stores the actual term vectors as a {@link BytesRef}. + * + * Term vectors for each fields are stored in blocks, one for each field. The + * offsets in {@code headerRef} are used to find where the block for a field + * starts. Each block begins with a + * + * If the field statistics were requested ({@code hasFieldStatistics} is true, + * see {@code headerRef}), the following numbers are stored: + * + * + * After that, for each term it stores + *