diff --git a/src/main/java/org/elasticsearch/action/termvector/TermVectorResponse.java b/src/main/java/org/elasticsearch/action/termvector/TermVectorResponse.java index 056faf256e5..6650f82d8a3 100644 --- a/src/main/java/org/elasticsearch/action/termvector/TermVectorResponse.java +++ b/src/main/java/org/elasticsearch/action/termvector/TermVectorResponse.java @@ -19,13 +19,8 @@ package org.elasticsearch.action.termvector; -import java.io.IOException; -import java.util.EnumSet; -import java.util.Iterator; -import java.util.Set; - +import com.google.common.collect.Iterators; import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -36,7 +31,6 @@ import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.termvector.TermVectorRequest.Flag; -import org.elasticsearch.common.Base64; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.BytesStreamOutput; @@ -46,7 +40,10 @@ import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; -import com.google.common.collect.Iterators; +import java.io.IOException; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.Set; public class TermVectorResponse extends ActionResponse implements ToXContent { @@ -82,6 +79,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent { private String type; private String id; private long docVersion; + private boolean exists = false; private boolean sourceCopied = false; @@ -105,20 +103,27 @@ public class TermVectorResponse extends ActionResponse implements ToXContent { out.writeString(type); out.writeString(id); out.writeVLong(docVersion); - final boolean docExists = documentExists(); + final boolean docExists = isExists(); out.writeBoolean(docExists); - if (docExists) { + out.writeBoolean(hasTermVectors()); + if (hasTermVectors()) { out.writeBytesReference(headerRef); out.writeBytesReference(termVectors); } } + private boolean hasTermVectors() { + assert (headerRef == null && termVectors == null) || (headerRef != null && termVectors != null); + return headerRef != null; + } + @Override public void readFrom(StreamInput in) throws IOException { index = in.readString(); type = in.readString(); id = in.readString(); docVersion = in.readVLong(); + exists = in.readBoolean(); if (in.readBoolean()) { headerRef = in.readBytesReference(); termVectors = in.readBytesReference(); @@ -126,7 +131,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent { } public Fields getFields() throws IOException { - if (documentExists()) { + if (hasTermVectors() && isExists()) { if (!sourceCopied) { // make the bytes safe headerRef = headerRef.copyBytesArray(); termVectors = termVectors.copyBytesArray(); @@ -163,8 +168,8 @@ public class TermVectorResponse extends ActionResponse implements ToXContent { builder.field(FieldStrings._TYPE, type); builder.field(FieldStrings._ID, id); builder.field(FieldStrings._VERSION, docVersion); - builder.field(FieldStrings.EXISTS, documentExists()); - if (!documentExists()) { + builder.field(FieldStrings.EXISTS, isExists()); + if (!isExists()) { builder.endObject(); return builder; } @@ -234,7 +239,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent { builder.field(FieldStrings.END_OFFSET, 0, termFreq, currentEndOffset); } if (curTerms.hasPayloads()) { - builder.array(FieldStrings.PAYLOAD, (Object[])currentPayloads); + builder.array(FieldStrings.PAYLOAD, (Object[]) currentPayloads); } } @@ -251,7 +256,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent { if (curTerms.hasPayloads()) { BytesRef curPaypoad = posEnum.getPayload(); currentPayloads[j] = new BytesArray(curPaypoad.bytes, 0, curPaypoad.length); - + } } } @@ -295,14 +300,19 @@ public class TermVectorResponse extends ActionResponse implements ToXContent { } } - public boolean documentExists() { - assert (headerRef == null && termVectors == null) || (headerRef != null && termVectors != null); - return headerRef != null; + public boolean isExists() { + return exists; + } + + public void setExists(boolean exists) { + this.exists = exists; } public void setFields(Fields fields, Set selectedFields, EnumSet flags, Fields topLevelFields) throws IOException { TermVectorWriter tvw = new TermVectorWriter(this); - tvw.setFields(fields, selectedFields, flags, topLevelFields); + if (fields != null) { + tvw.setFields(fields, selectedFields, flags, topLevelFields); + } } diff --git a/src/main/java/org/elasticsearch/action/termvector/TransportSingleShardTermVectorAction.java b/src/main/java/org/elasticsearch/action/termvector/TransportSingleShardTermVectorAction.java index 90ad6f106c2..ad1c60084fd 100644 --- a/src/main/java/org/elasticsearch/action/termvector/TransportSingleShardTermVectorAction.java +++ b/src/main/java/org/elasticsearch/action/termvector/TransportSingleShardTermVectorAction.java @@ -19,10 +19,6 @@ package org.elasticsearch.action.termvector; -import java.util.List; - -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; @@ -35,7 +31,6 @@ import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.routing.ShardIterator; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.engine.Engine; @@ -108,11 +103,11 @@ public class TransportSingleShardTermVectorAction extends TransportShardSingleOp try { Fields topLevelFields = MultiFields.getFields(topLevelReader); Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm); - if (docIdAndVersion != null) { termVectorResponse.setFields(docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId), request.selectedFields(), request.getFlags(), topLevelFields); termVectorResponse.setDocVersion(docIdAndVersion.version); + termVectorResponse.setExists(true); } else { } diff --git a/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTests.java b/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTests.java index 030d7d6b418..56f85726521 100644 --- a/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTests.java +++ b/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTests.java @@ -69,6 +69,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest { public void streamTest() throws Exception { TermVectorResponse outResponse = new TermVectorResponse("a", "b", "c"); + outResponse.setExists(true); writeStandardTermVector(outResponse); // write @@ -84,6 +85,22 @@ public class GetTermVectorTests extends AbstractSharedClusterTest { // see if correct checkIfStandardTermVector(inResponse); + + outResponse = new TermVectorResponse("a", "b", "c"); + writeEmptyTermVector(outResponse); + // write + outBuffer = new ByteArrayOutputStream(); + out = new OutputStreamStreamOutput(outBuffer); + outResponse.writeTo(out); + + // read + esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray()); + esBuffer = new InputStreamStreamInput(esInBuffer); + inResponse = new TermVectorResponse("a", "b", "c"); + inResponse.readFrom(esBuffer); + assertTrue(inResponse.isExists()); + + } @@ -94,6 +111,36 @@ public class GetTermVectorTests extends AbstractSharedClusterTest { assertThat(fields.terms("desc"), Matchers.notNullValue()); assertThat(fields.size(), equalTo(2)); } + + private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException { + + Directory dir = FSDirectory.open(new File("/tmp/foo")); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)); + conf.setOpenMode(OpenMode.CREATE); + IndexWriter writer = new IndexWriter(dir, conf); + FieldType type = new FieldType(TextField.TYPE_STORED); + type.setStoreTermVectorOffsets(true); + type.setStoreTermVectorPayloads(false); + type.setStoreTermVectorPositions(true); + type.setStoreTermVectors(true); + type.freeze(); + Document d = new Document(); + d.add(new Field("id", "abc", StringField.TYPE_STORED)); + + writer.updateDocument(new Term("id", "abc"), d); + writer.commit(); + writer.close(); + DirectoryReader dr = DirectoryReader.open(dir); + IndexSearcher s = new IndexSearcher(dr); + TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); + ScoreDoc[] scoreDocs = search.scoreDocs; + int doc = scoreDocs[0].doc; + Fields fields = dr.getTermVectors(doc); + EnumSet flags = EnumSet.of(Flag.Positions, Flag.Offsets); + outResponse.setFields(fields, null, flags, fields); + outResponse.setExists(true); + + } private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException { @@ -246,11 +293,54 @@ public class GetTermVectorTests extends AbstractSharedClusterTest { ActionFuture termVector = client().termVector(new TermVectorRequest("test", "type1", "" + i)); TermVectorResponse actionGet = termVector.actionGet(); assertThat(actionGet, Matchers.notNullValue()); - assertThat(actionGet.documentExists(), Matchers.equalTo(false)); + assertThat(actionGet.isExists(), Matchers.equalTo(false)); } } + + @Test + public void testExistingFieldWithNoTermVectorsNoNPE() throws Exception { + + run(addMapping(prepareCreate("test"), "type1", new Object[] { "existingfield", "type", "string", "term_vector", + "with_positions_offsets_payloads" })); + + ensureYellow(); + // when indexing a field that simply has a question mark, the term + // vectors will be null + client().prepareIndex("test", "type1", "0").setSource("existingfield", "?").execute().actionGet(); + refresh(); + String[] selectedFields = { "existingfield" }; + ActionFuture termVector = client().termVector( + new TermVectorRequest("test", "type1", "0").selectedFields(selectedFields)); + // lets see if the null term vectors are caught... + termVector.actionGet(); + TermVectorResponse actionGet = termVector.actionGet(); + assertThat(actionGet.isExists(), Matchers.equalTo(true)); + + } + + @Test + public void testExistingFieldButNotInDocNPE() throws Exception { + + run(addMapping(prepareCreate("test"), "type1", new Object[] { "existingfield", "type", "string", "term_vector", + "with_positions_offsets_payloads" })); + + ensureYellow(); + // when indexing a field that simply has a question mark, the term + // vectors will be null + client().prepareIndex("test", "type1", "0").setSource("anotherexistingfield", 1).execute().actionGet(); + refresh(); + String[] selectedFields = { "existingfield" }; + ActionFuture termVector = client().termVector( + new TermVectorRequest("test", "type1", "0").selectedFields(selectedFields)); + // lets see if the null term vectors are caught... + TermVectorResponse actionGet = termVector.actionGet(); + assertThat(actionGet.isExists(), Matchers.equalTo(true)); + + } + + @Test public void testSimpleTermVectors() throws ElasticSearchException, IOException { @@ -278,7 +368,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true) .setOffsets(true).setPositions(true).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); - assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); + assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); @@ -387,7 +477,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested).setPositions(isPositionsRequested).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); - assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); + assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0)); if (ft.storeTermVectors()) { diff --git a/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTestsCheckDocFreq.java b/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTestsCheckDocFreq.java index 62d260d8cb7..f7d949ad53b 100644 --- a/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTestsCheckDocFreq.java +++ b/src/test/java/org/elasticsearch/test/integration/termvectors/GetTermVectorTestsCheckDocFreq.java @@ -122,7 +122,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true) .setPositions(true).setTermStatistics(true).setFieldStatistics(false).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); - assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); + assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); @@ -181,7 +181,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest { .setPositions(true).setTermStatistics(false).setFieldStatistics(true).setSelectedFields(); assertThat(resp.request().termStatistics(), equalTo(false)); TermVectorResponse response = resp.execute().actionGet(); - assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); + assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); @@ -238,7 +238,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest { .setPositions(true).setFieldStatistics(true).setTermStatistics(true).setSelectedFields(); assertThat(resp.request().fieldStatistics(), equalTo(true)); TermVectorResponse response = resp.execute().actionGet(); - assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); + assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field");