termvectors: fix null pointer exception if field has no term vectors

Retrieving termvectors for a document that does not have the requested field
caused a null pointer exception. Same for documents if the field has no term vectors,
for example, because the field only contains "?".
Now, an empty response is returned.

Closes #3471
This commit is contained in:
Britta Weber 2013-08-06 10:27:31 +02:00
parent ec770373ab
commit f64065c9d2
4 changed files with 126 additions and 31 deletions

View File

@ -19,13 +19,8 @@
package org.elasticsearch.action.termvector;
import java.io.IOException;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.Set;
import com.google.common.collect.Iterators;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -36,7 +31,6 @@ import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.action.ActionResponse;
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
import org.elasticsearch.common.Base64;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
@ -46,7 +40,10 @@ import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import com.google.common.collect.Iterators;
import java.io.IOException;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.Set;
public class TermVectorResponse extends ActionResponse implements ToXContent {
@ -82,6 +79,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
private String type;
private String id;
private long docVersion;
private boolean exists = false;
private boolean sourceCopied = false;
@ -105,20 +103,27 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
out.writeString(type);
out.writeString(id);
out.writeVLong(docVersion);
final boolean docExists = documentExists();
final boolean docExists = isExists();
out.writeBoolean(docExists);
if (docExists) {
out.writeBoolean(hasTermVectors());
if (hasTermVectors()) {
out.writeBytesReference(headerRef);
out.writeBytesReference(termVectors);
}
}
private boolean hasTermVectors() {
assert (headerRef == null && termVectors == null) || (headerRef != null && termVectors != null);
return headerRef != null;
}
@Override
public void readFrom(StreamInput in) throws IOException {
index = in.readString();
type = in.readString();
id = in.readString();
docVersion = in.readVLong();
exists = in.readBoolean();
if (in.readBoolean()) {
headerRef = in.readBytesReference();
termVectors = in.readBytesReference();
@ -126,7 +131,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
}
public Fields getFields() throws IOException {
if (documentExists()) {
if (hasTermVectors() && isExists()) {
if (!sourceCopied) { // make the bytes safe
headerRef = headerRef.copyBytesArray();
termVectors = termVectors.copyBytesArray();
@ -163,8 +168,8 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
builder.field(FieldStrings._TYPE, type);
builder.field(FieldStrings._ID, id);
builder.field(FieldStrings._VERSION, docVersion);
builder.field(FieldStrings.EXISTS, documentExists());
if (!documentExists()) {
builder.field(FieldStrings.EXISTS, isExists());
if (!isExists()) {
builder.endObject();
return builder;
}
@ -234,7 +239,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
builder.field(FieldStrings.END_OFFSET, 0, termFreq, currentEndOffset);
}
if (curTerms.hasPayloads()) {
builder.array(FieldStrings.PAYLOAD, (Object[])currentPayloads);
builder.array(FieldStrings.PAYLOAD, (Object[]) currentPayloads);
}
}
@ -251,7 +256,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
if (curTerms.hasPayloads()) {
BytesRef curPaypoad = posEnum.getPayload();
currentPayloads[j] = new BytesArray(curPaypoad.bytes, 0, curPaypoad.length);
}
}
}
@ -295,14 +300,19 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
}
}
public boolean documentExists() {
assert (headerRef == null && termVectors == null) || (headerRef != null && termVectors != null);
return headerRef != null;
public boolean isExists() {
return exists;
}
public void setExists(boolean exists) {
this.exists = exists;
}
public void setFields(Fields fields, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
TermVectorWriter tvw = new TermVectorWriter(this);
tvw.setFields(fields, selectedFields, flags, topLevelFields);
if (fields != null) {
tvw.setFields(fields, selectedFields, flags, topLevelFields);
}
}

View File

@ -19,10 +19,6 @@
package org.elasticsearch.action.termvector;
import java.util.List;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
@ -35,7 +31,6 @@ import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.routing.ShardIterator;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.engine.Engine;
@ -108,11 +103,11 @@ public class TransportSingleShardTermVectorAction extends TransportShardSingleOp
try {
Fields topLevelFields = MultiFields.getFields(topLevelReader);
Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm);
if (docIdAndVersion != null) {
termVectorResponse.setFields(docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId), request.selectedFields(),
request.getFlags(), topLevelFields);
termVectorResponse.setDocVersion(docIdAndVersion.version);
termVectorResponse.setExists(true);
} else {
}

View File

@ -69,6 +69,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
public void streamTest() throws Exception {
TermVectorResponse outResponse = new TermVectorResponse("a", "b", "c");
outResponse.setExists(true);
writeStandardTermVector(outResponse);
// write
@ -84,6 +85,22 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
// see if correct
checkIfStandardTermVector(inResponse);
outResponse = new TermVectorResponse("a", "b", "c");
writeEmptyTermVector(outResponse);
// write
outBuffer = new ByteArrayOutputStream();
out = new OutputStreamStreamOutput(outBuffer);
outResponse.writeTo(out);
// read
esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
esBuffer = new InputStreamStreamInput(esInBuffer);
inResponse = new TermVectorResponse("a", "b", "c");
inResponse.readFrom(esBuffer);
assertTrue(inResponse.isExists());
}
@ -94,6 +111,36 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
assertThat(fields.terms("desc"), Matchers.notNullValue());
assertThat(fields.size(), equalTo(2));
}
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException {
Directory dir = FSDirectory.open(new File("/tmp/foo"));
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
conf.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, conf);
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPayloads(false);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Document d = new Document();
d.add(new Field("id", "abc", StringField.TYPE_STORED));
writer.updateDocument(new Term("id", "abc"), d);
writer.commit();
writer.close();
DirectoryReader dr = DirectoryReader.open(dir);
IndexSearcher s = new IndexSearcher(dr);
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
ScoreDoc[] scoreDocs = search.scoreDocs;
int doc = scoreDocs[0].doc;
Fields fields = dr.getTermVectors(doc);
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
outResponse.setFields(fields, null, flags, fields);
outResponse.setExists(true);
}
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException {
@ -246,11 +293,54 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
ActionFuture<TermVectorResponse> termVector = client().termVector(new TermVectorRequest("test", "type1", "" + i));
TermVectorResponse actionGet = termVector.actionGet();
assertThat(actionGet, Matchers.notNullValue());
assertThat(actionGet.documentExists(), Matchers.equalTo(false));
assertThat(actionGet.isExists(), Matchers.equalTo(false));
}
}
@Test
public void testExistingFieldWithNoTermVectorsNoNPE() throws Exception {
run(addMapping(prepareCreate("test"), "type1", new Object[] { "existingfield", "type", "string", "term_vector",
"with_positions_offsets_payloads" }));
ensureYellow();
// when indexing a field that simply has a question mark, the term
// vectors will be null
client().prepareIndex("test", "type1", "0").setSource("existingfield", "?").execute().actionGet();
refresh();
String[] selectedFields = { "existingfield" };
ActionFuture<TermVectorResponse> termVector = client().termVector(
new TermVectorRequest("test", "type1", "0").selectedFields(selectedFields));
// lets see if the null term vectors are caught...
termVector.actionGet();
TermVectorResponse actionGet = termVector.actionGet();
assertThat(actionGet.isExists(), Matchers.equalTo(true));
}
@Test
public void testExistingFieldButNotInDocNPE() throws Exception {
run(addMapping(prepareCreate("test"), "type1", new Object[] { "existingfield", "type", "string", "term_vector",
"with_positions_offsets_payloads" }));
ensureYellow();
// when indexing a field that simply has a question mark, the term
// vectors will be null
client().prepareIndex("test", "type1", "0").setSource("anotherexistingfield", 1).execute().actionGet();
refresh();
String[] selectedFields = { "existingfield" };
ActionFuture<TermVectorResponse> termVector = client().termVector(
new TermVectorRequest("test", "type1", "0").selectedFields(selectedFields));
// lets see if the null term vectors are caught...
TermVectorResponse actionGet = termVector.actionGet();
assertThat(actionGet.isExists(), Matchers.equalTo(true));
}
@Test
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
@ -278,7 +368,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true)
.setOffsets(true).setPositions(true).setSelectedFields();
TermVectorResponse response = resp.execute().actionGet();
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
Fields fields = response.getFields();
assertThat(fields.size(), equalTo(1));
Terms terms = fields.terms("field");
@ -387,7 +477,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
.setPayloads(isPayloadRequested).setOffsets(isOffsetRequested).setPositions(isPositionsRequested).setSelectedFields();
TermVectorResponse response = resp.execute().actionGet();
assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
Fields fields = response.getFields();
assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0));
if (ft.storeTermVectors()) {

View File

@ -122,7 +122,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true)
.setPositions(true).setTermStatistics(true).setFieldStatistics(false).setSelectedFields();
TermVectorResponse response = resp.execute().actionGet();
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
Fields fields = response.getFields();
assertThat(fields.size(), equalTo(1));
Terms terms = fields.terms("field");
@ -181,7 +181,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
.setPositions(true).setTermStatistics(false).setFieldStatistics(true).setSelectedFields();
assertThat(resp.request().termStatistics(), equalTo(false));
TermVectorResponse response = resp.execute().actionGet();
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
Fields fields = response.getFields();
assertThat(fields.size(), equalTo(1));
Terms terms = fields.terms("field");
@ -238,7 +238,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
.setPositions(true).setFieldStatistics(true).setTermStatistics(true).setSelectedFields();
assertThat(resp.request().fieldStatistics(), equalTo(true));
TermVectorResponse response = resp.execute().actionGet();
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
Fields fields = response.getFields();
assertThat(fields.size(), equalTo(1));
Terms terms = fields.terms("field");