termvectors: fix null pointer exception if field has no term vectors
Retrieving termvectors for a document that does not have the requested field caused a null pointer exception. Same for documents if the field has no term vectors, for example, because the field only contains "?". Now, an empty response is returned. Closes #3471
This commit is contained in:
parent
ec770373ab
commit
f64065c9d2
|
@ -19,13 +19,8 @@
|
|||
|
||||
package org.elasticsearch.action.termvector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.collect.Iterators;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -36,7 +31,6 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.action.ActionResponse;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||
import org.elasticsearch.common.Base64;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
|
@ -46,7 +40,10 @@ import org.elasticsearch.common.xcontent.ToXContent;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilderString;
|
||||
|
||||
import com.google.common.collect.Iterators;
|
||||
import java.io.IOException;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
public class TermVectorResponse extends ActionResponse implements ToXContent {
|
||||
|
||||
|
@ -82,6 +79,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
private String type;
|
||||
private String id;
|
||||
private long docVersion;
|
||||
private boolean exists = false;
|
||||
|
||||
private boolean sourceCopied = false;
|
||||
|
||||
|
@ -105,20 +103,27 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
out.writeString(type);
|
||||
out.writeString(id);
|
||||
out.writeVLong(docVersion);
|
||||
final boolean docExists = documentExists();
|
||||
final boolean docExists = isExists();
|
||||
out.writeBoolean(docExists);
|
||||
if (docExists) {
|
||||
out.writeBoolean(hasTermVectors());
|
||||
if (hasTermVectors()) {
|
||||
out.writeBytesReference(headerRef);
|
||||
out.writeBytesReference(termVectors);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasTermVectors() {
|
||||
assert (headerRef == null && termVectors == null) || (headerRef != null && termVectors != null);
|
||||
return headerRef != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
index = in.readString();
|
||||
type = in.readString();
|
||||
id = in.readString();
|
||||
docVersion = in.readVLong();
|
||||
exists = in.readBoolean();
|
||||
if (in.readBoolean()) {
|
||||
headerRef = in.readBytesReference();
|
||||
termVectors = in.readBytesReference();
|
||||
|
@ -126,7 +131,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
}
|
||||
|
||||
public Fields getFields() throws IOException {
|
||||
if (documentExists()) {
|
||||
if (hasTermVectors() && isExists()) {
|
||||
if (!sourceCopied) { // make the bytes safe
|
||||
headerRef = headerRef.copyBytesArray();
|
||||
termVectors = termVectors.copyBytesArray();
|
||||
|
@ -163,8 +168,8 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
builder.field(FieldStrings._TYPE, type);
|
||||
builder.field(FieldStrings._ID, id);
|
||||
builder.field(FieldStrings._VERSION, docVersion);
|
||||
builder.field(FieldStrings.EXISTS, documentExists());
|
||||
if (!documentExists()) {
|
||||
builder.field(FieldStrings.EXISTS, isExists());
|
||||
if (!isExists()) {
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
@ -234,7 +239,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
builder.field(FieldStrings.END_OFFSET, 0, termFreq, currentEndOffset);
|
||||
}
|
||||
if (curTerms.hasPayloads()) {
|
||||
builder.array(FieldStrings.PAYLOAD, (Object[])currentPayloads);
|
||||
builder.array(FieldStrings.PAYLOAD, (Object[]) currentPayloads);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -251,7 +256,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
if (curTerms.hasPayloads()) {
|
||||
BytesRef curPaypoad = posEnum.getPayload();
|
||||
currentPayloads[j] = new BytesArray(curPaypoad.bytes, 0, curPaypoad.length);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -295,14 +300,19 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean documentExists() {
|
||||
assert (headerRef == null && termVectors == null) || (headerRef != null && termVectors != null);
|
||||
return headerRef != null;
|
||||
public boolean isExists() {
|
||||
return exists;
|
||||
}
|
||||
|
||||
public void setExists(boolean exists) {
|
||||
this.exists = exists;
|
||||
}
|
||||
|
||||
public void setFields(Fields fields, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||
TermVectorWriter tvw = new TermVectorWriter(this);
|
||||
tvw.setFields(fields, selectedFields, flags, topLevelFields);
|
||||
if (fields != null) {
|
||||
tvw.setFields(fields, selectedFields, flags, topLevelFields);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -19,10 +19,6 @@
|
|||
|
||||
package org.elasticsearch.action.termvector;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
|
@ -35,7 +31,6 @@ import org.elasticsearch.cluster.block.ClusterBlockException;
|
|||
import org.elasticsearch.cluster.block.ClusterBlockLevel;
|
||||
import org.elasticsearch.cluster.routing.ShardIterator;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.uid.Versions;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.engine.Engine;
|
||||
|
@ -108,11 +103,11 @@ public class TransportSingleShardTermVectorAction extends TransportShardSingleOp
|
|||
try {
|
||||
Fields topLevelFields = MultiFields.getFields(topLevelReader);
|
||||
Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm);
|
||||
|
||||
if (docIdAndVersion != null) {
|
||||
termVectorResponse.setFields(docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId), request.selectedFields(),
|
||||
request.getFlags(), topLevelFields);
|
||||
termVectorResponse.setDocVersion(docIdAndVersion.version);
|
||||
termVectorResponse.setExists(true);
|
||||
} else {
|
||||
|
||||
}
|
||||
|
|
|
@ -69,6 +69,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
public void streamTest() throws Exception {
|
||||
|
||||
TermVectorResponse outResponse = new TermVectorResponse("a", "b", "c");
|
||||
outResponse.setExists(true);
|
||||
writeStandardTermVector(outResponse);
|
||||
|
||||
// write
|
||||
|
@ -84,6 +85,22 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
|
||||
// see if correct
|
||||
checkIfStandardTermVector(inResponse);
|
||||
|
||||
outResponse = new TermVectorResponse("a", "b", "c");
|
||||
writeEmptyTermVector(outResponse);
|
||||
// write
|
||||
outBuffer = new ByteArrayOutputStream();
|
||||
out = new OutputStreamStreamOutput(outBuffer);
|
||||
outResponse.writeTo(out);
|
||||
|
||||
// read
|
||||
esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||
inResponse = new TermVectorResponse("a", "b", "c");
|
||||
inResponse.readFrom(esBuffer);
|
||||
assertTrue(inResponse.isExists());
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -94,6 +111,36 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
assertThat(fields.terms("desc"), Matchers.notNullValue());
|
||||
assertThat(fields.size(), equalTo(2));
|
||||
}
|
||||
|
||||
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException {
|
||||
|
||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
conf.setOpenMode(OpenMode.CREATE);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(true);
|
||||
type.setStoreTermVectorPayloads(false);
|
||||
type.setStoreTermVectorPositions(true);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
Document d = new Document();
|
||||
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
||||
|
||||
writer.updateDocument(new Term("id", "abc"), d);
|
||||
writer.commit();
|
||||
writer.close();
|
||||
DirectoryReader dr = DirectoryReader.open(dir);
|
||||
IndexSearcher s = new IndexSearcher(dr);
|
||||
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
int doc = scoreDocs[0].doc;
|
||||
Fields fields = dr.getTermVectors(doc);
|
||||
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
||||
outResponse.setFields(fields, null, flags, fields);
|
||||
outResponse.setExists(true);
|
||||
|
||||
}
|
||||
|
||||
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException {
|
||||
|
||||
|
@ -246,11 +293,54 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
ActionFuture<TermVectorResponse> termVector = client().termVector(new TermVectorRequest("test", "type1", "" + i));
|
||||
TermVectorResponse actionGet = termVector.actionGet();
|
||||
assertThat(actionGet, Matchers.notNullValue());
|
||||
assertThat(actionGet.documentExists(), Matchers.equalTo(false));
|
||||
assertThat(actionGet.isExists(), Matchers.equalTo(false));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExistingFieldWithNoTermVectorsNoNPE() throws Exception {
|
||||
|
||||
run(addMapping(prepareCreate("test"), "type1", new Object[] { "existingfield", "type", "string", "term_vector",
|
||||
"with_positions_offsets_payloads" }));
|
||||
|
||||
ensureYellow();
|
||||
// when indexing a field that simply has a question mark, the term
|
||||
// vectors will be null
|
||||
client().prepareIndex("test", "type1", "0").setSource("existingfield", "?").execute().actionGet();
|
||||
refresh();
|
||||
String[] selectedFields = { "existingfield" };
|
||||
ActionFuture<TermVectorResponse> termVector = client().termVector(
|
||||
new TermVectorRequest("test", "type1", "0").selectedFields(selectedFields));
|
||||
// lets see if the null term vectors are caught...
|
||||
termVector.actionGet();
|
||||
TermVectorResponse actionGet = termVector.actionGet();
|
||||
assertThat(actionGet.isExists(), Matchers.equalTo(true));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExistingFieldButNotInDocNPE() throws Exception {
|
||||
|
||||
run(addMapping(prepareCreate("test"), "type1", new Object[] { "existingfield", "type", "string", "term_vector",
|
||||
"with_positions_offsets_payloads" }));
|
||||
|
||||
ensureYellow();
|
||||
// when indexing a field that simply has a question mark, the term
|
||||
// vectors will be null
|
||||
client().prepareIndex("test", "type1", "0").setSource("anotherexistingfield", 1).execute().actionGet();
|
||||
refresh();
|
||||
String[] selectedFields = { "existingfield" };
|
||||
ActionFuture<TermVectorResponse> termVector = client().termVector(
|
||||
new TermVectorRequest("test", "type1", "0").selectedFields(selectedFields));
|
||||
// lets see if the null term vectors are caught...
|
||||
TermVectorResponse actionGet = termVector.actionGet();
|
||||
assertThat(actionGet.isExists(), Matchers.equalTo(true));
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
|
||||
|
@ -278,7 +368,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true)
|
||||
.setOffsets(true).setPositions(true).setSelectedFields();
|
||||
TermVectorResponse response = resp.execute().actionGet();
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
|
||||
Fields fields = response.getFields();
|
||||
assertThat(fields.size(), equalTo(1));
|
||||
Terms terms = fields.terms("field");
|
||||
|
@ -387,7 +477,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
|
||||
.setPayloads(isPayloadRequested).setOffsets(isOffsetRequested).setPositions(isPositionsRequested).setSelectedFields();
|
||||
TermVectorResponse response = resp.execute().actionGet();
|
||||
assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
|
||||
assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
|
||||
Fields fields = response.getFields();
|
||||
assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0));
|
||||
if (ft.storeTermVectors()) {
|
||||
|
|
|
@ -122,7 +122,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
|
|||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true)
|
||||
.setPositions(true).setTermStatistics(true).setFieldStatistics(false).setSelectedFields();
|
||||
TermVectorResponse response = resp.execute().actionGet();
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
|
||||
Fields fields = response.getFields();
|
||||
assertThat(fields.size(), equalTo(1));
|
||||
Terms terms = fields.terms("field");
|
||||
|
@ -181,7 +181,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
|
|||
.setPositions(true).setTermStatistics(false).setFieldStatistics(true).setSelectedFields();
|
||||
assertThat(resp.request().termStatistics(), equalTo(false));
|
||||
TermVectorResponse response = resp.execute().actionGet();
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
|
||||
Fields fields = response.getFields();
|
||||
assertThat(fields.size(), equalTo(1));
|
||||
Terms terms = fields.terms("field");
|
||||
|
@ -238,7 +238,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
|
|||
.setPositions(true).setFieldStatistics(true).setTermStatistics(true).setSelectedFields();
|
||||
assertThat(resp.request().fieldStatistics(), equalTo(true));
|
||||
TermVectorResponse response = resp.execute().actionGet();
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true));
|
||||
assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
|
||||
Fields fields = response.getFields();
|
||||
assertThat(fields.size(), equalTo(1));
|
||||
Terms terms = fields.terms("field");
|
||||
|
|
Loading…
Reference in New Issue