Term Vectors API: adds support for wildcards in selected fields

This could useful to generate all term vectors or a chosen set of them.

Closes #7061
This commit is contained in:
Alex Ksikes 2014-07-28 15:59:27 +02:00
parent 2077d4be48
commit e3b3b6c055
5 changed files with 45 additions and 6 deletions

View File

@ -20,7 +20,8 @@ curl -XGET 'http://localhost:9200/twitter/tweet/1/_termvector?fields=text,...'
--------------------------------------------------
or by adding the requested fields in the request body (see
example below).
example below). Fields can also be specified with wildcards
in similar way to the <<query-dsl-multi-match-query,multi match query>> added[1.4.0].
[float]
=== Return values

View File

@ -75,9 +75,7 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
return requests;
}
public void add(TermVectorRequest template, BytesReference data)
throws Exception {
public void add(TermVectorRequest template, BytesReference data) throws Exception {
XContentParser.Token token;
String currentFieldName = null;
if (data.length() > 0) {

View File

@ -358,7 +358,6 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
currentFieldName = parser.currentName();
} else if (currentFieldName != null) {
if (currentFieldName.equals("fields")) {
if (token == XContentParser.Token.START_ARRAY) {
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
fields.add(parser.text());

View File

@ -28,6 +28,7 @@ import org.elasticsearch.action.termvector.TermVectorResponse;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.get.GetField;
@ -71,8 +72,12 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
Fields topLevelFields = MultiFields.getFields(topLevelReader);
Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm);
if (docIdAndVersion != null) {
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
/* handle potential wildcards in fields */
if (request.selectedFields() != null) {
handleFieldWildcards(request);
}
/* generate term vectors if not available */
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
if (request.selectedFields() != null) {
termVectorsByField = generateTermVectorsIfNeeded(termVectorsByField, request, uidTerm, false);
}
@ -90,6 +95,14 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
return termVectorResponse;
}
private void handleFieldWildcards(TermVectorRequest request) {
Set<String> fieldNames = new HashSet<>();
for (String pattern : request.selectedFields()) {
fieldNames.addAll(indexShard.mapperService().simpleMatchToIndexNames(pattern));
}
request.selectedFields(fieldNames.toArray(Strings.EMPTY_ARRAY));
}
private Fields generateTermVectorsIfNeeded(Fields termVectorsByField, TermVectorRequest request, Term uidTerm, boolean realTime) throws IOException {
List<String> validFields = new ArrayList<>();
for (String field : request.selectedFields()) {
@ -187,4 +200,5 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
return fields.size();
}
}
}

View File

@ -740,4 +740,31 @@ public class GetTermVectorTests extends AbstractTermVectorTests {
assertThat(iter0.next(), nullValue());
assertThat(iter1.next(), nullValue());
}
@Test
public void testSimpleWildCards() throws ElasticsearchException, IOException {
int numFields = 25;
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties");
XContentBuilder source = XContentFactory.jsonBuilder().startObject();
for (int i = 0; i < numFields; i++) {
mapping.startObject("field" + i)
.field("type", "string")
.field("term_vector", randomBoolean() ? "yes" : "no")
.endObject();
source.field("field" + i, "some text here");
}
source.endObject();
mapping.endObject().endObject().endObject();
assertAcked(prepareCreate("test").addMapping("type1", mapping));
ensureGreen();
client().prepareIndex("test", "type1", "0").setSource(source).get();
refresh();
TermVectorResponse response = client().prepareTermVector("test", "type1", "0").setSelectedFields("field*").get();
assertThat("Doc doesn't exists but should", response.isExists(), equalTo(true));
assertThat("All term vectors should have been generated", response.getFields().size(), equalTo(numFields));
}
}