Allow string fields to store token counts

To use this one you send a string to a field of type 'token_count'.  This
makes the most sense with a multi-field.
This commit is contained in:
Nik Everett 2013-11-01 21:00:59 -04:00 committed by Adrien Grand
parent 3494ac252e
commit 7690b40ec6
8 changed files with 643 additions and 35 deletions

View File

@ -212,6 +212,49 @@ defaults to `true` or to the parent `object` type setting.
|=======================================================================
[float]
[[token_count]]
==== Token Count
added[0.90.8]
The `token_count` type maps to the JSON string type but indexes and stores
the number of tokens in the string rather than the string itself. For
example:
[source,js]
--------------------------------------------------
{
"tweet" : {
"properties" : {
"message" : {
"type" : "multi_field",
"fields" : {
"name": {
"type": "string"
},
"word_count": {
"type" : "token_count",
"store" : "yes",
"analyzer" : "standard"
}
}
}
}
}
}
--------------------------------------------------
All the configuration that can be specified for a number can be specified
for a token_count. The only extra configuration is the required
`analyzer` field which specifies which analyzer to use to break the string
into tokens. For best performance, use an analyzer with no token filters.
[NOTE]
===================================================================
Technically the `token_count` type sums position increments rather than
counting tokens. This means that even if the analyzer filters out stop
words they are included in the count.
===================================================================
[float]
[[date]]
==== Date

View File

@ -95,6 +95,7 @@ public class DocumentMapperParser extends AbstractIndexComponent {
.put(DateFieldMapper.CONTENT_TYPE, new DateFieldMapper.TypeParser())
.put(IpFieldMapper.CONTENT_TYPE, new IpFieldMapper.TypeParser())
.put(StringFieldMapper.CONTENT_TYPE, new StringFieldMapper.TypeParser())
.put(TokenCountFieldMapper.CONTENT_TYPE, new TokenCountFieldMapper.TypeParser())
.put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser())
.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser())
.put(MultiFieldMapper.CONTENT_TYPE, new MultiFieldMapper.TypeParser())

View File

@ -133,6 +133,10 @@ public final class MapperBuilders {
return new IntegerFieldMapper.Builder(name);
}
public static TokenCountFieldMapper.Builder tokenCountField(String name) {
return new TokenCountFieldMapper.Builder(name);
}
public static LongFieldMapper.Builder longField(String name) {
return new LongFieldMapper.Builder(name);
}

View File

@ -313,7 +313,10 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
}
}
}
addIntegerFields(fields, value, boost);
}
protected void addIntegerFields(List<Field> fields, int value, float boost) {
if (fieldType.indexed() || fieldType.stored()) {
CustomIntegerNumericField field = new CustomIntegerNumericField(this, value, fieldType);
field.setBoost(boost);
@ -326,6 +329,10 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
}
}
protected Integer nullValue() {
return nullValue;
}
@Override
protected String contentType() {
return CONTENT_TYPE;

View File

@ -259,57 +259,69 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
String value = nullValue;
float boost = this.boost;
if (context.externalValueSet()) {
value = (String) context.externalValue();
} else {
XContentParser parser = context.parser();
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
value = nullValue;
} else if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
XContentParser.Token token;
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else {
if ("value".equals(currentFieldName) || "_value".equals(currentFieldName)) {
value = parser.textOrNull();
} else if ("boost".equals(currentFieldName) || "_boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else {
throw new ElasticSearchIllegalArgumentException("unknown property [" + currentFieldName + "]");
}
}
}
} else {
value = parser.textOrNull();
}
}
if (value == null) {
ValueAndBoost valueAndBoost = parseCreateFieldForString(context, nullValue, boost);
if (valueAndBoost.value() == null) {
return;
}
if (ignoreAbove > 0 && value.length() > ignoreAbove) {
if (ignoreAbove > 0 && valueAndBoost.value().length() > ignoreAbove) {
return;
}
if (context.includeInAll(includeInAll, this)) {
context.allEntries().addText(names.fullName(), value, boost);
context.allEntries().addText(names.fullName(), valueAndBoost.value(), valueAndBoost.boost());
}
if (fieldType.indexed() || fieldType.stored()) {
Field field = new StringField(names.indexName(), value, fieldType);
field.setBoost(boost);
Field field = new StringField(names.indexName(), valueAndBoost.value(), fieldType);
field.setBoost(valueAndBoost.boost());
fields.add(field);
}
if (hasDocValues()) {
fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(value)));
fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(valueAndBoost.value())));
}
if (fields.isEmpty()) {
context.ignoredValue(names.indexName(), value);
context.ignoredValue(names.indexName(), valueAndBoost.value());
}
}
/**
* Parse a field as though it were a string.
* @param context parse context used during parsing
* @param nullValue value to use for null
* @param defaultBoost default boost value returned unless overwritten in the field
* @return the parsed field and the boost either parsed or defaulted
* @throws IOException if thrown while parsing
*/
public static ValueAndBoost parseCreateFieldForString(ParseContext context, String nullValue, float defaultBoost) throws IOException {
if (context.externalValueSet()) {
return new ValueAndBoost((String) context.externalValue(), defaultBoost);
}
XContentParser parser = context.parser();
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
return new ValueAndBoost(nullValue, defaultBoost);
}
if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
XContentParser.Token token;
String currentFieldName = null;
String value = nullValue;
float boost = defaultBoost;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else {
if ("value".equals(currentFieldName) || "_value".equals(currentFieldName)) {
value = parser.textOrNull();
} else if ("boost".equals(currentFieldName) || "_boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else {
throw new ElasticSearchIllegalArgumentException("unknown property [" + currentFieldName + "]");
}
}
}
return new ValueAndBoost(value, boost);
}
return new ValueAndBoost(parser.textOrNull(), defaultBoost);
}
@Override
protected String contentType() {
return CONTENT_TYPE;
@ -437,4 +449,33 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
value = null;
}
}
/**
* Parsed value and boost to be returned from {@link #parseCreateFieldForString}.
*/
public static class ValueAndBoost {
private final String value;
private final float boost;
public ValueAndBoost(String value, float boost) {
this.value = value;
this.boost = boost;
}
/**
* Value of string field.
* @return value of string field
*/
public String value() {
return value;
}
/**
* Boost either parsed from the document or defaulted.
* @return boost either parsed from the document or defaulted
*/
public float boost() {
return boost;
}
}
}

View File

@ -0,0 +1,198 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.core;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.StringFieldMapper.ValueAndBoost;
import org.elasticsearch.index.similarity.SimilarityProvider;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeIntegerValue;
import static org.elasticsearch.index.mapper.MapperBuilders.tokenCountField;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseNumberField;
/**
* A {@link FieldMapper} that takes a string and writes a count of the tokens in that string
* to the index. In most ways the mapper acts just like an {@link IntegerFieldMapper}.
*/
public class TokenCountFieldMapper extends IntegerFieldMapper {
public static final String CONTENT_TYPE = "token_count";
public static class Defaults extends IntegerFieldMapper.Defaults {
}
public static class Builder extends NumberFieldMapper.Builder<Builder, TokenCountFieldMapper> {
private Integer nullValue = Defaults.NULL_VALUE;
private NamedAnalyzer analyzer;
public Builder(String name) {
super(name, new FieldType(Defaults.FIELD_TYPE));
builder = this;
}
public Builder nullValue(int nullValue) {
this.nullValue = nullValue;
return this;
}
public Builder analyzer(NamedAnalyzer analyzer) {
this.analyzer = analyzer;
return this;
}
public NamedAnalyzer analyzer() {
return analyzer;
}
@Override
public TokenCountFieldMapper build(BuilderContext context) {
fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f);
TokenCountFieldMapper fieldMapper = new TokenCountFieldMapper(buildNames(context), precisionStep, boost, fieldType, nullValue,
ignoreMalformed(context), postingsProvider, docValuesProvider, similarity, fieldDataSettings, context.indexSettings(),
analyzer);
fieldMapper.includeInAll(includeInAll);
return fieldMapper;
}
}
public static class TypeParser implements Mapper.TypeParser {
@Override
@SuppressWarnings("unchecked")
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
TokenCountFieldMapper.Builder builder = tokenCountField(name);
parseNumberField(builder, name, node, parserContext);
for (Map.Entry<String, Object> entry : node.entrySet()) {
String propName = Strings.toUnderscoreCase(entry.getKey());
Object propNode = entry.getValue();
if (propName.equals("null_value")) {
builder.nullValue(nodeIntegerValue(propNode));
} else if (propName.equals("analyzer")) {
NamedAnalyzer analyzer = parserContext.analysisService().analyzer(propNode.toString());
if (analyzer == null) {
throw new MapperParsingException("Analyzer [" + propNode.toString() + "] not found for field [" + name + "]");
}
builder.analyzer(analyzer);
}
}
if (builder.analyzer() == null) {
throw new MapperParsingException("Analyzer must be set for field [" + name + "] but wasn't.");
}
return builder;
}
}
private NamedAnalyzer analyzer;
protected TokenCountFieldMapper(Names names, int precisionStep, float boost, FieldType fieldType, Integer nullValue,
Explicit<Boolean> ignoreMalformed, PostingsFormatProvider postingsProvider, DocValuesFormatProvider docValuesProvider,
SimilarityProvider similarity, Settings fieldDataSettings, Settings indexSettings, NamedAnalyzer analyzer) {
super(names, precisionStep, boost, fieldType, nullValue, ignoreMalformed, postingsProvider, docValuesProvider, similarity,
fieldDataSettings, indexSettings);
this.analyzer = analyzer;
}
@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
ValueAndBoost valueAndBoost = StringFieldMapper.parseCreateFieldForString(context, null /* Out null value is an int so we convert*/, boost);
if (valueAndBoost.value() == null && nullValue() == null) {
return;
}
if (fieldType.indexed() || fieldType.stored() || hasDocValues()) {
int count;
if (valueAndBoost.value() == null) {
count = nullValue();
} else {
count = countPositions(analyzer.analyzer().tokenStream(name(), valueAndBoost.value()));
}
addIntegerFields(fields, count, valueAndBoost.boost());
}
if (fields.isEmpty()) {
context.ignoredValue(names.indexName(), valueAndBoost.value());
}
}
/**
* Count position increments in a token stream. Package private for testing.
* @param tokenStream token stream to count
* @return number of position increments in a token stream
* @throws IOException if tokenStream throws it
*/
static int countPositions(TokenStream tokenStream) throws IOException {
try {
int count = 0;
PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
count += position.getPositionIncrement();
}
tokenStream.end();
count += position.getPositionIncrement();
return count;
} finally {
tokenStream.close();
}
}
/**
* Name of analyzer.
* @return name of analyzer
*/
public String analyzer() {
return analyzer.name();
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
@Override
public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException {
super.merge(mergeWith, mergeContext);
if (!this.getClass().equals(mergeWith.getClass())) {
return;
}
if (!mergeContext.mergeFlags().simulate()) {
this.analyzer = ((TokenCountFieldMapper) mergeWith).analyzer;
}
}
@Override
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
super.doXContentBody(builder, includeDefaults, params);
builder.field("analyzer", analyzer());
}
}

View File

@ -0,0 +1,222 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.core;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import com.google.common.collect.ImmutableList;
import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.TermsFacetBuilder;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.*;
public class TokenCountFieldMapperIntegrationTests extends ElasticsearchIntegrationTest {
@ParametersFactory
public static Iterable<Object[]> buildParameters() {
List<Object[]> parameters = new ArrayList<Object[]>();
for (boolean storeCountedFields : new boolean[] { true, false }) {
for (boolean loadCountedFields : new boolean[] { true, false }) {
parameters.add(new Object[] { storeCountedFields, loadCountedFields });
}
}
return parameters;
}
private final boolean storeCountedFields;
private final boolean loadCountedFields;
public TokenCountFieldMapperIntegrationTests(@Name("storeCountedFields") boolean storeCountedFields,
@Name("loadCountedFields") boolean loadCountedFields) {
this.storeCountedFields = storeCountedFields;
this.loadCountedFields = loadCountedFields;
}
/**
* It is possible to get the token count in a search response.
*/
@Test
public void searchReturnsTokenCount() throws ElasticSearchException, IOException {
init();
assertSearchReturns(searchById("single"), "single");
assertSearchReturns(searchById("bulk1"), "bulk1");
assertSearchReturns(searchById("bulk2"), "bulk2");
assertSearchReturns(searchById("multi"), "multi");
assertSearchReturns(searchById("multibulk1"), "multibulk1");
assertSearchReturns(searchById("multibulk2"), "multibulk2");
}
/**
* It is possible to search by token count.
*/
@Test
public void searchByTokenCount() throws ElasticSearchException, IOException {
init();
assertSearchReturns(searchByNumericRange(4, 4).get(), "single");
assertSearchReturns(searchByNumericRange(10, 10).get(), "multibulk2");
assertSearchReturns(searchByNumericRange(7, 10).get(), "multi", "multibulk1", "multibulk2");
assertSearchReturns(searchByNumericRange(1, 10).get(), "single", "bulk1", "bulk2", "multi", "multibulk1", "multibulk2");
assertSearchReturns(searchByNumericRange(12, 12).get());
}
/**
* It is possible to search by token count.
*/
@Test
public void facetByTokenCount() throws ElasticSearchException, IOException {
init();
String facetField = randomFrom(ImmutableList.of(
"foo.token_count", "foo.token_count_unstored", "foo.token_count_with_doc_values"));
SearchResponse result = searchByNumericRange(1, 10)
.addFacet(new TermsFacetBuilder("facet").field(facetField)).get();
assertSearchReturns(result, "single", "bulk1", "bulk2", "multi", "multibulk1", "multibulk2");
assertThat(result.getFacets().facets().size(), equalTo(1));
TermsFacet facet = (TermsFacet) result.getFacets().facets().get(0);
assertThat(facet.getEntries().size(), equalTo(9));
}
private void init() throws ElasticSearchException, IOException {
prepareCreate("test").addMapping("test", jsonBuilder().startObject()
.startObject("test")
.startObject("properties")
.startObject("foo")
.field("type", "multi_field")
.startObject("fields")
.startObject("foo")
.field("type", "string")
.field("store", storeCountedFields)
.field("analyzer", "simple")
.endObject()
.startObject("token_count")
.field("type", "token_count")
.field("analyzer", "standard")
.field("store", true)
.endObject()
.startObject("token_count_unstored")
.field("type", "token_count")
.field("analyzer", "standard")
.endObject()
.startObject("token_count_with_doc_values")
.field("type", "token_count")
.field("analyzer", "standard")
.startObject("fielddata")
.field("format", LuceneTestCase.defaultCodecSupportsSortedSet() ? "doc_values" : null)
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject()).get();
ensureGreen();
assertTrue(prepareIndex("single", "I have four terms").get().isCreated());
BulkResponse bulk = client().prepareBulk()
.add(prepareIndex("bulk1", "bulk three terms"))
.add(prepareIndex("bulk2", "this has five bulk terms")).get();
assertFalse(bulk.buildFailureMessage(), bulk.hasFailures());
assertTrue(prepareIndex("multi", "two terms", "wow now I have seven lucky terms").get().isCreated());
bulk = client().prepareBulk()
.add(prepareIndex("multibulk1", "one", "oh wow now I have eight unlucky terms"))
.add(prepareIndex("multibulk2", "six is a bunch of terms", "ten! ten terms is just crazy! too many too count!")).get();
assertFalse(bulk.buildFailureMessage(), bulk.hasFailures());
assertThat(refresh().getFailedShards(), equalTo(0));
}
private IndexRequestBuilder prepareIndex(String id, String... texts) throws IOException {
return client().prepareIndex("test", "test", id).setSource("foo", texts);
}
private SearchResponse searchById(String id) {
return prepareSearch().setQuery(QueryBuilders.termQuery("_id", id)).get();
}
private SearchRequestBuilder searchByNumericRange(int low, int high) {
return prepareSearch().setQuery(QueryBuilders.rangeQuery(randomFrom(
ImmutableList.of("foo.token_count", "foo.token_count_unstored", "foo.token_count_with_doc_values")
)).gte(low).lte(high));
}
private SearchRequestBuilder prepareSearch() {
SearchRequestBuilder request = client().prepareSearch("test").setTypes("test");
request.addField("foo.token_count");
if (loadCountedFields) {
request.addField("foo");
}
return request;
}
private void assertSearchReturns(SearchResponse result, String... ids) {
assertThat(result.getHits().getTotalHits(), equalTo((long) ids.length));
assertThat(result.getHits().hits().length, equalTo(ids.length));
List<String> foundIds = new ArrayList<String>();
for (SearchHit hit : result.getHits()) {
foundIds.add(hit.id());
}
assertThat(foundIds, containsInAnyOrder(ids));
for (SearchHit hit : result.getHits()) {
String id = hit.id();
if (id.equals("single")) {
assertSearchHit(hit, 4);
} else if (id.equals("bulk1")) {
assertSearchHit(hit, 3);
} else if (id.equals("bulk2")) {
assertSearchHit(hit, 5);
} else if (id.equals("multi")) {
assertSearchHit(hit, 2, 7);
} else if (id.equals("multibulk1")) {
assertSearchHit(hit, 1, 8);
} else if (id.equals("multibulk2")) {
assertSearchHit(hit, 6, 10);
} else {
throw new ElasticSearchException("Unexpected response!");
}
}
}
private void assertSearchHit(SearchHit hit, int... termCounts) {
assertThat(hit.field("foo.token_count"), not(nullValue()));
assertThat(hit.field("foo.token_count").values().size(), equalTo(termCounts.length));
for (int i = 0; i < termCounts.length; i++) {
assertThat((Integer) hit.field("foo.token_count").values().get(i), equalTo(termCounts[i]));
}
if (loadCountedFields && storeCountedFields) {
assertThat(hit.field("foo").values().size(), equalTo(termCounts.length));
}
}
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.core;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MapperTestUtils;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import static org.elasticsearch.index.mapper.DocumentMapper.MergeFlags.mergeFlags;
import static org.hamcrest.Matchers.equalTo;
/**
* Test for {@link TokenCountFieldMapper}.
*/
public class TokenCountFieldMapperTests extends ElasticsearchTestCase {
@Test
public void testMerge() throws IOException {
String stage1Mapping = XContentFactory.jsonBuilder().startObject()
.startObject("person")
.startObject("properties")
.startObject("tc")
.field("type", "token_count")
.field("tokenizer", "keyword")
.endObject()
.endObject()
.endObject().endObject().string();
DocumentMapper stage1 = MapperTestUtils.newParser().parse(stage1Mapping);
String stage2Mapping = XContentFactory.jsonBuilder().startObject()
.startObject("person")
.startObject("properties")
.startObject("tc")
.field("type", "token_count")
.field("tokenizer", "standard")
.endObject()
.endObject()
.endObject().endObject().string();
DocumentMapper stage2 = MapperTestUtils.newParser().parse(stage2Mapping);
DocumentMapper.MergeResult mergeResult = stage1.merge(stage2, mergeFlags().simulate(true));
assertThat(mergeResult.hasConflicts(), equalTo(false));
// Just simulated so merge hasn't happened yet
assertThat(((TokenCountFieldMapper) stage1.mappers().smartName("tc").mapper()).analyzer(), equalTo("keyword"));
mergeResult = stage1.merge(stage2, mergeFlags().simulate(false));
assertThat(mergeResult.hasConflicts(), equalTo(false));
// Just simulated so merge hasn't happened yet
assertThat(((TokenCountFieldMapper) stage1.mappers().smartName("tc").mapper()).analyzer(), equalTo("standard"));
}
@Test
public void testCountPositions() throws IOException {
// We're looking to make sure that we:
Token t1 = new Token(); // Don't count tokens without an increment
t1.setPositionIncrement(0);
Token t2 = new Token();
t2.setPositionIncrement(1); // Count normal tokens with one increment
Token t3 = new Token();
t2.setPositionIncrement(2); // Count funny tokens with more than one increment
int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
Token[] tokens = new Token[] {t1, t2, t3};
Collections.shuffle(Arrays.asList(tokens), getRandom());
TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
assertThat(TokenCountFieldMapper.countPositions(tokenStream), equalTo(7));
}
}