diff --git a/src/main/java/org/elasticsearch/common/lucene/all/AllField.java b/src/main/java/org/elasticsearch/common/lucene/all/AllField.java index daf2b67d51f..e917b07fa82 100644 --- a/src/main/java/org/elasticsearch/common/lucene/all/AllField.java +++ b/src/main/java/org/elasticsearch/common/lucene/all/AllField.java @@ -23,7 +23,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.Version; import java.io.IOException; import java.io.Reader; @@ -56,11 +58,26 @@ public class AllField extends Field { return null; } + /** Returns the {@link AllEntries} containing the original text fields for the document. */ + public AllEntries getAllEntries() { + return allEntries; + } + + static { + assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Re-use the incoming AllTokenStream once we upgrade to Lucene 4.9"; + } + @Override public TokenStream tokenStream(Analyzer analyzer) throws IOException { try { allEntries.reset(); // reset the all entries, just in case it was read already - return AllTokenStream.allTokenStream(name, allEntries, analyzer); + if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { + // AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if + // positions are indexed: + return AllTokenStream.allTokenStream(name, allEntries, analyzer); + } else { + return analyzer.tokenStream(name, allEntries); + } } catch (IOException e) { throw new ElasticsearchException("Failed to create token stream"); } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java index 5b1bd7c344e..d3b0027153d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java @@ -174,7 +174,7 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna if (!autoBoost) { return new TermQuery(term); } - if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { return new AllTermQuery(term); } return new TermQuery(term); diff --git a/src/test/java/org/elasticsearch/index/mapper/all/SimpleAllMapperTests.java b/src/test/java/org/elasticsearch/index/mapper/all/SimpleAllMapperTests.java index d262b079182..6d6e1637418 100644 --- a/src/test/java/org/elasticsearch/index/mapper/all/SimpleAllMapperTests.java +++ b/src/test/java/org/elasticsearch/index/mapper/all/SimpleAllMapperTests.java @@ -62,7 +62,9 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + // One field is boosted so we should see AllTokenStream used: + assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.instanceOf(AllTokenStream.class)); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -79,7 +81,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -96,7 +98,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -107,6 +109,50 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { } + // #6187: make sure we see AllTermQuery even when offsets are indexed in the _all field: + @Test + public void testAllMappersWithOffsetsTermQuery() throws Exception { + String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/mapping_offsets_on_all.json"); + DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping); + byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); + Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); + AllField field = (AllField) doc.getField("_all"); + // _all field indexes positions, and mapping has boosts, so we should see AllTokenStream: + assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.instanceOf(AllTokenStream.class)); + AllEntries allEntries = field.getAllEntries(); + assertThat(allEntries.fields().size(), equalTo(3)); + assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); + assertThat(allEntries.fields().contains("name.last"), equalTo(true)); + assertThat(allEntries.fields().contains("simple1"), equalTo(true)); + FieldMapper mapper = docMapper.mappers().smartNameFieldMapper("_all"); + assertThat(field.fieldType().omitNorms(), equalTo(false)); + assertThat(mapper.queryStringTermQuery(new Term("_all", "foobar")), Matchers.instanceOf(AllTermQuery.class)); + } + + // #6187: if _all doesn't index positions then we never use AllTokenStream, even if some fields have boost + @Test + public void testBoostWithOmitPositions() throws Exception { + String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json"); + DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping); + byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); + Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); + AllField field = (AllField) doc.getField("_all"); + // _all field omits positions, so we should not get AllTokenStream even though fields are boosted + assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.not(Matchers.instanceOf(AllTokenStream.class))); + } + + // #6187: if no fields were boosted, we shouldn't use AllTokenStream + @Test + public void testNoBoost() throws Exception { + String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/noboost-mapping.json"); + DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping); + byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); + Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); + AllField field = (AllField) doc.getField("_all"); + // no fields have boost, so we should not see AllTokenStream: + assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.not(Matchers.instanceOf(AllTokenStream.class))); + } + @Test public void testSimpleAllMappersWithReparse() throws Exception { @@ -119,7 +165,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -134,7 +180,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); assertThat(allEntries.fields().contains("simple1"), equalTo(true)); @@ -155,7 +201,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); assertThat(allEntries.fields().contains("simple1"), equalTo(true)); @@ -246,7 +292,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { assertThat(field.fieldType().storeTermVectorPayloads(), equalTo(tv_payloads)); assertThat(field.fieldType().storeTermVectorPositions(), equalTo(tv_positions)); assertThat(field.fieldType().storeTermVectors(), equalTo(tv_stored)); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("foobar"), equalTo(true)); assertThat(allEntries.fields().contains("foo"), equalTo(true)); @@ -298,7 +344,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { Document doc = docMapper.parse(builder.bytes()).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields(), empty()); } @@ -318,7 +364,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase { Document doc = docMapper.parse(builder.bytes()).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); + AllEntries allEntries = field.getAllEntries(); assertThat(allEntries.fields(), hasSize(1)); assertThat(allEntries.fields(), hasItem("foo.bar")); } diff --git a/src/test/java/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json b/src/test/java/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json new file mode 100644 index 00000000000..f9be13cd1e4 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json @@ -0,0 +1,57 @@ +{ + "person":{ + "_all":{ + "enabled": true , + "index_options" : "freqs" + }, + "properties":{ + "name":{ + "type":"object", + "dynamic":false, + "properties":{ + "first":{ + "type":"string", + "store":"yes", + "include_in_all":false + }, + "last":{ + "type":"string", + "index":"not_analyzed", + "boost": 2.0 + } + } + }, + "address":{ + "type":"object", + "include_in_all":false, + "properties":{ + "first":{ + "properties":{ + "location":{ + "type":"string", + "store":"yes", + "index_name":"firstLocation" + } + } + }, + "last":{ + "properties":{ + "location":{ + "type":"string", + "include_in_all":true + } + } + } + } + }, + "simple1":{ + "type":"long", + "include_in_all":true + }, + "simple2":{ + "type":"long", + "include_in_all":false + } + } + } +} diff --git a/src/test/java/org/elasticsearch/index/mapper/all/mapping_offsets_on_all.json b/src/test/java/org/elasticsearch/index/mapper/all/mapping_offsets_on_all.json new file mode 100644 index 00000000000..43aa682685f --- /dev/null +++ b/src/test/java/org/elasticsearch/index/mapper/all/mapping_offsets_on_all.json @@ -0,0 +1,57 @@ +{ + "person":{ + "_all":{ + "enabled": true , + "index_options" : "offsets" + }, + "properties":{ + "name":{ + "type":"object", + "dynamic":false, + "properties":{ + "first":{ + "type":"string", + "store":"yes", + "include_in_all":false + }, + "last":{ + "type":"string", + "index":"not_analyzed", + "boost": 2.0 + } + } + }, + "address":{ + "type":"object", + "include_in_all":false, + "properties":{ + "first":{ + "properties":{ + "location":{ + "type":"string", + "store":"yes", + "index_name":"firstLocation" + } + } + }, + "last":{ + "properties":{ + "location":{ + "type":"string", + "include_in_all":true + } + } + } + } + }, + "simple1":{ + "type":"long", + "include_in_all":true + }, + "simple2":{ + "type":"long", + "include_in_all":false + } + } + } +}