Don't use AllTokenStream when no fields were boosted
AllTokenStream, used to index the _all field, adds some overhead, but it's not necessary when no fields were boosted or when positions are not indexed the _all field. Closes #6187 Closes #6219
This commit is contained in:
parent
476e28f4ce
commit
9c45fe8f9b
|
@ -23,7 +23,9 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -56,11 +58,26 @@ public class AllField extends Field {
|
|||
return null;
|
||||
}
|
||||
|
||||
/** Returns the {@link AllEntries} containing the original text fields for the document. */
|
||||
public AllEntries getAllEntries() {
|
||||
return allEntries;
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Re-use the incoming AllTokenStream once we upgrade to Lucene 4.9";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
try {
|
||||
allEntries.reset(); // reset the all entries, just in case it was read already
|
||||
return AllTokenStream.allTokenStream(name, allEntries, analyzer);
|
||||
if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
|
||||
// AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
|
||||
// positions are indexed:
|
||||
return AllTokenStream.allTokenStream(name, allEntries, analyzer);
|
||||
} else {
|
||||
return analyzer.tokenStream(name, allEntries);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchException("Failed to create token stream");
|
||||
}
|
||||
|
|
|
@ -174,7 +174,7 @@ public class AllFieldMapper extends AbstractFieldMapper<Void> implements Interna
|
|||
if (!autoBoost) {
|
||||
return new TermQuery(term);
|
||||
}
|
||||
if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
|
||||
return new AllTermQuery(term);
|
||||
}
|
||||
return new TermQuery(term);
|
||||
|
|
|
@ -62,7 +62,9 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
|
||||
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
// One field is boosted so we should see AllTokenStream used:
|
||||
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.instanceOf(AllTokenStream.class));
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(3));
|
||||
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
|
||||
|
@ -79,7 +81,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
|
||||
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(3));
|
||||
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
|
||||
|
@ -96,7 +98,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
|
||||
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(3));
|
||||
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
|
||||
|
@ -107,6 +109,50 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
|
||||
}
|
||||
|
||||
// #6187: make sure we see AllTermQuery even when offsets are indexed in the _all field:
|
||||
@Test
|
||||
public void testAllMappersWithOffsetsTermQuery() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/mapping_offsets_on_all.json");
|
||||
DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping);
|
||||
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
|
||||
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
// _all field indexes positions, and mapping has boosts, so we should see AllTokenStream:
|
||||
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.instanceOf(AllTokenStream.class));
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(3));
|
||||
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("simple1"), equalTo(true));
|
||||
FieldMapper mapper = docMapper.mappers().smartNameFieldMapper("_all");
|
||||
assertThat(field.fieldType().omitNorms(), equalTo(false));
|
||||
assertThat(mapper.queryStringTermQuery(new Term("_all", "foobar")), Matchers.instanceOf(AllTermQuery.class));
|
||||
}
|
||||
|
||||
// #6187: if _all doesn't index positions then we never use AllTokenStream, even if some fields have boost
|
||||
@Test
|
||||
public void testBoostWithOmitPositions() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json");
|
||||
DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping);
|
||||
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
|
||||
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
// _all field omits positions, so we should not get AllTokenStream even though fields are boosted
|
||||
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.not(Matchers.instanceOf(AllTokenStream.class)));
|
||||
}
|
||||
|
||||
// #6187: if no fields were boosted, we shouldn't use AllTokenStream
|
||||
@Test
|
||||
public void testNoBoost() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/noboost-mapping.json");
|
||||
DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping);
|
||||
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
|
||||
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
// no fields have boost, so we should not see AllTokenStream:
|
||||
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.not(Matchers.instanceOf(AllTokenStream.class)));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSimpleAllMappersWithReparse() throws Exception {
|
||||
|
@ -119,7 +165,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc();
|
||||
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(3));
|
||||
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
|
||||
|
@ -134,7 +180,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
|
||||
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(2));
|
||||
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("simple1"), equalTo(true));
|
||||
|
@ -155,7 +201,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc();
|
||||
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(2));
|
||||
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("simple1"), equalTo(true));
|
||||
|
@ -246,7 +292,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
assertThat(field.fieldType().storeTermVectorPayloads(), equalTo(tv_payloads));
|
||||
assertThat(field.fieldType().storeTermVectorPositions(), equalTo(tv_positions));
|
||||
assertThat(field.fieldType().storeTermVectors(), equalTo(tv_stored));
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields().size(), equalTo(2));
|
||||
assertThat(allEntries.fields().contains("foobar"), equalTo(true));
|
||||
assertThat(allEntries.fields().contains("foo"), equalTo(true));
|
||||
|
@ -298,7 +344,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
|
||||
Document doc = docMapper.parse(builder.bytes()).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields(), empty());
|
||||
}
|
||||
|
||||
|
@ -318,7 +364,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
|
|||
|
||||
Document doc = docMapper.parse(builder.bytes()).rootDoc();
|
||||
AllField field = (AllField) doc.getField("_all");
|
||||
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
|
||||
AllEntries allEntries = field.getAllEntries();
|
||||
assertThat(allEntries.fields(), hasSize(1));
|
||||
assertThat(allEntries.fields(), hasItem("foo.bar"));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"person":{
|
||||
"_all":{
|
||||
"enabled": true ,
|
||||
"index_options" : "freqs"
|
||||
},
|
||||
"properties":{
|
||||
"name":{
|
||||
"type":"object",
|
||||
"dynamic":false,
|
||||
"properties":{
|
||||
"first":{
|
||||
"type":"string",
|
||||
"store":"yes",
|
||||
"include_in_all":false
|
||||
},
|
||||
"last":{
|
||||
"type":"string",
|
||||
"index":"not_analyzed",
|
||||
"boost": 2.0
|
||||
}
|
||||
}
|
||||
},
|
||||
"address":{
|
||||
"type":"object",
|
||||
"include_in_all":false,
|
||||
"properties":{
|
||||
"first":{
|
||||
"properties":{
|
||||
"location":{
|
||||
"type":"string",
|
||||
"store":"yes",
|
||||
"index_name":"firstLocation"
|
||||
}
|
||||
}
|
||||
},
|
||||
"last":{
|
||||
"properties":{
|
||||
"location":{
|
||||
"type":"string",
|
||||
"include_in_all":true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"simple1":{
|
||||
"type":"long",
|
||||
"include_in_all":true
|
||||
},
|
||||
"simple2":{
|
||||
"type":"long",
|
||||
"include_in_all":false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"person":{
|
||||
"_all":{
|
||||
"enabled": true ,
|
||||
"index_options" : "offsets"
|
||||
},
|
||||
"properties":{
|
||||
"name":{
|
||||
"type":"object",
|
||||
"dynamic":false,
|
||||
"properties":{
|
||||
"first":{
|
||||
"type":"string",
|
||||
"store":"yes",
|
||||
"include_in_all":false
|
||||
},
|
||||
"last":{
|
||||
"type":"string",
|
||||
"index":"not_analyzed",
|
||||
"boost": 2.0
|
||||
}
|
||||
}
|
||||
},
|
||||
"address":{
|
||||
"type":"object",
|
||||
"include_in_all":false,
|
||||
"properties":{
|
||||
"first":{
|
||||
"properties":{
|
||||
"location":{
|
||||
"type":"string",
|
||||
"store":"yes",
|
||||
"index_name":"firstLocation"
|
||||
}
|
||||
}
|
||||
},
|
||||
"last":{
|
||||
"properties":{
|
||||
"location":{
|
||||
"type":"string",
|
||||
"include_in_all":true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"simple1":{
|
||||
"type":"long",
|
||||
"include_in_all":true
|
||||
},
|
||||
"simple2":{
|
||||
"type":"long",
|
||||
"include_in_all":false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue