Don't use AllTokenStream when no fields were boosted

AllTokenStream, used to index the _all field, adds some overhead, but
it's not necessary when no fields were boosted or when positions are
not indexed the _all field.

Closes #6187 Closes #6219
This commit is contained in:
mikemccand 2014-05-20 10:28:31 -04:00
parent 476e28f4ce
commit 9c45fe8f9b
5 changed files with 188 additions and 11 deletions

View File

@ -23,7 +23,9 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import java.io.IOException;
import java.io.Reader;
@ -56,11 +58,26 @@ public class AllField extends Field {
return null;
}
/** Returns the {@link AllEntries} containing the original text fields for the document. */
public AllEntries getAllEntries() {
return allEntries;
}
static {
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Re-use the incoming AllTokenStream once we upgrade to Lucene 4.9";
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
try {
allEntries.reset(); // reset the all entries, just in case it was read already
return AllTokenStream.allTokenStream(name, allEntries, analyzer);
if (allEntries.customBoost() && fieldType().indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// AllTokenStream maps boost to 4-byte payloads, so we only need to use it any field had non-default (!= 1.0f) boost and if
// positions are indexed:
return AllTokenStream.allTokenStream(name, allEntries, analyzer);
} else {
return analyzer.tokenStream(name, allEntries);
}
} catch (IOException e) {
throw new ElasticsearchException("Failed to create token stream");
}

View File

@ -174,7 +174,7 @@ public class AllFieldMapper extends AbstractFieldMapper<Void> implements Interna
if (!autoBoost) {
return new TermQuery(term);
}
if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
return new AllTermQuery(term);
}
return new TermQuery(term);

View File

@ -62,7 +62,9 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
// One field is boosted so we should see AllTokenStream used:
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.instanceOf(AllTokenStream.class));
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(3));
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
@ -79,7 +81,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(3));
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
@ -96,7 +98,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(3));
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
@ -107,6 +109,50 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
}
// #6187: make sure we see AllTermQuery even when offsets are indexed in the _all field:
@Test
public void testAllMappersWithOffsetsTermQuery() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/mapping_offsets_on_all.json");
DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping);
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
// _all field indexes positions, and mapping has boosts, so we should see AllTokenStream:
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.instanceOf(AllTokenStream.class));
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(3));
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
assertThat(allEntries.fields().contains("simple1"), equalTo(true));
FieldMapper mapper = docMapper.mappers().smartNameFieldMapper("_all");
assertThat(field.fieldType().omitNorms(), equalTo(false));
assertThat(mapper.queryStringTermQuery(new Term("_all", "foobar")), Matchers.instanceOf(AllTermQuery.class));
}
// #6187: if _all doesn't index positions then we never use AllTokenStream, even if some fields have boost
@Test
public void testBoostWithOmitPositions() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json");
DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping);
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
// _all field omits positions, so we should not get AllTokenStream even though fields are boosted
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.not(Matchers.instanceOf(AllTokenStream.class)));
}
// #6187: if no fields were boosted, we shouldn't use AllTokenStream
@Test
public void testNoBoost() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/noboost-mapping.json");
DocumentMapper docMapper = MapperTestUtils.newParser().parse(mapping);
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
// no fields have boost, so we should not see AllTokenStream:
assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer()), Matchers.not(Matchers.instanceOf(AllTokenStream.class)));
}
@Test
public void testSimpleAllMappersWithReparse() throws Exception {
@ -119,7 +165,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(3));
assertThat(allEntries.fields().contains("address.last.location"), equalTo(true));
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
@ -134,7 +180,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
Document doc = docMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(2));
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
assertThat(allEntries.fields().contains("simple1"), equalTo(true));
@ -155,7 +201,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(2));
assertThat(allEntries.fields().contains("name.last"), equalTo(true));
assertThat(allEntries.fields().contains("simple1"), equalTo(true));
@ -246,7 +292,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
assertThat(field.fieldType().storeTermVectorPayloads(), equalTo(tv_payloads));
assertThat(field.fieldType().storeTermVectorPositions(), equalTo(tv_positions));
assertThat(field.fieldType().storeTermVectors(), equalTo(tv_stored));
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields().size(), equalTo(2));
assertThat(allEntries.fields().contains("foobar"), equalTo(true));
assertThat(allEntries.fields().contains("foo"), equalTo(true));
@ -298,7 +344,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
Document doc = docMapper.parse(builder.bytes()).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields(), empty());
}
@ -318,7 +364,7 @@ public class SimpleAllMapperTests extends ElasticsearchTestCase {
Document doc = docMapper.parse(builder.bytes()).rootDoc();
AllField field = (AllField) doc.getField("_all");
AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries();
AllEntries allEntries = field.getAllEntries();
assertThat(allEntries.fields(), hasSize(1));
assertThat(allEntries.fields(), hasItem("foo.bar"));
}

View File

@ -0,0 +1,57 @@
{
"person":{
"_all":{
"enabled": true ,
"index_options" : "freqs"
},
"properties":{
"name":{
"type":"object",
"dynamic":false,
"properties":{
"first":{
"type":"string",
"store":"yes",
"include_in_all":false
},
"last":{
"type":"string",
"index":"not_analyzed",
"boost": 2.0
}
}
},
"address":{
"type":"object",
"include_in_all":false,
"properties":{
"first":{
"properties":{
"location":{
"type":"string",
"store":"yes",
"index_name":"firstLocation"
}
}
},
"last":{
"properties":{
"location":{
"type":"string",
"include_in_all":true
}
}
}
}
},
"simple1":{
"type":"long",
"include_in_all":true
},
"simple2":{
"type":"long",
"include_in_all":false
}
}
}
}

View File

@ -0,0 +1,57 @@
{
"person":{
"_all":{
"enabled": true ,
"index_options" : "offsets"
},
"properties":{
"name":{
"type":"object",
"dynamic":false,
"properties":{
"first":{
"type":"string",
"store":"yes",
"include_in_all":false
},
"last":{
"type":"string",
"index":"not_analyzed",
"boost": 2.0
}
}
},
"address":{
"type":"object",
"include_in_all":false,
"properties":{
"first":{
"properties":{
"location":{
"type":"string",
"store":"yes",
"index_name":"firstLocation"
}
}
},
"last":{
"properties":{
"location":{
"type":"string",
"include_in_all":true
}
}
}
}
},
"simple1":{
"type":"long",
"include_in_all":true
},
"simple2":{
"type":"long",
"include_in_all":false
}
}
}
}