mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 02:14:54 +00:00
Use index-prefix fields for terms of length min_chars - 1 (#36703)
The default index_prefix settings will index prefixes of between 2 and 5 characters in length. Currently, if a prefix search falls outside of this range at either end we fall back to a standard prefix expansion, which is still very expensive for single character prefixes. However, we have an option here to use a wildcard expansion rather than a prefix expansion, so that a query of a* gets remapped to a? against the _index_prefix field - likely to be a very small set of terms, and certain to be much smaller than a* against the whole index. This commit adds this extra level of mapping for any prefix term whose length is one less than the min_chars parameter of the index_prefixes field.
This commit is contained in:
parent
132ccbec2f
commit
dd540ef618
@ -58,6 +58,21 @@ setup:
|
||||
- match: {hits.max_score: 2}
|
||||
- match: {hits.hits.0._score: 2}
|
||||
|
||||
- do:
|
||||
search:
|
||||
rest_total_hits_as_int: true
|
||||
index: test
|
||||
body:
|
||||
query:
|
||||
query_string:
|
||||
default_field: text
|
||||
query: s*
|
||||
boost: 2
|
||||
|
||||
- match: {hits.total: 1}
|
||||
- match: {hits.max_score: 2}
|
||||
- match: {hits.hits.0._score: 2}
|
||||
|
||||
- do:
|
||||
search:
|
||||
rest_total_hits_as_int: true
|
||||
|
@ -32,6 +32,7 @@ import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
@ -40,6 +41,9 @@ import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.intervals.IntervalsSource;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
@ -360,7 +364,7 @@ public class TextFieldMapper extends FieldMapper {
|
||||
}
|
||||
|
||||
boolean accept(int length) {
|
||||
return length >= minChars && length <= maxChars;
|
||||
return length >= minChars - 1 && length <= maxChars;
|
||||
}
|
||||
|
||||
void doXContent(XContentBuilder builder) throws IOException {
|
||||
@ -370,6 +374,22 @@ public class TextFieldMapper extends FieldMapper {
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
if (value.length() >= minChars) {
|
||||
return super.termQuery(value, context);
|
||||
}
|
||||
List<Automaton> automata = new ArrayList<>();
|
||||
automata.add(Automata.makeString(value));
|
||||
for (int i = value.length(); i < minChars; i++) {
|
||||
automata.add(Automata.makeAnyChar());
|
||||
}
|
||||
Automaton automaton = Operations.concatenate(automata);
|
||||
AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton);
|
||||
query.setRewriteMethod(method);
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrefixFieldType clone() {
|
||||
return new PrefixFieldType(name(), minChars, maxChars);
|
||||
@ -402,7 +422,6 @@ public class TextFieldMapper extends FieldMapper {
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return Objects.hash(super.hashCode(), minChars, maxChars);
|
||||
}
|
||||
}
|
||||
@ -564,7 +583,7 @@ public class TextFieldMapper extends FieldMapper {
|
||||
if (prefixFieldType == null || prefixFieldType.accept(value.length()) == false) {
|
||||
return super.prefixQuery(value, method, context);
|
||||
}
|
||||
Query tq = prefixFieldType.termQuery(value, context);
|
||||
Query tq = prefixFieldType.prefixQuery(value, method, context);
|
||||
if (method == null || method == MultiTermQuery.CONSTANT_SCORE_REWRITE
|
||||
|| method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) {
|
||||
return new ConstantScoreQuery(tq);
|
||||
|
@ -31,10 +31,8 @@ import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
@ -71,10 +69,8 @@ import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.core.Is.is;
|
||||
|
||||
public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
||||
@ -817,18 +813,13 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
||||
|
||||
public void testIndexPrefixMapping() throws IOException {
|
||||
|
||||
QueryShardContext queryShardContext = indexService.newQueryShardContext(
|
||||
randomInt(20), null, () -> {
|
||||
throw new UnsupportedOperationException();
|
||||
}, null);
|
||||
|
||||
{
|
||||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes")
|
||||
.field("min_chars", 1)
|
||||
.field("min_chars", 2)
|
||||
.field("max_chars", 10)
|
||||
.endObject()
|
||||
.endObject().endObject()
|
||||
@ -837,16 +828,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=1:10"));
|
||||
|
||||
FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field");
|
||||
MappedFieldType fieldType = fieldMapper.fieldType;
|
||||
|
||||
Query q = fieldType.prefixQuery("goin", CONSTANT_SCORE_REWRITE, queryShardContext);
|
||||
|
||||
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);
|
||||
q = fieldType.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, queryShardContext);
|
||||
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);
|
||||
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:10"));
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
|
||||
.bytes(XContentFactory.jsonBuilder()
|
||||
@ -870,17 +852,8 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
||||
CompressedXContent json = new CompressedXContent(mapping);
|
||||
DocumentMapper mapper = parser.parse("type", json);
|
||||
|
||||
FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field");
|
||||
MappedFieldType fieldType = fieldMapper.fieldType;
|
||||
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:5"));
|
||||
|
||||
Query q1 = fieldType.prefixQuery("g", CONSTANT_SCORE_REWRITE, queryShardContext);
|
||||
assertThat(q1, instanceOf(PrefixQuery.class));
|
||||
Query q2 = fieldType.prefixQuery("go", CONSTANT_SCORE_REWRITE, queryShardContext);
|
||||
assertThat(q2, instanceOf(ConstantScoreQuery.class));
|
||||
Query q5 = fieldType.prefixQuery("going", CONSTANT_SCORE_REWRITE, queryShardContext);
|
||||
assertThat(q5, instanceOf(ConstantScoreQuery.class));
|
||||
Query q6 = fieldType.prefixQuery("goings", CONSTANT_SCORE_REWRITE, queryShardContext);
|
||||
assertThat(q6, instanceOf(PrefixQuery.class));
|
||||
}
|
||||
|
||||
{
|
||||
@ -898,10 +871,8 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
||||
.endObject().endObject()
|
||||
.endObject().endObject());
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
|
||||
indexService.mapperService()
|
||||
.merge("type", new CompressedXContent(illegalMapping), MergeReason.MAPPING_UPDATE);
|
||||
});
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () ->
|
||||
indexService.mapperService().merge("type", new CompressedXContent(illegalMapping), MergeReason.MAPPING_UPDATE));
|
||||
assertThat(e.getMessage(), containsString("Field [field._index_prefix] is defined twice in [type]"));
|
||||
|
||||
}
|
||||
|
@ -20,11 +20,18 @@ package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.junit.Before;
|
||||
|
||||
@ -32,6 +39,8 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
|
||||
|
||||
public class TextFieldTypeTests extends FieldTypeTestCase {
|
||||
@Override
|
||||
protected MappedFieldType createDefaultFieldType() {
|
||||
@ -143,4 +152,21 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
|
||||
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
|
||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testIndexPrefixes() {
|
||||
TextFieldMapper.TextFieldType ft = new TextFieldMapper.TextFieldType();
|
||||
ft.setName("field");
|
||||
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType("field._index_prefix", 2, 10));
|
||||
|
||||
Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, null);
|
||||
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);
|
||||
|
||||
q = ft.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, null);
|
||||
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);
|
||||
|
||||
q = ft.prefixQuery("g", CONSTANT_SCORE_REWRITE, null);
|
||||
Automaton automaton
|
||||
= Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
|
||||
assertEquals(new ConstantScoreQuery(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton)), q);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user