percolator: small cleanup by using MemoryIndex#fromDocument(...) helper

Now also `position_increment_gap` is taken into account.

Closes #9386
This commit is contained in:
Martijn van Groningen 2016-04-12 09:54:06 +02:00
parent 226644ea2c
commit 6776586725
2 changed files with 45 additions and 36 deletions

View File

@ -20,10 +20,8 @@
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
@ -52,13 +50,13 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.analysis.FieldNameAnalyzer;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperForType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.percolator.PercolatorFieldMapper;
import org.elasticsearch.index.percolator.PercolatorQueryCache;
@ -371,16 +369,26 @@ public class PercolatorQueryBuilder extends AbstractQueryBuilder<PercolatorQuery
.id("_temp_id")
.type(documentType));
Analyzer defaultAnalyzer = context.getAnalysisService().defaultIndexAnalyzer();
FieldNameAnalyzer fieldNameAnalyzer = (FieldNameAnalyzer) docMapper.mappers().indexAnalyzer();
// Need to this custom impl because FieldNameAnalyzer is strict and the percolator sometimes isn't when
// 'index.percolator.map_unmapped_fields_as_string' is enabled:
Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
Analyzer analyzer = fieldNameAnalyzer.analyzers().get(fieldName);
if (analyzer != null) {
return analyzer;
} else {
return context.getAnalysisService().defaultIndexAnalyzer();
}
}
};
final IndexSearcher docSearcher;
if (doc.docs().size() > 1) {
assert docMapper.hasNestedObjects();
docSearcher = createMultiDocumentSearcher(docMapper, defaultAnalyzer, doc);
docSearcher = createMultiDocumentSearcher(analyzer, doc);
} else {
// TODO: we may want to bring to MemoryIndex thread local cache back...
// but I'm unsure about the real benefits.
MemoryIndex memoryIndex = new MemoryIndex(true);
indexDoc(docMapper, defaultAnalyzer, doc.rootDoc(), memoryIndex);
MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc.rootDoc(), analyzer, true, false);
docSearcher = memoryIndex.createSearcher();
docSearcher.setQueryCache(null);
}
@ -411,15 +419,14 @@ public class PercolatorQueryBuilder extends AbstractQueryBuilder<PercolatorQuery
return document;
}
private IndexSearcher createMultiDocumentSearcher(DocumentMapper docMapper, Analyzer defaultAnalyzer, ParsedDocument doc) {
private IndexSearcher createMultiDocumentSearcher(Analyzer analyzer, ParsedDocument doc) {
IndexReader[] memoryIndices = new IndexReader[doc.docs().size()];
List<ParseContext.Document> docs = doc.docs();
int rootDocIndex = docs.size() - 1;
assert rootDocIndex > 0;
for (int i = 0; i < docs.size(); i++) {
ParseContext.Document d = docs.get(i);
MemoryIndex memoryIndex = new MemoryIndex(true);
indexDoc(docMapper, defaultAnalyzer, d, memoryIndex);
MemoryIndex memoryIndex = MemoryIndex.fromDocument(d, analyzer, true, false);
memoryIndices[i] = memoryIndex.createSearcher().getIndexReader();
}
try {
@ -443,27 +450,4 @@ public class PercolatorQueryBuilder extends AbstractQueryBuilder<PercolatorQuery
}
}
private void indexDoc(DocumentMapper documentMapper, Analyzer defaultAnalyzer, ParseContext.Document document,
MemoryIndex memoryIndex) {
for (IndexableField field : document.getFields()) {
if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
continue;
}
Analyzer analyzer = defaultAnalyzer;
if (documentMapper != null && documentMapper.mappers().getMapper(field.name()) != null) {
analyzer = documentMapper.mappers().indexAnalyzer();
}
try {
try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
if (tokenStream != null) {
memoryIndex.addField(field.name(), tokenStream, field.boost());
}
}
} catch (IOException e) {
throw new ElasticsearchException("Failed to create token stream", e);
}
}
}
}

View File

@ -19,9 +19,12 @@
package org.elasticsearch.search.percolator;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.index.percolator.PercolatorFieldMapper;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.test.ESSingleNodeTestCase;
@ -204,4 +207,26 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
equalTo("The quick brown <em>fox</em> jumps over the lazy dog"));
}
public void testTakePositionOffsetGapIntoAccount() throws Exception {
createIndex("test", client().admin().indices().prepareCreate("test")
.addMapping("type", "field", "type=text,position_increment_gap=5")
);
client().prepareIndex("test", PercolatorFieldMapper.TYPE_NAME, "1")
.setSource(jsonBuilder().startObject().field("query",
new MatchPhraseQueryBuilder("field", "brown fox").slop(4)).endObject())
.get();
client().prepareIndex("test", PercolatorFieldMapper.TYPE_NAME, "2")
.setSource(jsonBuilder().startObject().field("query",
new MatchPhraseQueryBuilder("field", "brown fox").slop(5)).endObject())
.get();
client().admin().indices().prepareRefresh().get();
SearchResponse response = client().prepareSearch().setQuery(
QueryBuilders.percolatorQuery("type", new BytesArray("{\"field\" : [\"brown\", \"fox\"]}"))
).get();
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getId(), equalTo("2"));
}
}