percolator: fix handling of nested documents

Nested documents were indexed as separate documents, but it was never checked
if the hits represent nested documents or not. Therefore, nested objects could
match not nested queries and nested queries could also match not nested documents.

Examples are in issue #6540 .

closes #6540
closes #6544
This commit is contained in:
Britta Weber 2014-06-18 09:24:53 +02:00
parent 69350dc426
commit fe89ea1ecf
3 changed files with 203 additions and 37 deletions

View File

@ -68,6 +68,7 @@ import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.percolator.stats.ShardPercolateService;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
import org.elasticsearch.index.service.IndexService;
import org.elasticsearch.index.shard.service.IndexShard;
import org.elasticsearch.indices.IndicesService;
@ -210,8 +211,9 @@ public class PercolatorService extends AbstractComponent {
// parse the source either into one MemoryIndex, if it is a single document or index multiple docs if nested
PercolatorIndex percolatorIndex;
boolean isNested = indexShard.mapperService().documentMapper(request.documentType()).hasNestedObjects();
if (parsedDocument.docs().size() > 1) {
assert indexShard.mapperService().documentMapper(request.documentType()).hasNestedObjects();
assert isNested;
percolatorIndex = multi;
} else {
percolatorIndex = single;
@ -232,7 +234,7 @@ public class PercolatorService extends AbstractComponent {
context.percolatorTypeId = action.id();
percolatorIndex.prepare(context, parsedDocument);
return action.doPercolate(request, context);
return action.doPercolate(request, context, isNested);
} finally {
context.close();
shardPercolateService.postPercolate(System.nanoTime() - startTime);
@ -418,7 +420,7 @@ public class PercolatorService extends AbstractComponent {
ReduceResult reduce(List<PercolateShardResponse> shardResults);
PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context);
PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested);
}
@ -443,13 +445,17 @@ public class PercolatorService extends AbstractComponent {
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context) {
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
Lucene.ExistsCollector collector = new Lucene.ExistsCollector();
for (Map.Entry<BytesRef, Query> entry : context.percolateQueries().entrySet()) {
collector.reset();
try {
context.docSearcher().search(entry.getValue(), collector);
if (isNested) {
context.docSearcher().search(entry.getValue(), NonNestedDocsFilter.INSTANCE, collector);
} else {
context.docSearcher().search(entry.getValue(), collector);
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
@ -477,11 +483,11 @@ public class PercolatorService extends AbstractComponent {
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context) {
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
Count countCollector = count(logger, context);
Count countCollector = count(logger, context, isNested);
queryBasedPercolating(percolatorSearcher, context, countCollector);
count = countCollector.counter();
} catch (Throwable e) {
@ -534,7 +540,7 @@ public class PercolatorService extends AbstractComponent {
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context) {
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
List<BytesRef> matches = new ArrayList<>();
List<Map<String, HighlightField>> hls = new ArrayList<>();
@ -547,7 +553,11 @@ public class PercolatorService extends AbstractComponent {
context.hitContext().cache().clear();
}
try {
context.docSearcher().search(entry.getValue(), collector);
if (isNested) {
context.docSearcher().search(entry.getValue(), NonNestedDocsFilter.INSTANCE, collector);
} else {
context.docSearcher().search(entry.getValue(), collector);
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
@ -583,10 +593,10 @@ public class PercolatorService extends AbstractComponent {
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context) {
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
Match match = match(logger, context, highlightPhase);
Match match = match(logger, context, highlightPhase, isNested);
queryBasedPercolating(percolatorSearcher, context, match);
List<BytesRef> matches = match.matches();
List<Map<String, HighlightField>> hls = match.hls();
@ -616,10 +626,10 @@ public class PercolatorService extends AbstractComponent {
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context) {
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
MatchAndScore matchAndScore = matchAndScore(logger, context, highlightPhase);
MatchAndScore matchAndScore = matchAndScore(logger, context, highlightPhase, isNested);
queryBasedPercolating(percolatorSearcher, context, matchAndScore);
List<BytesRef> matches = matchAndScore.matches();
List<Map<String, HighlightField>> hls = matchAndScore.hls();
@ -730,10 +740,10 @@ public class PercolatorService extends AbstractComponent {
}
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context) {
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
Engine.Searcher percolatorSearcher = context.indexShard().acquireSearcher("percolate");
try {
MatchAndSort matchAndSort = QueryCollector.matchAndSort(logger, context);
MatchAndSort matchAndSort = QueryCollector.matchAndSort(logger, context, isNested);
queryBasedPercolating(percolatorSearcher, context, matchAndSort);
TopDocs topDocs = matchAndSort.topDocs();
long count = topDocs.totalHits;
@ -785,7 +795,6 @@ public class PercolatorService extends AbstractComponent {
percolatorTypeFilter = context.indexService().cache().filter().cache(percolatorTypeFilter);
XFilteredQuery query = new XFilteredQuery(context.percolateQuery(), percolatorTypeFilter);
percolatorSearcher.searcher().search(query, percolateCollector);
for (Collector queryCollector : percolateCollector.facetAndAggregatorCollector) {
if (queryCollector instanceof XCollector) {
((XCollector) queryCollector).postCollection();

View File

@ -32,6 +32,7 @@ import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
import org.elasticsearch.search.aggregations.AggregationPhase;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregator;
@ -55,6 +56,7 @@ abstract class QueryCollector extends Collector {
final IndexSearcher searcher;
final ConcurrentMap<BytesRef, Query> queries;
final ESLogger logger;
boolean isNestedDoc = false;
final Lucene.ExistsCollector collector = new Lucene.ExistsCollector();
BytesRef current;
@ -63,12 +65,13 @@ abstract class QueryCollector extends Collector {
final List<Collector> facetAndAggregatorCollector;
QueryCollector(ESLogger logger, PercolateContext context) {
QueryCollector(ESLogger logger, PercolateContext context, boolean isNestedDoc) {
this.logger = logger;
this.queries = context.percolateQueries();
this.searcher = context.docSearcher();
final FieldMapper<?> idMapper = context.mapperService().smartNameFieldMapper(IdFieldMapper.NAME);
this.idFieldData = context.fieldData().getForField(idMapper);
this.isNestedDoc = isNestedDoc;
ImmutableList.Builder<Collector> facetAggCollectorBuilder = ImmutableList.builder();
if (context.facets() != null) {
@ -139,20 +142,20 @@ abstract class QueryCollector extends Collector {
}
static Match match(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase) {
return new Match(logger, context, highlightPhase);
static Match match(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase, boolean isNestedDoc) {
return new Match(logger, context, highlightPhase, isNestedDoc);
}
static Count count(ESLogger logger, PercolateContext context) {
return new Count(logger, context);
static Count count(ESLogger logger, PercolateContext context, boolean isNestedDoc) {
return new Count(logger, context, isNestedDoc);
}
static MatchAndScore matchAndScore(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase) {
return new MatchAndScore(logger, context, highlightPhase);
static MatchAndScore matchAndScore(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase, boolean isNestedDoc) {
return new MatchAndScore(logger, context, highlightPhase, isNestedDoc);
}
static MatchAndSort matchAndSort(ESLogger logger, PercolateContext context) {
return new MatchAndSort(logger, context);
static MatchAndSort matchAndSort(ESLogger logger, PercolateContext context, boolean isNestedDoc) {
return new MatchAndSort(logger, context, isNestedDoc);
}
@ -179,8 +182,8 @@ abstract class QueryCollector extends Collector {
final int size;
long counter = 0;
Match(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase) {
super(logger, context);
Match(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase, boolean isNestedDoc) {
super(logger, context, isNestedDoc);
this.limit = context.limit;
this.size = context.size();
this.context = context;
@ -202,7 +205,11 @@ abstract class QueryCollector extends Collector {
context.hitContext().cache().clear();
}
searcher.search(query, collector);
if (isNestedDoc) {
searcher.search(query, NonNestedDocsFilter.INSTANCE, collector);
} else {
searcher.search(query, collector);
}
if (collector.exists()) {
if (!limit || counter < size) {
matches.add(values.copyShared());
@ -236,8 +243,8 @@ abstract class QueryCollector extends Collector {
private final TopScoreDocCollector topDocsCollector;
MatchAndSort(ESLogger logger, PercolateContext context) {
super(logger, context);
MatchAndSort(ESLogger logger, PercolateContext context, boolean isNestedDoc) {
super(logger, context, isNestedDoc);
// TODO: Use TopFieldCollector.create(...) for ascending and decending scoring?
topDocsCollector = TopScoreDocCollector.create(context.size(), false);
}
@ -252,7 +259,11 @@ abstract class QueryCollector extends Collector {
// run the query
try {
collector.reset();
searcher.search(query, collector);
if (isNestedDoc) {
searcher.search(query, NonNestedDocsFilter.INSTANCE, collector);
} else {
searcher.search(query, collector);
}
if (collector.exists()) {
topDocsCollector.collect(doc);
postMatch(doc);
@ -294,8 +305,8 @@ abstract class QueryCollector extends Collector {
private Scorer scorer;
MatchAndScore(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase) {
super(logger, context);
MatchAndScore(ESLogger logger, PercolateContext context, HighlightPhase highlightPhase, boolean isNestedDoc) {
super(logger, context, isNestedDoc);
this.limit = context.limit;
this.size = context.size();
this.context = context;
@ -316,7 +327,11 @@ abstract class QueryCollector extends Collector {
context.parsedQuery(new ParsedQuery(query, ImmutableMap.<String, Filter>of()));
context.hitContext().cache().clear();
}
searcher.search(query, collector);
if (isNestedDoc) {
searcher.search(query, NonNestedDocsFilter.INSTANCE, collector);
} else {
searcher.search(query, collector);
}
if (collector.exists()) {
if (!limit || counter < size) {
matches.add(values.copyShared());
@ -360,8 +375,8 @@ abstract class QueryCollector extends Collector {
private long counter = 0;
Count(ESLogger logger, PercolateContext context) {
super(logger, context);
Count(ESLogger logger, PercolateContext context, boolean isNestedDoc) {
super(logger, context, isNestedDoc);
}
@Override
@ -374,7 +389,11 @@ abstract class QueryCollector extends Collector {
// run the query
try {
collector.reset();
searcher.search(query, collector);
if (isNestedDoc) {
searcher.search(query, NonNestedDocsFilter.INSTANCE, collector);
} else {
searcher.search(query, collector);
}
if (collector.exists()) {
counter++;
postMatch(doc);

View File

@ -61,6 +61,7 @@ import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilde
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders.exponentialDecayFunction;
import static org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders.scriptFunction;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.Matchers.*;
@ -1843,4 +1844,141 @@ public class PercolatorTests extends ElasticsearchIntegrationTest {
.endArray().endObject();
return doc;
}
// issue
@Test
public void testNestedDocFilter() throws IOException {
String mapping = "{\n" +
" \"doc\": {\n" +
" \"properties\": {\n" +
" \"persons\": {\n" +
" \"type\": \"nested\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }";
String doc = "{\n" +
" \"name\": \"obama\",\n" +
" \"persons\": [\n" +
" {\n" +
" \"foo\": \"bar\"\n" +
" }\n" +
" ]\n" +
" }";
String q1 = "{\n" +
" \"query\": {\n" +
" \"bool\": {\n" +
" \"must\": {\n" +
" \"match\": {\n" +
" \"name\": \"obama\"\n" +
" }\n" +
" }\n" +
" }\n" +
" },\n" +
"\"text\":\"foo\""+
"}";
String q2 = "{\n" +
" \"query\": {\n" +
" \"bool\": {\n" +
" \"must_not\": {\n" +
" \"match\": {\n" +
" \"name\": \"obama\"\n" +
" }\n" +
" }\n" +
" }\n" +
" },\n" +
"\"text\":\"foo\""+
"}";
String q3 = "{\n" +
" \"query\": {\n" +
" \"bool\": {\n" +
" \"must\": {\n" +
" \"match\": {\n" +
" \"persons.foo\": \"bar\"\n" +
" }\n" +
" }\n" +
" }\n" +
" },\n" +
"\"text\":\"foo\""+
"}";
String q4 = "{\n" +
" \"query\": {\n" +
" \"bool\": {\n" +
" \"must_not\": {\n" +
" \"match\": {\n" +
" \"persons.foo\": \"bar\"\n" +
" }\n" +
" }\n" +
" }\n" +
" },\n" +
"\"text\":\"foo\""+
"}";
String q5 = "{\n" +
" \"query\": {\n" +
" \"bool\": {\n" +
" \"must\": {\n" +
" \"nested\": {\n" +
" \"path\": \"persons\",\n" +
" \"query\": {\n" +
" \"match\": {\n" +
" \"persons.foo\": \"bar\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" },\n" +
"\"text\":\"foo\""+
"}";
String q6 = "{\n" +
" \"query\": {\n" +
" \"bool\": {\n" +
" \"must_not\": {\n" +
" \"nested\": {\n" +
" \"path\": \"persons\",\n" +
" \"query\": {\n" +
" \"match\": {\n" +
" \"persons.foo\": \"bar\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" },\n" +
"\"text\":\"foo\""+
"}";
assertAcked(client().admin().indices().prepareCreate("test").addMapping("doc", mapping));
ensureGreen("test");
client().prepareIndex("test", PercolatorService.TYPE_NAME).setSource(q1).setId("q1").get();
client().prepareIndex("test", PercolatorService.TYPE_NAME).setSource(q2).setId("q2").get();
client().prepareIndex("test", PercolatorService.TYPE_NAME).setSource(q3).setId("q3").get();
client().prepareIndex("test", PercolatorService.TYPE_NAME).setSource(q4).setId("q4").get();
client().prepareIndex("test", PercolatorService.TYPE_NAME).setSource(q5).setId("q5").get();
client().prepareIndex("test", PercolatorService.TYPE_NAME).setSource(q6).setId("q6").get();
refresh();
PercolateResponse response = client().preparePercolate()
.setIndices("test").setDocumentType("doc")
.setPercolateDoc(docBuilder().setDoc(doc))
.get();
assertMatchCount(response, 3l);
Set<String> expectedIds = new HashSet<>();
expectedIds.add("q1");
expectedIds.add("q4");
expectedIds.add("q5");
for (PercolateResponse.Match match : response.getMatches()) {
assertTrue(expectedIds.remove(match.getId().string()));
}
assertTrue(expectedIds.isEmpty());
response = client().preparePercolate().setOnlyCount(true)
.setIndices("test").setDocumentType("doc")
.setPercolateDoc(docBuilder().setDoc(doc))
.get();
assertMatchCount(response, 3l);
response = client().preparePercolate().setScore(randomBoolean()).setSortByScore(randomBoolean()).setOnlyCount(randomBoolean()).setSize(10).setPercolateQuery(QueryBuilders.termQuery("text", "foo"))
.setIndices("test").setDocumentType("doc")
.setPercolateDoc(docBuilder().setDoc(doc))
.get();
assertMatchCount(response, 3l);
}
}