ShieldIndexSearcherWrapper should create the scorer only once. elastic/elasticsearch#1725

Currently it first creates a scorer, then checks if the role bits are sparse, and falls back to the bulk scorer if they are dense. The issue is that creating scorers and bulk scorers is very expensive on some queries such as ranges, prefix and terms queries. So it should rather check whether bits are sparse first in order to decide whether to use the scorer or bulk scorer. Original commit: elastic/x-pack-elasticsearch@067d630099
2016-03-17 09:12:12 +01:00 · 2016-03-17 09:12:12 +01:00 · 71542594e6
parent 52a91d7c6f
commit 71542594e6
4 changed files with 220 additions and 46 deletions
--- a/elasticsearch/x-pack/shield/src/main/java/org/elasticsearch/shield/authz/accesscontrol/DocumentSubsetReader.java
+++ b/elasticsearch/x-pack/shield/src/main/java/org/elasticsearch/shield/authz/accesscontrol/DocumentSubsetReader.java
@ -26,7 +26,8 @@ import java.io.IOException;
 */
 public final class DocumentSubsetReader extends FilterLeafReader {

-    public static DirectoryReader wrap(DirectoryReader in, BitsetFilterCache bitsetFilterCache, Query roleQuery) throws IOException {
+    public static DocumentSubsetDirectoryReader wrap(DirectoryReader in, BitsetFilterCache bitsetFilterCache,
+            Query roleQuery) throws IOException {
        return new DocumentSubsetDirectoryReader(in, bitsetFilterCache, roleQuery);
    }

--- a/elasticsearch/x-pack/shield/src/main/java/org/elasticsearch/shield/authz/accesscontrol/ShieldIndexSearcherWrapper.java
+++ b/elasticsearch/x-pack/shield/src/main/java/org/elasticsearch/shield/authz/accesscontrol/ShieldIndexSearcherWrapper.java
@ -6,6 +6,7 @@
 package org.elasticsearch.shield.authz.accesscontrol;

 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BulkScorer;
@ -156,50 +157,7 @@ public class ShieldIndexSearcherWrapper extends IndexSearcherWrapper {
            // The reasons why we return a custom searcher:
            // 1) in the case the role query is sparse then large part of the main query can be skipped
            // 2) If the role query doesn't match with any docs in a segment, that a segment can be skipped
-            IndexSearcher indexSearcher = new IndexSearcher(directoryReader) {
-
-                @Override
-                protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
-                    for (LeafReaderContext ctx : leaves) { // search each subreader
-                        final LeafCollector leafCollector;
-                        try {
-                            leafCollector = collector.getLeafCollector(ctx);
-                        } catch (CollectionTerminatedException e) {
-                            // there is no doc of interest in this reader context
-                            // continue with the following leaf
-                            continue;
-                        }
-                        // The reader is always of type DocumentSubsetReader when we get here:
-                        DocumentSubsetReader reader = (DocumentSubsetReader) ctx.reader();
-
-                        BitSet roleQueryBits = reader.getRoleQueryBits();
-                        if (roleQueryBits == null) {
-                            // nothing matches with the role query, so skip this segment:
-                            continue;
-                        }
-
-                        Scorer scorer = weight.scorer(ctx);
-                        if (scorer != null) {
-                            try {
-                                // if the role query result set is sparse then we should use the SparseFixedBitSet for advancing:
-                                if (roleQueryBits instanceof SparseFixedBitSet) {
-                                    SparseFixedBitSet sparseFixedBitSet = (SparseFixedBitSet) roleQueryBits;
-                                    Bits realLiveDocs = reader.getWrappedLiveDocs();
-                                    intersectScorerAndRoleBits(scorer, sparseFixedBitSet, leafCollector, realLiveDocs);
-                                } else {
-                                    BulkScorer bulkScorer = weight.bulkScorer(ctx);
-                                    Bits liveDocs = reader.getLiveDocs();
-                                    bulkScorer.score(leafCollector, liveDocs);
-                                }
-                            } catch (CollectionTerminatedException e) {
-                                // collection was terminated prematurely
-                                // continue with the following leaf
-                            }
-                        }
-
-                    }
-                }
-            };
+            IndexSearcher indexSearcher = new IndexSearcherWrapper((DocumentSubsetDirectoryReader) directoryReader);
            indexSearcher.setQueryCache(indexSearcher.getQueryCache());
            indexSearcher.setQueryCachingPolicy(indexSearcher.getQueryCachingPolicy());
            indexSearcher.setSimilarity(indexSearcher.getSimilarity(true));
@ -208,6 +166,61 @@ public class ShieldIndexSearcherWrapper extends IndexSearcherWrapper {
        return searcher;
    }

+    static class IndexSearcherWrapper extends IndexSearcher {
+
+        public IndexSearcherWrapper(DocumentSubsetDirectoryReader r) {
+            super(r);
+        }
+
+        @Override
+        protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+            for (LeafReaderContext ctx : leaves) { // search each subreader
+                final LeafCollector leafCollector;
+                try {
+                    leafCollector = collector.getLeafCollector(ctx);
+                } catch (CollectionTerminatedException e) {
+                    // there is no doc of interest in this reader context
+                    // continue with the following leaf
+                    continue;
+                }
+                // The reader is always of type DocumentSubsetReader when we get here:
+                DocumentSubsetReader reader = (DocumentSubsetReader) ctx.reader();
+
+                BitSet roleQueryBits = reader.getRoleQueryBits();
+                if (roleQueryBits == null) {
+                    // nothing matches with the role query, so skip this segment:
+                    continue;
+                }
+
+                // if the role query result set is sparse then we should use the SparseFixedBitSet for advancing:
+                if (roleQueryBits instanceof SparseFixedBitSet) {
+                    Scorer scorer = weight.scorer(ctx);
+                    if (scorer != null) {
+                        SparseFixedBitSet sparseFixedBitSet = (SparseFixedBitSet) roleQueryBits;
+                        Bits realLiveDocs = reader.getWrappedLiveDocs();
+                        try {
+                            intersectScorerAndRoleBits(scorer, sparseFixedBitSet, leafCollector, realLiveDocs);
+                        } catch (CollectionTerminatedException e) {
+                            // collection was terminated prematurely
+                            // continue with the following leaf
+                        }
+                    }
+                } else {
+                    BulkScorer bulkScorer = weight.bulkScorer(ctx);
+                    if (bulkScorer != null) {
+                        Bits liveDocs = reader.getLiveDocs();
+                        try {
+                            bulkScorer.score(leafCollector, liveDocs);
+                        } catch (CollectionTerminatedException e) {
+                            // collection was terminated prematurely
+                            // continue with the following leaf
+                        }
+                    }
+                }
+            }
+        }
+    }
+
    public Set<String> getAllowedMetaFields() {
        return allowedMetaFields;
    }
--- a/elasticsearch/x-pack/shield/src/test/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java
+++ b/elasticsearch/x-pack/shield/src/test/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java
@ -549,7 +549,7 @@ public class DocumentLevelSecurityTests extends ShieldIntegTestCase {

        searchResponse = client().prepareSearch("test")
                .setQuery(hasParentQuery("parent", matchAllQuery()))
-                .addSort("_id", SortOrder.ASC)
+                .addSort("_uid", SortOrder.ASC)
                .get();
        assertHitCount(searchResponse, 3L);
        assertThat(searchResponse.getHits().getAt(0).id(), equalTo("c1"));
--- a/elasticsearch/x-pack/shield/src/test/java/org/elasticsearch/shield/authz/accesscontrol/ShieldIndexSearcherWrapperUnitTests.java
+++ b/elasticsearch/x-pack/shield/src/test/java/org/elasticsearch/shield/authz/accesscontrol/ShieldIndexSearcherWrapperUnitTests.java
@ -9,7 +9,9 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReaderContext;
@ -17,15 +19,21 @@ import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BulkScorer;
+import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.SparseFixedBitSet;
 import org.elasticsearch.common.compress.CompressedXContent;
 import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
@ -43,6 +51,7 @@ import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.index.similarity.SimilarityService;
 import org.elasticsearch.indices.IndicesModule;
 import org.elasticsearch.search.aggregations.LeafBucketCollector;
+import org.elasticsearch.shield.authz.accesscontrol.DocumentSubsetReader.DocumentSubsetDirectoryReader;
 import org.elasticsearch.shield.license.ShieldLicenseState;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
@ -51,6 +60,8 @@ import org.junit.Before;

 import java.io.IOException;
 import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.Set;

 import static java.util.Collections.emptySet;
 import static java.util.Collections.singleton;
@ -58,6 +69,7 @@ import static java.util.Collections.singletonMap;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.elasticsearch.shield.authz.accesscontrol.ShieldIndexSearcherWrapper.intersectScorerAndRoleBits;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.not;
 import static org.hamcrest.Matchers.sameInstance;
@ -370,4 +382,152 @@ public class ShieldIndexSearcherWrapperUnitTests extends ESTestCase {
        }
    }

+    public void testIndexSearcherWrapperSparseNoDeletions() throws IOException {
+        doTestIndexSearcherWrapper(true, false);
+    }
+
+    public void testIndexSearcherWrapperDenseNoDeletions() throws IOException {
+        doTestIndexSearcherWrapper(false, false);
+    }
+
+    public void testIndexSearcherWrapperSparseWithDeletions() throws IOException {
+        doTestIndexSearcherWrapper(true, true);
+    }
+
+    public void testIndexSearcherWrapperDenseWithDeletions() throws IOException {
+        doTestIndexSearcherWrapper(false, true);
+    }
+
+    static class CreateScorerOnceWeight extends Weight {
+
+        private final Weight weight;
+        private final Set<Object> seenLeaves = Collections.newSetFromMap(new IdentityHashMap<>());
+        
+        protected CreateScorerOnceWeight(Weight weight) {
+            super(weight.getQuery());
+            this.weight = weight;
+        }
+
+        @Override
+        public void extractTerms(Set<Term> terms) {
+            weight.extractTerms(terms);
+        }
+
+        @Override
+        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+            return weight.explain(context, doc);
+        }
+
+        @Override
+        public float getValueForNormalization() throws IOException {
+            return weight.getValueForNormalization();
+        }
+
+        @Override
+        public void normalize(float norm, float boost) {
+            weight.normalize(norm, boost);
+        }
+
+        @Override
+        public Scorer scorer(LeafReaderContext context) throws IOException {
+            assertTrue(seenLeaves.add(context.reader().getCoreCacheKey()));
+            return weight.scorer(context);
+        }
+
+        @Override
+        public BulkScorer bulkScorer(LeafReaderContext context)
+                throws IOException {
+            assertTrue(seenLeaves.add(context.reader().getCoreCacheKey()));
+            return weight.bulkScorer(context);
+        }
+    }
+
+    static class CreateScorerOnceQuery extends Query {
+
+        private final Query query;
+
+        CreateScorerOnceQuery(Query query) {
+            this.query = query;
+        }
+
+        @Override
+        public String toString(String field) {
+            return query.toString(field);
+        }
+
+        @Override
+        public Query rewrite(IndexReader reader) throws IOException {
+            Query queryRewritten = query.rewrite(reader);
+            if (query != queryRewritten) {
+                return new CreateScorerOnceQuery(queryRewritten);
+            }
+            return super.rewrite(reader);
+        }
+
+        @Override
+        public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+            return new CreateScorerOnceWeight(query.createWeight(searcher, needsScores));
+        }
+    }
+
+    public void doTestIndexSearcherWrapper(boolean sparse, boolean deletions) throws IOException {
+        Directory dir = newDirectory();
+        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(null));
+        Document doc = new Document();
+        StringField allowedField = new StringField("allowed", "yes", Store.NO);
+        doc.add(allowedField);
+        StringField fooField = new StringField("foo", "bar", Store.NO);
+        doc.add(fooField);
+        StringField deleteField = new StringField("delete", "no", Store.NO);
+        doc.add(deleteField);
+        w.addDocument(doc);
+        if (deletions) {
+            // add a document that matches foo:bar but will be deleted
+            deleteField.setStringValue("yes");
+            w.addDocument(doc);
+            deleteField.setStringValue("no");
+        }
+        allowedField.setStringValue("no");
+        w.addDocument(doc);
+        if (sparse) {
+            for (int i = 0; i < 1000; ++i) {
+                w.addDocument(doc);
+            }
+            w.forceMerge(1);
+        }
+        w.deleteDocuments(new Term("delete", "yes"));
+
+        DirectoryReader reader = DirectoryReader.open(w);
+        IndexSettings settings = IndexSettingsModule.newIndexSettings("index", Settings.EMPTY);
+        BitsetFilterCache.Listener listener = new BitsetFilterCache.Listener() {
+            @Override
+            public void onCache(ShardId shardId, Accountable accountable) {
+
+            }
+            @Override
+            public void onRemoval(ShardId shardId, Accountable accountable) {
+
+            }
+        };
+        BitsetFilterCache cache = new BitsetFilterCache(settings, listener);
+        Query roleQuery = new TermQuery(new Term("allowed", "yes"));
+        BitSet bitSet = cache.getBitSetProducer(roleQuery).getBitSet(reader.leaves().get(0));
+        if (sparse) {
+            assertThat(bitSet, instanceOf(SparseFixedBitSet.class));
+        } else {
+            assertThat(bitSet, instanceOf(FixedBitSet.class));
+        }
+
+        DocumentSubsetDirectoryReader filteredReader = DocumentSubsetReader.wrap(reader, cache, roleQuery);
+        IndexSearcher searcher = new ShieldIndexSearcherWrapper.IndexSearcherWrapper(filteredReader);
+
+        // Searching a non-existing term will trigger a null scorer
+        assertEquals(0, searcher.count(new TermQuery(new Term("non_existing_field", "non_existing_value"))));
+
+        assertEquals(1, searcher.count(new TermQuery(new Term("foo", "bar"))));
+
+        // make sure scorers are created only once, see #1725
+        assertEquals(1, searcher.count(new CreateScorerOnceQuery(new MatchAllDocsQuery())));
+        IOUtils.close(reader, w, dir);
+    }
 }