Disable graph analysis at query time for shingle and cjk filters producing tokens of different size (#23920)

This change disables graph analysis of token streams containing a shingle or a cjk filters that produce shingle or ngram of different size. The graph analysis is disabled for phrase and boolean queries. Closes #23918
2017-04-06 08:55:00 +02:00 · 2017-04-06 08:55:00 +02:00 · 38009efedd
parent 203f8433c2
commit 38009efedd
10 changed files with 473 additions and 2 deletions
--- a/core/src/main/java/org/apache/lucene/analysis/miscellaneous/DisableGraphAttribute.java
+++ b/core/src/main/java/org/apache/lucene/analysis/miscellaneous/DisableGraphAttribute.java
@ -0,0 +1,32 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.apache.lucene.analysis.miscellaneous;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 /**
 * This attribute can be used to indicate that the {@link PositionLengthAttribute}
 * should not be taken in account in this {@link TokenStream}.
 * Query parsers can extract this information to decide if this token stream should be analyzed
 * as a graph or not.
 */
 public interface DisableGraphAttribute extends Attribute {}
--- a/core/src/main/java/org/apache/lucene/analysis/miscellaneous/DisableGraphAttributeImpl.java
+++ b/core/src/main/java/org/apache/lucene/analysis/miscellaneous/DisableGraphAttributeImpl.java
@ -0,0 +1,38 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.apache.lucene.analysis.miscellaneous;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
 /** Default implementation of {@link DisableGraphAttribute}. */
 public class DisableGraphAttributeImpl extends AttributeImpl implements DisableGraphAttribute {
    public DisableGraphAttributeImpl() {}
    @Override
    public void clear() {}
    @Override
    public void reflectWith(AttributeReflector reflector) {
    }
    @Override
    public void copyTo(AttributeImpl target) {}
 }
--- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
+++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
@ -20,6 +20,7 @@
 package org.apache.lucene.queryparser.classic;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -49,6 +50,7 @@ import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.StringFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.index.query.support.QueryParsers;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import java.io.IOException;
 import java.util.ArrayList;
@ -56,7 +58,6 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import static java.util.Collections.unmodifiableMap;
 import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded;
@ -805,4 +806,30 @@ public class MapperQueryParser extends AnalyzingQueryParser {
        }
        return super.parse(query);
    }
    /**
     * Checks if graph analysis should be enabled for the field depending
     * on the provided {@link Analyzer}
     */
    protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
                                     String queryText, boolean quoted, int phraseSlop) {
        assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
        // Use the analyzer to get all the tokens, and then build an appropriate
        // query based on the analysis chain.
        try (TokenStream source = analyzer.tokenStream(field, queryText)) {
            if (source.hasAttribute(DisableGraphAttribute.class)) {
                /**
                 * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid
                 * paths explosion. See {@link ShingleTokenFilterFactory} for details.
                 */
                setEnableGraphQueries(false);
            }
            Query query = super.createFieldQuery(source, operator, field, quoted, phraseSlop);
            setEnableGraphQueries(true);
            return query;
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }
    }
 }
--- a/core/src/main/java/org/elasticsearch/index/analysis/CJKBigramFilterFactory.java
+++ b/core/src/main/java/org/elasticsearch/index/analysis/CJKBigramFilterFactory.java
@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.cjk.CJKBigramFilter;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
@ -74,7 +75,17 @@ public final class CJKBigramFilterFactory extends AbstractTokenFilterFactory {
    @Override
    public TokenStream create(TokenStream tokenStream) {
-        return new CJKBigramFilter(tokenStream, flags, outputUnigrams);
+        CJKBigramFilter filter = new CJKBigramFilter(tokenStream, flags, outputUnigrams);
        if (outputUnigrams) {
            /**
             * We disable the graph analysis on this token stream
             * because it produces bigrams AND unigrams.
             * Graph analysis on such token stream is useless and dangerous as it may create too many paths
             * since shingles of different size are not aligned in terms of positions.
             */
            filter.addAttribute(DisableGraphAttribute.class);
        }
        return filter;
    }
 }
--- a/core/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java
+++ b/core/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java
@ -20,6 +20,7 @@
 package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.apache.lucene.analysis.shingle.ShingleFilter;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
@ -86,6 +87,15 @@ public class ShingleTokenFilterFactory extends AbstractTokenFilterFactory {
            filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
            filter.setTokenSeparator(tokenSeparator);
            filter.setFillerToken(fillerToken);
            if (outputUnigrams || (minShingleSize != maxShingleSize)) {
                /**
                 * We disable the graph analysis on this token stream
                 * because it produces shingles of different size.
                 * Graph analysis on such token stream is useless and dangerous as it may create too many paths
                 * since shingles of different size are not aligned in terms of positions.
                 */
                filter.addAttribute(DisableGraphAttribute.class);
            }
            return filter;
        }
--- a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java
+++ b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java
@ -19,6 +19,7 @@
 package org.elasticsearch.index.query;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -31,6 +32,7 @@ import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SynonymQuery;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import java.io.IOException;
@ -167,6 +169,32 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
        return super.simplify(bq.build());
    }
    /**
     * Checks if graph analysis should be enabled for the field depending
     * on the provided {@link Analyzer}
     */
    protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
                                     String queryText, boolean quoted, int phraseSlop) {
        assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
        // Use the analyzer to get all the tokens, and then build an appropriate
        // query based on the analysis chain.
        try (TokenStream source = analyzer.tokenStream(field, queryText)) {
            if (source.hasAttribute(DisableGraphAttribute.class)) {
                /**
                 * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid
                 * paths explosion. See {@link ShingleTokenFilterFactory} for details.
                 */
                setEnableGraphQueries(false);
            }
            Query query = super.createFieldQuery(source, operator, field, quoted, phraseSlop);
            setEnableGraphQueries(true);
            return query;
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }
    }
    private static Query wrapWithBoost(Query query, float boost) {
        if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
            return new BoostQuery(query, boost);
--- a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java
+++ b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java
@ -20,6 +20,8 @@
 package org.elasticsearch.index.search;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.ExtendedCommonTermsQuery;
 import org.apache.lucene.search.BooleanClause;
@ -49,6 +51,7 @@ import org.elasticsearch.common.lucene.all.AllTermQuery;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.index.query.support.QueryParsers;
@ -320,6 +323,32 @@ public class MatchQuery {
            return blendTermsQuery(terms, mapper);
        }
        /**
         * Checks if graph analysis should be enabled for the field depending
         * on the provided {@link Analyzer}
         */
        protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
                                         String queryText, boolean quoted, int phraseSlop) {
            assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
            // Use the analyzer to get all the tokens, and then build an appropriate
            // query based on the analysis chain.
            try (TokenStream source = analyzer.tokenStream(field, queryText)) {
                if (source.hasAttribute(DisableGraphAttribute.class)) {
                    /**
                     * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid
                     * paths explosion. See {@link ShingleTokenFilterFactory} for details.
                     */
                    setEnableGraphQueries(false);
                }
                Query query = super.createFieldQuery(source, operator, field, quoted, phraseSlop);
                setEnableGraphQueries(true);
                return query;
            } catch (IOException e) {
                throw new RuntimeException("Error analyzing query text", e);
            }
        }
        public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
            final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
            return toMultiPhrasePrefix(query, phraseSlop, maxExpansions);
--- a/core/src/test/java/org/elasticsearch/index/analysis/CJKFilterFactoryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/CJKFilterFactoryTests.java
@ -19,7 +19,9 @@
 package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.ESTokenStreamTestCase;
@ -69,4 +71,25 @@ public class CJKFilterFactoryTests extends ESTokenStreamTestCase {
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
    public void testDisableGraph() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE);
        TokenFilterFactory allFlagsFactory = analysis.tokenFilter.get("cjk_all_flags");
        TokenFilterFactory hanOnlyFactory = analysis.tokenFilter.get("cjk_han_only");
        String source = "多くの学生が試験に落ちた。";
        Tokenizer tokenizer = new StandardTokenizer();
        tokenizer.setReader(new StringReader(source));
        try (TokenStream tokenStream = allFlagsFactory.create(tokenizer)) {
            // This config outputs different size of ngrams so graph analysis is disabled
            assertTrue(tokenStream.hasAttribute(DisableGraphAttribute.class));
        }
        tokenizer = new StandardTokenizer();
        tokenizer.setReader(new StringReader(source));
        try (TokenStream tokenStream = hanOnlyFactory.create(tokenizer)) {
            // This config uses only bigrams so graph analysis is enabled
            assertFalse(tokenStream.hasAttribute(DisableGraphAttribute.class));
        }
    }
 }
--- a/core/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.ESTokenStreamTestCase;
@ -80,4 +81,25 @@ public class ShingleTokenFilterFactoryTests extends ESTokenStreamTestCase {
        TokenStream stream = new StopFilter(tokenizer, StopFilter.makeStopSet("the"));
        assertTokenStreamContents(tokenFilter.create(stream), expected);
    }
    public void testDisableGraph() throws IOException {
        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE);
        TokenFilterFactory shingleFiller = analysis.tokenFilter.get("shingle_filler");
        TokenFilterFactory shingleInverse = analysis.tokenFilter.get("shingle_inverse");
        String source = "hello world";
        Tokenizer tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        try (TokenStream stream = shingleFiller.create(tokenizer)) {
            // This config uses different size of shingles so graph analysis is disabled
            assertTrue(stream.hasAttribute(DisableGraphAttribute.class));
        }
        tokenizer = new WhitespaceTokenizer();
        tokenizer.setReader(new StringReader(source));
        try (TokenStream stream = shingleInverse.create(tokenizer)) {
            // This config uses a single size of shingles so graph analysis is enabled
            assertFalse(stream.hasAttribute(DisableGraphAttribute.class));
        }
    }
 }
--- a/core/src/test/java/org/elasticsearch/index/query/DisableGraphQueryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/query/DisableGraphQueryTests.java
@ -0,0 +1,251 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.query;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SynonymQuery;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.DisjunctionMaxQuery;
 import org.apache.lucene.search.MultiPhraseQuery;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexService;
 import org.elasticsearch.index.search.MatchQuery;
 import org.elasticsearch.test.ESSingleNodeTestCase;
 import org.junit.After;
 import org.junit.Before;
 import java.io.IOException;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 /**
 * Makes sure that graph analysis is disabled with shingle filters of different size
 */
 public class DisableGraphQueryTests extends ESSingleNodeTestCase {
    private static IndexService indexService;
    private static QueryShardContext shardContext;
    private static Query expectedQuery;
    private static Query expectedPhraseQuery;
    private static Query expectedQueryWithUnigram;
    private static Query expectedPhraseQueryWithUnigram;
    @Before
    public void setup() {
        Settings settings = Settings.builder()
            .put("index.analysis.filter.shingle.type", "shingle")
            .put("index.analysis.filter.shingle.output_unigrams", false)
            .put("index.analysis.filter.shingle.min_size", 2)
            .put("index.analysis.filter.shingle.max_size", 2)
            .put("index.analysis.filter.shingle_unigram.type", "shingle")
            .put("index.analysis.filter.shingle_unigram.output_unigrams", true)
            .put("index.analysis.filter.shingle_unigram.min_size", 2)
            .put("index.analysis.filter.shingle_unigram.max_size", 2)
            .put("index.analysis.analyzer.text_shingle.tokenizer", "whitespace")
            .put("index.analysis.analyzer.text_shingle.filter", "lowercase, shingle")
            .put("index.analysis.analyzer.text_shingle_unigram.tokenizer", "whitespace")
            .put("index.analysis.analyzer.text_shingle_unigram.filter",
                "lowercase, shingle_unigram")
            .build();
        indexService = createIndex("test", settings, "t",
            "text_shingle", "type=text,analyzer=text_shingle",
            "text_shingle_unigram", "type=text,analyzer=text_shingle_unigram");
        shardContext = indexService.newQueryShardContext(0, null, () -> 0L);
        // parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
        // that ignores position length attribute
         expectedQueryWithUnigram= new BooleanQuery.Builder()
            .add(
                new SynonymQuery(
                    new Term("text_shingle_unigram", "foo"),
                    new Term("text_shingle_unigram", "foo bar")
                ), BooleanClause.Occur.SHOULD)
            .add(
                new SynonymQuery(
                    new Term("text_shingle_unigram", "bar"),
                    new Term("text_shingle_unigram", "bar baz")
            ), BooleanClause.Occur.SHOULD)
            .add(
                new TermQuery(
                    new Term("text_shingle_unigram", "baz")
                ), BooleanClause.Occur.SHOULD)
            .build();
        // parsed query for "text_shingle_unigram:\"foo bar baz\" with query parsers
        // that ignores position length attribute
        expectedPhraseQueryWithUnigram = new MultiPhraseQuery.Builder()
            .add(
                new Term[] {
                    new Term("text_shingle_unigram", "foo"),
                    new Term("text_shingle_unigram", "foo bar")
                }, 0)
            .add(
                new Term[] {
                    new Term("text_shingle_unigram", "bar"),
                    new Term("text_shingle_unigram", "bar baz")
                }, 1)
            .add(
                new Term[] {
                    new Term("text_shingle_unigram", "baz"),
                }, 2)
            .build();
        // parsed query for "text_shingle:(foo bar baz)
        expectedQuery = new BooleanQuery.Builder()
            .add(
                new TermQuery(new Term("text_shingle", "foo bar")),
                BooleanClause.Occur.SHOULD
            )
            .add(
                new TermQuery(new Term("text_shingle","bar baz")),
                BooleanClause.Occur.SHOULD
            )
            .add(
                new TermQuery(new Term("text_shingle","baz biz")),
                BooleanClause.Occur.SHOULD
            )
            .build();
        // parsed query for "text_shingle:"foo bar baz"
        expectedPhraseQuery = new PhraseQuery.Builder()
            .add(
                new Term("text_shingle", "foo bar")
            )
            .add(
                new Term("text_shingle","bar baz")
            )
            .add(
                new Term("text_shingle","baz biz")
            )
            .build();
    }
    @After
    public void cleanup() {
        indexService = null;
        shardContext = null;
        expectedQuery = null;
        expectedPhraseQuery = null;
    }
    public void testMatchPhraseQuery() throws IOException {
        MatchPhraseQueryBuilder builder =
            new MatchPhraseQueryBuilder("text_shingle_unigram", "foo bar baz");
        Query query = builder.doToQuery(shardContext);
        assertThat(expectedPhraseQueryWithUnigram, equalTo(query));
        builder =
            new MatchPhraseQueryBuilder("text_shingle", "foo bar baz biz");
        query = builder.doToQuery(shardContext);
        assertThat(expectedPhraseQuery, equalTo(query));
    }
    public void testMatchQuery() throws IOException {
        MatchQueryBuilder builder =
            new MatchQueryBuilder("text_shingle_unigram", "foo bar baz");
        Query query = builder.doToQuery(shardContext);
        assertThat(expectedQueryWithUnigram, equalTo(query));
        builder = new MatchQueryBuilder("text_shingle", "foo bar baz biz");
        query = builder.doToQuery(shardContext);
        assertThat(expectedQuery, equalTo(query));
    }
    public void testMultiMatchQuery() throws IOException {
        MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("foo bar baz",
            "text_shingle_unigram");
        Query query = builder.doToQuery(shardContext);
        assertThat(expectedQueryWithUnigram, equalTo(query));
        builder.type(MatchQuery.Type.PHRASE);
        query = builder.doToQuery(shardContext);
        assertThat(expectedPhraseQueryWithUnigram, equalTo(query));
        builder = new MultiMatchQueryBuilder("foo bar baz biz", "text_shingle");
        query = builder.doToQuery(shardContext);
        assertThat(expectedQuery, equalTo(query));
        builder.type(MatchQuery.Type.PHRASE);
        query = builder.doToQuery(shardContext);
        assertThat(expectedPhraseQuery, equalTo(query));
    }
    public void testSimpleQueryString() throws IOException {
        SimpleQueryStringBuilder builder = new SimpleQueryStringBuilder("foo bar baz");
        builder.field("text_shingle_unigram");
        builder.flags(SimpleQueryStringFlag.NONE);
        Query query = builder.doToQuery(shardContext);
        assertThat(expectedQueryWithUnigram, equalTo(query));
        builder = new SimpleQueryStringBuilder("\"foo bar baz\"");
        builder.field("text_shingle_unigram");
        builder.flags(SimpleQueryStringFlag.PHRASE);
        query = builder.doToQuery(shardContext);
        assertThat(expectedPhraseQueryWithUnigram, equalTo(query));
        builder = new SimpleQueryStringBuilder("foo bar baz biz");
        builder.field("text_shingle");
        builder.flags(SimpleQueryStringFlag.NONE);
        query = builder.doToQuery(shardContext);
        assertThat(expectedQuery, equalTo(query));
        builder = new SimpleQueryStringBuilder("\"foo bar baz biz\"");
        builder.field("text_shingle");
        builder.flags(SimpleQueryStringFlag.PHRASE);
        query = builder.doToQuery(shardContext);
        assertThat(expectedPhraseQuery, equalTo(query));
    }
    public void testQueryString() throws IOException {
        QueryStringQueryBuilder builder = new QueryStringQueryBuilder("foo bar baz");
        builder.field("text_shingle_unigram");
        builder.splitOnWhitespace(false);
        Query query = builder.doToQuery(shardContext);
        assertThat(expectedQueryWithUnigram, equalTo(query));
        builder = new QueryStringQueryBuilder("\"foo bar baz\"");
        builder.field("text_shingle_unigram");
        builder.splitOnWhitespace(false);
        query = builder.doToQuery(shardContext);
        assertThat(query, instanceOf(DisjunctionMaxQuery.class));
        DisjunctionMaxQuery maxQuery = (DisjunctionMaxQuery) query;
        assertThat(maxQuery.getDisjuncts().size(), equalTo(1));
        assertThat(expectedPhraseQueryWithUnigram, equalTo(maxQuery.getDisjuncts().get(0)));
        builder = new QueryStringQueryBuilder("foo bar baz biz");
        builder.field("text_shingle");
        builder.splitOnWhitespace(false);
        query = builder.doToQuery(shardContext);
        assertThat(expectedQuery, equalTo(query));
        builder = new QueryStringQueryBuilder("\"foo bar baz biz\"");
        builder.field("text_shingle");
        builder.splitOnWhitespace(false);
        query = builder.doToQuery(shardContext);
        assertThat(query, instanceOf(DisjunctionMaxQuery.class));
        maxQuery = (DisjunctionMaxQuery) query;
        assertThat(maxQuery.getDisjuncts().size(), equalTo(1));
        assertThat(expectedPhraseQuery, equalTo(maxQuery.getDisjuncts().get(0)));
    }
 }