From 683be6fc645fe3e917caeb883d3f29c63a6763a2 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 28 Oct 2012 10:00:01 +0100 Subject: [PATCH] lucene 4: converted QueryParser/Builders to Lucene 4 --- .../index/query/BoolFilterParser.java | 2 +- .../index/query/BoostingQueryParser.java | 2 +- .../index/query/CustomScoreQueryParser.java | 6 +- .../index/query/FuzzyQueryBuilder.java | 4 +- .../index/query/FuzzyQueryParser.java | 6 +- .../index/query/IdsFilterParser.java | 2 +- .../index/query/IdsQueryParser.java | 2 +- .../index/query/MatchAllQueryParser.java | 4 +- .../index/query/MatchQueryBuilder.java | 13 +++- .../index/query/MatchQueryParser.java | 2 + .../index/query/NestedQueryParser.java | 18 +++-- .../index/query/QueryParseContext.java | 7 +- .../index/query/QueryStringQueryParser.java | 15 ++-- .../index/query/RangeFilterParser.java | 20 ++--- .../index/query/RangeQueryParser.java | 20 ++--- .../index/query/ScriptFilterParser.java | 12 ++- .../index/query/TypeFilterParser.java | 9 ++- .../index/search/MatchQuery.java | 73 +++++++++++-------- .../search/NumericRangeFieldDataFilter.java | 41 ++++++----- .../elasticsearch/index/search/UidFilter.java | 55 ++++++-------- 20 files changed, 174 insertions(+), 139 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java b/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java index 5313001311c..1c2dc1b3ef0 100644 --- a/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.FilterClause; +import org.apache.lucene.queries.FilterClause; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.XBooleanFilter; import org.elasticsearch.common.xcontent.XContentParser; diff --git a/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java b/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java index e42f93d4a19..a5a40f50770 100644 --- a/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.query; -import org.apache.lucene.search.BoostingQuery; +import org.apache.lucene.queries.BoostingQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; diff --git a/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java b/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java index f792efd6f4a..81d0694579f 100644 --- a/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Query; @@ -122,8 +123,9 @@ public class CustomScoreQueryParser implements QueryParser { } @Override - public void setNextReader(IndexReader reader) { - script.setNextReader(reader); + public void setNextReader(AtomicReaderContext ctx) { + //LUCENE 4 UPGRADE should this pass on a ARC or just and atomic reader? + script.setNextReader(ctx); } @Override diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java index f5360f30292..6c94e9ea82f 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java @@ -43,7 +43,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer private Integer maxExpansions; //LUCENE 4 UPGRADE we need a testcase for this + documentation - private Boolean transpositions = true; + private Boolean transpositions; /** * Constructs a new term query. @@ -101,7 +101,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer if (boost != -1) { builder.field("boost", boost); } - if (!transpositions) { + if (transpositions != null) { builder.field("transpositions", transpositions); } if (minSimilarity != null) { diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java index 4da392305f0..07749b928d2 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java @@ -64,7 +64,7 @@ public class FuzzyQueryParser implements QueryParser { String minSimilarity = "0.5"; int prefixLength = FuzzyQuery.defaultPrefixLength; int maxExpansions = FuzzyQuery.defaultMaxExpansions; - boolean transpositions = true; + boolean transpositions = false; MultiTermQuery.RewriteMethod rewriteMethod = null; token = parser.nextToken(); if (token == XContentParser.Token.START_OBJECT) { @@ -113,8 +113,8 @@ public class FuzzyQueryParser implements QueryParser { } } if (query == null) { - //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity), + //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float + int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity), value.codePointCount(0, value.length())); query = new FuzzyQuery(new Term(fieldName, value), edits, prefixLength, maxExpansions, transpositions); } diff --git a/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java b/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java index 59e341e3a3e..a3231f1df5a 100644 --- a/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java @@ -98,7 +98,7 @@ public class IdsFilterParser implements FilterParser { types = parseContext.mapperService().types(); } - UidFilter filter = new UidFilter(types, ids, parseContext.indexCache().bloomCache()); + UidFilter filter = new UidFilter(types, ids); if (filterName != null) { parseContext.addNamedFilter(filterName, filter); } diff --git a/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java b/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java index 3c4bd5264c4..3230c79e316 100644 --- a/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java @@ -102,7 +102,7 @@ public class IdsQueryParser implements QueryParser { types = parseContext.mapperService().types(); } - UidFilter filter = new UidFilter(types, ids, parseContext.indexCache().bloomCache()); + UidFilter filter = new UidFilter(types, ids); // no need for constant score filter, since we don't cache the filter, and it always takes deletes into account ConstantScoreQuery query = new ConstantScoreQuery(filter); query.setBoost(boost); diff --git a/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java b/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java index 21621d30c5c..6ce238c51c4 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java @@ -71,7 +71,9 @@ public class MatchAllQueryParser implements QueryParser { return Queries.MATCH_ALL_QUERY; } - MatchAllDocsQuery query = new MatchAllDocsQuery(normsField); + //LUCENE 4 UPGRADE norms field is not supported anymore need to find another way or drop the functionality + //MatchAllDocsQuery query = new MatchAllDocsQuery(normsField); + MatchAllDocsQuery query = new MatchAllDocsQuery(); query.setBoost(boost); return query; } diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index 44d52dfc935..c20944790cc 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -77,6 +77,8 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer private String fuzzyRewrite = null; private Boolean lenient; + + private Boolean fuzzyTranspositions = null; /** * Constructs a new text query. @@ -163,6 +165,12 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer this.fuzzyRewrite = fuzzyRewrite; return this; } + + public MatchQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { + //LUCENE 4 UPGRADE add documentation + this.fuzzyTranspositions = fuzzyTranspositions; + return this; + } /** * Sets whether format based failures will be ignored. @@ -211,7 +219,10 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer if (fuzzyRewrite != null) { builder.field("fuzzy_rewrite", fuzzyRewrite); } - + if (fuzzyTranspositions != null) { + //LUCENE 4 UPGRADE we need to document this & test this + builder.field("fuzzy_transpositions", fuzzyTranspositions); + } if (lenient != null) { builder.field("lenient", lenient); } diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java index 0df86025a94..2c4251fb060 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java @@ -122,6 +122,8 @@ public class MatchQueryParser implements QueryParser { matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null)); } else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) { matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null)); + } else if ("fuzzy_transpositions".equals(fieldName)) { + matchQuery.setTranspositions(parser.booleanValue()); } else if ("lenient".equals(currentFieldName)) { matchQuery.setLenient(parser.booleanValue()); } else { diff --git a/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java b/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java index 98bd960bc46..8fc3e3f56e7 100644 --- a/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java @@ -19,8 +19,15 @@ package org.elasticsearch.index.query; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.*; +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.DeletionAwareConstantScoreQuery; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; @@ -30,8 +37,6 @@ import org.elasticsearch.index.search.nested.BlockJoinQuery; import org.elasticsearch.index.search.nested.NonNestedDocsFilter; import org.elasticsearch.search.internal.SearchContext; -import java.io.IOException; - public class NestedQueryParser implements QueryParser { public static final String NAME = "nested"; @@ -184,8 +189,9 @@ public class NestedQueryParser implements QueryParser { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits liveDocs) throws IOException { + //LUCENE 4 UPGRADE just passing on ctx and live docs here + return filter.getDocIdSet(ctx, liveDocs); } } } diff --git a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java index 18cc8eb584f..4f597a90695 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java +++ b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java @@ -21,11 +21,12 @@ package org.elasticsearch.index.query; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -import org.apache.lucene.queryParser.MapperQueryParser; -import org.apache.lucene.queryParser.QueryParserSettings; + +import org.apache.lucene.queryparser.classic.MapperQueryParser; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.Index; diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java index 9600090c2a4..04ea25d4d63 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java @@ -22,9 +22,10 @@ package org.elasticsearch.index.query; import com.google.common.collect.Lists; import gnu.trove.impl.Constants; import gnu.trove.map.hash.TObjectFloatHashMap; -import org.apache.lucene.queryParser.MapperQueryParser; -import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.queryParser.QueryParserSettings; + +import org.apache.lucene.queryparser.classic.MapperQueryParser; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.Strings; @@ -130,9 +131,9 @@ public class QueryStringQueryParser implements QueryParser { } else if ("default_operator".equals(currentFieldName) || "defaultOperator".equals(currentFieldName)) { String op = parser.text(); if ("or".equalsIgnoreCase(op)) { - qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.OR); + qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.OR); } else if ("and".equalsIgnoreCase(op)) { - qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND); + qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.AND); } else { throw new QueryParsingException(parseContext.index(), "Query default operator [" + op + "] is not allowed"); } @@ -196,7 +197,7 @@ public class QueryStringQueryParser implements QueryParser { qpSettings.defaultQuoteAnalyzer(parseContext.mapperService().searchQuoteAnalyzer()); if (qpSettings.escape()) { - qpSettings.queryString(org.apache.lucene.queryParser.QueryParser.escape(qpSettings.queryString())); + qpSettings.queryString(org.apache.lucene.queryparser.classic.QueryParser.escape(qpSettings.queryString())); } qpSettings.queryTypes(parseContext.queryTypes()); @@ -220,7 +221,7 @@ public class QueryStringQueryParser implements QueryParser { } parseContext.indexCache().queryParserCache().put(qpSettings, query); return query; - } catch (ParseException e) { + } catch (org.apache.lucene.queryparser.classic.ParseException e) { throw new QueryParsingException(parseContext.index(), "Failed to parse query [" + qpSettings.queryString() + "]", e); } } diff --git a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java index 820f8a9c66f..b24f72fd13f 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.Filter; import org.apache.lucene.search.TermRangeFilter; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.cache.filter.support.CacheKeyFilter; @@ -53,8 +54,8 @@ public class RangeFilterParser implements FilterParser { boolean cache = true; CacheKeyFilter.Key cacheKey = null; String fieldName = null; - String from = null; - String to = null; + BytesRef from = null; + BytesRef to = null; boolean includeLower = true; boolean includeUpper = true; @@ -71,24 +72,24 @@ public class RangeFilterParser implements FilterParser { currentFieldName = parser.currentName(); } else { if ("from".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); } else if ("to".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); } else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) { includeLower = parser.booleanValue(); } else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) { includeUpper = parser.booleanValue(); } else if ("gt".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = false; } else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = true; } else if ("lt".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = false; } else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = true; } else { throw new QueryParsingException(parseContext.index(), "[range] filter does not support [" + currentFieldName + "]"); @@ -116,7 +117,8 @@ public class RangeFilterParser implements FilterParser { MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { - filter = smartNameFieldMappers.mapper().rangeFilter(from, to, includeLower, includeUpper, parseContext); + //LUCENE 4 UPGRADE range filter should use bytesref too? + filter = smartNameFieldMappers.mapper().rangeFilter(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext); } } if (filter == null) { diff --git a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java index 69c02f42ab4..b79b3a07ee3 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; @@ -59,8 +60,8 @@ public class RangeQueryParser implements QueryParser { throw new QueryParsingException(parseContext.index(), "[range] query malformed, after field missing start object"); } - String from = null; - String to = null; + BytesRef from = null; + BytesRef to = null; boolean includeLower = true; boolean includeUpper = true; float boost = 1.0f; @@ -71,9 +72,9 @@ public class RangeQueryParser implements QueryParser { currentFieldName = parser.currentName(); } else { if ("from".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); } else if ("to".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); } else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) { includeLower = parser.booleanValue(); } else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) { @@ -81,16 +82,16 @@ public class RangeQueryParser implements QueryParser { } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); } else if ("gt".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = false; } else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = true; } else if ("lt".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = false; } else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = true; } else { throw new QueryParsingException(parseContext.index(), "[range] query does not support [" + currentFieldName + "]"); @@ -108,7 +109,8 @@ public class RangeQueryParser implements QueryParser { MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { - query = smartNameFieldMappers.mapper().rangeQuery(from, to, includeLower, includeUpper, parseContext); + //LUCENE 4 UPGRADE Mapper#rangeQuery should use bytesref as well? + query = smartNameFieldMappers.mapper().rangeQuery(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext); } } if (query == null) { diff --git a/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java b/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java index d77557d487d..5e42153e019 100644 --- a/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java @@ -20,9 +20,14 @@ package org.elasticsearch.index.query; import com.google.common.collect.Maps; + +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredDocIdSet; +import org.apache.lucene.util.Bits; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.inject.Inject; @@ -160,9 +165,10 @@ public class ScriptFilterParser implements FilterParser { } @Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - searchScript.setNextReader(reader); - return new ScriptDocSet(reader, searchScript); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + searchScript.setNextReader(context.reader()); + // LUCENE 4 UPGRADE: we can simply wrap this here since it is not cacheable and if we are not top level we will get a null passed anyway + return BitsFilteredDocIdSet.wrap(new ScriptDocSet(context.reader(), searchScript), acceptDocs); } static class ScriptDocSet extends GetDocSet { diff --git a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java index b4f0a0a4bc4..f1b5c227130 100644 --- a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java @@ -19,7 +19,9 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.TermFilter; import org.elasticsearch.common.xcontent.XContentParser; @@ -57,14 +59,15 @@ public class TypeFilterParser implements FilterParser { if (token != XContentParser.Token.VALUE_STRING) { throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name"); } - String type = parser.text(); + BytesRef type = parser.bytes(null); // move to the next token parser.nextToken(); Filter filter; - DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type); + //LUCENE 4 UPGRADE document mapper should use bytesref aswell? + DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type.utf8ToString()); if (documentMapper == null) { - filter = new TermFilter(TypeFieldMapper.TERM_FACTORY.createTerm(type)); + filter = new TermFilter(new Term(TypeFieldMapper.TERM_FACTORY.field(), type)); } else { filter = documentMapper.typeFilter(); } diff --git a/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 55eed4b1787..4134f5fb506 100644 --- a/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -24,10 +24,14 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalStateException; +import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; @@ -64,6 +68,9 @@ public class MatchQuery { protected String fuzziness = null; protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; protected int maxExpansions = FuzzyQuery.defaultMaxExpansions; + //LUCENE 4 UPGRADE we need a default value for this! + protected boolean transpositions = false; + protected MultiTermQuery.RewriteMethod rewriteMethod; protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod; @@ -101,6 +108,10 @@ public class MatchQuery { public void setMaxExpansions(int maxExpansions) { this.maxExpansions = maxExpansions; } + + public void setTranspositions(boolean transpositions) { + this.transpositions = transpositions; + } public void setRewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) { this.rewriteMethod = rewriteMethod; @@ -116,13 +127,13 @@ public class MatchQuery { public Query parse(Type type, String fieldName, String text) { FieldMapper mapper = null; - Term fieldTerm; + final String field; MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) { mapper = smartNameFieldMappers.mapper(); - fieldTerm = mapper.names().indexNameTerm(); + field = mapper.names().indexName(); } else { - fieldTerm = new Term(fieldName); + field = fieldName; } if (mapper != null && mapper.useFieldQueryWithQueryString()) { @@ -169,13 +180,13 @@ public class MatchQuery { } // Logic similar to QueryParser#getFieldQuery - - TokenStream source; + final TokenStream source; try { - source = analyzer.reusableTokenStream(fieldTerm.field(), new FastStringReader(text)); + source = analyzer.tokenStream(field, new FastStringReader(text)); source.reset(); - } catch (IOException e) { - source = analyzer.tokenStream(fieldTerm.field(), new FastStringReader(text)); + } catch(IOException ex) { + //LUCENE 4 UPGRADE not sure what todo here really lucene 3.6 had a tokenStream that didn't throw an exc. + throw new ElasticSearchParseException("failed to process query", ex); } CachingTokenFilter buffer = new CachingTokenFilter(source); CharTermAttribute termAtt = null; @@ -183,12 +194,7 @@ public class MatchQuery { int numTokens = 0; boolean success = false; - try { - buffer.reset(); - success = true; - } catch (IOException e) { - // success==false if we hit an exception - } + buffer.reset(); if (success) { if (buffer.hasAttribute(CharTermAttribute.class)) { termAtt = buffer.getAttribute(CharTermAttribute.class); @@ -233,29 +239,26 @@ public class MatchQuery { return MatchNoDocsQuery.INSTANCE; } else if (type == Type.BOOLEAN) { if (numTokens == 1) { - String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } - Query q = newTermQuery(mapper, fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + Query q = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef()))); return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } BooleanQuery q = new BooleanQuery(positionCount == 1); for (int i = 0; i < numTokens; i++) { - String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } - - Query currentQuery = newTermQuery(mapper, fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + Query currentQuery = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef()))); q.add(currentQuery, occur); } return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); @@ -266,12 +269,10 @@ public class MatchQuery { List multiTerms = new ArrayList(); int position = -1; for (int i = 0; i < numTokens; i++) { - String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -288,7 +289,8 @@ public class MatchQuery { multiTerms.clear(); } position += positionIncrement; - multiTerms.add(fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef()))); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); @@ -303,13 +305,11 @@ public class MatchQuery { for (int i = 0; i < numTokens; i++) { - String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -319,9 +319,10 @@ public class MatchQuery { if (enablePositionIncrements) { position += positionIncrement; - pq.add(fieldTerm.createTerm(term), position); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + pq.add(new Term(field, termToByteRef(termAtt, new BytesRef())), position); } else { - pq.add(fieldTerm.createTerm(term)); + pq.add(new Term(field, termToByteRef(termAtt, new BytesRef()))); } } return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext); @@ -333,12 +334,10 @@ public class MatchQuery { List multiTerms = new ArrayList(); int position = -1; for (int i = 0; i < numTokens; i++) { - String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -355,7 +354,8 @@ public class MatchQuery { multiTerms.clear(); } position += positionIncrement; - multiTerms.add(fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef()))); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); @@ -376,7 +376,11 @@ public class MatchQuery { QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); } } - FuzzyQuery query = new FuzzyQuery(term, Float.parseFloat(fuzziness), fuzzyPrefixLength, maxExpansions); + String text = term.text(); + //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float + int edits = FuzzyQuery.floatToEdits(Float.parseFloat(fuzziness), + text.codePointCount(0, text.length())); + FuzzyQuery query = new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions); QueryParsers.setRewriteMethod(query, rewriteMethod); return query; } @@ -388,4 +392,9 @@ public class MatchQuery { } return new TermQuery(term); } + + private static BytesRef termToByteRef(CharTermAttribute attr, BytesRef ref) { + UnicodeUtil.UTF16toUTF8WithHash(attr.buffer(), 0, attr.length(), ref); + return ref; + } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java b/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java index 7a4adbb9cb9..60761fc9476 100644 --- a/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java +++ b/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.search; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.GetDocSet; @@ -43,7 +44,7 @@ import java.io.IOException; * */ public abstract class NumericRangeFieldDataFilter extends Filter { - + // LUCENE 4 UPGRADE: this filter doesn't respect acceptDocs yet! final FieldDataCache fieldDataCache; final String field; final T lowerVal; @@ -121,7 +122,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newByteRange(FieldDataCache fieldDataCache, String field, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final byte inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { byte i = lowerVal.byteValue(); @@ -143,8 +144,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final ByteFieldData fieldData = (ByteFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.BYTE, reader, field); - return new GetDocSet(reader.maxDoc()) { + final ByteFieldData fieldData = (ByteFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.BYTE, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -181,7 +182,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newShortRange(FieldDataCache fieldDataCache, String field, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final short inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { short i = lowerVal.shortValue(); @@ -203,8 +204,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final ShortFieldData fieldData = (ShortFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.SHORT, reader, field); - return new GetDocSet(reader.maxDoc()) { + final ShortFieldData fieldData = (ShortFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.SHORT, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -240,7 +241,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newIntRange(FieldDataCache fieldDataCache, String field, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final int inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { int i = lowerVal.intValue(); @@ -262,8 +263,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final IntFieldData fieldData = (IntFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.INT, reader, field); - return new GetDocSet(reader.maxDoc()) { + final IntFieldData fieldData = (IntFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.INT, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -299,7 +300,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newLongRange(FieldDataCache fieldDataCache, String field, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final long inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { long i = lowerVal.longValue(); @@ -321,8 +322,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final LongFieldData fieldData = (LongFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.LONG, reader, field); - return new GetDocSet(reader.maxDoc()) { + final LongFieldData fieldData = (LongFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.LONG, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -358,7 +359,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newFloatRange(FieldDataCache fieldDataCache, String field, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final float inclusiveLowerPoint, inclusiveUpperPoint; @@ -384,8 +385,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final FloatFieldData fieldData = (FloatFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.FLOAT, reader, field); - return new GetDocSet(reader.maxDoc()) { + final FloatFieldData fieldData = (FloatFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.FLOAT, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -421,7 +422,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newDoubleRange(FieldDataCache fieldDataCache, String field, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final double inclusiveLowerPoint, inclusiveUpperPoint; @@ -447,8 +448,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final DoubleFieldData fieldData = (DoubleFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.DOUBLE, reader, field); - return new GetDocSet(reader.maxDoc()) { + final DoubleFieldData fieldData = (DoubleFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.DOUBLE, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { diff --git a/src/main/java/org/elasticsearch/index/search/UidFilter.java b/src/main/java/org/elasticsearch/index/search/UidFilter.java index 99d320114bb..59665b6fd95 100644 --- a/src/main/java/org/elasticsearch/index/search/UidFilter.java +++ b/src/main/java/org/elasticsearch/index/search/UidFilter.java @@ -19,16 +19,15 @@ package org.elasticsearch.index.search; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.UnicodeUtil; -import org.elasticsearch.common.Unicode; -import org.elasticsearch.common.bloom.BloomFilter; -import org.elasticsearch.index.cache.bloom.BloomCache; import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.UidFieldMapper; @@ -40,17 +39,12 @@ import java.util.List; public class UidFilter extends Filter { final Term[] uids; - - private final BloomCache bloomCache; - - // LUCENE 4 UPGRADE: We removed the bloom cache, so once we rewrite this filter, do it without - public UidFilter(Collection types, List ids, BloomCache bloomCache) { - this.bloomCache = bloomCache; + public UidFilter(Collection types, List ids) { this.uids = new Term[types.size() * ids.size()]; int i = 0; for (String type : types) { for (String id : ids) { - uids[i++] = UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(type, id)); + uids[i++] = new Term(UidFieldMapper.NAME, Uid.createUid(type, id)); } } if (this.uids.length > 1) { @@ -66,33 +60,26 @@ public class UidFilter extends Filter { // - If we have a single id, we can create a SingleIdDocIdSet to save on mem // - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - BloomFilter filter = bloomCache.filter(reader, UidFieldMapper.NAME, true); + // LUCENE 4 UPGRADE: this filter does respect acceptDocs maybe we need to change this + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { FixedBitSet set = null; - TermDocs td = null; - UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result(); - try { - for (Term uid : uids) { - Unicode.fromStringAsUtf8(uid.text(), utf8); - if (!filter.isPresent(utf8.result, 0, utf8.length)) { - continue; - } - if (td == null) { - td = reader.termDocs(); - } - td.seek(uid); - // no need for batching, its on the UID, there will be only one doc - while (td.next()) { + final AtomicReader reader = ctx.reader(); + final TermsEnum termsEnum = reader.terms(UidFieldMapper.NAME).iterator(null); + DocsEnum docsEnum = null; + for (Term uid : uids) { + if (termsEnum.seekExact(uid.bytes(), false)) { + docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0); + int doc; + while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) { + // no need for batching, its on the UID, there will be only + // one doc if (set == null) { set = new FixedBitSet(reader.maxDoc()); } - set.set(td.doc()); + set.set(doc); } } - } finally { - if (td != null) { - td.close(); - } + } return set; }