lucene 4: converted QueryParser/Builders to Lucene 4

This commit is contained in:
Simon Willnauer 2012-10-28 10:00:01 +01:00 committed by Shay Banon
parent 5bd8e1b337
commit 683be6fc64
20 changed files with 174 additions and 139 deletions

View File

@ -21,7 +21,7 @@ package org.elasticsearch.index.query;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.queries.FilterClause;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.xcontent.XContentParser;

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.query;
import org.apache.lucene.search.BoostingQuery;
import org.apache.lucene.queries.BoostingQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.query;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Query;
@ -122,8 +123,9 @@ public class CustomScoreQueryParser implements QueryParser {
}
@Override
public void setNextReader(IndexReader reader) {
script.setNextReader(reader);
public void setNextReader(AtomicReaderContext ctx) {
//LUCENE 4 UPGRADE should this pass on a ARC or just and atomic reader?
script.setNextReader(ctx);
}
@Override

View File

@ -43,7 +43,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer
private Integer maxExpansions;
//LUCENE 4 UPGRADE we need a testcase for this + documentation
private Boolean transpositions = true;
private Boolean transpositions;
/**
* Constructs a new term query.
@ -101,7 +101,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer
if (boost != -1) {
builder.field("boost", boost);
}
if (!transpositions) {
if (transpositions != null) {
builder.field("transpositions", transpositions);
}
if (minSimilarity != null) {

View File

@ -64,7 +64,7 @@ public class FuzzyQueryParser implements QueryParser {
String minSimilarity = "0.5";
int prefixLength = FuzzyQuery.defaultPrefixLength;
int maxExpansions = FuzzyQuery.defaultMaxExpansions;
boolean transpositions = true;
boolean transpositions = false;
MultiTermQuery.RewriteMethod rewriteMethod = null;
token = parser.nextToken();
if (token == XContentParser.Token.START_OBJECT) {
@ -113,8 +113,8 @@ public class FuzzyQueryParser implements QueryParser {
}
}
if (query == null) {
//LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float
int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity),
//LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float
int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity),
value.codePointCount(0, value.length()));
query = new FuzzyQuery(new Term(fieldName, value), edits, prefixLength, maxExpansions, transpositions);
}

View File

@ -98,7 +98,7 @@ public class IdsFilterParser implements FilterParser {
types = parseContext.mapperService().types();
}
UidFilter filter = new UidFilter(types, ids, parseContext.indexCache().bloomCache());
UidFilter filter = new UidFilter(types, ids);
if (filterName != null) {
parseContext.addNamedFilter(filterName, filter);
}

View File

@ -102,7 +102,7 @@ public class IdsQueryParser implements QueryParser {
types = parseContext.mapperService().types();
}
UidFilter filter = new UidFilter(types, ids, parseContext.indexCache().bloomCache());
UidFilter filter = new UidFilter(types, ids);
// no need for constant score filter, since we don't cache the filter, and it always takes deletes into account
ConstantScoreQuery query = new ConstantScoreQuery(filter);
query.setBoost(boost);

View File

@ -71,7 +71,9 @@ public class MatchAllQueryParser implements QueryParser {
return Queries.MATCH_ALL_QUERY;
}
MatchAllDocsQuery query = new MatchAllDocsQuery(normsField);
//LUCENE 4 UPGRADE norms field is not supported anymore need to find another way or drop the functionality
//MatchAllDocsQuery query = new MatchAllDocsQuery(normsField);
MatchAllDocsQuery query = new MatchAllDocsQuery();
query.setBoost(boost);
return query;
}

View File

@ -77,6 +77,8 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer
private String fuzzyRewrite = null;
private Boolean lenient;
private Boolean fuzzyTranspositions = null;
/**
* Constructs a new text query.
@ -163,6 +165,12 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer
this.fuzzyRewrite = fuzzyRewrite;
return this;
}
public MatchQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) {
//LUCENE 4 UPGRADE add documentation
this.fuzzyTranspositions = fuzzyTranspositions;
return this;
}
/**
* Sets whether format based failures will be ignored.
@ -211,7 +219,10 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer
if (fuzzyRewrite != null) {
builder.field("fuzzy_rewrite", fuzzyRewrite);
}
if (fuzzyTranspositions != null) {
//LUCENE 4 UPGRADE we need to document this & test this
builder.field("fuzzy_transpositions", fuzzyTranspositions);
}
if (lenient != null) {
builder.field("lenient", lenient);
}

View File

@ -122,6 +122,8 @@ public class MatchQueryParser implements QueryParser {
matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
} else if ("fuzzy_transpositions".equals(fieldName)) {
matchQuery.setTranspositions(parser.booleanValue());
} else if ("lenient".equals(currentFieldName)) {
matchQuery.setLenient(parser.booleanValue());
} else {

View File

@ -19,8 +19,15 @@
package org.elasticsearch.index.query;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.DeletionAwareConstantScoreQuery;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
@ -30,8 +37,6 @@ import org.elasticsearch.index.search.nested.BlockJoinQuery;
import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
public class NestedQueryParser implements QueryParser {
public static final String NAME = "nested";
@ -184,8 +189,9 @@ public class NestedQueryParser implements QueryParser {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return filter.getDocIdSet(reader);
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits liveDocs) throws IOException {
//LUCENE 4 UPGRADE just passing on ctx and live docs here
return filter.getDocIdSet(ctx, liveDocs);
}
}
}

View File

@ -21,11 +21,12 @@ package org.elasticsearch.index.query;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.lucene.queryParser.MapperQueryParser;
import org.apache.lucene.queryParser.QueryParserSettings;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.queryparser.classic.QueryParserSettings;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.Index;

View File

@ -22,9 +22,10 @@ package org.elasticsearch.index.query;
import com.google.common.collect.Lists;
import gnu.trove.impl.Constants;
import gnu.trove.map.hash.TObjectFloatHashMap;
import org.apache.lucene.queryParser.MapperQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParserSettings;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParserSettings;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.Strings;
@ -130,9 +131,9 @@ public class QueryStringQueryParser implements QueryParser {
} else if ("default_operator".equals(currentFieldName) || "defaultOperator".equals(currentFieldName)) {
String op = parser.text();
if ("or".equalsIgnoreCase(op)) {
qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.OR);
qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.OR);
} else if ("and".equalsIgnoreCase(op)) {
qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.AND);
} else {
throw new QueryParsingException(parseContext.index(), "Query default operator [" + op + "] is not allowed");
}
@ -196,7 +197,7 @@ public class QueryStringQueryParser implements QueryParser {
qpSettings.defaultQuoteAnalyzer(parseContext.mapperService().searchQuoteAnalyzer());
if (qpSettings.escape()) {
qpSettings.queryString(org.apache.lucene.queryParser.QueryParser.escape(qpSettings.queryString()));
qpSettings.queryString(org.apache.lucene.queryparser.classic.QueryParser.escape(qpSettings.queryString()));
}
qpSettings.queryTypes(parseContext.queryTypes());
@ -220,7 +221,7 @@ public class QueryStringQueryParser implements QueryParser {
}
parseContext.indexCache().queryParserCache().put(qpSettings, query);
return query;
} catch (ParseException e) {
} catch (org.apache.lucene.queryparser.classic.ParseException e) {
throw new QueryParsingException(parseContext.index(), "Failed to parse query [" + qpSettings.queryString() + "]", e);
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.cache.filter.support.CacheKeyFilter;
@ -53,8 +54,8 @@ public class RangeFilterParser implements FilterParser {
boolean cache = true;
CacheKeyFilter.Key cacheKey = null;
String fieldName = null;
String from = null;
String to = null;
BytesRef from = null;
BytesRef to = null;
boolean includeLower = true;
boolean includeUpper = true;
@ -71,24 +72,24 @@ public class RangeFilterParser implements FilterParser {
currentFieldName = parser.currentName();
} else {
if ("from".equals(currentFieldName)) {
from = parser.textOrNull();
from = parser.bytesOrNull(from);
} else if ("to".equals(currentFieldName)) {
to = parser.textOrNull();
to = parser.bytesOrNull(to);
} else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) {
includeLower = parser.booleanValue();
} else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) {
includeUpper = parser.booleanValue();
} else if ("gt".equals(currentFieldName)) {
from = parser.textOrNull();
from = parser.bytesOrNull(from);
includeLower = false;
} else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) {
from = parser.textOrNull();
from = parser.bytesOrNull(from);
includeLower = true;
} else if ("lt".equals(currentFieldName)) {
to = parser.textOrNull();
to = parser.bytesOrNull(to);
includeUpper = false;
} else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) {
to = parser.textOrNull();
to = parser.bytesOrNull(to);
includeUpper = true;
} else {
throw new QueryParsingException(parseContext.index(), "[range] filter does not support [" + currentFieldName + "]");
@ -116,7 +117,8 @@ public class RangeFilterParser implements FilterParser {
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
if (smartNameFieldMappers != null) {
if (smartNameFieldMappers.hasMapper()) {
filter = smartNameFieldMappers.mapper().rangeFilter(from, to, includeLower, includeUpper, parseContext);
//LUCENE 4 UPGRADE range filter should use bytesref too?
filter = smartNameFieldMappers.mapper().rangeFilter(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext);
}
}
if (filter == null) {

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MapperService;
@ -59,8 +60,8 @@ public class RangeQueryParser implements QueryParser {
throw new QueryParsingException(parseContext.index(), "[range] query malformed, after field missing start object");
}
String from = null;
String to = null;
BytesRef from = null;
BytesRef to = null;
boolean includeLower = true;
boolean includeUpper = true;
float boost = 1.0f;
@ -71,9 +72,9 @@ public class RangeQueryParser implements QueryParser {
currentFieldName = parser.currentName();
} else {
if ("from".equals(currentFieldName)) {
from = parser.textOrNull();
from = parser.bytesOrNull(from);
} else if ("to".equals(currentFieldName)) {
to = parser.textOrNull();
to = parser.bytesOrNull(to);
} else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) {
includeLower = parser.booleanValue();
} else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) {
@ -81,16 +82,16 @@ public class RangeQueryParser implements QueryParser {
} else if ("boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else if ("gt".equals(currentFieldName)) {
from = parser.textOrNull();
from = parser.bytesOrNull(from);
includeLower = false;
} else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) {
from = parser.textOrNull();
from = parser.bytesOrNull(from);
includeLower = true;
} else if ("lt".equals(currentFieldName)) {
to = parser.textOrNull();
to = parser.bytesOrNull(to);
includeUpper = false;
} else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) {
to = parser.textOrNull();
to = parser.bytesOrNull(to);
includeUpper = true;
} else {
throw new QueryParsingException(parseContext.index(), "[range] query does not support [" + currentFieldName + "]");
@ -108,7 +109,8 @@ public class RangeQueryParser implements QueryParser {
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
if (smartNameFieldMappers != null) {
if (smartNameFieldMappers.hasMapper()) {
query = smartNameFieldMappers.mapper().rangeQuery(from, to, includeLower, includeUpper, parseContext);
//LUCENE 4 UPGRADE Mapper#rangeQuery should use bytesref as well?
query = smartNameFieldMappers.mapper().rangeQuery(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext);
}
}
if (query == null) {

View File

@ -20,9 +20,14 @@
package org.elasticsearch.index.query;
import com.google.common.collect.Maps;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BitsFilteredDocIdSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredDocIdSet;
import org.apache.lucene.util.Bits;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.inject.Inject;
@ -160,9 +165,10 @@ public class ScriptFilterParser implements FilterParser {
}
@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
searchScript.setNextReader(reader);
return new ScriptDocSet(reader, searchScript);
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
searchScript.setNextReader(context.reader());
// LUCENE 4 UPGRADE: we can simply wrap this here since it is not cacheable and if we are not top level we will get a null passed anyway
return BitsFilteredDocIdSet.wrap(new ScriptDocSet(context.reader(), searchScript), acceptDocs);
}
static class ScriptDocSet extends GetDocSet {

View File

@ -19,7 +19,9 @@
package org.elasticsearch.index.query;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.common.xcontent.XContentParser;
@ -57,14 +59,15 @@ public class TypeFilterParser implements FilterParser {
if (token != XContentParser.Token.VALUE_STRING) {
throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name");
}
String type = parser.text();
BytesRef type = parser.bytes(null);
// move to the next token
parser.nextToken();
Filter filter;
DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type);
//LUCENE 4 UPGRADE document mapper should use bytesref aswell?
DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type.utf8ToString());
if (documentMapper == null) {
filter = new TermFilter(TypeFieldMapper.TERM_FACTORY.createTerm(type));
filter = new TermFilter(new Term(TypeFieldMapper.TERM_FACTORY.field(), type));
} else {
filter = documentMapper.typeFilter();
}

View File

@ -24,10 +24,14 @@ import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.ElasticSearchParseException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
@ -64,6 +68,9 @@ public class MatchQuery {
protected String fuzziness = null;
protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
protected int maxExpansions = FuzzyQuery.defaultMaxExpansions;
//LUCENE 4 UPGRADE we need a default value for this!
protected boolean transpositions = false;
protected MultiTermQuery.RewriteMethod rewriteMethod;
protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod;
@ -101,6 +108,10 @@ public class MatchQuery {
public void setMaxExpansions(int maxExpansions) {
this.maxExpansions = maxExpansions;
}
public void setTranspositions(boolean transpositions) {
this.transpositions = transpositions;
}
public void setRewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) {
this.rewriteMethod = rewriteMethod;
@ -116,13 +127,13 @@ public class MatchQuery {
public Query parse(Type type, String fieldName, String text) {
FieldMapper mapper = null;
Term fieldTerm;
final String field;
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
mapper = smartNameFieldMappers.mapper();
fieldTerm = mapper.names().indexNameTerm();
field = mapper.names().indexName();
} else {
fieldTerm = new Term(fieldName);
field = fieldName;
}
if (mapper != null && mapper.useFieldQueryWithQueryString()) {
@ -169,13 +180,13 @@ public class MatchQuery {
}
// Logic similar to QueryParser#getFieldQuery
TokenStream source;
final TokenStream source;
try {
source = analyzer.reusableTokenStream(fieldTerm.field(), new FastStringReader(text));
source = analyzer.tokenStream(field, new FastStringReader(text));
source.reset();
} catch (IOException e) {
source = analyzer.tokenStream(fieldTerm.field(), new FastStringReader(text));
} catch(IOException ex) {
//LUCENE 4 UPGRADE not sure what todo here really lucene 3.6 had a tokenStream that didn't throw an exc.
throw new ElasticSearchParseException("failed to process query", ex);
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
CharTermAttribute termAtt = null;
@ -183,12 +194,7 @@ public class MatchQuery {
int numTokens = 0;
boolean success = false;
try {
buffer.reset();
success = true;
} catch (IOException e) {
// success==false if we hit an exception
}
buffer.reset();
if (success) {
if (buffer.hasAttribute(CharTermAttribute.class)) {
termAtt = buffer.getAttribute(CharTermAttribute.class);
@ -233,29 +239,26 @@ public class MatchQuery {
return MatchNoDocsQuery.INSTANCE;
} else if (type == Type.BOOLEAN) {
if (numTokens == 1) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
Query q = newTermQuery(mapper, fieldTerm.createTerm(term));
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
Query q = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef())));
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
}
BooleanQuery q = new BooleanQuery(positionCount == 1);
for (int i = 0; i < numTokens; i++) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
Query currentQuery = newTermQuery(mapper, fieldTerm.createTerm(term));
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
Query currentQuery = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef())));
q.add(currentQuery, occur);
}
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
@ -266,12 +269,10 @@ public class MatchQuery {
List<Term> multiTerms = new ArrayList<Term>();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -288,7 +289,8 @@ public class MatchQuery {
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(fieldTerm.createTerm(term));
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef())));
}
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
@ -303,13 +305,11 @@ public class MatchQuery {
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -319,9 +319,10 @@ public class MatchQuery {
if (enablePositionIncrements) {
position += positionIncrement;
pq.add(fieldTerm.createTerm(term), position);
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
pq.add(new Term(field, termToByteRef(termAtt, new BytesRef())), position);
} else {
pq.add(fieldTerm.createTerm(term));
pq.add(new Term(field, termToByteRef(termAtt, new BytesRef())));
}
}
return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext);
@ -333,12 +334,10 @@ public class MatchQuery {
List<Term> multiTerms = new ArrayList<Term>();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -355,7 +354,8 @@ public class MatchQuery {
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(fieldTerm.createTerm(term));
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef())));
}
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
@ -376,7 +376,11 @@ public class MatchQuery {
QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod);
}
}
FuzzyQuery query = new FuzzyQuery(term, Float.parseFloat(fuzziness), fuzzyPrefixLength, maxExpansions);
String text = term.text();
//LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float
int edits = FuzzyQuery.floatToEdits(Float.parseFloat(fuzziness),
text.codePointCount(0, text.length()));
FuzzyQuery query = new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions);
QueryParsers.setRewriteMethod(query, rewriteMethod);
return query;
}
@ -388,4 +392,9 @@ public class MatchQuery {
}
return new TermQuery(term);
}
private static BytesRef termToByteRef(CharTermAttribute attr, BytesRef ref) {
UnicodeUtil.UTF16toUTF8WithHash(attr.buffer(), 0, attr.length(), ref);
return ref;
}
}

View File

@ -19,9 +19,10 @@
package org.elasticsearch.index.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.GetDocSet;
@ -43,7 +44,7 @@ import java.io.IOException;
*
*/
public abstract class NumericRangeFieldDataFilter<T> extends Filter {
// LUCENE 4 UPGRADE: this filter doesn't respect acceptDocs yet!
final FieldDataCache fieldDataCache;
final String field;
final T lowerVal;
@ -121,7 +122,7 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
public static NumericRangeFieldDataFilter<Byte> newByteRange(FieldDataCache fieldDataCache, String field, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) {
return new NumericRangeFieldDataFilter<Byte>(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
final byte inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
byte i = lowerVal.byteValue();
@ -143,8 +144,8 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocSet.EMPTY_DOC_SET;
final ByteFieldData fieldData = (ByteFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.BYTE, reader, field);
return new GetDocSet(reader.maxDoc()) {
final ByteFieldData fieldData = (ByteFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.BYTE, ctx.reader(), field);
return new GetDocSet(ctx.reader().maxDoc()) {
@Override
public boolean isCacheable() {
@ -181,7 +182,7 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
public static NumericRangeFieldDataFilter<Short> newShortRange(FieldDataCache fieldDataCache, String field, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) {
return new NumericRangeFieldDataFilter<Short>(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
final short inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
short i = lowerVal.shortValue();
@ -203,8 +204,8 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocSet.EMPTY_DOC_SET;
final ShortFieldData fieldData = (ShortFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.SHORT, reader, field);
return new GetDocSet(reader.maxDoc()) {
final ShortFieldData fieldData = (ShortFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.SHORT, ctx.reader(), field);
return new GetDocSet(ctx.reader().maxDoc()) {
@Override
public boolean isCacheable() {
@ -240,7 +241,7 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
public static NumericRangeFieldDataFilter<Integer> newIntRange(FieldDataCache fieldDataCache, String field, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) {
return new NumericRangeFieldDataFilter<Integer>(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
final int inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
int i = lowerVal.intValue();
@ -262,8 +263,8 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocSet.EMPTY_DOC_SET;
final IntFieldData fieldData = (IntFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.INT, reader, field);
return new GetDocSet(reader.maxDoc()) {
final IntFieldData fieldData = (IntFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.INT, ctx.reader(), field);
return new GetDocSet(ctx.reader().maxDoc()) {
@Override
public boolean isCacheable() {
@ -299,7 +300,7 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
public static NumericRangeFieldDataFilter<Long> newLongRange(FieldDataCache fieldDataCache, String field, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) {
return new NumericRangeFieldDataFilter<Long>(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
final long inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
long i = lowerVal.longValue();
@ -321,8 +322,8 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocSet.EMPTY_DOC_SET;
final LongFieldData fieldData = (LongFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.LONG, reader, field);
return new GetDocSet(reader.maxDoc()) {
final LongFieldData fieldData = (LongFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.LONG, ctx.reader(), field);
return new GetDocSet(ctx.reader().maxDoc()) {
@Override
public boolean isCacheable() {
@ -358,7 +359,7 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
public static NumericRangeFieldDataFilter<Float> newFloatRange(FieldDataCache fieldDataCache, String field, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) {
return new NumericRangeFieldDataFilter<Float>(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
// we transform the floating point numbers to sortable integers
// using NumericUtils to easier find the next bigger/lower value
final float inclusiveLowerPoint, inclusiveUpperPoint;
@ -384,8 +385,8 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocSet.EMPTY_DOC_SET;
final FloatFieldData fieldData = (FloatFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.FLOAT, reader, field);
return new GetDocSet(reader.maxDoc()) {
final FloatFieldData fieldData = (FloatFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.FLOAT, ctx.reader(), field);
return new GetDocSet(ctx.reader().maxDoc()) {
@Override
public boolean isCacheable() {
@ -421,7 +422,7 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
public static NumericRangeFieldDataFilter<Double> newDoubleRange(FieldDataCache fieldDataCache, String field, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) {
return new NumericRangeFieldDataFilter<Double>(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
// we transform the floating point numbers to sortable integers
// using NumericUtils to easier find the next bigger/lower value
final double inclusiveLowerPoint, inclusiveUpperPoint;
@ -447,8 +448,8 @@ public abstract class NumericRangeFieldDataFilter<T> extends Filter {
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocSet.EMPTY_DOC_SET;
final DoubleFieldData fieldData = (DoubleFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.DOUBLE, reader, field);
return new GetDocSet(reader.maxDoc()) {
final DoubleFieldData fieldData = (DoubleFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.DOUBLE, ctx.reader(), field);
return new GetDocSet(ctx.reader().maxDoc()) {
@Override
public boolean isCacheable() {

View File

@ -19,16 +19,15 @@
package org.elasticsearch.index.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.bloom.BloomFilter;
import org.elasticsearch.index.cache.bloom.BloomCache;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
@ -40,17 +39,12 @@ import java.util.List;
public class UidFilter extends Filter {
final Term[] uids;
private final BloomCache bloomCache;
// LUCENE 4 UPGRADE: We removed the bloom cache, so once we rewrite this filter, do it without
public UidFilter(Collection<String> types, List<String> ids, BloomCache bloomCache) {
this.bloomCache = bloomCache;
public UidFilter(Collection<String> types, List<String> ids) {
this.uids = new Term[types.size() * ids.size()];
int i = 0;
for (String type : types) {
for (String id : ids) {
uids[i++] = UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(type, id));
uids[i++] = new Term(UidFieldMapper.NAME, Uid.createUid(type, id));
}
}
if (this.uids.length > 1) {
@ -66,33 +60,26 @@ public class UidFilter extends Filter {
// - If we have a single id, we can create a SingleIdDocIdSet to save on mem
// - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
BloomFilter filter = bloomCache.filter(reader, UidFieldMapper.NAME, true);
// LUCENE 4 UPGRADE: this filter does respect acceptDocs maybe we need to change this
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
FixedBitSet set = null;
TermDocs td = null;
UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
try {
for (Term uid : uids) {
Unicode.fromStringAsUtf8(uid.text(), utf8);
if (!filter.isPresent(utf8.result, 0, utf8.length)) {
continue;
}
if (td == null) {
td = reader.termDocs();
}
td.seek(uid);
// no need for batching, its on the UID, there will be only one doc
while (td.next()) {
final AtomicReader reader = ctx.reader();
final TermsEnum termsEnum = reader.terms(UidFieldMapper.NAME).iterator(null);
DocsEnum docsEnum = null;
for (Term uid : uids) {
if (termsEnum.seekExact(uid.bytes(), false)) {
docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0);
int doc;
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
// no need for batching, its on the UID, there will be only
// one doc
if (set == null) {
set = new FixedBitSet(reader.maxDoc());
}
set.set(td.doc());
set.set(doc);
}
}
} finally {
if (td != null) {
td.close();
}
}
return set;
}