Do not set analyzers on numeric fields.

When it comes to query parsing, either a field is tokenized and it would go
through analysis with its search_analyzer. Or it is not tokenized and the
raw string should be passed to termQuery(). Since numeric fields are not
tokenized and also declare a search analyzer, values would currently go through
analysis twice...
This commit is contained in:
Adrien Grand 2016-04-12 15:24:09 +02:00
parent f5bade1a0d
commit 3bf6f4076c
30 changed files with 24 additions and 849 deletions

View File

@ -305,33 +305,4 @@ public class Analysis {
}
}
/**
* Check whether the provided token stream is able to provide character
* terms.
* <p>Although most analyzers generate character terms (CharTermAttribute),
* some token only contain binary terms (BinaryTermAttribute,
* CharTermAttribute being a special type of BinaryTermAttribute), such as
* {@link LegacyNumericTokenStream} and unsuitable for highlighting and
* more-like-this queries which expect character terms.</p>
*/
public static boolean isCharacterTokenStream(TokenStream tokenStream) {
try {
tokenStream.addAttribute(CharTermAttribute.class);
return true;
} catch (IllegalArgumentException e) {
return false;
}
}
/**
* Check whether {@link TokenStream}s generated with <code>analyzer</code>
* provide with character terms.
* @see #isCharacterTokenStream(TokenStream)
*/
public static boolean generatesCharacterTokenStream(Analyzer analyzer, String fieldName) throws IOException {
try (TokenStream ts = analyzer.tokenStream(fieldName, "")) {
return isCharacterTokenStream(ts);
}
}
}

View File

@ -1,43 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import java.io.IOException;
/**
*
*/
public abstract class NumericAnalyzer<T extends NumericTokenizer> extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
try {
// LUCENE 4 UPGRADE: in reusableTokenStream the buffer size was char[120]
// Not sure if this is intentional or not
return new TokenStreamComponents(createNumericTokenizer(new char[32]));
} catch (IOException e) {
throw new RuntimeException("Failed to create numeric tokenizer", e);
}
}
protected abstract T createNumericTokenizer(char[] buffer) throws IOException;
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.elasticsearch.common.joda.FormatDateTimeFormatter;
import org.joda.time.format.DateTimeFormatter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
*
*/
public class NumericDateAnalyzer extends NumericAnalyzer<NumericDateTokenizer> {
private static final Map<String, IntObjectHashMap<NamedAnalyzer>> globalAnalyzers = new HashMap<>();
public static synchronized NamedAnalyzer buildNamedAnalyzer(FormatDateTimeFormatter formatter, int precisionStep) {
IntObjectHashMap<NamedAnalyzer> precisionMap = globalAnalyzers.get(formatter.format());
if (precisionMap == null) {
precisionMap = new IntObjectHashMap<>();
globalAnalyzers.put(formatter.format(), precisionMap);
}
NamedAnalyzer namedAnalyzer = precisionMap.get(precisionStep);
if (namedAnalyzer == null) {
String name = "_date/" + ((precisionStep == Integer.MAX_VALUE) ? "max" : precisionStep);
namedAnalyzer = new NamedAnalyzer(name, AnalyzerScope.GLOBAL, new NumericDateAnalyzer(precisionStep, formatter.parser()));
precisionMap.put(precisionStep, namedAnalyzer);
}
return namedAnalyzer;
}
private final int precisionStep;
private final DateTimeFormatter dateTimeFormatter;
public NumericDateAnalyzer(int precisionStep, DateTimeFormatter dateTimeFormatter) {
this.precisionStep = precisionStep;
this.dateTimeFormatter = dateTimeFormatter;
}
@Override
protected NumericDateTokenizer createNumericTokenizer(char[] buffer) throws IOException {
return new NumericDateTokenizer(precisionStep, buffer, dateTimeFormatter);
}
}

View File

@ -1,40 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LegacyNumericTokenStream;
import org.joda.time.format.DateTimeFormatter;
import java.io.IOException;
/**
*
*/
public class NumericDateTokenizer extends NumericTokenizer {
public NumericDateTokenizer(int precisionStep, char[] buffer, DateTimeFormatter dateTimeFormatter) throws IOException {
super(new LegacyNumericTokenStream(precisionStep), buffer, dateTimeFormatter);
}
@Override
protected void setValue(LegacyNumericTokenStream tokenStream, String value) {
tokenStream.setLongValue(((DateTimeFormatter) extra).parseMillis(value));
}
}

View File

@ -1,59 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.hppc.IntObjectHashMap;
import java.io.IOException;
/**
*
*/
public class NumericDoubleAnalyzer extends NumericAnalyzer<NumericDoubleTokenizer> {
private final static IntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new IntObjectHashMap<>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_double/max", AnalyzerScope.GLOBAL, new NumericDoubleAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_double/" + i, AnalyzerScope.GLOBAL, new NumericDoubleAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_double/" + precisionStep, AnalyzerScope.INDEX, new NumericDoubleAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericDoubleAnalyzer(int precisionStep) {
this.precisionStep = precisionStep;
}
@Override
protected NumericDoubleTokenizer createNumericTokenizer(char[] buffer) throws IOException {
return new NumericDoubleTokenizer(precisionStep, buffer);
}
}

View File

@ -1,39 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LegacyNumericTokenStream;
import java.io.IOException;
/**
*
*/
public class NumericDoubleTokenizer extends NumericTokenizer {
public NumericDoubleTokenizer(int precisionStep, char[] buffer) throws IOException {
super(new LegacyNumericTokenStream(precisionStep), buffer, null);
}
@Override
protected void setValue(LegacyNumericTokenStream tokenStream, String value) {
tokenStream.setDoubleValue(Double.parseDouble(value));
}
}

View File

@ -1,59 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.hppc.IntObjectHashMap;
import java.io.IOException;
/**
*
*/
public class NumericFloatAnalyzer extends NumericAnalyzer<NumericFloatTokenizer> {
private final static IntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new IntObjectHashMap<>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_float/max", AnalyzerScope.GLOBAL, new NumericFloatAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_float/" + i, AnalyzerScope.GLOBAL, new NumericFloatAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_float/" + precisionStep, AnalyzerScope.INDEX, new NumericFloatAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericFloatAnalyzer(int precisionStep) {
this.precisionStep = precisionStep;
}
@Override
protected NumericFloatTokenizer createNumericTokenizer(char[] buffer) throws IOException {
return new NumericFloatTokenizer(precisionStep, buffer);
}
}

View File

@ -1,39 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LegacyNumericTokenStream;
import java.io.IOException;
/**
*
*/
public class NumericFloatTokenizer extends NumericTokenizer {
public NumericFloatTokenizer(int precisionStep, char[] buffer) throws IOException {
super(new LegacyNumericTokenStream(precisionStep), buffer, null);
}
@Override
protected void setValue(LegacyNumericTokenStream tokenStream, String value) {
tokenStream.setFloatValue(Float.parseFloat(value));
}
}

View File

@ -1,59 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.hppc.IntObjectHashMap;
import java.io.IOException;
/**
*
*/
public class NumericIntegerAnalyzer extends NumericAnalyzer<NumericIntegerTokenizer> {
private final static IntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new IntObjectHashMap<>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_int/max", AnalyzerScope.GLOBAL, new NumericIntegerAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_int/" + i, AnalyzerScope.GLOBAL, new NumericIntegerAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_int/" + precisionStep, AnalyzerScope.INDEX, new NumericIntegerAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericIntegerAnalyzer(int precisionStep) {
this.precisionStep = precisionStep;
}
@Override
protected NumericIntegerTokenizer createNumericTokenizer(char[] buffer) throws IOException {
return new NumericIntegerTokenizer(precisionStep, buffer);
}
}

View File

@ -1,39 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LegacyNumericTokenStream;
import java.io.IOException;
/**
*
*/
public class NumericIntegerTokenizer extends NumericTokenizer {
public NumericIntegerTokenizer(int precisionStep, char[] buffer) throws IOException {
super(new LegacyNumericTokenStream(precisionStep), buffer, null);
}
@Override
protected void setValue(LegacyNumericTokenStream tokenStream, String value) {
tokenStream.setIntValue(Integer.parseInt(value));
}
}

View File

@ -1,59 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.hppc.IntObjectHashMap;
import java.io.IOException;
/**
*
*/
public class NumericLongAnalyzer extends NumericAnalyzer<NumericLongTokenizer> {
private final static IntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new IntObjectHashMap<>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_long/max", AnalyzerScope.GLOBAL, new NumericLongAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_long/" + i, AnalyzerScope.GLOBAL, new NumericLongAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_long/" + precisionStep, AnalyzerScope.INDEX, new NumericLongAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericLongAnalyzer(int precisionStep) {
this.precisionStep = precisionStep;
}
@Override
protected NumericLongTokenizer createNumericTokenizer(char[] buffer) throws IOException {
return new NumericLongTokenizer(precisionStep, buffer);
}
}

View File

@ -1,39 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LegacyNumericTokenStream;
import java.io.IOException;
/**
*
*/
public class NumericLongTokenizer extends NumericTokenizer {
public NumericLongTokenizer(int precisionStep, char[] buffer) throws IOException {
super(new LegacyNumericTokenStream(precisionStep), buffer, null);
}
@Override
protected void setValue(LegacyNumericTokenStream tokenStream, String value) {
tokenStream.setLongValue(Long.parseLong(value));
}
}

View File

@ -1,99 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LegacyNumericTokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.elasticsearch.common.io.Streams;
import java.io.IOException;
import java.util.Iterator;
/**
*
*/
public abstract class NumericTokenizer extends Tokenizer {
/** Make this tokenizer get attributes from the delegate token stream. */
private static final AttributeFactory delegatingAttributeFactory(final AttributeSource source) {
return new AttributeFactory() {
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
return (AttributeImpl) source.addAttribute(attClass);
}
};
}
private final LegacyNumericTokenStream numericTokenStream;
private final char[] buffer;
protected final Object extra;
private boolean started;
protected NumericTokenizer(LegacyNumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException {
super(delegatingAttributeFactory(numericTokenStream));
this.numericTokenStream = numericTokenStream;
// Add attributes from the numeric token stream, this works fine because the attribute factory delegates to numericTokenStream
for (Iterator<Class<? extends Attribute>> it = numericTokenStream.getAttributeClassesIterator(); it.hasNext();) {
addAttribute(it.next());
}
this.extra = extra;
this.buffer = buffer;
started = true;
}
@Override
public void reset() throws IOException {
super.reset();
started = false;
}
@Override
public final boolean incrementToken() throws IOException {
if (!started) {
// reset() must be idempotent, this is why we read data in incrementToken
final int len = Streams.readFully(input, buffer);
if (len == buffer.length && input.read() != -1) {
throw new IOException("Cannot read numeric data larger than " + buffer.length + " chars");
}
setValue(numericTokenStream, new String(buffer, 0, len));
numericTokenStream.reset();
started = true;
}
return numericTokenStream.incrementToken();
}
@Override
public void end() throws IOException {
super.end();
numericTokenStream.end();
}
@Override
public void close() throws IOException {
super.close();
numericTokenStream.close();
}
protected abstract void setValue(LegacyNumericTokenStream tokenStream, String value);
}

View File

@ -38,7 +38,6 @@ import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericIntegerAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -85,12 +84,6 @@ public class ByteFieldMapper extends NumberFieldMapper {
return (ByteFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
String name = precisionStep == Integer.MAX_VALUE ? "_byte/max" : ("_byte/" + precisionStep);
return new NamedAnalyzer(name, new NumericIntegerAnalyzer(precisionStep));
}
@Override
protected int maxPrecisionStep() {
return 32;

View File

@ -44,7 +44,6 @@ import org.elasticsearch.common.util.LocaleUtils;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericDateAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -141,11 +140,6 @@ public class DateFieldMapper extends NumberFieldMapper {
return this;
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
return NumericDateAnalyzer.buildNamedAnalyzer(fieldType().dateTimeFormatter, precisionStep);
}
@Override
protected int maxPrecisionStep() {
return 64;

View File

@ -40,7 +40,6 @@ import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericDoubleAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -87,11 +86,6 @@ public class DoubleFieldMapper extends NumberFieldMapper {
return (DoubleFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
return NumericDoubleAnalyzer.buildNamedAnalyzer(precisionStep);
}
@Override
protected int maxPrecisionStep() {
return 64;

View File

@ -41,7 +41,6 @@ import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericFloatAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -88,11 +87,6 @@ public class FloatFieldMapper extends NumberFieldMapper {
return (FloatFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
return NumericFloatAnalyzer.buildNamedAnalyzer(precisionStep);
}
@Override
protected int maxPrecisionStep() {
return 32;

View File

@ -40,7 +40,6 @@ import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericIntegerAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -92,12 +91,6 @@ public class IntegerFieldMapper extends NumberFieldMapper {
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
return (IntegerFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
return NumericIntegerAnalyzer.buildNamedAnalyzer(precisionStep);
}
@Override
protected int maxPrecisionStep() {
return 32;

View File

@ -40,7 +40,6 @@ import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericLongAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -92,11 +91,6 @@ public class LongFieldMapper extends NumberFieldMapper {
return (LongFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
return NumericLongAnalyzer.buildNamedAnalyzer(precisionStep);
}
@Override
protected int maxPrecisionStep() {
return 64;

View File

@ -127,12 +127,8 @@ public abstract class NumberFieldMapper extends FieldMapper implements AllFieldM
if (precisionStep <= 0 || precisionStep >= maxPrecisionStep()) {
fieldType.setNumericPrecisionStep(Integer.MAX_VALUE);
}
fieldType.setIndexAnalyzer(makeNumberAnalyzer(fieldType.numericPrecisionStep()));
fieldType.setSearchAnalyzer(makeNumberAnalyzer(Integer.MAX_VALUE));
}
protected abstract NamedAnalyzer makeNumberAnalyzer(int precisionStep);
protected abstract int maxPrecisionStep();
}

View File

@ -39,7 +39,6 @@ import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericIntegerAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -88,12 +87,6 @@ public class ShortFieldMapper extends NumberFieldMapper {
return (ShortFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
String name = precisionStep == Integer.MAX_VALUE ? "_short/max" : ("_short/" + precisionStep);
return new NamedAnalyzer(name, new NumericIntegerAnalyzer(precisionStep));
}
@Override
protected int maxPrecisionStep() {
return 32;

View File

@ -28,7 +28,6 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericIntegerAnalyzer;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
@ -82,11 +81,6 @@ public class TokenCountFieldMapper extends IntegerFieldMapper {
return (TokenCountFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
return NumericIntegerAnalyzer.buildNamedAnalyzer(precisionStep);
}
@Override
protected int maxPrecisionStep() {
return 32;

View File

@ -22,12 +22,12 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.AlreadyExpiredException;
import org.elasticsearch.index.analysis.NumericLongAnalyzer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
@ -61,9 +61,9 @@ public class TTLFieldMapper extends MetadataFieldMapper {
TTL_FIELD_TYPE.setStored(true);
TTL_FIELD_TYPE.setTokenized(false);
TTL_FIELD_TYPE.setNumericPrecisionStep(Defaults.PRECISION_STEP_64_BIT);
TTL_FIELD_TYPE.setIndexAnalyzer(NumericLongAnalyzer.buildNamedAnalyzer(Defaults.PRECISION_STEP_64_BIT));
TTL_FIELD_TYPE.setSearchAnalyzer(NumericLongAnalyzer.buildNamedAnalyzer(Integer.MAX_VALUE));
TTL_FIELD_TYPE.setName(NAME);
TTL_FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
TTL_FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
TTL_FIELD_TYPE.freeze();
}

View File

@ -26,9 +26,9 @@ import org.elasticsearch.action.TimestampParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.joda.FormatDateTimeFormatter;
import org.elasticsearch.common.joda.Joda;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.analysis.NumericDateAnalyzer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
@ -65,8 +65,8 @@ public class TimestampFieldMapper extends MetadataFieldMapper {
FIELD_TYPE.setNumericPrecisionStep(Defaults.PRECISION_STEP_64_BIT);
FIELD_TYPE.setName(NAME);
FIELD_TYPE.setDateTimeFormatter(DATE_TIME_FORMATTER);
FIELD_TYPE.setIndexAnalyzer(NumericDateAnalyzer.buildNamedAnalyzer(DATE_TIME_FORMATTER, Defaults.PRECISION_STEP_64_BIT));
FIELD_TYPE.setSearchAnalyzer(NumericDateAnalyzer.buildNamedAnalyzer(DATE_TIME_FORMATTER, Integer.MAX_VALUE));
FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
FIELD_TYPE.setHasDocValues(true);
FIELD_TYPE.freeze();
}

View File

@ -42,8 +42,6 @@ import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericAnalyzer;
import org.elasticsearch.index.analysis.NumericTokenizer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
@ -131,12 +129,6 @@ public class IpFieldMapper extends NumberFieldMapper {
return (IpFieldMapper) fieldMapper.includeInAll(includeInAll);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
String name = precisionStep == Integer.MAX_VALUE ? "_ip/max" : ("_ip/" + precisionStep);
return new NamedAnalyzer(name, new NumericIpAnalyzer(precisionStep));
}
@Override
protected int maxPrecisionStep() {
return 64;
@ -362,29 +354,4 @@ public class IpFieldMapper extends NumberFieldMapper {
}
public static class NumericIpAnalyzer extends NumericAnalyzer<NumericIpTokenizer> {
private final int precisionStep;
public NumericIpAnalyzer(int precisionStep) {
this.precisionStep = precisionStep;
}
@Override
protected NumericIpTokenizer createNumericTokenizer(char[] buffer) throws IOException {
return new NumericIpTokenizer(precisionStep, buffer);
}
}
public static class NumericIpTokenizer extends NumericTokenizer {
public NumericIpTokenizer(int precisionStep, char[] buffer) throws IOException {
super(new LegacyNumericTokenStream(precisionStep), buffer, null);
}
@Override
protected void setValue(LegacyNumericTokenStream tokenStream, String value) {
tokenStream.setLongValue(ipToLong(value));
}
}
}

View File

@ -52,8 +52,10 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.core.KeywordFieldMapper.KeywordFieldType;
import org.elasticsearch.index.mapper.core.StringFieldMapper.StringFieldType;
import org.elasticsearch.index.mapper.core.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.search.internal.SearchContext;
@ -62,7 +64,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -94,6 +95,9 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
public static final boolean DEFAULT_INCLUDE = false;
public static final boolean DEFAULT_FAIL_ON_UNSUPPORTED_FIELDS = true;
private static final Set<Class<? extends MappedFieldType>> SUPPORTED_FIELD_TYPES = new HashSet<>(
Arrays.asList(StringFieldType.class, TextFieldType.class, KeywordFieldType.class));
private interface Field {
ParseField FIELDS = new ParseField("fields");
ParseField LIKE = new ParseField("like");
@ -1032,12 +1036,18 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
} else {
for (String field : fields) {
MappedFieldType fieldType = context.fieldMapper(field);
if (fieldType != null && SUPPORTED_FIELD_TYPES.contains(fieldType.getClass()) == false) {
if (failOnUnsupportedField) {
throw new IllegalArgumentException("more_like_this only supports text/keyword fields: [" + field + "]");
} else {
// skip
continue;
}
}
moreLikeFields.add(fieldType == null ? field : fieldType.name());
}
}
// possibly remove unsupported fields
removeUnsupportedFields(moreLikeFields, analyzerObj, failOnUnsupportedField);
if (moreLikeFields.isEmpty()) {
return null;
}
@ -1059,20 +1069,6 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
}
}
private static List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
final String fieldName = it.next();
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new IllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
it.remove();
}
}
}
return moreLikeFields;
}
private Query handleItems(QueryShardContext context, MoreLikeThisQuery mltQuery, Item[] likeItems, Item[] unlikeItems,
boolean include, List<String> moreLikeFields, boolean useDefaultField) throws IOException {
// set default index, type and fields if not specified

View File

@ -1,59 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.LegacyNumericTokenStream;
import org.apache.lucene.analysis.LegacyNumericTokenStream.LegacyNumericTermAttribute;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
public class NumericAnalyzerTests extends ESTestCase {
public void testAttributeEqual() throws IOException {
final int precisionStep = 8;
final double value = randomDouble();
NumericDoubleAnalyzer analyzer = new NumericDoubleAnalyzer(precisionStep);
final TokenStream ts1 = analyzer.tokenStream("dummy", String.valueOf(value));
final LegacyNumericTokenStream ts2 = new LegacyNumericTokenStream(precisionStep);
ts2.setDoubleValue(value);
final LegacyNumericTermAttribute numTerm1 = ts1.addAttribute(LegacyNumericTermAttribute.class);
final LegacyNumericTermAttribute numTerm2 = ts1.addAttribute(LegacyNumericTermAttribute.class);
final PositionIncrementAttribute posInc1 = ts1.addAttribute(PositionIncrementAttribute.class);
final PositionIncrementAttribute posInc2 = ts1.addAttribute(PositionIncrementAttribute.class);
ts1.reset();
ts2.reset();
while (ts1.incrementToken()) {
assertThat(ts2.incrementToken(), is(true));
assertThat(posInc1, equalTo(posInc2));
// can't use equalTo directly on the numeric attribute cause it doesn't implement equals (LUCENE-5070)
assertThat(numTerm1.getRawValue(), equalTo(numTerm2.getRawValue()));
assertThat(numTerm2.getShift(), equalTo(numTerm2.getShift()));
}
assertThat(ts2.incrementToken(), is(false));
ts1.end();
ts2.end();
}
}

View File

@ -272,7 +272,7 @@ public class MoreLikeThisQueryBuilderTests extends AbstractQueryTestCase<MoreLik
queryBuilder.toQuery(createShardContext());
fail("should have failed with IllegalArgumentException for field: " + unsupportedField);
} catch (IllegalArgumentException e) {
assertThat(e.getMessage(), containsString("more_like_this doesn't support binary/numeric fields"));
assertThat(e.getMessage(), containsString("more_like_this only supports text/keyword fields"));
}
}

View File

@ -29,8 +29,6 @@ import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.hash.MurmurHash3;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericLongAnalyzer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
@ -77,11 +75,6 @@ public class Murmur3FieldMapper extends LongFieldMapper {
defaultFieldType.setHasDocValues(true);
}
@Override
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
return NumericLongAnalyzer.buildNamedAnalyzer(precisionStep);
}
@Override
protected int maxPrecisionStep() {
return 64;

View File

@ -21,9 +21,9 @@ package org.elasticsearch.index.mapper.size;
import org.apache.lucene.document.Field;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.analysis.NumericIntegerAnalyzer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
@ -53,8 +53,8 @@ public class SizeFieldMapper extends MetadataFieldMapper {
SIZE_FIELD_TYPE.setStored(true);
SIZE_FIELD_TYPE.setNumericPrecisionStep(Defaults.PRECISION_STEP_32_BIT);
SIZE_FIELD_TYPE.setName(NAME);
SIZE_FIELD_TYPE.setIndexAnalyzer(NumericIntegerAnalyzer.buildNamedAnalyzer(Defaults.PRECISION_STEP_32_BIT));
SIZE_FIELD_TYPE.setSearchAnalyzer(NumericIntegerAnalyzer.buildNamedAnalyzer(Integer.MAX_VALUE));
SIZE_FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
SIZE_FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
SIZE_FIELD_TYPE.freeze();
}
}