Upgrade to lucene-8.0.0-snapshot-31d7dfe6b1 (#35224)
This commit is contained in:
parent
9f4b93fd5e
commit
a5e1f4d3a2
|
@ -1,5 +1,5 @@
|
||||||
elasticsearch = 7.0.0
|
elasticsearch = 7.0.0
|
||||||
lucene = 8.0.0-snapshot-7d0a7782fa
|
lucene = 8.0.0-snapshot-31d7dfe6b1
|
||||||
|
|
||||||
# optional dependencies
|
# optional dependencies
|
||||||
spatial4j = 0.7
|
spatial4j = 0.7
|
||||||
|
|
|
@ -44,7 +44,6 @@ import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
|
||||||
import org.apache.lucene.analysis.core.UpperCaseFilter;
|
import org.apache.lucene.analysis.core.UpperCaseFilter;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||||
|
@ -308,7 +307,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
||||||
tokenizers.put("char_group", CharGroupTokenizerFactory::new);
|
tokenizers.put("char_group", CharGroupTokenizerFactory::new);
|
||||||
tokenizers.put("classic", ClassicTokenizerFactory::new);
|
tokenizers.put("classic", ClassicTokenizerFactory::new);
|
||||||
tokenizers.put("letter", LetterTokenizerFactory::new);
|
tokenizers.put("letter", LetterTokenizerFactory::new);
|
||||||
tokenizers.put("lowercase", LowerCaseTokenizerFactory::new);
|
// TODO deprecate and remove in API
|
||||||
|
tokenizers.put("lowercase", XLowerCaseTokenizerFactory::new);
|
||||||
tokenizers.put("path_hierarchy", PathHierarchyTokenizerFactory::new);
|
tokenizers.put("path_hierarchy", PathHierarchyTokenizerFactory::new);
|
||||||
tokenizers.put("PathHierarchy", PathHierarchyTokenizerFactory::new);
|
tokenizers.put("PathHierarchy", PathHierarchyTokenizerFactory::new);
|
||||||
tokenizers.put("pattern", PatternTokenizerFactory::new);
|
tokenizers.put("pattern", PatternTokenizerFactory::new);
|
||||||
|
@ -503,7 +503,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
||||||
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
|
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
|
||||||
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1), null));
|
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1), null));
|
||||||
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new, null));
|
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new, null));
|
||||||
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", LowerCaseTokenizer::new, () -> new TokenFilterFactory() {
|
// TODO deprecate and remove in API
|
||||||
|
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new, () -> new TokenFilterFactory() {
|
||||||
@Override
|
@Override
|
||||||
public String name() {
|
public String name() {
|
||||||
return "lowercase";
|
return "lowercase";
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.CharacterUtils;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
|
class XLowerCaseTokenizer extends Tokenizer {
|
||||||
|
|
||||||
|
private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
|
||||||
|
|
||||||
|
private static final int IO_BUFFER_SIZE = 4096;
|
||||||
|
|
||||||
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
|
|
||||||
|
private final CharacterUtils.CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final boolean incrementToken() throws IOException {
|
||||||
|
clearAttributes();
|
||||||
|
int length = 0;
|
||||||
|
int start = -1; // this variable is always initialized
|
||||||
|
int end = -1;
|
||||||
|
char[] buffer = termAtt.buffer();
|
||||||
|
while (true) {
|
||||||
|
if (bufferIndex >= dataLen) {
|
||||||
|
offset += dataLen;
|
||||||
|
CharacterUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
|
||||||
|
if (ioBuffer.getLength() == 0) {
|
||||||
|
dataLen = 0; // so next offset += dataLen won't decrement offset
|
||||||
|
if (length > 0) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
finalOffset = correctOffset(offset);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dataLen = ioBuffer.getLength();
|
||||||
|
bufferIndex = 0;
|
||||||
|
}
|
||||||
|
// use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
|
||||||
|
final int c = Character.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength());
|
||||||
|
final int charCount = Character.charCount(c);
|
||||||
|
bufferIndex += charCount;
|
||||||
|
|
||||||
|
if (Character.isLetter(c)) { // if it's a token char
|
||||||
|
if (length == 0) { // start of token
|
||||||
|
assert start == -1;
|
||||||
|
start = offset + bufferIndex - charCount;
|
||||||
|
end = start;
|
||||||
|
} else if (length >= buffer.length-1) { // check if a supplementary could run out of bounds
|
||||||
|
buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
|
||||||
|
}
|
||||||
|
end += charCount;
|
||||||
|
length += Character.toChars(Character.toLowerCase(c), buffer, length); // buffer it, normalized
|
||||||
|
int maxTokenLen = CharTokenizer.DEFAULT_MAX_WORD_LEN;
|
||||||
|
if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (length > 0) { // at non-Letter w/ chars
|
||||||
|
break; // return 'em
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
termAtt.setLength(length);
|
||||||
|
assert start != -1;
|
||||||
|
offsetAtt.setOffset(correctOffset(start), finalOffset = correctOffset(end));
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void end() throws IOException {
|
||||||
|
super.end();
|
||||||
|
// set final offset
|
||||||
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
bufferIndex = 0;
|
||||||
|
offset = 0;
|
||||||
|
dataLen = 0;
|
||||||
|
finalOffset = 0;
|
||||||
|
ioBuffer.reset(); // make sure to reset the IO buffer!!
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -20,26 +20,21 @@
|
||||||
package org.elasticsearch.analysis.common;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
|
import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
|
||||||
|
|
||||||
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory implements MultiTermAwareComponent {
|
@Deprecated
|
||||||
|
// NORELEASE we should prevent the usage on indices created after 7.0 in order to be able to remove in 8
|
||||||
|
public class XLowerCaseTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
LowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
public XLowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, settings);
|
super(indexSettings, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create() {
|
public Tokenizer create() {
|
||||||
return new LowerCaseTokenizer();
|
return new XLowerCaseTokenizer();
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getMultiTermComponent() {
|
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -48,7 +48,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
||||||
tokenizers.put("edgengram", EdgeNGramTokenizerFactory.class);
|
tokenizers.put("edgengram", EdgeNGramTokenizerFactory.class);
|
||||||
tokenizers.put("classic", ClassicTokenizerFactory.class);
|
tokenizers.put("classic", ClassicTokenizerFactory.class);
|
||||||
tokenizers.put("letter", LetterTokenizerFactory.class);
|
tokenizers.put("letter", LetterTokenizerFactory.class);
|
||||||
tokenizers.put("lowercase", LowerCaseTokenizerFactory.class);
|
// tokenizers.put("lowercase", XLowerCaseTokenizerFactory.class);
|
||||||
tokenizers.put("pathhierarchy", PathHierarchyTokenizerFactory.class);
|
tokenizers.put("pathhierarchy", PathHierarchyTokenizerFactory.class);
|
||||||
tokenizers.put("pattern", PatternTokenizerFactory.class);
|
tokenizers.put("pattern", PatternTokenizerFactory.class);
|
||||||
tokenizers.put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
|
tokenizers.put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
|
||||||
|
@ -223,7 +223,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
||||||
protected Map<String, Class<?>> getPreConfiguredTokenizers() {
|
protected Map<String, Class<?>> getPreConfiguredTokenizers() {
|
||||||
Map<String, Class<?>> tokenizers = new TreeMap<>(super.getPreConfiguredTokenizers());
|
Map<String, Class<?>> tokenizers = new TreeMap<>(super.getPreConfiguredTokenizers());
|
||||||
tokenizers.put("keyword", null);
|
tokenizers.put("keyword", null);
|
||||||
tokenizers.put("lowercase", null);
|
tokenizers.put("lowercase", Void.class);
|
||||||
tokenizers.put("classic", null);
|
tokenizers.put("classic", null);
|
||||||
tokenizers.put("uax_url_email", org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class);
|
tokenizers.put("uax_url_email", org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class);
|
||||||
tokenizers.put("path_hierarchy", null);
|
tokenizers.put("path_hierarchy", null);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
8db13c6e146c851614c9f862f1eac67431f9b509
|
|
@ -1 +0,0 @@
|
||||||
cc072b68aac06a2fb9569ab7adce05302f130948
|
|
|
@ -75,7 +75,7 @@ public class ScaledFloatFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointDataDimensionCount());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(1230, pointField.numericValue().longValue());
|
assertEquals(1230, pointField.numericValue().longValue());
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
|
@ -149,7 +149,7 @@ public class ScaledFloatFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(1, fields.length);
|
assertEquals(1, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointDataDimensionCount());
|
||||||
assertEquals(1230, pointField.numericValue().longValue());
|
assertEquals(1230, pointField.numericValue().longValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@ public class ScaledFloatFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(3, fields.length);
|
assertEquals(3, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointDataDimensionCount());
|
||||||
assertEquals(1230, pointField.numericValue().doubleValue(), 0d);
|
assertEquals(1230, pointField.numericValue().doubleValue(), 0d);
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||||
|
@ -202,7 +202,7 @@ public class ScaledFloatFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointDataDimensionCount());
|
||||||
assertEquals(1230, pointField.numericValue().longValue());
|
assertEquals(1230, pointField.numericValue().longValue());
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||||
|
@ -317,7 +317,7 @@ public class ScaledFloatFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointDataDimensionCount());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(25, pointField.numericValue().longValue());
|
assertEquals(25, pointField.numericValue().longValue());
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
|
|
|
@ -327,7 +327,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
||||||
extractedTerms.add(builder.toBytesRef());
|
extractedTerms.add(builder.toBytesRef());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (info.getPointDimensionCount() == 1) { // not != 0 because range fields are not supported
|
if (info.getPointIndexDimensionCount() == 1) { // not != 0 because range fields are not supported
|
||||||
PointValues values = reader.getPointValues(info.name);
|
PointValues values = reader.getPointValues(info.name);
|
||||||
List<byte[]> encodedPointValues = new ArrayList<>();
|
List<byte[]> encodedPointValues = new ArrayList<>();
|
||||||
encodedPointValues.add(values.getMinPackedValue().clone());
|
encodedPointValues.add(values.getMinPackedValue().clone());
|
||||||
|
|
|
@ -38,7 +38,7 @@ import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.MultiDocValues;
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiTerms;
|
||||||
import org.apache.lucene.index.NoMergePolicy;
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
|
@ -1090,7 +1090,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
||||||
String queryToString = shardSearcher.doc(controlTopDocs.scoreDocs[i].doc).get("query_to_string");
|
String queryToString = shardSearcher.doc(controlTopDocs.scoreDocs[i].doc).get("query_to_string");
|
||||||
logger.error("controlTopDocs.scoreDocs[{}].query_to_string={}", i, queryToString);
|
logger.error("controlTopDocs.scoreDocs[{}].query_to_string={}", i, queryToString);
|
||||||
|
|
||||||
TermsEnum tenum = MultiFields.getFields(shardSearcher.getIndexReader()).terms(fieldType.queryTermsField.name()).iterator();
|
TermsEnum tenum = MultiTerms.getTerms(shardSearcher.getIndexReader(), fieldType.queryTermsField.name()).iterator();
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
|
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
|
||||||
PostingsEnum penum = tenum.postings(null);
|
PostingsEnum penum = tenum.postings(null);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
b474e1a2d7f0172338a08f159849a6c491781d70
|
|
@ -1 +0,0 @@
|
||||||
429eb7e780c5a6e5200041a1f5b98bccd2623aaf
|
|
|
@ -0,0 +1 @@
|
||||||
|
fc547e69837bcb808f1782bfa35490645bab9cae
|
|
@ -1 +0,0 @@
|
||||||
837fca1b1d7ca1dc002e53171801526644e52818
|
|
|
@ -0,0 +1 @@
|
||||||
|
e08961a2ec9414947693659ff79bb7e21a410298
|
|
@ -1 +0,0 @@
|
||||||
1dde903172ade259cb26cbe320c25bc1d1356f89
|
|
|
@ -0,0 +1 @@
|
||||||
|
09280919225656c7ce2a14af29666a02bd86c540
|
|
@ -1 +0,0 @@
|
||||||
b6ca20e96a989e6e6706b8b7b8ad8c82d2a03576
|
|
|
@ -0,0 +1 @@
|
||||||
|
880f10393cdefff7575fbf5b2ced890666ec81dc
|
|
@ -1 +0,0 @@
|
||||||
c96a2f25dea18b383423a41aca296734353d4bbd
|
|
|
@ -0,0 +1 @@
|
||||||
|
b41451a9d4e30b8a9a14ccdd7553e5796f77cf44
|
|
@ -1 +0,0 @@
|
||||||
09363c5ce111d024a6da22a5ea8dbaf54d91dbd0
|
|
|
@ -0,0 +1 @@
|
||||||
|
145fd2c803d682c2cb2d78e6e350e09a09a09ea0
|
|
@ -1 +0,0 @@
|
||||||
13c3840d49480014118de99ef6e07a9e55c50172
|
|
|
@ -20,12 +20,9 @@
|
||||||
package org.elasticsearch.index.mapper.annotatedtext;
|
package org.elasticsearch.index.mapper.annotatedtext;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
|
||||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
@ -69,20 +66,21 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static org.elasticsearch.index.mapper.TypeParsers.parseTextField;
|
import static org.elasticsearch.index.mapper.TypeParsers.parseTextField;
|
||||||
|
|
||||||
/** A {@link FieldMapper} for full-text fields with annotation markup e.g.
|
/** A {@link FieldMapper} for full-text fields with annotation markup e.g.
|
||||||
*
|
*
|
||||||
* "New mayor is [John Smith](type=person&value=John%20Smith) "
|
* "New mayor is [John Smith](type=person&value=John%20Smith) "
|
||||||
*
|
*
|
||||||
* A special Analyzer wraps the default choice of analyzer in order
|
* A special Analyzer wraps the default choice of analyzer in order
|
||||||
* to strip the text field of annotation markup and inject the related
|
* to strip the text field of annotation markup and inject the related
|
||||||
* entity annotation tokens as supplementary tokens at the relevant points
|
* entity annotation tokens as supplementary tokens at the relevant points
|
||||||
* in the token stream.
|
* in the token stream.
|
||||||
* This code is largely a copy of TextFieldMapper which is less than ideal -
|
* This code is largely a copy of TextFieldMapper which is less than ideal -
|
||||||
* my attempts to subclass TextFieldMapper failed but we can revisit this.
|
* my attempts to subclass TextFieldMapper failed but we can revisit this.
|
||||||
**/
|
**/
|
||||||
public class AnnotatedTextFieldMapper extends FieldMapper {
|
public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
|
@ -100,7 +98,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
public static class Builder extends FieldMapper.Builder<Builder, AnnotatedTextFieldMapper> {
|
public static class Builder extends FieldMapper.Builder<Builder, AnnotatedTextFieldMapper> {
|
||||||
|
|
||||||
private int positionIncrementGap = POSITION_INCREMENT_GAP_USE_ANALYZER;
|
private int positionIncrementGap = POSITION_INCREMENT_GAP_USE_ANALYZER;
|
||||||
|
|
||||||
public Builder(String name) {
|
public Builder(String name) {
|
||||||
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
|
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
|
||||||
builder = this;
|
builder = this;
|
||||||
|
@ -118,7 +116,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
this.positionIncrementGap = positionIncrementGap;
|
this.positionIncrementGap = positionIncrementGap;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Builder docValues(boolean docValues) {
|
public Builder docValues(boolean docValues) {
|
||||||
if (docValues) {
|
if (docValues) {
|
||||||
|
@ -141,8 +139,8 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap));
|
fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap));
|
||||||
fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap));
|
fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap));
|
||||||
} else {
|
} else {
|
||||||
//Using the analyzer's default BUT need to do the same thing AnalysisRegistry.processAnalyzerFactory
|
//Using the analyzer's default BUT need to do the same thing AnalysisRegistry.processAnalyzerFactory
|
||||||
// does to splice in new default of posIncGap=100 by wrapping the analyzer
|
// does to splice in new default of posIncGap=100 by wrapping the analyzer
|
||||||
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
|
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
|
||||||
int overrideInc = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
|
int overrideInc = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
|
||||||
fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), overrideInc));
|
fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), overrideInc));
|
||||||
|
@ -162,7 +160,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
public Mapper.Builder<AnnotatedTextFieldMapper.Builder, AnnotatedTextFieldMapper> parse(
|
public Mapper.Builder<AnnotatedTextFieldMapper.Builder, AnnotatedTextFieldMapper> parse(
|
||||||
String fieldName, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
String fieldName, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||||
AnnotatedTextFieldMapper.Builder builder = new AnnotatedTextFieldMapper.Builder(fieldName);
|
AnnotatedTextFieldMapper.Builder builder = new AnnotatedTextFieldMapper.Builder(fieldName);
|
||||||
|
|
||||||
builder.fieldType().setIndexAnalyzer(parserContext.getIndexAnalyzers().getDefaultIndexAnalyzer());
|
builder.fieldType().setIndexAnalyzer(parserContext.getIndexAnalyzers().getDefaultIndexAnalyzer());
|
||||||
builder.fieldType().setSearchAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchAnalyzer());
|
builder.fieldType().setSearchAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchAnalyzer());
|
||||||
builder.fieldType().setSearchQuoteAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer());
|
builder.fieldType().setSearchQuoteAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer());
|
||||||
|
@ -181,7 +179,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses markdown-like syntax into plain text and AnnotationTokens with offsets for
|
* Parses markdown-like syntax into plain text and AnnotationTokens with offsets for
|
||||||
* annotations found in texts
|
* annotations found in texts
|
||||||
|
@ -190,32 +188,32 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
public final String textPlusMarkup;
|
public final String textPlusMarkup;
|
||||||
public final String textMinusMarkup;
|
public final String textMinusMarkup;
|
||||||
List<AnnotationToken> annotations;
|
List<AnnotationToken> annotations;
|
||||||
|
|
||||||
// Format is markdown-like syntax for URLs eg:
|
// Format is markdown-like syntax for URLs eg:
|
||||||
// "New mayor is [John Smith](type=person&value=John%20Smith) "
|
// "New mayor is [John Smith](type=person&value=John%20Smith) "
|
||||||
static Pattern markdownPattern = Pattern.compile("\\[([^\\]\\[]*)\\]\\(([^\\)\\(]*)\\)");
|
static Pattern markdownPattern = Pattern.compile("\\[([^\\]\\[]*)\\]\\(([^\\)\\(]*)\\)");
|
||||||
|
|
||||||
public static AnnotatedText parse (String textPlusMarkup) {
|
public static AnnotatedText parse (String textPlusMarkup) {
|
||||||
List<AnnotationToken> annotations =new ArrayList<>();
|
List<AnnotationToken> annotations =new ArrayList<>();
|
||||||
Matcher m = markdownPattern.matcher(textPlusMarkup);
|
Matcher m = markdownPattern.matcher(textPlusMarkup);
|
||||||
int lastPos = 0;
|
int lastPos = 0;
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
while(m.find()){
|
while(m.find()){
|
||||||
if(m.start() > lastPos){
|
if(m.start() > lastPos){
|
||||||
sb.append(textPlusMarkup.substring(lastPos, m.start()));
|
sb.append(textPlusMarkup.substring(lastPos, m.start()));
|
||||||
}
|
}
|
||||||
|
|
||||||
int startOffset = sb.length();
|
int startOffset = sb.length();
|
||||||
int endOffset = sb.length() + m.group(1).length();
|
int endOffset = sb.length() + m.group(1).length();
|
||||||
sb.append(m.group(1));
|
sb.append(m.group(1));
|
||||||
lastPos = m.end();
|
lastPos = m.end();
|
||||||
|
|
||||||
String[] pairs = m.group(2).split("&");
|
String[] pairs = m.group(2).split("&");
|
||||||
String value = null;
|
String value = null;
|
||||||
for (String pair : pairs) {
|
for (String pair : pairs) {
|
||||||
String[] kv = pair.split("=");
|
String[] kv = pair.split("=");
|
||||||
try {
|
try {
|
||||||
if(kv.length == 2){
|
if(kv.length == 2){
|
||||||
throw new ElasticsearchParseException("key=value pairs are not supported in annotations");
|
throw new ElasticsearchParseException("key=value pairs are not supported in annotations");
|
||||||
}
|
}
|
||||||
if(kv.length == 1) {
|
if(kv.length == 1) {
|
||||||
|
@ -230,9 +228,9 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
}
|
}
|
||||||
} catch (UnsupportedEncodingException uee){
|
} catch (UnsupportedEncodingException uee){
|
||||||
throw new ElasticsearchParseException("Unsupported encoding parsing annotated text", uee);
|
throw new ElasticsearchParseException("Unsupported encoding parsing annotated text", uee);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(lastPos < textPlusMarkup.length()){
|
if(lastPos < textPlusMarkup.length()){
|
||||||
sb.append(textPlusMarkup.substring(lastPos));
|
sb.append(textPlusMarkup.substring(lastPos));
|
||||||
}
|
}
|
||||||
|
@ -242,13 +240,13 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
protected AnnotatedText(String textMinusMarkup, String textPlusMarkup, List<AnnotationToken> annotations) {
|
protected AnnotatedText(String textMinusMarkup, String textPlusMarkup, List<AnnotationToken> annotations) {
|
||||||
this.textMinusMarkup = textMinusMarkup;
|
this.textMinusMarkup = textMinusMarkup;
|
||||||
this.textPlusMarkup = textPlusMarkup;
|
this.textPlusMarkup = textPlusMarkup;
|
||||||
this.annotations = annotations;
|
this.annotations = annotations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final class AnnotationToken {
|
public static final class AnnotationToken {
|
||||||
public final int offset;
|
public final int offset;
|
||||||
public final int endOffset;
|
public final int endOffset;
|
||||||
|
|
||||||
public final String value;
|
public final String value;
|
||||||
public AnnotationToken(int offset, int endOffset, String value) {
|
public AnnotationToken(int offset, int endOffset, String value) {
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
|
@ -259,12 +257,12 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return value +" ("+offset+" - "+endOffset+")";
|
return value +" ("+offset+" - "+endOffset+")";
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean intersects(int start, int end) {
|
public boolean intersects(int start, int end) {
|
||||||
return (start <= offset && end >= offset) || (start <= endOffset && end >= endOffset)
|
return (start <= offset && end >= offset) || (start <= endOffset && end >= endOffset)
|
||||||
|| (start >= offset && end <= endOffset);
|
|| (start >= offset && end <= endOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
|
@ -274,7 +272,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
result = prime * result + Objects.hashCode(value);
|
result = prime * result + Objects.hashCode(value);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj)
|
||||||
|
@ -287,16 +285,16 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
return Objects.equals(endOffset, other.endOffset) && Objects.equals(offset, other.offset)
|
return Objects.equals(endOffset, other.endOffset) && Objects.equals(offset, other.offset)
|
||||||
&& Objects.equals(value, other.value);
|
&& Objects.equals(value, other.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append(textMinusMarkup);
|
sb.append(textMinusMarkup);
|
||||||
sb.append("\n");
|
sb.append("\n");
|
||||||
annotations.forEach(a -> {
|
annotations.forEach(a -> {
|
||||||
sb.append(a);
|
sb.append(a);
|
||||||
sb.append("\n");
|
sb.append("\n");
|
||||||
});
|
});
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
|
@ -308,10 +306,10 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
public AnnotationToken getAnnotation(int index) {
|
public AnnotationToken getAnnotation(int index) {
|
||||||
return annotations.get(index);
|
return annotations.get(index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// A utility class for use with highlighters where the content being highlighted
|
// A utility class for use with highlighters where the content being highlighted
|
||||||
// needs plain text format for highlighting but marked-up format for token discovery.
|
// needs plain text format for highlighting but marked-up format for token discovery.
|
||||||
// The class takes markedup format field values and returns plain text versions.
|
// The class takes markedup format field values and returns plain text versions.
|
||||||
// When asked to tokenize plain-text versions by the highlighter it tokenizes the
|
// When asked to tokenize plain-text versions by the highlighter it tokenizes the
|
||||||
|
@ -330,7 +328,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
|
annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String [] getPlainTextValuesForHighlighter(){
|
public String [] getPlainTextValuesForHighlighter(){
|
||||||
String [] result = new String[annotations.length];
|
String [] result = new String[annotations.length];
|
||||||
for (int i = 0; i < annotations.length; i++) {
|
for (int i = 0; i < annotations.length; i++) {
|
||||||
|
@ -338,127 +336,75 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
|
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
|
||||||
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
|
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
|
||||||
int fieldValueOffset =0;
|
int fieldValueOffset =0;
|
||||||
for (AnnotatedText fieldValueAnnotations : this.annotations) {
|
for (AnnotatedText fieldValueAnnotations : this.annotations) {
|
||||||
//This is called from a highlighter where all of the field values are concatenated
|
//This is called from a highlighter where all of the field values are concatenated
|
||||||
// so each annotation offset will need to be adjusted so that it takes into account
|
// so each annotation offset will need to be adjusted so that it takes into account
|
||||||
// the previous values AND the MULTIVAL delimiter
|
// the previous values AND the MULTIVAL delimiter
|
||||||
for (AnnotationToken token : fieldValueAnnotations.annotations) {
|
for (AnnotationToken token : fieldValueAnnotations.annotations) {
|
||||||
if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
|
if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
|
||||||
intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
|
intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
|
||||||
token.endOffset + fieldValueOffset, token.value));
|
token.endOffset + fieldValueOffset, token.value));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//add 1 for the fieldvalue separator character
|
//add 1 for the fieldvalue separator character
|
||||||
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
|
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
|
||||||
}
|
}
|
||||||
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
|
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Analyzer getWrappedAnalyzer(String fieldName) {
|
public Analyzer getWrappedAnalyzer(String fieldName) {
|
||||||
return delegate;
|
return delegate;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||||
if(components instanceof AnnotatedHighlighterTokenStreamComponents){
|
|
||||||
// already wrapped.
|
|
||||||
return components;
|
|
||||||
}
|
|
||||||
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
|
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
|
||||||
return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, this.annotations);
|
AtomicInteger readerNum = new AtomicInteger(0);
|
||||||
}
|
return new TokenStreamComponents(r -> {
|
||||||
|
String plainText = readToString(r);
|
||||||
|
AnnotatedText at = this.annotations[readerNum.getAndIncrement()];
|
||||||
|
assert at.textMinusMarkup.equals(plainText);
|
||||||
|
injector.setAnnotations(at);
|
||||||
|
components.getSource().accept(new StringReader(at.textMinusMarkup));
|
||||||
|
}, injector);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
private static final class AnnotatedHighlighterTokenStreamComponents extends TokenStreamComponents{
|
|
||||||
|
|
||||||
private AnnotationsInjector annotationsInjector;
|
|
||||||
private AnnotatedText[] annotations;
|
|
||||||
int readerNum = 0;
|
|
||||||
|
|
||||||
AnnotatedHighlighterTokenStreamComponents(Tokenizer source, AnnotationsInjector annotationsFilter,
|
|
||||||
AnnotatedText[] annotations) {
|
|
||||||
super(source, annotationsFilter);
|
|
||||||
this.annotationsInjector = annotationsFilter;
|
|
||||||
this.annotations = annotations;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void setReader(Reader reader) {
|
|
||||||
String plainText = readToString(reader);
|
|
||||||
AnnotatedText at = this.annotations[readerNum++];
|
|
||||||
assert at.textMinusMarkup.equals(plainText);
|
|
||||||
// This code is reliant on the behaviour of highlighter logic - it
|
|
||||||
// takes plain text multi-value fields and then calls the same analyzer
|
|
||||||
// for each field value in turn. This class has cached the annotations
|
|
||||||
// associated with each plain-text value and are arranged in the same order
|
|
||||||
annotationsInjector.setAnnotations(at);
|
|
||||||
super.setReader(new StringReader(at.textMinusMarkup));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static final class AnnotationAnalyzerWrapper extends AnalyzerWrapper {
|
public static final class AnnotationAnalyzerWrapper extends AnalyzerWrapper {
|
||||||
|
|
||||||
|
|
||||||
private final Analyzer delegate;
|
private final Analyzer delegate;
|
||||||
|
|
||||||
public AnnotationAnalyzerWrapper (Analyzer delegate) {
|
public AnnotationAnalyzerWrapper(Analyzer delegate) {
|
||||||
super(delegate.getReuseStrategy());
|
super(delegate.getReuseStrategy());
|
||||||
this.delegate = delegate;
|
this.delegate = delegate;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Wraps {@link StandardAnalyzer}.
|
|
||||||
*/
|
|
||||||
public AnnotationAnalyzerWrapper() {
|
|
||||||
this(new StandardAnalyzer());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Analyzer getWrappedAnalyzer(String fieldName) {
|
public Analyzer getWrappedAnalyzer(String fieldName) {
|
||||||
return delegate;
|
return delegate;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||||
if(components instanceof AnnotatedTokenStreamComponents){
|
if (components.getTokenStream() instanceof AnnotationsInjector) {
|
||||||
// already wrapped.
|
// already wrapped
|
||||||
return components;
|
return components;
|
||||||
}
|
}
|
||||||
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
|
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
|
||||||
return new AnnotatedTokenStreamComponents(components.getTokenizer(), injector);
|
return new TokenStreamComponents(r -> {
|
||||||
}
|
AnnotatedText annotations = AnnotatedText.parse(readToString(r));
|
||||||
}
|
injector.setAnnotations(annotations);
|
||||||
|
components.getSource().accept(new StringReader(annotations.textMinusMarkup));
|
||||||
|
}, injector);
|
||||||
//This Analyzer is not "wrappable" because of a limitation in Lucene https://issues.apache.org/jira/browse/LUCENE-8352
|
|
||||||
private static final class AnnotatedTokenStreamComponents extends TokenStreamComponents{
|
|
||||||
private AnnotationsInjector annotationsInjector;
|
|
||||||
|
|
||||||
AnnotatedTokenStreamComponents(Tokenizer source, AnnotationsInjector annotationsInjector) {
|
|
||||||
super(source, annotationsInjector);
|
|
||||||
this.annotationsInjector = annotationsInjector;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void setReader(Reader reader) {
|
|
||||||
// Sneaky code to change the content downstream components will parse.
|
|
||||||
// Replace the marked-up content Reader with a plain text Reader and prime the
|
|
||||||
// annotations injector with the AnnotatedTokens that need to be injected
|
|
||||||
// as plain-text parsing progresses.
|
|
||||||
AnnotatedText annotations = AnnotatedText.parse(readToString(reader));
|
|
||||||
annotationsInjector.setAnnotations(annotations);
|
|
||||||
super.setReader(new StringReader(annotations.textMinusMarkup));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static String readToString(Reader reader) {
|
static String readToString(Reader reader) {
|
||||||
char[] arr = new char[8 * 1024];
|
char[] arr = new char[8 * 1024];
|
||||||
StringBuilder buffer = new StringBuilder();
|
StringBuilder buffer = new StringBuilder();
|
||||||
int numCharsRead;
|
int numCharsRead;
|
||||||
|
@ -467,15 +413,15 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
buffer.append(arr, 0, numCharsRead);
|
buffer.append(arr, 0, numCharsRead);
|
||||||
}
|
}
|
||||||
reader.close();
|
reader.close();
|
||||||
return buffer.toString();
|
return buffer.toString();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UncheckedIOException("IO Error reading field content", e);
|
throw new UncheckedIOException("IO Error reading field content", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static final class AnnotationsInjector extends TokenFilter {
|
public static final class AnnotationsInjector extends TokenFilter {
|
||||||
|
|
||||||
private AnnotatedText annotatedText;
|
private AnnotatedText annotatedText;
|
||||||
AnnotatedText.AnnotationToken nextAnnotationForInjection = null;
|
AnnotatedText.AnnotationToken nextAnnotationForInjection = null;
|
||||||
private int currentAnnotationIndex = 0;
|
private int currentAnnotationIndex = 0;
|
||||||
|
@ -502,8 +448,8 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
nextAnnotationForInjection = null;
|
nextAnnotationForInjection = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
|
@ -512,7 +458,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
inputExhausted = false;
|
inputExhausted = false;
|
||||||
super.reset();
|
super.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Abstracts if we are pulling from some pre-cached buffer of
|
// Abstracts if we are pulling from some pre-cached buffer of
|
||||||
// text tokens or directly from the wrapped TokenStream
|
// text tokens or directly from the wrapped TokenStream
|
||||||
private boolean internalNextToken() throws IOException{
|
private boolean internalNextToken() throws IOException{
|
||||||
|
@ -524,7 +470,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
pendingStates.clear();
|
pendingStates.clear();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if(inputExhausted) {
|
if(inputExhausted) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -579,28 +525,28 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
posLenAtt.setPositionLength(annotationPosLen);
|
posLenAtt.setPositionLength(annotationPosLen);
|
||||||
textOffsetAtt.setOffset(nextAnnotationForInjection.offset, nextAnnotationForInjection.endOffset);
|
textOffsetAtt.setOffset(nextAnnotationForInjection.offset, nextAnnotationForInjection.endOffset);
|
||||||
setType(nextAnnotationForInjection);
|
setType(nextAnnotationForInjection);
|
||||||
|
|
||||||
// We may have multiple annotations at this location - stack them up
|
// We may have multiple annotations at this location - stack them up
|
||||||
final int annotationOffset = nextAnnotationForInjection.offset;
|
final int annotationOffset = nextAnnotationForInjection.offset;
|
||||||
final AnnotatedText.AnnotationToken firstAnnotationAtThisPos = nextAnnotationForInjection;
|
final AnnotatedText.AnnotationToken firstAnnotationAtThisPos = nextAnnotationForInjection;
|
||||||
while (nextAnnotationForInjection != null && nextAnnotationForInjection.offset == annotationOffset) {
|
while (nextAnnotationForInjection != null && nextAnnotationForInjection.offset == annotationOffset) {
|
||||||
|
|
||||||
|
|
||||||
setType(nextAnnotationForInjection);
|
setType(nextAnnotationForInjection);
|
||||||
termAtt.resizeBuffer(nextAnnotationForInjection.value.length());
|
termAtt.resizeBuffer(nextAnnotationForInjection.value.length());
|
||||||
termAtt.copyBuffer(nextAnnotationForInjection.value.toCharArray(), 0, nextAnnotationForInjection.value.length());
|
termAtt.copyBuffer(nextAnnotationForInjection.value.toCharArray(), 0, nextAnnotationForInjection.value.length());
|
||||||
|
|
||||||
if (nextAnnotationForInjection == firstAnnotationAtThisPos) {
|
if (nextAnnotationForInjection == firstAnnotationAtThisPos) {
|
||||||
posAtt.setPositionIncrement(firstSpannedTextPosInc);
|
posAtt.setPositionIncrement(firstSpannedTextPosInc);
|
||||||
//Put at the head of the queue of tokens to be emitted
|
//Put at the head of the queue of tokens to be emitted
|
||||||
pendingStates.add(0, captureState());
|
pendingStates.add(0, captureState());
|
||||||
} else {
|
} else {
|
||||||
posAtt.setPositionIncrement(0);
|
posAtt.setPositionIncrement(0);
|
||||||
//Put after the head of the queue of tokens to be emitted
|
//Put after the head of the queue of tokens to be emitted
|
||||||
pendingStates.add(1, captureState());
|
pendingStates.add(1, captureState());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Flag the inject annotation as null to prevent re-injection.
|
// Flag the inject annotation as null to prevent re-injection.
|
||||||
currentAnnotationIndex++;
|
currentAnnotationIndex++;
|
||||||
if (currentAnnotationIndex < annotatedText.numAnnotations()) {
|
if (currentAnnotationIndex < annotatedText.numAnnotations()) {
|
||||||
|
@ -614,7 +560,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static final class AnnotatedTextFieldType extends StringFieldType {
|
public static final class AnnotatedTextFieldType extends StringFieldType {
|
||||||
|
|
||||||
|
@ -625,7 +571,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
protected AnnotatedTextFieldType(AnnotatedTextFieldType ref) {
|
protected AnnotatedTextFieldType(AnnotatedTextFieldType ref) {
|
||||||
super(ref);
|
super(ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setIndexAnalyzer(NamedAnalyzer delegate) {
|
public void setIndexAnalyzer(NamedAnalyzer delegate) {
|
||||||
if(delegate.analyzer() instanceof AnnotationAnalyzerWrapper){
|
if(delegate.analyzer() instanceof AnnotationAnalyzerWrapper){
|
||||||
|
@ -655,7 +601,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
return new NormsFieldExistsQuery(name());
|
return new NormsFieldExistsQuery(name());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
|
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
|
||||||
PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
||||||
|
@ -678,7 +624,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
return builder.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
|
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
|
||||||
|
|
||||||
|
@ -713,12 +659,12 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
mpqb.add(multiTerms.toArray(new Term[0]));
|
mpqb.add(multiTerms.toArray(new Term[0]));
|
||||||
}
|
}
|
||||||
return mpqb.build();
|
return mpqb.build();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int positionIncrementGap;
|
private int positionIncrementGap;
|
||||||
protected AnnotatedTextFieldMapper(String simpleName, AnnotatedTextFieldType fieldType, MappedFieldType defaultFieldType,
|
protected AnnotatedTextFieldMapper(String simpleName, AnnotatedTextFieldType fieldType, MappedFieldType defaultFieldType,
|
||||||
int positionIncrementGap,
|
int positionIncrementGap,
|
||||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
||||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||||
assert fieldType.tokenized();
|
assert fieldType.tokenized();
|
||||||
|
@ -774,6 +720,6 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
if (includeDefaults || positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
|
if (includeDefaults || positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
|
||||||
builder.field("position_increment_gap", positionIncrementGap);
|
builder.field("position_increment_gap", positionIncrementGap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ import java.util.Locale;
|
||||||
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
|
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
|
||||||
import static org.hamcrest.CoreMatchers.equalTo;
|
import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
|
|
||||||
public class AnnotatedTextHighlighterTests extends ESTestCase {
|
public class AnnotatedTextHighlighterTests extends ESTestCase {
|
||||||
|
|
||||||
private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
|
private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
|
||||||
Query query, Locale locale, BreakIterator breakIterator,
|
Query query, Locale locale, BreakIterator breakIterator,
|
||||||
|
|
|
@ -63,7 +63,7 @@ public class SizeMappingTests extends ESSingleNodeTestCase {
|
||||||
boolean points = false;
|
boolean points = false;
|
||||||
for (IndexableField field : doc.rootDoc().getFields("_size")) {
|
for (IndexableField field : doc.rootDoc().getFields("_size")) {
|
||||||
stored |= field.fieldType().stored();
|
stored |= field.fieldType().stored();
|
||||||
points |= field.fieldType().pointDimensionCount() > 0;
|
points |= field.fieldType().pointIndexDimensionCount() > 0;
|
||||||
}
|
}
|
||||||
assertTrue(stored);
|
assertTrue(stored);
|
||||||
assertTrue(points);
|
assertTrue(points);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
6bb87c96d76cdc70be77261d39376613b0a8860c
|
|
@ -1 +0,0 @@
|
||||||
dce55e44af096cb9029cb26d22a14d8a9c5223ce
|
|
|
@ -0,0 +1 @@
|
||||||
|
1b29b3e3b080ec32073c007a1940e5aa7b195316
|
|
@ -1 +0,0 @@
|
||||||
d1d941758dc91ea7c2d515dd97b5d9b23b0f1874
|
|
|
@ -0,0 +1 @@
|
||||||
|
3757a90f73f505d40e6e200d1bacbff897f67548
|
|
@ -1 +0,0 @@
|
||||||
e884b8ce62a2102b24bfdbe8911674cd5b0d06d9
|
|
|
@ -0,0 +1 @@
|
||||||
|
c918cc5ac54e5a4dba4740e9e45a93ebd3c95c77
|
|
@ -1 +0,0 @@
|
||||||
3870972c07d7fa41a3bc58eb65952da53a16a406
|
|
|
@ -0,0 +1 @@
|
||||||
|
6cff1fa9ac25c840589d9a39a42ed4629b594cf4
|
|
@ -1 +0,0 @@
|
||||||
b8f0b73cfd01fc48735f1e06f16f7ccb47fc183e
|
|
|
@ -0,0 +1 @@
|
||||||
|
2a843337e03493ab5f3498b5dd232fa9abb9e765
|
|
@ -1 +0,0 @@
|
||||||
1d253fae720355e2ff40d529d62c2b3de403d0d0
|
|
|
@ -0,0 +1 @@
|
||||||
|
afda00bbee5fb8b4c36867eabb83267b3b2b8c10
|
|
@ -1 +0,0 @@
|
||||||
d9ca14bcda331a425d2d7c16022fdfd1c6942924
|
|
|
@ -0,0 +1 @@
|
||||||
|
a2d8bc6a0486cfa6b4de8c1103017b35c0193544
|
|
@ -1 +0,0 @@
|
||||||
200454bbfe5ec93d941d9a9d27703883122a4522
|
|
|
@ -0,0 +1 @@
|
||||||
|
79a3b80245a9cf00f24f5d6e298a8e1a887760f1
|
|
@ -1 +0,0 @@
|
||||||
47915a125e54c845a4b540201cda88dc7612da08
|
|
|
@ -0,0 +1 @@
|
||||||
|
37c9970ec38f64e7ccecbe17efbabdaabe8da2ea
|
|
@ -1 +0,0 @@
|
||||||
e5d49e1c6ee7550234539314e600e2893e13cb80
|
|
|
@ -0,0 +1 @@
|
||||||
|
7103c3482c728a9788922aa39e39a5ed2bdd3a11
|
|
@ -1 +0,0 @@
|
||||||
68081b60905f1b53b3705b9cfa4403b8aba44352
|
|
|
@ -0,0 +1 @@
|
||||||
|
89d389c1020fac58f462819ad822c9b09e52f563
|
|
@ -1 +0,0 @@
|
||||||
c99d56a453cecc7258300fd04b438713b944f1b9
|
|
|
@ -0,0 +1 @@
|
||||||
|
b62e34e522f3afa9c3f1655b97b995ff6ba2592d
|
|
@ -1 +0,0 @@
|
||||||
2471966478f829b6455556346014f02ff59f50c0
|
|
|
@ -0,0 +1 @@
|
||||||
|
0c92f6b03eb226586b431a834dca90a1f2cd85b8
|
|
@ -1 +0,0 @@
|
||||||
46e012be699251306ad13f4582c30d79cea4b307
|
|
|
@ -0,0 +1 @@
|
||||||
|
3a659287ba728f7a0d81694ce32e9ef741a13c19
|
|
@ -1 +0,0 @@
|
||||||
dea19dd9e971d2a0171e7d78662f732b45148a27
|
|
|
@ -53,7 +53,7 @@ public class CustomFieldQuery extends FieldQuery {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
|
protected void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
|
||||||
if (sourceQuery instanceof BoostQuery) {
|
if (sourceQuery instanceof BoostQuery) {
|
||||||
BoostQuery bq = (BoostQuery) sourceQuery;
|
BoostQuery bq = (BoostQuery) sourceQuery;
|
||||||
sourceQuery = bq.getQuery();
|
sourceQuery = bq.getQuery();
|
||||||
|
|
|
@ -38,10 +38,10 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.MultiFields;
|
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -609,7 +609,7 @@ public final class XMoreLikeThis {
|
||||||
public Query like(int docNum) throws IOException {
|
public Query like(int docNum) throws IOException {
|
||||||
if (fieldNames == null) {
|
if (fieldNames == null) {
|
||||||
// gather list of valid fields from lucene
|
// gather list of valid fields from lucene
|
||||||
Collection<String> fields = MultiFields.getIndexedFields(ir);
|
Collection<String> fields = FieldInfos.getIndexedFields(ir);
|
||||||
fieldNames = fields.toArray(new String[fields.size()]);
|
fieldNames = fields.toArray(new String[fields.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,13 +54,13 @@ final class TranslogLeafReader extends LeafReader {
|
||||||
private final Translog.Index operation;
|
private final Translog.Index operation;
|
||||||
private static final FieldInfo FAKE_SOURCE_FIELD
|
private static final FieldInfo FAKE_SOURCE_FIELD
|
||||||
= new FieldInfo(SourceFieldMapper.NAME, 1, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(),
|
= new FieldInfo(SourceFieldMapper.NAME, 1, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(),
|
||||||
0, 0, false);
|
0, 0, 0, false);
|
||||||
private static final FieldInfo FAKE_ROUTING_FIELD
|
private static final FieldInfo FAKE_ROUTING_FIELD
|
||||||
= new FieldInfo(RoutingFieldMapper.NAME, 2, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(),
|
= new FieldInfo(RoutingFieldMapper.NAME, 2, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(),
|
||||||
0, 0, false);
|
0, 0, 0, false);
|
||||||
private static final FieldInfo FAKE_ID_FIELD
|
private static final FieldInfo FAKE_ID_FIELD
|
||||||
= new FieldInfo(IdFieldMapper.NAME, 3, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(),
|
= new FieldInfo(IdFieldMapper.NAME, 3, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(),
|
||||||
0, 0, false);
|
0, 0, 0, false);
|
||||||
private final Version indexVersionCreated;
|
private final Version indexVersionCreated;
|
||||||
|
|
||||||
TranslogLeafReader(Translog.Index operation, Version indexVersionCreated) {
|
TranslogLeafReader(Translog.Index operation, Version indexVersionCreated) {
|
||||||
|
|
|
@ -409,7 +409,7 @@ public abstract class MappedFieldType extends FieldType {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final void failIfNotIndexed() {
|
protected final void failIfNotIndexed() {
|
||||||
if (indexOptions() == IndexOptions.NONE && pointDimensionCount() == 0) {
|
if (indexOptions() == IndexOptions.NONE && pointDataDimensionCount() == 0) {
|
||||||
// we throw an IAE rather than an ISE so that it translates to a 4xx code rather than 5xx code on the http layer
|
// we throw an IAE rather than an ISE so that it translates to a 4xx code rather than 5xx code on the http layer
|
||||||
throw new IllegalArgumentException("Cannot search on field [" + name() + "] since it is not indexed.");
|
throw new IllegalArgumentException("Cannot search on field [" + name() + "] since it is not indexed.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -268,7 +268,7 @@ public class TextFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||||
return new TokenStreamComponents(components.getTokenizer(), new FixedShingleFilter(components.getTokenStream(), 2));
|
return new TokenStreamComponents(components.getSource(), new FixedShingleFilter(components.getTokenStream(), 2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,7 +293,7 @@ public class TextFieldMapper extends FieldMapper {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||||
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false);
|
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false);
|
||||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
return new TokenStreamComponents(components.getSource(), filter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,8 +22,9 @@ package org.elasticsearch.index.termvectors;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiTerms;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.memory.MemoryIndex;
|
import org.apache.lucene.index.memory.MemoryIndex;
|
||||||
|
@ -98,7 +99,7 @@ public class TermVectorsService {
|
||||||
try (Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), false, request.type(), request.id(), uidTerm)
|
try (Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), false, request.type(), request.id(), uidTerm)
|
||||||
.version(request.version()).versionType(request.versionType()));
|
.version(request.version()).versionType(request.versionType()));
|
||||||
Engine.Searcher searcher = indexShard.acquireSearcher("term_vector")) {
|
Engine.Searcher searcher = indexShard.acquireSearcher("term_vector")) {
|
||||||
Fields topLevelFields = MultiFields.getFields(get.searcher() != null ? get.searcher().reader() : searcher.reader());
|
Fields topLevelFields = fields(get.searcher() != null ? get.searcher().reader() : searcher.reader());
|
||||||
DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
|
DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
|
||||||
/* from an artificial document */
|
/* from an artificial document */
|
||||||
if (request.doc() != null) {
|
if (request.doc() != null) {
|
||||||
|
@ -152,6 +153,25 @@ public class TermVectorsService {
|
||||||
return termVectorsResponse;
|
return termVectorsResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Fields fields(IndexReader reader) {
|
||||||
|
return new Fields() {
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
return MultiTerms.getTerms(reader, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private static void handleFieldWildcards(IndexShard indexShard, TermVectorsRequest request) {
|
private static void handleFieldWildcards(IndexShard indexShard, TermVectorsRequest request) {
|
||||||
Set<String> fieldNames = new HashSet<>();
|
Set<String> fieldNames = new HashSet<>();
|
||||||
for (String pattern : request.selectedFields()) {
|
for (String pattern : request.selectedFields()) {
|
||||||
|
@ -270,7 +290,7 @@ public class TermVectorsService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* and read vectors from it */
|
/* and read vectors from it */
|
||||||
return MultiFields.getFields(index.createSearcher().getIndexReader());
|
return index.createSearcher().getIndexReader().getTermVectors(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVectorsRequest request) throws IOException {
|
private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVectorsRequest request) throws IOException {
|
||||||
|
@ -360,5 +380,4 @@ public class TermVectorsService {
|
||||||
return fields.size();
|
return fields.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.codecs.TermStats;
|
import org.apache.lucene.codecs.TermStats;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiTerms;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
@ -72,7 +72,7 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
|
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
|
||||||
double nonErrorLikelihood, int numCandidates) throws IOException {
|
double nonErrorLikelihood, int numCandidates) throws IOException {
|
||||||
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood,
|
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood,
|
||||||
numCandidates, null, null, MultiFields.getTerms(reader, field));
|
numCandidates, null, null, MultiTerms.getTerms(reader, field));
|
||||||
}
|
}
|
||||||
|
|
||||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
|
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
|
||||||
|
|
|
@ -21,7 +21,7 @@ package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiTerms;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||||
|
@ -78,14 +78,14 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
||||||
for (int i = 0; i < numGenerators; i++) {
|
for (int i = 0; i < numGenerators; i++) {
|
||||||
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
||||||
DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
|
DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
|
||||||
Terms terms = MultiFields.getTerms(indexReader, generator.field());
|
Terms terms = MultiTerms.getTerms(indexReader, generator.field());
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
|
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
|
||||||
indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
|
indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final String suggestField = suggestion.getField();
|
final String suggestField = suggestion.getField();
|
||||||
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
|
final Terms suggestTerms = MultiTerms.getTerms(indexReader, suggestField);
|
||||||
if (gens.size() > 0 && suggestTerms != null) {
|
if (gens.size() > 0 && suggestTerms != null) {
|
||||||
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(),
|
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(),
|
||||||
suggestion.getTokenLimit());
|
suggestion.getTokenLimit());
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
package org.elasticsearch.search.suggest.phrase;
|
package org.elasticsearch.search.suggest.phrase;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiTerms;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -45,7 +45,7 @@ public abstract class WordScorer {
|
||||||
private final boolean useTotalTermFreq;
|
private final boolean useTotalTermFreq;
|
||||||
|
|
||||||
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
||||||
this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator);
|
this(reader, MultiTerms.getTerms(reader, field), field, realWordLikelyHood, separator);
|
||||||
}
|
}
|
||||||
|
|
||||||
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
|
||||||
|
|
|
@ -77,7 +77,7 @@ public class DateFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(8, pointField.fieldType().pointNumBytes());
|
assertEquals(8, pointField.fieldType().pointNumBytes());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(1457654400000L, pointField.numericValue().longValue());
|
assertEquals(1457654400000L, pointField.numericValue().longValue());
|
||||||
|
@ -128,7 +128,7 @@ public class DateFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(1, fields.length);
|
assertEquals(1, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testStore() throws Exception {
|
public void testStore() throws Exception {
|
||||||
|
@ -150,7 +150,7 @@ public class DateFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(3, fields.length);
|
assertEquals(3, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||||
IndexableField storedField = fields[2];
|
IndexableField storedField = fields[2];
|
||||||
|
@ -304,7 +304,7 @@ public class DateFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(8, pointField.fieldType().pointNumBytes());
|
assertEquals(8, pointField.fieldType().pointNumBytes());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(1457654400000L, pointField.numericValue().longValue());
|
assertEquals(1457654400000L, pointField.numericValue().longValue());
|
||||||
|
|
|
@ -78,7 +78,7 @@ public class IpFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(16, pointField.fieldType().pointNumBytes());
|
assertEquals(16, pointField.fieldType().pointNumBytes());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))), pointField.binaryValue());
|
assertEquals(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))), pointField.binaryValue());
|
||||||
|
@ -129,7 +129,7 @@ public class IpFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(1, fields.length);
|
assertEquals(1, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))), pointField.binaryValue());
|
assertEquals(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))), pointField.binaryValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,7 +152,7 @@ public class IpFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(3, fields.length);
|
assertEquals(3, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
assertEquals(DocValuesType.SORTED_SET, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.SORTED_SET, dvField.fieldType().docValuesType());
|
||||||
IndexableField storedField = fields[2];
|
IndexableField storedField = fields[2];
|
||||||
|
@ -240,7 +240,7 @@ public class IpFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(16, pointField.fieldType().pointNumBytes());
|
assertEquals(16, pointField.fieldType().pointNumBytes());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))), pointField.binaryValue());
|
assertEquals(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))), pointField.binaryValue());
|
||||||
|
|
|
@ -71,7 +71,7 @@ public class IpRangeFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
IndexableField dvField = fields[0];
|
IndexableField dvField = fields[0];
|
||||||
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
||||||
IndexableField pointField = fields[1];
|
IndexableField pointField = fields[1];
|
||||||
assertEquals(2, pointField.fieldType().pointDimensionCount());
|
assertEquals(2, pointField.fieldType().pointIndexDimensionCount());
|
||||||
IndexableField storedField = fields[2];
|
IndexableField storedField = fields[2];
|
||||||
assertTrue(storedField.fieldType().stored());
|
assertTrue(storedField.fieldType().stored());
|
||||||
String strVal =
|
String strVal =
|
||||||
|
|
|
@ -66,7 +66,7 @@ public class NumberFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
|
@ -117,7 +117,7 @@ public class NumberFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(1, fields.length);
|
assertEquals(1, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,7 +141,7 @@ public class NumberFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(3, fields.length);
|
assertEquals(3, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||||
|
@ -170,7 +170,7 @@ public class NumberFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||||
|
@ -344,7 +344,7 @@ public class NumberFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(2, fields.length);
|
assertEquals(2, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
assertEquals(1, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
assertEquals(123, pointField.numericValue().doubleValue(), 0d);
|
||||||
IndexableField dvField = fields[1];
|
IndexableField dvField = fields[1];
|
||||||
|
|
|
@ -136,7 +136,7 @@ public class RangeFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
||||||
|
|
||||||
IndexableField pointField = fields[1];
|
IndexableField pointField = fields[1];
|
||||||
assertEquals(2, pointField.fieldType().pointDimensionCount());
|
assertEquals(2, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,7 +188,7 @@ public class RangeFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
assertEquals(1, fields.length);
|
assertEquals(1, fields.length);
|
||||||
IndexableField pointField = fields[0];
|
IndexableField pointField = fields[0];
|
||||||
assertEquals(2, pointField.fieldType().pointDimensionCount());
|
assertEquals(2, pointField.fieldType().pointIndexDimensionCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -216,7 +216,7 @@ public class RangeFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField dvField = fields[0];
|
IndexableField dvField = fields[0];
|
||||||
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
||||||
IndexableField pointField = fields[1];
|
IndexableField pointField = fields[1];
|
||||||
assertEquals(2, pointField.fieldType().pointDimensionCount());
|
assertEquals(2, pointField.fieldType().pointIndexDimensionCount());
|
||||||
IndexableField storedField = fields[2];
|
IndexableField storedField = fields[2];
|
||||||
assertTrue(storedField.fieldType().stored());
|
assertTrue(storedField.fieldType().stored());
|
||||||
String strVal = "5";
|
String strVal = "5";
|
||||||
|
@ -255,7 +255,7 @@ public class RangeFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField dvField = fields[0];
|
IndexableField dvField = fields[0];
|
||||||
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
||||||
IndexableField pointField = fields[1];
|
IndexableField pointField = fields[1];
|
||||||
assertEquals(2, pointField.fieldType().pointDimensionCount());
|
assertEquals(2, pointField.fieldType().pointIndexDimensionCount());
|
||||||
|
|
||||||
// date_range ignores the coerce parameter and epoch_millis date format truncates floats (see issue: #14641)
|
// date_range ignores the coerce parameter and epoch_millis date format truncates floats (see issue: #14641)
|
||||||
if (type.equals("date_range") == false) {
|
if (type.equals("date_range") == false) {
|
||||||
|
@ -353,7 +353,7 @@ public class RangeFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField dvField = fields[0];
|
IndexableField dvField = fields[0];
|
||||||
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
||||||
IndexableField pointField = fields[1];
|
IndexableField pointField = fields[1];
|
||||||
assertEquals(2, pointField.fieldType().pointDimensionCount());
|
assertEquals(2, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
storedField = fields[2];
|
storedField = fields[2];
|
||||||
assertTrue(storedField.fieldType().stored());
|
assertTrue(storedField.fieldType().stored());
|
||||||
|
@ -406,7 +406,7 @@ public class RangeFieldMapperTests extends AbstractNumericFieldMapperTestCase {
|
||||||
IndexableField dvField = fields[0];
|
IndexableField dvField = fields[0];
|
||||||
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
assertEquals(DocValuesType.BINARY, dvField.fieldType().docValuesType());
|
||||||
IndexableField pointField = fields[1];
|
IndexableField pointField = fields[1];
|
||||||
assertEquals(2, pointField.fieldType().pointDimensionCount());
|
assertEquals(2, pointField.fieldType().pointIndexDimensionCount());
|
||||||
assertFalse(pointField.fieldType().stored());
|
assertFalse(pointField.fieldType().stored());
|
||||||
IndexableField storedField = fields[2];
|
IndexableField storedField = fields[2];
|
||||||
assertTrue(storedField.fieldType().stored());
|
assertTrue(storedField.fieldType().stored());
|
||||||
|
|
|
@ -21,9 +21,6 @@ package org.elasticsearch.index.query;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CannedBinaryTokenStream;
|
import org.apache.lucene.analysis.CannedBinaryTokenStream;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
@ -52,7 +49,6 @@ import org.elasticsearch.test.AbstractQueryTestCase;
|
||||||
import org.hamcrest.Matcher;
|
import org.hamcrest.Matcher;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -69,7 +65,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
@Override
|
@Override
|
||||||
protected MatchQueryBuilder doCreateTestQueryBuilder() {
|
protected MatchQueryBuilder doCreateTestQueryBuilder() {
|
||||||
String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME, BOOLEAN_FIELD_NAME, INT_FIELD_NAME,
|
String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME, BOOLEAN_FIELD_NAME, INT_FIELD_NAME,
|
||||||
DOUBLE_FIELD_NAME, DATE_FIELD_NAME);
|
DOUBLE_FIELD_NAME, DATE_FIELD_NAME);
|
||||||
Object value;
|
Object value;
|
||||||
if (isTextField(fieldName)) {
|
if (isTextField(fieldName)) {
|
||||||
int terms = randomIntBetween(0, 3);
|
int terms = randomIntBetween(0, 3);
|
||||||
|
@ -136,10 +132,10 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
Map<String, MatchQueryBuilder> alternateVersions = new HashMap<>();
|
Map<String, MatchQueryBuilder> alternateVersions = new HashMap<>();
|
||||||
MatchQueryBuilder matchQuery = new MatchQueryBuilder(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10));
|
MatchQueryBuilder matchQuery = new MatchQueryBuilder(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10));
|
||||||
String contentString = "{\n" +
|
String contentString = "{\n" +
|
||||||
" \"match\" : {\n" +
|
" \"match\" : {\n" +
|
||||||
" \"" + matchQuery.fieldName() + "\" : \"" + matchQuery.value() + "\"\n" +
|
" \"" + matchQuery.fieldName() + "\" : \"" + matchQuery.value() + "\"\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
"}";
|
"}";
|
||||||
alternateVersions.put(contentString, matchQuery);
|
alternateVersions.put(contentString, matchQuery);
|
||||||
return alternateVersions;
|
return alternateVersions;
|
||||||
}
|
}
|
||||||
|
@ -238,7 +234,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
|
|
||||||
{
|
{
|
||||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> matchQuery.maxExpansions(randomIntBetween(-10, 0)));
|
() -> matchQuery.maxExpansions(randomIntBetween(-10, 0)));
|
||||||
assertEquals("[match] requires maxExpansions to be positive.", e.getMessage());
|
assertEquals("[match] requires maxExpansions to be positive.", e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,20 +257,20 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
|
|
||||||
public void testSimpleMatchQuery() throws IOException {
|
public void testSimpleMatchQuery() throws IOException {
|
||||||
String json = "{\n" +
|
String json = "{\n" +
|
||||||
" \"match\" : {\n" +
|
" \"match\" : {\n" +
|
||||||
" \"message\" : {\n" +
|
" \"message\" : {\n" +
|
||||||
" \"query\" : \"to be or not to be\",\n" +
|
" \"query\" : \"to be or not to be\",\n" +
|
||||||
" \"operator\" : \"AND\",\n" +
|
" \"operator\" : \"AND\",\n" +
|
||||||
" \"prefix_length\" : 0,\n" +
|
" \"prefix_length\" : 0,\n" +
|
||||||
" \"max_expansions\" : 50,\n" +
|
" \"max_expansions\" : 50,\n" +
|
||||||
" \"fuzzy_transpositions\" : true,\n" +
|
" \"fuzzy_transpositions\" : true,\n" +
|
||||||
" \"lenient\" : false,\n" +
|
" \"lenient\" : false,\n" +
|
||||||
" \"zero_terms_query\" : \"ALL\",\n" +
|
" \"zero_terms_query\" : \"ALL\",\n" +
|
||||||
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
|
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
|
||||||
" \"boost\" : 1.0\n" +
|
" \"boost\" : 1.0\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
"}";
|
"}";
|
||||||
MatchQueryBuilder qb = (MatchQueryBuilder) parseQuery(json);
|
MatchQueryBuilder qb = (MatchQueryBuilder) parseQuery(json);
|
||||||
checkGeneratedJson(json, qb);
|
checkGeneratedJson(json, qb);
|
||||||
|
|
||||||
|
@ -287,14 +283,14 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
query.fuzziness(randomFuzziness(INT_FIELD_NAME));
|
query.fuzziness(randomFuzziness(INT_FIELD_NAME));
|
||||||
QueryShardContext context = createShardContext();
|
QueryShardContext context = createShardContext();
|
||||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> query.toQuery(context));
|
() -> query.toQuery(context));
|
||||||
assertEquals("Can only use fuzzy queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
|
assertEquals("Can only use fuzzy queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
|
||||||
e.getMessage());
|
e.getMessage());
|
||||||
query.analyzer("keyword"); // triggers a different code path
|
query.analyzer("keyword"); // triggers a different code path
|
||||||
e = expectThrows(IllegalArgumentException.class,
|
e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> query.toQuery(context));
|
() -> query.toQuery(context));
|
||||||
assertEquals("Can only use fuzzy queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
|
assertEquals("Can only use fuzzy queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
|
||||||
e.getMessage());
|
e.getMessage());
|
||||||
|
|
||||||
query.lenient(true);
|
query.lenient(true);
|
||||||
query.toQuery(context); // no exception
|
query.toQuery(context); // no exception
|
||||||
|
@ -313,43 +309,43 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
|
|
||||||
public void testParseFailsWithMultipleFields() throws IOException {
|
public void testParseFailsWithMultipleFields() throws IOException {
|
||||||
String json = "{\n" +
|
String json = "{\n" +
|
||||||
" \"match\" : {\n" +
|
" \"match\" : {\n" +
|
||||||
" \"message1\" : {\n" +
|
" \"message1\" : {\n" +
|
||||||
" \"query\" : \"this is a test\"\n" +
|
" \"query\" : \"this is a test\"\n" +
|
||||||
" },\n" +
|
" },\n" +
|
||||||
" \"message2\" : {\n" +
|
" \"message2\" : {\n" +
|
||||||
" \"query\" : \"this is a test\"\n" +
|
" \"query\" : \"this is a test\"\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
"}";
|
"}";
|
||||||
ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(json));
|
ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(json));
|
||||||
assertEquals("[match] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage());
|
assertEquals("[match] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage());
|
||||||
|
|
||||||
String shortJson = "{\n" +
|
String shortJson = "{\n" +
|
||||||
" \"match\" : {\n" +
|
" \"match\" : {\n" +
|
||||||
" \"message1\" : \"this is a test\",\n" +
|
" \"message1\" : \"this is a test\",\n" +
|
||||||
" \"message2\" : \"this is a test\"\n" +
|
" \"message2\" : \"this is a test\"\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
"}";
|
"}";
|
||||||
e = expectThrows(ParsingException.class, () -> parseQuery(shortJson));
|
e = expectThrows(ParsingException.class, () -> parseQuery(shortJson));
|
||||||
assertEquals("[match] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage());
|
assertEquals("[match] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testParseFailsWithTermsArray() throws Exception {
|
public void testParseFailsWithTermsArray() throws Exception {
|
||||||
String json1 = "{\n" +
|
String json1 = "{\n" +
|
||||||
" \"match\" : {\n" +
|
" \"match\" : {\n" +
|
||||||
" \"message1\" : {\n" +
|
" \"message1\" : {\n" +
|
||||||
" \"query\" : [\"term1\", \"term2\"]\n" +
|
" \"query\" : [\"term1\", \"term2\"]\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
"}";
|
"}";
|
||||||
expectThrows(ParsingException.class, () -> parseQuery(json1));
|
expectThrows(ParsingException.class, () -> parseQuery(json1));
|
||||||
|
|
||||||
String json2 = "{\n" +
|
String json2 = "{\n" +
|
||||||
" \"match\" : {\n" +
|
" \"match\" : {\n" +
|
||||||
" \"message1\" : [\"term1\", \"term2\"]\n" +
|
" \"message1\" : [\"term1\", \"term2\"]\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
"}";
|
"}";
|
||||||
expectThrows(IllegalStateException.class, () -> parseQuery(json2));
|
expectThrows(IllegalStateException.class, () -> parseQuery(json2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -364,9 +360,9 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
@Override
|
@Override
|
||||||
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
|
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
|
||||||
mapperService.merge("_doc", new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(
|
mapperService.merge("_doc", new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(
|
||||||
"_doc",
|
"_doc",
|
||||||
"string_boost", "type=text,boost=4", "string_no_pos",
|
"string_boost", "type=text,boost=4", "string_no_pos",
|
||||||
"type=text,index_options=docs"))
|
"type=text,index_options=docs"))
|
||||||
),
|
),
|
||||||
MapperService.MergeReason.MAPPING_UPDATE);
|
MapperService.MergeReason.MAPPING_UPDATE);
|
||||||
}
|
}
|
||||||
|
@ -408,26 +404,18 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
query.setAnalyzer(new MockGraphAnalyzer(createGiantGraphMultiTerms()));
|
query.setAnalyzer(new MockGraphAnalyzer(createGiantGraphMultiTerms()));
|
||||||
expectThrows(BooleanQuery.TooManyClauses.class, () -> query.parse(Type.PHRASE, STRING_FIELD_NAME, ""));
|
expectThrows(BooleanQuery.TooManyClauses.class, () -> query.parse(Type.PHRASE, STRING_FIELD_NAME, ""));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class MockGraphAnalyzer extends Analyzer {
|
private static class MockGraphAnalyzer extends Analyzer {
|
||||||
final CannedBinaryTokenStream.BinaryToken[] tokens;
|
|
||||||
|
|
||||||
private MockGraphAnalyzer(CannedBinaryTokenStream.BinaryToken[] tokens ) {
|
CannedBinaryTokenStream tokenStream;
|
||||||
this.tokens = tokens;
|
|
||||||
|
MockGraphAnalyzer(CannedBinaryTokenStream.BinaryToken[] tokens) {
|
||||||
|
this.tokenStream = new CannedBinaryTokenStream(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName) {
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
return new TokenStreamComponents(r -> {}, tokenStream);
|
||||||
return new TokenStreamComponents(tokenizer) {
|
|
||||||
@Override
|
|
||||||
public TokenStream getTokenStream() {
|
|
||||||
return new CannedBinaryTokenStream(tokens);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void setReader(final Reader reader) {
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.index.query;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.MultiFields;
|
|
||||||
import org.apache.lucene.index.memory.MemoryIndex;
|
import org.apache.lucene.index.memory.MemoryIndex;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
@ -256,7 +255,7 @@ public class MoreLikeThisQueryBuilderTests extends AbstractQueryTestCase<MoreLik
|
||||||
for (String fieldName : fieldNames) {
|
for (String fieldName : fieldNames) {
|
||||||
index.addField(fieldName, text, new WhitespaceAnalyzer());
|
index.addField(fieldName, text, new WhitespaceAnalyzer());
|
||||||
}
|
}
|
||||||
return MultiFields.getFields(index.createSearcher().getIndexReader());
|
return index.createSearcher().getIndexReader().getTermVectors(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -56,7 +56,7 @@ public class LeafFieldsLookupTests extends ESTestCase {
|
||||||
when(mapperService.fullName("alias")).thenReturn(fieldType);
|
when(mapperService.fullName("alias")).thenReturn(fieldType);
|
||||||
|
|
||||||
FieldInfo mockFieldInfo = new FieldInfo("field", 1, false, false, true,
|
FieldInfo mockFieldInfo = new FieldInfo("field", 1, false, false, true,
|
||||||
IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, false);
|
IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
|
||||||
|
|
||||||
LeafReader leafReader = mock(LeafReader.class);
|
LeafReader leafReader = mock(LeafReader.class);
|
||||||
doAnswer(invocation -> {
|
doAnswer(invocation -> {
|
||||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiTerms;
|
||||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||||
import org.apache.lucene.search.spell.SuggestMode;
|
import org.apache.lucene.search.spell.SuggestMode;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -110,7 +110,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader ir = DirectoryReader.open(writer);
|
DirectoryReader ir = DirectoryReader.open(writer);
|
||||||
WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
|
WordScorer wordScorer = new LaplaceScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
|
||||||
new BytesRef(" "), 0.5f);
|
new BytesRef(" "), 0.5f);
|
||||||
|
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
|
@ -135,7 +135,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
assertThat(result.cutoffScore, equalTo(Double.MIN_VALUE));
|
assertThat(result.cutoffScore, equalTo(Double.MIN_VALUE));
|
||||||
|
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
wordScorer = new LaplaceScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
||||||
new BytesRef(" "), 0.5f);
|
new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4,
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4,
|
||||||
ir, "body", wordScorer, 0, 2).corrections;
|
ir, "body", wordScorer, 0, 2).corrections;
|
||||||
|
@ -159,7 +159,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
|
|
||||||
// Test some of the highlighting corner cases
|
// Test some of the highlighting corner cases
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
wordScorer = new LaplaceScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
||||||
new BytesRef(" "), 0.5f);
|
new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4,
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4,
|
||||||
ir, "body", wordScorer, 1, 2).corrections;
|
ir, "body", wordScorer, 1, 2).corrections;
|
||||||
|
@ -196,7 +196,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
spellchecker.setMinPrefix(1);
|
spellchecker.setMinPrefix(1);
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
wordScorer = new LaplaceScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
||||||
new BytesRef(" "), 0.5f);
|
new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4,
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4,
|
||||||
ir, "body", wordScorer, 1, 2).corrections;
|
ir, "body", wordScorer, 1, 2).corrections;
|
||||||
|
@ -204,7 +204,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
|
||||||
|
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
|
||||||
10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
10, null, analyzer, MultiTerms.getTerms(ir, "body"));
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4,
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4,
|
||||||
ir, "body", wordScorer, 1, 2).corrections;
|
ir, "body", wordScorer, 1, 2).corrections;
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
@ -212,7 +212,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
|
|
||||||
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
|
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
|
||||||
10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
10, null, analyzer, MultiTerms.getTerms(ir, "body"));
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir,
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir,
|
||||||
"body", wordScorer, 1, 2).corrections;
|
"body", wordScorer, 1, 2).corrections;
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
@ -280,7 +280,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader ir = DirectoryReader.open(writer);
|
DirectoryReader ir = DirectoryReader.open(writer);
|
||||||
LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
|
LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
|
||||||
new BytesRef(" "), 0.5f);
|
new BytesRef(" "), 0.5f);
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
|
@ -288,7 +288,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir,
|
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir,
|
||||||
0.95, 10);
|
0.95, 10);
|
||||||
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir,
|
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir,
|
||||||
0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
|
0.95, 10, wrapper, wrapper, MultiTerms.getTerms(ir, "body_reverse"));
|
||||||
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
|
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
|
||||||
|
|
||||||
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1,
|
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1,
|
||||||
|
@ -388,7 +388,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader ir = DirectoryReader.open(writer);
|
DirectoryReader ir = DirectoryReader.open(writer);
|
||||||
WordScorer wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
WordScorer wordScorer = new LinearInterpolatingScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
||||||
new BytesRef(" "), 0.5, 0.4, 0.1);
|
new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
|
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
|
@ -406,7 +406,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
assertThat(corrections.length, equalTo(0));
|
assertThat(corrections.length, equalTo(0));
|
||||||
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
|
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
|
||||||
|
|
||||||
wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
wordScorer = new LinearInterpolatingScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
||||||
new BytesRef(" "), 0.5, 0.4, 0.1);
|
new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4,
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4,
|
||||||
ir, "body", wordScorer, 0, 3).corrections;
|
ir, "body", wordScorer, 0, 3).corrections;
|
||||||
|
@ -457,20 +457,20 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
spellchecker.setMinPrefix(1);
|
spellchecker.setMinPrefix(1);
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
suggester = new NoisyChannelSpellChecker(0.95);
|
suggester = new NoisyChannelSpellChecker(0.95);
|
||||||
wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
|
wordScorer = new LinearInterpolatingScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
|
||||||
new BytesRef(" "), 0.5, 0.4, 0.1);
|
new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4,
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4,
|
||||||
ir, "body", wordScorer, 1, 3).corrections;
|
ir, "body", wordScorer, 1, 3).corrections;
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95,
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95,
|
||||||
10, null, analyzer, MultiFields.getTerms(ir, "body"));
|
10, null, analyzer, MultiTerms.getTerms(ir, "body"));
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4,
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4,
|
||||||
ir, "body", wordScorer, 1, 3).corrections;
|
ir, "body", wordScorer, 1, 3).corrections;
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
|
||||||
|
|
||||||
wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
wordScorer = new StupidBackoffScorer(ir, MultiTerms.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
|
||||||
new BytesRef(" "), 0.4);
|
new BytesRef(" "), 0.4);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2,
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2,
|
||||||
ir, "body", wordScorer, 0, 3).corrections;
|
ir, "body", wordScorer, 0, 3).corrections;
|
||||||
|
@ -492,7 +492,7 @@ public class NoisyChannelSpellCheckerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
try (DirectoryReader ir = DirectoryReader.open(dir)) {
|
try (DirectoryReader ir = DirectoryReader.open(dir)) {
|
||||||
WordScorer wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.95d,
|
WordScorer wordScorer = new StupidBackoffScorer(ir, MultiTerms.getTerms(ir, "field"), "field", 0.95d,
|
||||||
new BytesRef(" "), 0.4f);
|
new BytesRef(" "), 0.4f);
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiTerms;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
||||||
import org.elasticsearch.common.lucene.BytesRefs;
|
import org.elasticsearch.common.lucene.BytesRefs;
|
||||||
|
@ -118,7 +118,7 @@ public abstract class SmoothingModelTestCase extends ESTestCase {
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
DirectoryReader ir = DirectoryReader.open(writer);
|
DirectoryReader ir = DirectoryReader.open(writer);
|
||||||
|
|
||||||
WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
|
WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiTerms.getTerms(ir, "field"), "field", 0.9d,
|
||||||
BytesRefs.toBytesRef(" "));
|
BytesRefs.toBytesRef(" "));
|
||||||
assertWordScorer(wordScorer, testModel);
|
assertWordScorer(wordScorer, testModel);
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("edgengram", MovedToAnalysisCommon.class)
|
.put("edgengram", MovedToAnalysisCommon.class)
|
||||||
.put("keyword", MovedToAnalysisCommon.class)
|
.put("keyword", MovedToAnalysisCommon.class)
|
||||||
.put("letter", MovedToAnalysisCommon.class)
|
.put("letter", MovedToAnalysisCommon.class)
|
||||||
.put("lowercase", MovedToAnalysisCommon.class)
|
|
||||||
.put("ngram", MovedToAnalysisCommon.class)
|
.put("ngram", MovedToAnalysisCommon.class)
|
||||||
.put("pathhierarchy", MovedToAnalysisCommon.class)
|
.put("pathhierarchy", MovedToAnalysisCommon.class)
|
||||||
.put("pattern", MovedToAnalysisCommon.class)
|
.put("pattern", MovedToAnalysisCommon.class)
|
||||||
|
|
|
@ -198,7 +198,7 @@ public class SourceOnlySnapshot {
|
||||||
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
|
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
|
||||||
for (FieldInfo fieldInfo : fieldInfos) {
|
for (FieldInfo fieldInfo : fieldInfos) {
|
||||||
fieldInfoCopy.add(new FieldInfo(fieldInfo.name, fieldInfo.number,
|
fieldInfoCopy.add(new FieldInfo(fieldInfo.name, fieldInfo.number,
|
||||||
false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, fieldInfo.attributes(), 0, 0,
|
false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, fieldInfo.attributes(), 0, 0, 0,
|
||||||
fieldInfo.isSoftDeletesField()));
|
fieldInfo.isSoftDeletesField()));
|
||||||
}
|
}
|
||||||
FieldInfos newFieldInfos = new FieldInfos(fieldInfoCopy.toArray(new FieldInfo[0]));
|
FieldInfos newFieldInfos = new FieldInfos(fieldInfoCopy.toArray(new FieldInfo[0]));
|
||||||
|
|
|
@ -148,7 +148,7 @@ public class FieldSubsetReaderTests extends ESTestCase {
|
||||||
assertEquals(Integer.BYTES, points.getBytesPerDimension());
|
assertEquals(Integer.BYTES, points.getBytesPerDimension());
|
||||||
|
|
||||||
// number of dimensions
|
// number of dimensions
|
||||||
assertEquals(1, points.getNumDimensions());
|
assertEquals(1, points.getNumIndexDimensions());
|
||||||
|
|
||||||
// walk the trees: we should see stuff in fieldA
|
// walk the trees: we should see stuff in fieldA
|
||||||
AtomicBoolean sawDoc = new AtomicBoolean(false);
|
AtomicBoolean sawDoc = new AtomicBoolean(false);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
3757a90f73f505d40e6e200d1bacbff897f67548
|
|
@ -1 +0,0 @@
|
||||||
e884b8ce62a2102b24bfdbe8911674cd5b0d06d9
|
|
Loading…
Reference in New Issue