Run spotless and exclude checkstyle on modules module (#1442)

Signed-off-by: Owais Kazi <owaiskazi19@gmail.com>
This commit is contained in:
Owais Kazi 2021-10-27 13:24:43 -07:00 committed by GitHub
parent 3f6e1df9eb
commit 37ac3788a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
609 changed files with 32807 additions and 21983 deletions

View File

@ -25,6 +25,8 @@
<suppress files="plugins[/\\]examples" checks="." />
<!-- Excludes checkstyle run on libs module -->
<suppress files="libs" checks="." />
<!-- Excludes checkstyle run on modules module -->
<suppress files="modules" checks="." />
<!--
Truly temporary suppressions suppression of snippets included in

View File

@ -57,24 +57,6 @@ import org.opensearch.gradle.BuildPlugin
// Do not add new sub-projects here!
def projectPathsToExclude = [
':modules:aggs-matrix-stats',
':modules:analysis-common',
':modules:ingest-common',
':modules:ingest-geoip',
':modules:ingest-user-agent',
':modules:lang-expression',
':modules:lang-mustache',
':modules:lang-painless',
':modules:lang-painless:spi',
':modules:mapper-extras',
':modules:parent-join',
':modules:percolator',
':modules:rank-eval',
':modules:reindex',
':modules:repository-url',
':modules:systemd',
':modules:tasks',
':modules:transport-netty4',
':plugins:analysis-icu',
':plugins:analysis-kuromoji',
':plugins:analysis-nori',

View File

@ -45,7 +45,9 @@ import static java.util.Collections.singletonList;
public class MatrixAggregationPlugin extends Plugin implements SearchPlugin {
@Override
public List<AggregationSpec> getAggregations() {
return singletonList(new AggregationSpec(MatrixStatsAggregationBuilder.NAME, MatrixStatsAggregationBuilder::new,
new MatrixStatsParser()).addResultReader(InternalMatrixStats::new));
return singletonList(
new AggregationSpec(MatrixStatsAggregationBuilder.NAME, MatrixStatsAggregationBuilder::new, new MatrixStatsParser())
.addResultReader(InternalMatrixStats::new)
);
}
}

View File

@ -54,8 +54,13 @@ public class InternalMatrixStats extends InternalAggregation implements MatrixSt
private final MatrixStatsResults results;
/** per shard ctor */
InternalMatrixStats(String name, long count, RunningStats multiFieldStatsResults, MatrixStatsResults results,
Map<String, Object> metadata) {
InternalMatrixStats(
String name,
long count,
RunningStats multiFieldStatsResults,
MatrixStatsResults results,
Map<String, Object> metadata
) {
super(name, metadata);
assert count >= 0;
this.stats = multiFieldStatsResults;
@ -248,7 +253,7 @@ public class InternalMatrixStats extends InternalAggregation implements MatrixSt
public InternalAggregation reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
// merge stats across all shards
List<InternalAggregation> aggs = new ArrayList<>(aggregations);
aggs.removeIf(p -> ((InternalMatrixStats)p).stats == null);
aggs.removeIf(p -> ((InternalMatrixStats) p).stats == null);
// return empty result iff all stats are null
if (aggs.isEmpty()) {
@ -284,7 +289,6 @@ public class InternalMatrixStats extends InternalAggregation implements MatrixSt
if (super.equals(obj) == false) return false;
InternalMatrixStats other = (InternalMatrixStats) obj;
return Objects.equals(this.stats, other.stats) &&
Objects.equals(this.results, other.results);
return Objects.equals(this.stats, other.stats) && Objects.equals(this.results, other.results);
}
}

View File

@ -39,18 +39,25 @@ import org.opensearch.search.aggregations.Aggregation;
public interface MatrixStats extends Aggregation {
/** return the total document count */
long getDocCount();
/** return total field count (differs from docCount if there are missing values) */
long getFieldCount(String field);
/** return the field mean */
double getMean(String field);
/** return the field variance */
double getVariance(String field);
/** return the skewness of the distribution */
double getSkewness(String field);
/** return the kurtosis of the distribution */
double getKurtosis(String field);
/** return the covariance between field x and field y */
double getCovariance(String fieldX, String fieldY);
/** return the correlation coefficient of field x and field y */
double getCorrelation(String fieldX, String fieldY);
}

View File

@ -55,8 +55,11 @@ public class MatrixStatsAggregationBuilder extends ArrayValuesSourceAggregationB
super(name);
}
protected MatrixStatsAggregationBuilder(MatrixStatsAggregationBuilder clone,
AggregatorFactories.Builder factoriesBuilder, Map<String, Object> metadata) {
protected MatrixStatsAggregationBuilder(
MatrixStatsAggregationBuilder clone,
AggregatorFactories.Builder factoriesBuilder,
Map<String, Object> metadata
) {
super(clone, factoriesBuilder, metadata);
this.multiValueMode = clone.multiValueMode;
}
@ -88,10 +91,12 @@ public class MatrixStatsAggregationBuilder extends ArrayValuesSourceAggregationB
}
@Override
protected MatrixStatsAggregatorFactory innerBuild(QueryShardContext queryShardContext,
Map<String, ValuesSourceConfig> configs,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
protected MatrixStatsAggregatorFactory innerBuild(
QueryShardContext queryShardContext,
Map<String, ValuesSourceConfig> configs,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder
) throws IOException {
return new MatrixStatsAggregatorFactory(name, configs, multiValueMode, queryShardContext, parent, subFactoriesBuilder, metadata);
}

View File

@ -60,8 +60,14 @@ final class MatrixStatsAggregator extends MetricsAggregator {
/** array of descriptive stats, per shard, needed to compute the correlation */
ObjectArray<RunningStats> stats;
MatrixStatsAggregator(String name, Map<String, ValuesSource.Numeric> valuesSources, SearchContext context,
Aggregator parent, MultiValueMode multiValueMode, Map<String,Object> metadata) throws IOException {
MatrixStatsAggregator(
String name,
Map<String, ValuesSource.Numeric> valuesSources,
SearchContext context,
Aggregator parent,
MultiValueMode multiValueMode,
Map<String, Object> metadata
) throws IOException {
super(name, context, parent, metadata);
if (valuesSources != null && !valuesSources.isEmpty()) {
this.valuesSources = new ArrayValuesSource.NumericArrayValuesSource(valuesSources, multiValueMode);
@ -77,8 +83,7 @@ final class MatrixStatsAggregator extends MetricsAggregator {
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
if (valuesSources == null) {
return LeafBucketCollector.NO_OP_COLLECTOR;
}

View File

@ -51,36 +51,38 @@ final class MatrixStatsAggregatorFactory extends ArrayValuesSourceAggregatorFact
private final MultiValueMode multiValueMode;
MatrixStatsAggregatorFactory(String name,
Map<String, ValuesSourceConfig> configs,
MultiValueMode multiValueMode,
QueryShardContext queryShardContext,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata) throws IOException {
MatrixStatsAggregatorFactory(
String name,
Map<String, ValuesSourceConfig> configs,
MultiValueMode multiValueMode,
QueryShardContext queryShardContext,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata
) throws IOException {
super(name, configs, queryShardContext, parent, subFactoriesBuilder, metadata);
this.multiValueMode = multiValueMode;
}
@Override
protected Aggregator createUnmapped(SearchContext searchContext,
Aggregator parent,
Map<String, Object> metadata)
throws IOException {
protected Aggregator createUnmapped(SearchContext searchContext, Aggregator parent, Map<String, Object> metadata) throws IOException {
return new MatrixStatsAggregator(name, null, searchContext, parent, multiValueMode, metadata);
}
@Override
protected Aggregator doCreateInternal(Map<String, ValuesSource> valuesSources,
SearchContext searchContext,
Aggregator parent,
CardinalityUpperBound cardinality,
Map<String, Object> metadata) throws IOException {
protected Aggregator doCreateInternal(
Map<String, ValuesSource> valuesSources,
SearchContext searchContext,
Aggregator parent,
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
Map<String, ValuesSource.Numeric> typedValuesSources = new HashMap<>(valuesSources.size());
for (Map.Entry<String, ValuesSource> entry : valuesSources.entrySet()) {
if (entry.getValue() instanceof ValuesSource.Numeric == false) {
throw new AggregationExecutionException("ValuesSource type " + entry.getValue().toString() +
"is not supported for aggregation " + this.name());
throw new AggregationExecutionException(
"ValuesSource type " + entry.getValue().toString() + "is not supported for aggregation " + this.name()
);
}
// TODO: There must be a better option than this.
typedValuesSources.put(entry.getKey(), (ValuesSource.Numeric) entry.getValue());

View File

@ -49,8 +49,13 @@ public class MatrixStatsParser extends ArrayValuesSourceParser.NumericValuesSour
}
@Override
protected boolean token(String aggregationName, String currentFieldName, XContentParser.Token token, XContentParser parser,
Map<ParseField, Object> otherOptions) throws IOException {
protected boolean token(
String aggregationName,
String currentFieldName,
XContentParser.Token token,
XContentParser parser,
Map<ParseField, Object> otherOptions
) throws IOException {
if (ArrayValuesSourceAggregationBuilder.MULTIVALUE_MODE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
if (token == XContentParser.Token.VALUE_STRING) {
otherOptions.put(ArrayValuesSourceAggregationBuilder.MULTIVALUE_MODE_FIELD, parser.text());
@ -61,10 +66,14 @@ public class MatrixStatsParser extends ArrayValuesSourceParser.NumericValuesSour
}
@Override
protected MatrixStatsAggregationBuilder createFactory(String aggregationName, ValuesSourceType valuesSourceType,
ValueType targetValueType, Map<ParseField, Object> otherOptions) {
protected MatrixStatsAggregationBuilder createFactory(
String aggregationName,
ValuesSourceType valuesSourceType,
ValueType targetValueType,
Map<ParseField, Object> otherOptions
) {
MatrixStatsAggregationBuilder builder = new MatrixStatsAggregationBuilder(aggregationName);
String mode = (String)otherOptions.get(ArrayValuesSourceAggregationBuilder.MULTIVALUE_MODE_FIELD);
String mode = (String) otherOptions.get(ArrayValuesSourceAggregationBuilder.MULTIVALUE_MODE_FIELD);
if (mode != null) {
builder.multiValueMode(MultiValueMode.fromString(mode));
}

View File

@ -213,7 +213,7 @@ class MatrixStatsResults implements Writeable {
// update skewness
results.skewness.put(fieldName, Math.sqrt(results.docCount) * results.skewness.get(fieldName) / Math.pow(var, 1.5D));
// update kurtosis
results.kurtosis.put(fieldName, (double)results.docCount * results.kurtosis.get(fieldName) / (var * var));
results.kurtosis.put(fieldName, (double) results.docCount * results.kurtosis.get(fieldName) / (var * var));
// update variances
results.variances.put(fieldName, results.variances.get(fieldName) / nM1);
}
@ -248,8 +248,7 @@ class MatrixStatsResults implements Writeable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
MatrixStatsResults that = (MatrixStatsResults) o;
return Objects.equals(results, that.results) &&
Objects.equals(correlation, that.correlation);
return Objects.equals(results, that.results) && Objects.equals(correlation, that.correlation);
}
@Override

View File

@ -164,8 +164,11 @@ public class ParsedMatrixStats extends ParsedAggregation implements MatrixStats
return values.get(fieldName);
}
private static final ObjectParser<ParsedMatrixStats, Void> PARSER =
new ObjectParser<>(ParsedMatrixStats.class.getSimpleName(), true, ParsedMatrixStats::new);
private static final ObjectParser<ParsedMatrixStats, Void> PARSER = new ObjectParser<>(
ParsedMatrixStats.class.getSimpleName(),
true,
ParsedMatrixStats::new
);
static {
declareAggregationFields(PARSER);
PARSER.declareLong(ParsedMatrixStats::setDocCount, CommonFields.DOC_COUNT);
@ -200,21 +203,27 @@ public class ParsedMatrixStats extends ParsedAggregation implements MatrixStats
Map<String, Double> covariances;
Map<String, Double> correlations;
private static final ObjectParser<ParsedMatrixStatsResult, Void> RESULT_PARSER =
new ObjectParser<>(ParsedMatrixStatsResult.class.getSimpleName(), true, ParsedMatrixStatsResult::new);
private static final ObjectParser<ParsedMatrixStatsResult, Void> RESULT_PARSER = new ObjectParser<>(
ParsedMatrixStatsResult.class.getSimpleName(),
true,
ParsedMatrixStatsResult::new
);
static {
RESULT_PARSER.declareString((result, name) -> result.name = name,
new ParseField(InternalMatrixStats.Fields.NAME));
RESULT_PARSER.declareLong((result, count) -> result.count = count,
new ParseField(InternalMatrixStats.Fields.COUNT));
RESULT_PARSER.declareDouble((result, mean) -> result.mean = mean,
new ParseField(InternalMatrixStats.Fields.MEAN));
RESULT_PARSER.declareDouble((result, variance) -> result.variance = variance,
new ParseField(InternalMatrixStats.Fields.VARIANCE));
RESULT_PARSER.declareDouble((result, skewness) -> result.skewness = skewness,
new ParseField(InternalMatrixStats.Fields.SKEWNESS));
RESULT_PARSER.declareDouble((result, kurtosis) -> result.kurtosis = kurtosis,
new ParseField(InternalMatrixStats.Fields.KURTOSIS));
RESULT_PARSER.declareString((result, name) -> result.name = name, new ParseField(InternalMatrixStats.Fields.NAME));
RESULT_PARSER.declareLong((result, count) -> result.count = count, new ParseField(InternalMatrixStats.Fields.COUNT));
RESULT_PARSER.declareDouble((result, mean) -> result.mean = mean, new ParseField(InternalMatrixStats.Fields.MEAN));
RESULT_PARSER.declareDouble(
(result, variance) -> result.variance = variance,
new ParseField(InternalMatrixStats.Fields.VARIANCE)
);
RESULT_PARSER.declareDouble(
(result, skewness) -> result.skewness = skewness,
new ParseField(InternalMatrixStats.Fields.SKEWNESS)
);
RESULT_PARSER.declareDouble(
(result, kurtosis) -> result.kurtosis = kurtosis,
new ParseField(InternalMatrixStats.Fields.KURTOSIS)
);
RESULT_PARSER.declareObject((ParsedMatrixStatsResult result, Map<String, Object> covars) -> {
result.covariances = new LinkedHashMap<>(covars.size());

View File

@ -93,25 +93,25 @@ public class RunningStats implements Writeable, Cloneable {
public RunningStats(StreamInput in) throws IOException {
this();
// read doc count
docCount = (Long)in.readGenericValue();
docCount = (Long) in.readGenericValue();
// read fieldSum
fieldSum = convertIfNeeded((Map<String, Double>)in.readGenericValue());
fieldSum = convertIfNeeded((Map<String, Double>) in.readGenericValue());
// counts
counts = convertIfNeeded((Map<String, Long>)in.readGenericValue());
counts = convertIfNeeded((Map<String, Long>) in.readGenericValue());
// means
means = convertIfNeeded((Map<String, Double>)in.readGenericValue());
means = convertIfNeeded((Map<String, Double>) in.readGenericValue());
// variances
variances = convertIfNeeded((Map<String, Double>)in.readGenericValue());
variances = convertIfNeeded((Map<String, Double>) in.readGenericValue());
// skewness
skewness = convertIfNeeded((Map<String, Double>)in.readGenericValue());
skewness = convertIfNeeded((Map<String, Double>) in.readGenericValue());
// kurtosis
kurtosis = convertIfNeeded((Map<String, Double>)in.readGenericValue());
kurtosis = convertIfNeeded((Map<String, Double>) in.readGenericValue());
// read covariances
covariances = convertIfNeeded((Map<String, HashMap<String, Double>>)in.readGenericValue());
covariances = convertIfNeeded((Map<String, HashMap<String, Double>>) in.readGenericValue());
}
// Convert Map to HashMap if it isn't
private static <K, V> HashMap<K, V> convertIfNeeded(Map<K,V> map) {
private static <K, V> HashMap<K, V> convertIfNeeded(Map<K, V> map) {
if (map instanceof HashMap) {
return (HashMap<K, V>) map;
} else {
@ -235,7 +235,7 @@ public class RunningStats implements Writeable, Cloneable {
this.counts.put(fieldName, other.counts.get(fieldName).longValue());
this.fieldSum.put(fieldName, other.fieldSum.get(fieldName).doubleValue());
this.variances.put(fieldName, other.variances.get(fieldName).doubleValue());
this.skewness.put(fieldName , other.skewness.get(fieldName).doubleValue());
this.skewness.put(fieldName, other.skewness.get(fieldName).doubleValue());
this.kurtosis.put(fieldName, other.kurtosis.get(fieldName).doubleValue());
if (other.covariances.containsKey(fieldName)) {
this.covariances.put(fieldName, other.covariances.get(fieldName));
@ -338,14 +338,14 @@ public class RunningStats implements Writeable, Cloneable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
RunningStats that = (RunningStats) o;
return docCount == that.docCount &&
Objects.equals(fieldSum, that.fieldSum) &&
Objects.equals(counts, that.counts) &&
Objects.equals(means, that.means) &&
Objects.equals(variances, that.variances) &&
Objects.equals(skewness, that.skewness) &&
Objects.equals(kurtosis, that.kurtosis) &&
Objects.equals(covariances, that.covariances);
return docCount == that.docCount
&& Objects.equals(fieldSum, that.fieldSum)
&& Objects.equals(counts, that.counts)
&& Objects.equals(means, that.means)
&& Objects.equals(variances, that.variances)
&& Objects.equals(skewness, that.skewness)
&& Objects.equals(kurtosis, that.kurtosis)
&& Objects.equals(covariances, that.covariances);
}
@Override

View File

@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.support;
import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.index.fielddata.NumericDoubleValues;
import org.opensearch.search.MultiValueMode;
import org.opensearch.search.aggregations.support.ValuesSource;
import java.io.IOException;
import java.util.Map;

View File

@ -50,13 +50,13 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValuesSourceAggregationBuilder<AB>>
extends AbstractAggregationBuilder<AB> {
public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValuesSourceAggregationBuilder<AB>> extends
AbstractAggregationBuilder<AB> {
public static final ParseField MULTIVALUE_MODE_FIELD = new ParseField("mode");
public abstract static class LeafOnly<AB extends ArrayValuesSourceAggregationBuilder<AB>>
extends ArrayValuesSourceAggregationBuilder<AB> {
public abstract static class LeafOnly<AB extends ArrayValuesSourceAggregationBuilder<AB>> extends ArrayValuesSourceAggregationBuilder<
AB> {
protected LeafOnly(String name) {
super(name);
@ -65,8 +65,9 @@ public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValues
protected LeafOnly(LeafOnly<AB> clone, Builder factoriesBuilder, Map<String, Object> metadata) {
super(clone, factoriesBuilder, metadata);
if (factoriesBuilder.count() > 0) {
throw new AggregationInitializationException("Aggregator [" + name + "] of type ["
+ getType() + "] cannot accept sub-aggregations");
throw new AggregationInitializationException(
"Aggregator [" + name + "] of type [" + getType() + "] cannot accept sub-aggregations"
);
}
}
@ -79,8 +80,9 @@ public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValues
@Override
public AB subAggregations(Builder subFactories) {
throw new AggregationInitializationException("Aggregator [" + name + "] of type [" +
getType() + "] cannot accept sub-aggregations");
throw new AggregationInitializationException(
"Aggregator [" + name + "] of type [" + getType() + "] cannot accept sub-aggregations"
);
}
@Override
@ -102,8 +104,11 @@ public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValues
super(name);
}
protected ArrayValuesSourceAggregationBuilder(ArrayValuesSourceAggregationBuilder<AB> clone,
Builder factoriesBuilder, Map<String, Object> metadata) {
protected ArrayValuesSourceAggregationBuilder(
ArrayValuesSourceAggregationBuilder<AB> clone,
Builder factoriesBuilder,
Map<String, Object> metadata
) {
super(clone, factoriesBuilder, metadata);
this.fields = new ArrayList<>(clone.fields);
this.userValueTypeHint = clone.userValueTypeHint;
@ -112,8 +117,7 @@ public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValues
this.missing = clone.missing;
}
protected ArrayValuesSourceAggregationBuilder(StreamInput in)
throws IOException {
protected ArrayValuesSourceAggregationBuilder(StreamInput in) throws IOException {
super(in);
read(in);
}
@ -123,7 +127,7 @@ public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValues
*/
@SuppressWarnings("unchecked")
private void read(StreamInput in) throws IOException {
fields = (ArrayList<String>)in.readGenericValue();
fields = (ArrayList<String>) in.readGenericValue();
userValueTypeHint = in.readOptionalWriteable(ValueType::readFromStream);
format = in.readOptionalString();
missingMap = in.readMap();
@ -203,8 +207,11 @@ public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValues
}
@Override
protected final ArrayValuesSourceAggregatorFactory doBuild(QueryShardContext queryShardContext, AggregatorFactory parent,
Builder subFactoriesBuilder) throws IOException {
protected final ArrayValuesSourceAggregatorFactory doBuild(
QueryShardContext queryShardContext,
AggregatorFactory parent,
Builder subFactoriesBuilder
) throws IOException {
Map<String, ValuesSourceConfig> configs = resolveConfig(queryShardContext);
ArrayValuesSourceAggregatorFactory factory = innerBuild(queryShardContext, configs, parent, subFactoriesBuilder);
return factory;
@ -213,17 +220,27 @@ public abstract class ArrayValuesSourceAggregationBuilder<AB extends ArrayValues
protected Map<String, ValuesSourceConfig> resolveConfig(QueryShardContext queryShardContext) {
HashMap<String, ValuesSourceConfig> configs = new HashMap<>();
for (String field : fields) {
ValuesSourceConfig config = ValuesSourceConfig.resolveUnregistered(queryShardContext, userValueTypeHint, field, null,
missingMap.get(field), null, format, CoreValuesSourceType.BYTES);
ValuesSourceConfig config = ValuesSourceConfig.resolveUnregistered(
queryShardContext,
userValueTypeHint,
field,
null,
missingMap.get(field),
null,
format,
CoreValuesSourceType.BYTES
);
configs.put(field, config);
}
return configs;
}
protected abstract ArrayValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext,
Map<String, ValuesSourceConfig> configs,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException;
protected abstract ArrayValuesSourceAggregatorFactory innerBuild(
QueryShardContext queryShardContext,
Map<String, ValuesSourceConfig> configs,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder
) throws IOException;
@Override
public final XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {

View File

@ -43,24 +43,29 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public abstract class ArrayValuesSourceAggregatorFactory
extends AggregatorFactory {
public abstract class ArrayValuesSourceAggregatorFactory extends AggregatorFactory {
protected Map<String, ValuesSourceConfig> configs;
public ArrayValuesSourceAggregatorFactory(String name, Map<String, ValuesSourceConfig> configs,
QueryShardContext queryShardContext, AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata) throws IOException {
public ArrayValuesSourceAggregatorFactory(
String name,
Map<String, ValuesSourceConfig> configs,
QueryShardContext queryShardContext,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata
) throws IOException {
super(name, queryShardContext, parent, subFactoriesBuilder, metadata);
this.configs = configs;
}
@Override
public Aggregator createInternal(SearchContext searchContext,
Aggregator parent,
CardinalityUpperBound cardinality,
Map<String, Object> metadata) throws IOException {
public Aggregator createInternal(
SearchContext searchContext,
Aggregator parent,
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
HashMap<String, ValuesSource> valuesSources = new HashMap<>();
for (Map.Entry<String, ValuesSourceConfig> config : configs.entrySet()) {
@ -79,9 +84,8 @@ public abstract class ArrayValuesSourceAggregatorFactory
* Create the {@linkplain Aggregator} when none of the configured
* fields can be resolved to a {@link ValuesSource}.
*/
protected abstract Aggregator createUnmapped(SearchContext searchContext,
Aggregator parent,
Map<String, Object> metadata) throws IOException;
protected abstract Aggregator createUnmapped(SearchContext searchContext, Aggregator parent, Map<String, Object> metadata)
throws IOException;
/**
* Create the {@linkplain Aggregator} when any of the configured
@ -91,10 +95,12 @@ public abstract class ArrayValuesSourceAggregatorFactory
* that the {@link Aggregator} created by this method
* will be asked to collect.
*/
protected abstract Aggregator doCreateInternal(Map<String, ValuesSource> valuesSources,
SearchContext searchContext,
Aggregator parent,
CardinalityUpperBound cardinality,
Map<String, Object> metadata) throws IOException;
protected abstract Aggregator doCreateInternal(
Map<String, ValuesSource> valuesSources,
SearchContext searchContext,
Aggregator parent,
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException;
}

View File

@ -80,8 +80,7 @@ public abstract class ArrayValuesSourceParser<VS extends ValuesSource> implement
}
@Override
public final ArrayValuesSourceAggregationBuilder<?> parse(String aggregationName, XContentParser parser)
throws IOException {
public final ArrayValuesSourceAggregationBuilder<?> parse(String aggregationName, XContentParser parser) throws IOException {
List<String> fields = null;
String format = null;
@ -98,12 +97,22 @@ public abstract class ArrayValuesSourceParser<VS extends ValuesSource> implement
} else if (formattable && CommonFields.FORMAT.match(currentFieldName, parser.getDeprecationHandler())) {
format = parser.text();
} else if (CommonFields.VALUE_TYPE.match(currentFieldName, parser.getDeprecationHandler())) {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]. " +
"Multi-field aggregations do not support scripts.");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token "
+ token
+ " ["
+ currentFieldName
+ "] in ["
+ aggregationName
+ "]. "
+ "Multi-field aggregations do not support scripts."
);
} else if (!token(aggregationName, currentFieldName, token, parser, otherOptions)) {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "].");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]."
);
}
} else if (token == XContentParser.Token.START_OBJECT) {
if (CommonFields.MISSING.match(currentFieldName, parser.getDeprecationHandler())) {
@ -112,41 +121,69 @@ public abstract class ArrayValuesSourceParser<VS extends ValuesSource> implement
parseMissingAndAdd(aggregationName, currentFieldName, parser, missingMap);
}
} else if (Script.SCRIPT_PARSE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]. " +
"Multi-field aggregations do not support scripts.");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token "
+ token
+ " ["
+ currentFieldName
+ "] in ["
+ aggregationName
+ "]. "
+ "Multi-field aggregations do not support scripts."
);
} else if (!token(aggregationName, currentFieldName, token, parser, otherOptions)) {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "].");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]."
);
}
} else if (token == XContentParser.Token.START_ARRAY) {
if (Script.SCRIPT_PARSE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]. " +
"Multi-field aggregations do not support scripts.");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token "
+ token
+ " ["
+ currentFieldName
+ "] in ["
+ aggregationName
+ "]. "
+ "Multi-field aggregations do not support scripts."
);
} else if (CommonFields.FIELDS.match(currentFieldName, parser.getDeprecationHandler())) {
fields = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token == XContentParser.Token.VALUE_STRING) {
fields.add(parser.text());
} else {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "].");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]."
);
}
}
} else if (!token(aggregationName, currentFieldName, token, parser, otherOptions)) {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "].");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]."
);
}
} else if (!token(aggregationName, currentFieldName, token, parser, otherOptions)) {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "].");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]."
);
}
}
ArrayValuesSourceAggregationBuilder<?> factory = createFactory(aggregationName, this.valuesSourceType, this.targetValueType,
otherOptions);
ArrayValuesSourceAggregationBuilder<?> factory = createFactory(
aggregationName,
this.valuesSourceType,
this.targetValueType,
otherOptions
);
if (fields != null) {
factory.fields(fields);
}
@ -159,8 +196,12 @@ public abstract class ArrayValuesSourceParser<VS extends ValuesSource> implement
return factory;
}
private void parseMissingAndAdd(final String aggregationName, final String currentFieldName,
XContentParser parser, final Map<String, Object> missing) throws IOException {
private void parseMissingAndAdd(
final String aggregationName,
final String currentFieldName,
XContentParser parser,
final Map<String, Object> missing
) throws IOException {
XContentParser.Token token = parser.currentToken();
if (token == null) {
token = parser.nextToken();
@ -169,15 +210,18 @@ public abstract class ArrayValuesSourceParser<VS extends ValuesSource> implement
if (token == XContentParser.Token.FIELD_NAME) {
final String fieldName = parser.currentName();
if (missing.containsKey(fieldName)) {
throw new ParsingException(parser.getTokenLocation(),
"Missing field [" + fieldName + "] already defined as [" + missing.get(fieldName)
+ "] in [" + aggregationName + "].");
throw new ParsingException(
parser.getTokenLocation(),
"Missing field [" + fieldName + "] already defined as [" + missing.get(fieldName) + "] in [" + aggregationName + "]."
);
}
parser.nextToken();
missing.put(fieldName, parser.objectText());
} else {
throw new ParsingException(parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]");
throw new ParsingException(
parser.getTokenLocation(),
"Unexpected token " + token + " [" + currentFieldName + "] in [" + aggregationName + "]"
);
}
}
@ -199,10 +243,12 @@ public abstract class ArrayValuesSourceParser<VS extends ValuesSource> implement
* method
* @return the created factory
*/
protected abstract ArrayValuesSourceAggregationBuilder<?> createFactory(String aggregationName,
ValuesSourceType valuesSourceType,
ValueType targetValueType,
Map<ParseField, Object> otherOptions);
protected abstract ArrayValuesSourceAggregationBuilder<?> createFactory(
String aggregationName,
ValuesSourceType valuesSourceType,
ValueType targetValueType,
Map<ParseField, Object> otherOptions
);
/**
* Allows subclasses of {@link ArrayValuesSourceParser} to parse extra
@ -227,6 +273,11 @@ public abstract class ArrayValuesSourceParser<VS extends ValuesSource> implement
* @throws IOException
* if an error occurs whilst parsing
*/
protected abstract boolean token(String aggregationName, String currentFieldName, XContentParser.Token token, XContentParser parser,
Map<ParseField, Object> otherOptions) throws IOException;
protected abstract boolean token(
String aggregationName,
String currentFieldName,
XContentParser.Token token,
XContentParser parser,
Map<ParseField, Object> otherOptions
) throws IOException;
}

View File

@ -107,35 +107,35 @@ public class InternalMatrixStatsTests extends InternalAggregationTestCase<Intern
MatrixStatsResults matrixStatsResults = instance.getResults();
Map<String, Object> metadata = instance.getMetadata();
switch (between(0, 3)) {
case 0:
name += randomAlphaOfLength(5);
break;
case 1:
String[] fields = Arrays.copyOf(this.fields, this.fields.length + 1);
fields[fields.length - 1] = "field_" + (fields.length - 1);
double[] values = new double[fields.length];
for (int i = 0; i < fields.length; i++) {
values[i] = randomDouble() * 200;
}
runningStats = new RunningStats();
runningStats.add(fields, values);
break;
case 2:
if (matrixStatsResults == null) {
matrixStatsResults = new MatrixStatsResults(runningStats);
} else {
matrixStatsResults = null;
}
break;
case 3:
default:
if (metadata == null) {
metadata = new HashMap<>(1);
} else {
metadata = new HashMap<>(instance.getMetadata());
}
metadata.put(randomAlphaOfLength(15), randomInt());
break;
case 0:
name += randomAlphaOfLength(5);
break;
case 1:
String[] fields = Arrays.copyOf(this.fields, this.fields.length + 1);
fields[fields.length - 1] = "field_" + (fields.length - 1);
double[] values = new double[fields.length];
for (int i = 0; i < fields.length; i++) {
values[i] = randomDouble() * 200;
}
runningStats = new RunningStats();
runningStats.add(fields, values);
break;
case 2:
if (matrixStatsResults == null) {
matrixStatsResults = new MatrixStatsResults(runningStats);
} else {
matrixStatsResults = null;
}
break;
case 3:
default:
if (metadata == null) {
metadata = new HashMap<>(1);
} else {
metadata = new HashMap<>(instance.getMetadata());
}
metadata.put(randomAlphaOfLength(15), randomInt());
break;
}
return new InternalMatrixStats(name, docCount, runningStats, matrixStatsResults, metadata);
}
@ -159,7 +159,7 @@ public class InternalMatrixStatsTests extends InternalAggregationTestCase<Intern
double valueB = randomDouble();
bValues.add(valueB);
runningStats.add(new String[]{"a", "b"}, new double[]{valueA, valueB});
runningStats.add(new String[] { "a", "b" }, new double[] { valueA, valueB });
if (++valuePerShardCounter == valuesPerShard) {
shardResults.add(new InternalMatrixStats("_name", 1L, runningStats, null, Collections.emptyMap()));
runningStats = new RunningStats();
@ -176,7 +176,11 @@ public class InternalMatrixStatsTests extends InternalAggregationTestCase<Intern
ScriptService mockScriptService = mockScriptService();
MockBigArrays bigArrays = new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService());
InternalAggregation.ReduceContext context = InternalAggregation.ReduceContext.forFinalReduction(
bigArrays, mockScriptService, b -> {}, PipelineTree.EMPTY);
bigArrays,
mockScriptService,
b -> {},
PipelineTree.EMPTY
);
InternalMatrixStats reduced = (InternalMatrixStats) shardResults.get(0).reduce(shardResults, context);
multiPassStats.assertNearlyEqual(reduced.getResults());
}

View File

@ -54,18 +54,17 @@ import java.util.List;
public class MatrixStatsAggregatorTests extends AggregatorTestCase {
public void testNoData() throws Exception {
MappedFieldType ft =
new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.DOUBLE);
MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.DOUBLE);
try (Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
try (Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
if (randomBoolean()) {
indexWriter.addDocument(Collections.singleton(new StringField("another_field", "value", Field.Store.NO)));
}
try (IndexReader reader = indexWriter.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
.fields(Collections.singletonList("field"));
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg").fields(
Collections.singletonList("field")
);
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ft);
assertNull(stats.getStats());
assertEquals(0L, stats.getDocCount());
@ -76,15 +75,15 @@ public class MatrixStatsAggregatorTests extends AggregatorTestCase {
public void testUnmapped() throws Exception {
MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.DOUBLE);
try (Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
try (Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
if (randomBoolean()) {
indexWriter.addDocument(Collections.singleton(new StringField("another_field", "value", Field.Store.NO)));
}
try (IndexReader reader = indexWriter.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
.fields(Collections.singletonList("bogus"));
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg").fields(
Collections.singletonList("bogus")
);
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ft);
assertNull(stats.getStats());
assertEquals(0L, stats.getDocCount());
@ -98,8 +97,7 @@ public class MatrixStatsAggregatorTests extends AggregatorTestCase {
String fieldB = "b";
MappedFieldType ftB = new NumberFieldMapper.NumberFieldType(fieldB, NumberFieldMapper.NumberType.DOUBLE);
try (Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
try (Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
int numDocs = scaledRandomIntBetween(8192, 16384);
Double[] fieldAValues = new Double[numDocs];
@ -118,8 +116,9 @@ public class MatrixStatsAggregatorTests extends AggregatorTestCase {
multiPassStats.computeStats(Arrays.asList(fieldAValues), Arrays.asList(fieldBValues));
try (IndexReader reader = indexWriter.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
.fields(Arrays.asList(fieldA, fieldB));
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg").fields(
Arrays.asList(fieldA, fieldB)
);
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
multiPassStats.assertNearlyEqual(stats);
assertTrue(MatrixAggregationInspectionHelper.hasValue(stats));

View File

@ -142,8 +142,8 @@ class MultiPassStats {
assertTrue(nearlyEqual(kurtosis.get(fieldAKey), stats.getKurtosis(fieldAKey), 1e-4));
assertTrue(nearlyEqual(kurtosis.get(fieldBKey), stats.getKurtosis(fieldBKey), 1e-4));
// covariances
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey),stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey),stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey), stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey), stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
// correlation
assertTrue(nearlyEqual(correlations.get(fieldAKey).get(fieldBKey), stats.getCorrelation(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
@ -166,8 +166,8 @@ class MultiPassStats {
assertTrue(nearlyEqual(kurtosis.get(fieldAKey), stats.getKurtosis(fieldAKey), 1e-4));
assertTrue(nearlyEqual(kurtosis.get(fieldBKey), stats.getKurtosis(fieldBKey), 1e-4));
// covariances
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey),stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey),stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey), stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey), stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
// correlation
assertTrue(nearlyEqual(correlations.get(fieldAKey).get(fieldBKey), stats.getCorrelation(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));

View File

@ -49,9 +49,9 @@ public class RunningStatsTests extends BaseMatrixStatsTestCase {
int start = 0;
RunningStats stats = null;
List<Double> fieldAShard, fieldBShard;
for (int s = 0; s < numShards-1; start = ++s * (int)obsPerShard) {
fieldAShard = fieldA.subList(start, start + (int)obsPerShard);
fieldBShard = fieldB.subList(start, start + (int)obsPerShard);
for (int s = 0; s < numShards - 1; start = ++s * (int) obsPerShard) {
fieldAShard = fieldA.subList(start, start + (int) obsPerShard);
fieldBShard = fieldB.subList(start, start + (int) obsPerShard);
if (stats == null) {
stats = createRunningStats(fieldAShard, fieldBShard);
} else {

View File

@ -37,7 +37,7 @@ import org.opensearch.test.rest.yaml.ClientYamlTestCandidate;
import org.opensearch.test.rest.yaml.OpenSearchClientYamlSuiteTestCase;
public class MatrixStatsClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTestCase {
public MatrixStatsClientYamlTestSuiteIT(@Name("yaml")ClientYamlTestCandidate testCandidate) {
public MatrixStatsClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {
super(testCandidate);
}

View File

@ -55,8 +55,12 @@ public class QueryStringWithAnalyzersIT extends OpenSearchIntegTestCase {
* Validates that we properly split fields using the word delimiter filter in query_string.
*/
public void testCustomWordDelimiterQueryString() {
assertAcked(client().admin().indices().prepareCreate("test")
.setSettings(Settings.builder()
assertAcked(
client().admin()
.indices()
.prepareCreate("test")
.setSettings(
Settings.builder()
.put("analysis.analyzer.my_analyzer.type", "custom")
.put("analysis.analyzer.my_analyzer.tokenizer", "whitespace")
.put("analysis.analyzer.my_analyzer.filter", "custom_word_delimiter")
@ -67,21 +71,17 @@ public class QueryStringWithAnalyzersIT extends OpenSearchIntegTestCase {
.put("analysis.filter.custom_word_delimiter.catenate_words", "false")
.put("analysis.filter.custom_word_delimiter.split_on_case_change", "false")
.put("analysis.filter.custom_word_delimiter.split_on_numerics", "false")
.put("analysis.filter.custom_word_delimiter.stem_english_possessive", "false"))
.addMapping("type1",
"field1", "type=text,analyzer=my_analyzer",
"field2", "type=text,analyzer=my_analyzer"));
.put("analysis.filter.custom_word_delimiter.stem_english_possessive", "false")
)
.addMapping("type1", "field1", "type=text,analyzer=my_analyzer", "field2", "type=text,analyzer=my_analyzer")
);
client().prepareIndex("test", "type1", "1").setSource(
"field1", "foo bar baz",
"field2", "not needed").get();
client().prepareIndex("test", "type1", "1").setSource("field1", "foo bar baz", "field2", "not needed").get();
refresh();
SearchResponse response = client()
.prepareSearch("test")
.setQuery(
queryStringQuery("foo.baz").defaultOperator(Operator.AND)
.field("field1").field("field2")).get();
SearchResponse response = client().prepareSearch("test")
.setQuery(queryStringQuery("foo.baz").defaultOperator(Operator.AND).field("field1").field("field2"))
.get();
assertHitCount(response, 1L);
}
}

View File

@ -45,16 +45,14 @@ import org.opensearch.index.analysis.TokenFilterFactory;
/**
* Factory for ASCIIFoldingFilter.
*/
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory
implements NormalizingTokenFilterFactory {
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
public static final ParseField PRESERVE_ORIGINAL = new ParseField("preserve_original");
public static final boolean DEFAULT_PRESERVE_ORIGINAL = false;
private final boolean preserveOriginal;
public ASCIIFoldingTokenFilterFactory(IndexSettings indexSettings, Environment environment,
String name, Settings settings) {
public ASCIIFoldingTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
preserveOriginal = settings.getAsBoolean(PRESERVE_ORIGINAL.getPreferredName(), DEFAULT_PRESERVE_ORIGINAL);
}

View File

@ -43,8 +43,10 @@ import java.util.Map;
public class AnalysisPainlessExtension implements PainlessExtension {
private static final Whitelist WHITELIST =
WhitelistLoader.loadFromResourceFiles(AnalysisPainlessExtension.class, "painless_whitelist.txt");
private static final Whitelist WHITELIST = WhitelistLoader.loadFromResourceFiles(
AnalysisPainlessExtension.class,
"painless_whitelist.txt"
);
@Override
public Map<ScriptContext<?>, List<Whitelist>> getContextWhitelists() {

View File

@ -124,7 +124,7 @@ public abstract class AnalysisPredicateScript {
AnalysisPredicateScript newInstance();
}
public static final String[] PARAMETERS = new String[]{ "token" };
public static final String[] PARAMETERS = new String[] { "token" };
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("analysis", Factory.class);
}

View File

@ -111,12 +111,12 @@ public final class CJKBigramFilterFactory extends AbstractTokenFilterFactory {
public TokenFilterFactory getSynonymFilter() {
if (outputUnigrams) {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() +
"] cannot be used to parse synonyms");
}
else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
}
}
return this;

View File

@ -43,7 +43,7 @@ import org.opensearch.index.analysis.AbstractTokenizerFactory;
import java.util.HashSet;
import java.util.Set;
public class CharGroupTokenizerFactory extends AbstractTokenizerFactory{
public class CharGroupTokenizerFactory extends AbstractTokenizerFactory {
static final String MAX_TOKEN_LENGTH = "max_token_length";
@ -67,8 +67,7 @@ public class CharGroupTokenizerFactory extends AbstractTokenizerFactory{
if (c.length() == 1) {
tokenizeOnChars.add((int) c.charAt(0));
}
else if (c.charAt(0) == '\\') {
} else if (c.charAt(0) == '\\') {
tokenizeOnChars.add((int) parseEscapedChar(c));
} else {
switch (c) {
@ -98,8 +97,7 @@ public class CharGroupTokenizerFactory extends AbstractTokenizerFactory{
int len = s.length();
char c = s.charAt(0);
if (c == '\\') {
if (1 >= len)
throw new RuntimeException("Invalid escaped char in [" + s + "]");
if (1 >= len) throw new RuntimeException("Invalid escaped char in [" + s + "]");
c = s.charAt(1);
switch (c) {
case '\\':

View File

@ -81,16 +81,16 @@ public interface CharMatcher {
@Override
public boolean isTokenChar(int c) {
switch (Character.getType(c)) {
case Character.START_PUNCTUATION:
case Character.END_PUNCTUATION:
case Character.OTHER_PUNCTUATION:
case Character.CONNECTOR_PUNCTUATION:
case Character.DASH_PUNCTUATION:
case Character.INITIAL_QUOTE_PUNCTUATION:
case Character.FINAL_QUOTE_PUNCTUATION:
return true;
default:
return false;
case Character.START_PUNCTUATION:
case Character.END_PUNCTUATION:
case Character.OTHER_PUNCTUATION:
case Character.CONNECTOR_PUNCTUATION:
case Character.DASH_PUNCTUATION:
case Character.INITIAL_QUOTE_PUNCTUATION:
case Character.FINAL_QUOTE_PUNCTUATION:
return true;
default:
return false;
}
}
},
@ -98,13 +98,13 @@ public interface CharMatcher {
@Override
public boolean isTokenChar(int c) {
switch (Character.getType(c)) {
case Character.CURRENCY_SYMBOL:
case Character.MATH_SYMBOL:
case Character.OTHER_SYMBOL:
case Character.MODIFIER_SYMBOL:
return true;
default:
return false;
case Character.CURRENCY_SYMBOL:
case Character.MATH_SYMBOL:
case Character.OTHER_SYMBOL:
case Character.MODIFIER_SYMBOL:
return true;
default:
return false;
}
}
}
@ -112,36 +112,39 @@ public interface CharMatcher {
final class Builder {
private final Set<CharMatcher> matchers;
Builder() {
matchers = new HashSet<>();
}
public Builder or(CharMatcher matcher) {
matchers.add(matcher);
return this;
}
public CharMatcher build() {
switch (matchers.size()) {
case 0:
return new CharMatcher() {
@Override
public boolean isTokenChar(int c) {
return false;
}
};
case 1:
return matchers.iterator().next();
default:
return new CharMatcher() {
@Override
public boolean isTokenChar(int c) {
for (CharMatcher matcher : matchers) {
if (matcher.isTokenChar(c)) {
return true;
}
case 0:
return new CharMatcher() {
@Override
public boolean isTokenChar(int c) {
return false;
}
return false;
}
};
};
case 1:
return matchers.iterator().next();
default:
return new CharMatcher() {
@Override
public boolean isTokenChar(int c) {
for (CharMatcher matcher : matchers) {
if (matcher.isTokenChar(c)) {
return true;
}
}
return false;
}
};
}
}
}

View File

@ -174,12 +174,19 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
private final SetOnce<ScriptService> scriptService = new SetOnce<>();
@Override
public Collection<Object> createComponents(Client client, ClusterService clusterService, ThreadPool threadPool,
ResourceWatcherService resourceWatcherService, ScriptService scriptService,
NamedXContentRegistry xContentRegistry, Environment environment,
NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry,
IndexNameExpressionResolver expressionResolver,
Supplier<RepositoriesService> repositoriesServiceSupplier) {
public Collection<Object> createComponents(
Client client,
ClusterService clusterService,
ThreadPool threadPool,
ResourceWatcherService resourceWatcherService,
ScriptService scriptService,
NamedXContentRegistry xContentRegistry,
Environment environment,
NodeEnvironment nodeEnvironment,
NamedWriteableRegistry namedWriteableRegistry,
IndexNameExpressionResolver expressionResolver,
Supplier<RepositoriesService> repositoriesServiceSupplier
) {
this.scriptService.set(scriptService);
return Collections.emptyList();
}
@ -253,8 +260,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.put("classic", ClassicFilterFactory::new);
filters.put("czech_stem", CzechStemTokenFilterFactory::new);
filters.put("common_grams", requiresAnalysisSettings(CommonGramsTokenFilterFactory::new));
filters.put("condition",
requiresAnalysisSettings((i, e, n, s) -> new ScriptedConditionTokenFilterFactory(i, n, s, scriptService.get())));
filters.put(
"condition",
requiresAnalysisSettings((i, e, n, s) -> new ScriptedConditionTokenFilterFactory(i, n, s, scriptService.get()))
);
filters.put("decimal_digit", DecimalDigitFilterFactory::new);
filters.put("delimited_payload_filter", LegacyDelimitedPayloadTokenFilterFactory::new);
filters.put("delimited_payload", DelimitedPayloadTokenFilterFactory::new);
@ -262,9 +271,11 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
filters.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
deprecationLogger.deprecate("edgeNGram_deprecation",
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
deprecationLogger.deprecate(
"edgeNGram_deprecation",
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead."
);
return new EdgeNGramTokenFilterFactory(indexSettings, environment, name, settings);
});
filters.put("elision", requiresAnalysisSettings(ElisionTokenFilterFactory::new));
@ -287,17 +298,21 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.put("multiplexer", MultiplexerTokenFilterFactory::new);
filters.put("ngram", NGramTokenFilterFactory::new);
filters.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
deprecationLogger.deprecate("nGram_deprecation",
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
deprecationLogger.deprecate(
"nGram_deprecation",
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead."
);
return new NGramTokenFilterFactory(indexSettings, environment, name, settings);
});
filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
filters.put("porter_stem", PorterStemTokenFilterFactory::new);
filters.put("predicate_token_filter",
requiresAnalysisSettings((i, e, n, s) -> new PredicateTokenFilterScriptFactory(i, n, s, scriptService.get())));
filters.put(
"predicate_token_filter",
requiresAnalysisSettings((i, e, n, s) -> new PredicateTokenFilterScriptFactory(i, n, s, scriptService.get()))
);
filters.put("remove_duplicates", RemoveDuplicatesTokenFilterFactory::new);
filters.put("reverse", ReverseTokenFilterFactory::new);
filters.put("russian_stem", RussianStemTokenFilterFactory::new);
@ -336,18 +351,22 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
tokenizers.put("thai", ThaiTokenizerFactory::new);
tokenizers.put("nGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_6_0)) {
deprecationLogger.deprecate("nGram_tokenizer_deprecation",
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [ngram] instead.");
deprecationLogger.deprecate(
"nGram_tokenizer_deprecation",
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [ngram] instead."
);
}
return new NGramTokenizerFactory(indexSettings, environment, name, settings);
});
tokenizers.put("ngram", NGramTokenizerFactory::new);
tokenizers.put("edgeNGram", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_6_0)) {
deprecationLogger.deprecate("edgeNGram_tokenizer_deprecation",
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [edge_ngram] instead.");
deprecationLogger.deprecate(
"edgeNGram_tokenizer_deprecation",
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [edge_ngram] instead."
);
}
return new EdgeNGramTokenizerFactory(indexSettings, environment, name, settings);
});
@ -370,13 +389,27 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
// TODO remove in 8.0
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.OPENSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.OPENSEARCH,
() -> new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true,
CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE,
() -> new SnowballAnalyzer("English", EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)));
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"standard_html_strip",
CachingStrategy.OPENSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)
)
);
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"pattern",
CachingStrategy.OPENSEARCH,
() -> new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET)
)
);
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"snowball",
CachingStrategy.LUCENE,
() -> new SnowballAnalyzer("English", EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)
)
);
// Language analyzers:
analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, ArabicAnalyzer::new));
@ -387,8 +420,13 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, BulgarianAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, CatalanAnalyzer::new));
// chinese analyzer: only for old indices, best effort
analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.ONE,
() -> new StandardAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)));
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"chinese",
CachingStrategy.ONE,
() -> new StandardAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)
)
);
analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, CJKAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, CzechAnalyzer::new));
analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, DanishAnalyzer::new));
@ -426,9 +464,11 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
filters.add(PreConfiguredCharFilter.openSearchVersion("htmlStrip", false, (reader, version) -> {
if (version.onOrAfter(LegacyESVersion.V_6_3_0)) {
deprecationLogger.deprecate("htmlStrip_deprecation",
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead.");
deprecationLogger.deprecate(
"htmlStrip_deprecation",
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead."
);
}
return new HTMLStripCharFilter(reader);
}));
@ -447,44 +487,60 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.add(PreConfiguredTokenFilter.singleton("cjk_bigram", false, CJKBigramFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("cjk_width", true, CJKWidthFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("classic", false, ClassicFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("common_grams", false, false,
input -> new CommonGramsFilter(input, CharArraySet.EMPTY_SET)));
filters.add(
PreConfiguredTokenFilter.singleton("common_grams", false, false, input -> new CommonGramsFilter(input, CharArraySet.EMPTY_SET))
);
filters.add(PreConfiguredTokenFilter.singleton("czech_stem", false, CzechStemFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("decimal_digit", true, DecimalDigitFilter::new));
filters.add(PreConfiguredTokenFilter.openSearchVersion("delimited_payload_filter", false, (input, version) -> {
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead");
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"
);
}
if (version.onOrAfter(LegacyESVersion.V_6_2_0)) {
deprecationLogger.deprecate("analysis_delimited_payload_filter",
"Deprecated [delimited_payload_filter] used, replaced by [delimited_payload]");
deprecationLogger.deprecate(
"analysis_delimited_payload_filter",
"Deprecated [delimited_payload_filter] used, replaced by [delimited_payload]"
);
}
return new DelimitedPayloadTokenFilter(input,
return new DelimitedPayloadTokenFilter(
input,
DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER,
DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER);
DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER
);
}));
filters.add(PreConfiguredTokenFilter.singleton("delimited_payload", false, input ->
new DelimitedPayloadTokenFilter(input,
DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER,
DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER)));
filters.add(
PreConfiguredTokenFilter.singleton(
"delimited_payload",
false,
input -> new DelimitedPayloadTokenFilter(
input,
DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER,
DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER
)
)
);
filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer())));
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input ->
new EdgeNGramTokenFilter(input, 1)));
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input -> new EdgeNGramTokenFilter(input, 1)));
filters.add(PreConfiguredTokenFilter.openSearchVersion("edgeNGram", false, false, (reader, version) -> {
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.");
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead."
);
} else {
deprecationLogger.deprecate("edgeNGram_deprecation",
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
deprecationLogger.deprecate(
"edgeNGram_deprecation",
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead."
);
}
return new EdgeNGramTokenFilter(reader, 1);
}));
filters.add(PreConfiguredTokenFilter.singleton("elision", true,
input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES)));
}));
filters.add(
PreConfiguredTokenFilter.singleton("elision", true, input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES))
);
filters.add(PreConfiguredTokenFilter.singleton("french_stem", false, input -> new SnowballFilter(input, new FrenchStemmer())));
filters.add(PreConfiguredTokenFilter.singleton("german_normalization", true, GermanNormalizationFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("german_stem", false, GermanStemFilter::new));
@ -494,19 +550,30 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.add(PreConfiguredTokenFilter.singleton("kstem", false, KStemFilter::new));
// TODO this one seems useless
filters.add(PreConfiguredTokenFilter.singleton("length", false, input -> new LengthFilter(input, 0, Integer.MAX_VALUE)));
filters.add(PreConfiguredTokenFilter.singleton("limit", false, input ->
new LimitTokenCountFilter(input,
LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT,
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
filters.add(
PreConfiguredTokenFilter.singleton(
"limit",
false,
input -> new LimitTokenCountFilter(
input,
LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT,
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS
)
)
);
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false)));
filters.add(PreConfiguredTokenFilter.openSearchVersion("nGram", false, false, (reader, version) -> {
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.");
throw new IllegalArgumentException(
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead."
);
} else {
deprecationLogger.deprecate("nGram_deprecation",
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
deprecationLogger.deprecate(
"nGram_deprecation",
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead."
);
}
return new NGramTokenFilter(reader, 1, 2, false);
}));
@ -531,28 +598,39 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.add(PreConfiguredTokenFilter.singleton("sorani_normalization", true, SoraniNormalizationFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("stemmer", false, PorterStemFilter::new));
// The stop filter is in lucene-core but the English stop words set is in lucene-analyzers-common
filters.add(PreConfiguredTokenFilter.singleton("stop", false,
input -> new StopFilter(input, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)));
filters.add(
PreConfiguredTokenFilter.singleton("stop", false, input -> new StopFilter(input, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET))
);
filters.add(PreConfiguredTokenFilter.singleton("trim", true, TrimFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("unique", false, UniqueTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("uppercase", true, UpperCaseFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("word_delimiter", false, false, input ->
new WordDelimiterFilter(input,
WordDelimiterFilter.GENERATE_WORD_PARTS
| WordDelimiterFilter.GENERATE_NUMBER_PARTS
| WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
| WordDelimiterFilter.SPLIT_ON_NUMERICS
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null)));
filters.add(
PreConfiguredTokenFilter.singleton(
"word_delimiter",
false,
false,
input -> new WordDelimiterFilter(
input,
WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS
| WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE,
null
)
)
);
filters.add(PreConfiguredTokenFilter.openSearchVersion("word_delimiter_graph", false, false, (input, version) -> {
boolean adjustOffsets = version.onOrAfter(LegacyESVersion.V_7_3_0);
return new WordDelimiterGraphFilter(input, adjustOffsets, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
WordDelimiterGraphFilter.GENERATE_WORD_PARTS
| WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS
| WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE
| WordDelimiterGraphFilter.SPLIT_ON_NUMERICS
| WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null);
return new WordDelimiterGraphFilter(
input,
adjustOffsets,
WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
WordDelimiterGraphFilter.GENERATE_WORD_PARTS | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS
| WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS
| WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE,
null
);
}));
return filters;
}
@ -582,17 +660,21 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
// Temporary shim for aliases. TODO deprecate after they are moved
tokenizers.add(PreConfiguredTokenizer.openSearchVersion("nGram", (version) -> {
if (version.onOrAfter(LegacyESVersion.V_7_6_0)) {
deprecationLogger.deprecate("nGram_tokenizer_deprecation",
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [ngram] instead.");
deprecationLogger.deprecate(
"nGram_tokenizer_deprecation",
"The [nGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [ngram] instead."
);
}
return new NGramTokenizer();
}));
tokenizers.add(PreConfiguredTokenizer.openSearchVersion("edgeNGram", (version) -> {
if (version.onOrAfter(LegacyESVersion.V_7_6_0)) {
deprecationLogger.deprecate("edgeNGram_tokenizer_deprecation",
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [edge_ngram] instead.");
deprecationLogger.deprecate(
"edgeNGram_tokenizer_deprecation",
"The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [edge_ngram] instead."
);
}
if (version.onOrAfter(LegacyESVersion.V_7_3_0)) {
return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);

View File

@ -47,7 +47,7 @@ import org.opensearch.index.analysis.TokenFilterFactory;
public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CommonGramsTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CommonGramsTokenFilterFactory.class);
private final CharArraySet words;
@ -63,7 +63,8 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
if (this.words == null) {
throw new IllegalArgumentException(
"missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
"missing or empty [common_words] or [common_words_path] configuration for common_grams token filter"
);
}
}
@ -82,11 +83,12 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
}
return this;
}
}

View File

@ -38,7 +38,6 @@ import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
/**
* Uses the {@link org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilter} to decompound tokens using a dictionary.
*
@ -52,7 +51,6 @@ public class DictionaryCompoundWordTokenFilterFactory extends AbstractCompoundWo
@Override
public TokenStream create(TokenStream tokenStream) {
return new DictionaryCompoundWordTokenFilter(tokenStream, wordList, minWordSize,
minSubwordSize, maxSubwordSize, onlyLongestMatch);
return new DictionaryCompoundWordTokenFilter(tokenStream, wordList, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
}

View File

@ -43,10 +43,9 @@ import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractTokenFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;
public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(EdgeNGramTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(EdgeNGramTokenFilterFactory.class);
private final int minGram;
@ -67,10 +66,13 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
}
static int parseSide(String side) {
switch(side) {
case "front": return SIDE_FRONT;
case "back": return SIDE_BACK;
default: throw new IllegalArgumentException("invalid side: " + side);
switch (side) {
case "front":
return SIDE_FRONT;
case "back":
return SIDE_BACK;
default:
throw new IllegalArgumentException("invalid side: " + side);
}
}
@ -102,10 +104,11 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
} else {
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
}

View File

@ -54,5 +54,7 @@ public class EstonianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Esto
}
@Override
public EstonianAnalyzer get() { return this.analyzer; }
public EstonianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -41,7 +41,6 @@ import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.opensearch.index.analysis.Analysis;
/**
* Builds an OpenRefine Fingerprint analyzer. Uses the default settings from the various components
* (Standard Tokenizer and lowercase + stop + fingerprint + ascii-folding filters)
@ -53,7 +52,7 @@ public class FingerprintAnalyzerProvider extends AbstractIndexAnalyzerProvider<A
public static int DEFAULT_MAX_OUTPUT_SIZE = 255;
public static CharArraySet DEFAULT_STOP_WORDS = CharArraySet.EMPTY_SET;
public static final char DEFAULT_SEPARATOR = ' ';
public static final char DEFAULT_SEPARATOR = ' ';
private final FingerprintAnalyzer analyzer;
@ -61,7 +60,7 @@ public class FingerprintAnalyzerProvider extends AbstractIndexAnalyzerProvider<A
super(indexSettings, name, settings);
char separator = parseSeparator(settings);
int maxOutputSize = settings.getAsInt(MAX_OUTPUT_SIZE.getPreferredName(),DEFAULT_MAX_OUTPUT_SIZE);
int maxOutputSize = settings.getAsInt(MAX_OUTPUT_SIZE.getPreferredName(), DEFAULT_MAX_OUTPUT_SIZE);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, DEFAULT_STOP_WORDS);
this.analyzer = new FingerprintAnalyzer(stopWords, separator, maxOutputSize);
@ -80,7 +79,8 @@ public class FingerprintAnalyzerProvider extends AbstractIndexAnalyzerProvider<A
return customSeparator.charAt(0);
}
throw new IllegalArgumentException("Setting [separator] must be a single, non-null character. ["
+ customSeparator + "] was provided.");
throw new IllegalArgumentException(
"Setting [separator] must be a single, non-null character. [" + customSeparator + "] was provided."
);
}
}

View File

@ -47,7 +47,7 @@ import static org.opensearch.analysis.common.FingerprintAnalyzerProvider.MAX_OUT
public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(FingerprintTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(FingerprintTokenFilterFactory.class);
private final char separator;
private final int maxOutputSize;
@ -69,10 +69,11 @@ public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
} else {
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
}

View File

@ -45,8 +45,7 @@ public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider<GreekAn
GreekAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GreekAnalyzer(
Analysis.parseStopWords(env, settings, GreekAnalyzer.getDefaultStopSet()));
analyzer = new GreekAnalyzer(Analysis.parseStopWords(env, settings, GreekAnalyzer.getDefaultStopSet()));
analyzer.setVersion(version);
}

View File

@ -73,7 +73,14 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
@Override
public TokenStream create(TokenStream tokenStream) {
return new HyphenationCompoundWordTokenFilter(tokenStream, hyphenationTree, wordList, minWordSize,
minSubwordSize, maxSubwordSize, onlyLongestMatch);
return new HyphenationCompoundWordTokenFilter(
tokenStream,
hyphenationTree,
wordList,
minWordSize,
minSubwordSize,
maxSubwordSize,
onlyLongestMatch
);
}
}

View File

@ -64,7 +64,8 @@ public class KeepTypesFilterFactory extends AbstractTokenFilterFactory {
static final String KEEP_TYPES_MODE_KEY = "mode";
enum KeepTypesMode {
INCLUDE, EXCLUDE;
INCLUDE,
EXCLUDE;
@Override
public String toString() {
@ -78,8 +79,15 @@ public class KeepTypesFilterFactory extends AbstractTokenFilterFactory {
} else if (lc.equals("exclude")) {
return EXCLUDE;
} else {
throw new IllegalArgumentException("`keep_types` tokenfilter mode can only be [" + KeepTypesMode.INCLUDE + "] or ["
+ KeepTypesMode.EXCLUDE + "] but was [" + modeString + "].");
throw new IllegalArgumentException(
"`keep_types` tokenfilter mode can only be ["
+ KeepTypesMode.INCLUDE
+ "] or ["
+ KeepTypesMode.EXCLUDE
+ "] but was ["
+ modeString
+ "]."
);
}
}
}

View File

@ -80,8 +80,9 @@ public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
final String keepWordsPath = settings.get(KEEP_WORDS_PATH_KEY, null);
if ((arrayKeepWords == null && keepWordsPath == null) || (arrayKeepWords != null && keepWordsPath != null)) {
// we don't allow both or none
throw new IllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
+ KEEP_WORDS_PATH_KEY + "` to be configured");
throw new IllegalArgumentException(
"keep requires either `" + KEEP_WORDS_KEY + "` or `" + KEEP_WORDS_PATH_KEY + "` to be configured"
);
}
if (settings.get(ENABLE_POS_INC_KEY) != null) {
throw new IllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain");

View File

@ -68,15 +68,13 @@ public class KeywordMarkerTokenFilterFactory extends AbstractTokenFilterFactory
KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
boolean ignoreCase =
settings.getAsBoolean("ignore_case", false);
boolean ignoreCase = settings.getAsBoolean("ignore_case", false);
String patternString = settings.get("keywords_pattern");
if (patternString != null) {
// a pattern for matching keywords is specified, as opposed to a
// set of keyword strings to match against
if (settings.get("keywords") != null || settings.get("keywords_path") != null) {
throw new IllegalArgumentException(
"cannot specify both `keywords_pattern` and `keywords` or `keywords_path`");
throw new IllegalArgumentException("cannot specify both `keywords_pattern` and `keywords` or `keywords_path`");
}
keywordPattern = Pattern.compile(patternString);
keywordLookup = null;
@ -84,8 +82,8 @@ public class KeywordMarkerTokenFilterFactory extends AbstractTokenFilterFactory
Set<?> rules = Analysis.getWordSet(env, settings, "keywords");
if (rules == null) {
throw new IllegalArgumentException(
"keyword filter requires either `keywords`, `keywords_path`, " +
"or `keywords_pattern` to be configured");
"keyword filter requires either `keywords`, `keywords_path`, " + "or `keywords_pattern` to be configured"
);
}
// a set of keywords (or a path to them) is specified
keywordLookup = new CharArraySet(rules, ignoreCase);

View File

@ -40,18 +40,20 @@ import org.opensearch.index.IndexSettings;
public class LegacyDelimitedPayloadTokenFilterFactory extends DelimitedPayloadTokenFilterFactory {
private static final DeprecationLogger deprecationLogger =
DeprecationLogger.getLogger(LegacyDelimitedPayloadTokenFilterFactory.class);
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(LegacyDelimitedPayloadTokenFilterFactory.class);
LegacyDelimitedPayloadTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, env, name, settings);
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead");
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"
);
}
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_6_2_0)) {
deprecationLogger.deprecate("analysis_legacy_delimited_payload_filter",
"Deprecated [delimited_payload_filter] used, replaced by [delimited_payload]");
deprecationLogger.deprecate(
"analysis_legacy_delimited_payload_filter",
"Deprecated [delimited_payload_filter] used, replaced by [delimited_payload]"
);
}
}
}

View File

@ -77,5 +77,3 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
}
}

View File

@ -77,12 +77,10 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen
private void parseRules(List<String> rules, NormalizeCharMap.Builder map) {
for (String rule : rules) {
Matcher m = rulePattern.matcher(rule);
if (!m.find())
throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
if (!m.find()) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
String lhs = parseString(m.group(1).trim());
String rhs = parseString(m.group(2).trim());
if (lhs == null || rhs == null)
throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal mapping.");
if (lhs == null || rhs == null) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal mapping.");
map.add(lhs, rhs);
}
}
@ -96,8 +94,7 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen
while (readPos < len) {
char c = s.charAt(readPos++);
if (c == '\\') {
if (readPos >= len)
throw new RuntimeException("Invalid escaped char in [" + s + "]");
if (readPos >= len) throw new RuntimeException("Invalid escaped char in [" + s + "]");
c = s.charAt(readPos++);
switch (c) {
case '\\':
@ -119,8 +116,7 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen
c = '\f';
break;
case 'u':
if (readPos + 3 >= len)
throw new RuntimeException("Invalid escaped char in [" + s + "]");
if (readPos + 3 >= len) throw new RuntimeException("Invalid escaped char in [" + s + "]");
c = (char) Integer.parseInt(s.substring(readPos, readPos + 4), 16);
readPos += 4;
break;

View File

@ -56,7 +56,7 @@ import java.util.function.Function;
public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(MultiplexerTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(MultiplexerTokenFilterFactory.class);
private List<String> filterNames;
private final boolean preserveOriginal;
@ -76,22 +76,27 @@ public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
else {
} else {
if (preserveOriginal) {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return IDENTITY_FILTER;
}
throw new IllegalArgumentException("Token filter [" + name()
+ "] cannot be used to parse synonyms unless [preserve_original] is [true]");
throw new IllegalArgumentException(
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
);
}
}
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters) {
public TokenFilterFactory getChainAwareTokenFilterFactory(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
List<TokenFilterFactory> filters = new ArrayList<>();
if (preserveOriginal) {
filters.add(IDENTITY_FILTER);
@ -139,15 +144,17 @@ public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
else {
} else {
if (preserveOriginal) {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return IDENTITY_FILTER;
}
throw new IllegalArgumentException("Token filter [" + name()
+ "] cannot be used to parse synonyms unless [preserve_original] is [true]");
throw new IllegalArgumentException(
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
);
}
}

View File

@ -42,10 +42,9 @@ import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractTokenFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;
public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(NGramTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(NGramTokenFilterFactory.class);
private final int minGram;
private final int maxGram;
@ -62,12 +61,21 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
+ maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.");
+ maxAllowedNgramDiff
+ "] but was ["
+ ngramDiff
+ "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey()
+ "] index level setting."
);
} else {
deprecationLogger.deprecate("ngram_big_difference",
deprecationLogger.deprecate(
"ngram_big_difference",
"Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
+ "expected difference must be less than or equal to: [" + maxAllowedNgramDiff + "]");
+ "expected difference must be less than or equal to: ["
+ maxAllowedNgramDiff
+ "]"
);
}
}
preserveOriginal = settings.getAsBoolean(PRESERVE_ORIG_KEY, false);
@ -82,10 +90,11 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
} else {
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
}

View File

@ -71,9 +71,9 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
// Populate with unicode categories from java.lang.Character
for (Field field : Character.class.getFields()) {
if (!field.getName().startsWith("DIRECTIONALITY")
&& Modifier.isPublic(field.getModifiers())
&& Modifier.isStatic(field.getModifiers())
&& field.getType() == byte.class) {
&& Modifier.isPublic(field.getModifiers())
&& Modifier.isStatic(field.getModifiers())
&& field.getType() == byte.class) {
try {
matchers.put(field.getName().toLowerCase(Locale.ROOT), CharMatcher.ByUnicodeCategory.of(field.getByte(null)));
} catch (Exception e) {
@ -96,8 +96,14 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
CharMatcher matcher = MATCHERS.get(characterClass);
if (matcher == null) {
if (characterClass.equals("custom") == false) {
throw new IllegalArgumentException("Unknown token type: '" + characterClass + "', must be one of " + Stream
.of(MATCHERS.keySet(), Collections.singleton("custom")).flatMap(x -> x.stream()).collect(Collectors.toSet()));
throw new IllegalArgumentException(
"Unknown token type: '"
+ characterClass
+ "', must be one of "
+ Stream.of(MATCHERS.keySet(), Collections.singleton("custom"))
.flatMap(x -> x.stream())
.collect(Collectors.toSet())
);
}
String customCharacters = settings.get("custom_token_chars");
if (customCharacters == null) {
@ -128,12 +134,21 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
+ maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.");
+ maxAllowedNgramDiff
+ "] but was ["
+ ngramDiff
+ "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey()
+ "] index level setting."
);
} else {
deprecationLogger.deprecate("ngram_big_difference",
deprecationLogger.deprecate(
"ngram_big_difference",
"Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
+ "expected difference must be less than or equal to: [" + maxAllowedNgramDiff + "]");
+ "expected difference must be less than or equal to: ["
+ maxAllowedNgramDiff
+ "]"
);
}
}
this.matcher = parseTokenChars(settings);

View File

@ -31,7 +31,6 @@
package org.opensearch.analysis.common;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter;

View File

@ -50,5 +50,3 @@ public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory {
return new PorterStemFilter(tokenStream);
}
}

View File

@ -58,8 +58,7 @@ public class ScriptedConditionTokenFilterFactory extends AbstractTokenFilterFact
private final AnalysisPredicateScript.Factory factory;
private final List<String> filterNames;
ScriptedConditionTokenFilterFactory(IndexSettings indexSettings, String name,
Settings settings, ScriptService scriptService) {
ScriptedConditionTokenFilterFactory(IndexSettings indexSettings, String name, Settings settings, ScriptService scriptService) {
super(indexSettings, name, settings);
Settings scriptSettings = settings.getAsSettings("script");
@ -81,16 +80,20 @@ public class ScriptedConditionTokenFilterFactory extends AbstractTokenFilterFact
}
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters) {
public TokenFilterFactory getChainAwareTokenFilterFactory(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
List<TokenFilterFactory> filters = new ArrayList<>();
List<TokenFilterFactory> existingChain = new ArrayList<>(previousTokenFilters);
for (String filter : filterNames) {
TokenFilterFactory tff = allFilters.apply(filter);
if (tff == null) {
throw new IllegalArgumentException("ScriptedConditionTokenFilter [" + name() +
"] refers to undefined token filter [" + filter + "]");
throw new IllegalArgumentException(
"ScriptedConditionTokenFilter [" + name() + "] refers to undefined token filter [" + filter + "]"
);
}
tff = tff.getChainAwareTokenFilterFactory(tokenizer, charFilters, existingChain, allFilters);
filters.add(tff);
@ -121,8 +124,7 @@ public class ScriptedConditionTokenFilterFactory extends AbstractTokenFilterFact
private final AnalysisPredicateScript script;
private final AnalysisPredicateScript.Token token;
ScriptedConditionTokenFilter(TokenStream input, Function<TokenStream, TokenStream> inputFactory,
AnalysisPredicateScript script) {
ScriptedConditionTokenFilter(TokenStream input, Function<TokenStream, TokenStream> inputFactory, AnalysisPredicateScript script) {
super(input, inputFactory);
this.script = script;
this.token = new AnalysisPredicateScript.Token(this);

View File

@ -55,37 +55,33 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
*/
@Deprecated
public final class SnowballAnalyzer extends Analyzer {
private String name;
private CharArraySet stopSet;
private String name;
private CharArraySet stopSet;
/** Builds the named analyzer with no stop words. */
SnowballAnalyzer(String name) {
this.name = name;
}
/** Builds the named analyzer with no stop words. */
SnowballAnalyzer(String name) {
this.name = name;
}
/** Builds the named analyzer with the given stop words. */
SnowballAnalyzer(String name, CharArraySet stopWords) {
this(name);
stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
}
/** Builds the named analyzer with the given stop words. */
SnowballAnalyzer(String name, CharArraySet stopWords) {
this(name);
stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
}
/** Constructs a {@link StandardTokenizer} filtered by a {@link LowerCaseFilter}, a {@link StopFilter},
/** Constructs a {@link StandardTokenizer} filtered by a {@link LowerCaseFilter}, a {@link StopFilter},
and a {@link SnowballFilter} */
@Override
public TokenStreamComponents createComponents(String fieldName) {
final Tokenizer tokenizer = new StandardTokenizer();
TokenStream result = tokenizer;
// remove the possessive 's for english stemmers
if (name.equals("English") || name.equals("Porter") || name.equals("Lovins"))
result = new EnglishPossessiveFilter(result);
// Use a special lowercase filter for turkish, the stemmer expects it.
if (name.equals("Turkish"))
result = new TurkishLowerCaseFilter(result);
else
result = new LowerCaseFilter(result);
if (stopSet != null)
result = new StopFilter(result, stopSet);
result = new SnowballFilter(result, name);
return new TokenStreamComponents(tokenizer, result);
}
@Override
public TokenStreamComponents createComponents(String fieldName) {
final Tokenizer tokenizer = new StandardTokenizer();
TokenStream result = tokenizer;
// remove the possessive 's for english stemmers
if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result);
// Use a special lowercase filter for turkish, the stemmer expects it.
if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result);
else result = new LowerCaseFilter(result);
if (stopSet != null) result = new StopFilter(result, stopSet);
result = new SnowballFilter(result, name);
return new TokenStreamComponents(tokenizer, result);
}
}

View File

@ -50,6 +50,7 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
public StandardHtmlStripAnalyzer() {
super(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
}
/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/

View File

@ -43,8 +43,7 @@ import org.opensearch.index.analysis.Analysis;
public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {
private static final DeprecationLogger DEPRECATION_LOGGER =
DeprecationLogger.getLogger(StandardHtmlStripAnalyzerProvider.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(StandardHtmlStripAnalyzerProvider.class);
private final StandardHtmlStripAnalyzer analyzer;
@ -52,19 +51,23 @@ public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProv
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
throw new IllegalArgumentException(
"[standard_html_strip] analyzer is not supported for new indices, "
+ "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
);
} else {
DEPRECATION_LOGGER.deprecate("standard_html_strip_deprecation",
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
DEPRECATION_LOGGER.deprecate(
"standard_html_strip_deprecation",
"Deprecated analyzer [standard_html_strip] used, "
+ "replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
);
}
}

View File

@ -142,138 +142,139 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
// English stemmers
} else if ("english".equalsIgnoreCase(language)) {
return new PorterStemFilter(tokenStream);
} else if ("light_english".equalsIgnoreCase(language) || "lightEnglish".equalsIgnoreCase(language)
|| "kstem".equalsIgnoreCase(language)) {
return new KStemFilter(tokenStream);
} else if ("lovins".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new LovinsStemmer());
} else if ("porter".equalsIgnoreCase(language)) {
return new PorterStemFilter(tokenStream);
} else if ("porter2".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new EnglishStemmer());
} else if ("minimal_english".equalsIgnoreCase(language) || "minimalEnglish".equalsIgnoreCase(language)) {
return new EnglishMinimalStemFilter(tokenStream);
} else if ("possessive_english".equalsIgnoreCase(language) || "possessiveEnglish".equalsIgnoreCase(language)) {
return new EnglishPossessiveFilter(tokenStream);
} else if ("light_english".equalsIgnoreCase(language)
|| "lightEnglish".equalsIgnoreCase(language)
|| "kstem".equalsIgnoreCase(language)) {
return new KStemFilter(tokenStream);
} else if ("lovins".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new LovinsStemmer());
} else if ("porter".equalsIgnoreCase(language)) {
return new PorterStemFilter(tokenStream);
} else if ("porter2".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new EnglishStemmer());
} else if ("minimal_english".equalsIgnoreCase(language) || "minimalEnglish".equalsIgnoreCase(language)) {
return new EnglishMinimalStemFilter(tokenStream);
} else if ("possessive_english".equalsIgnoreCase(language) || "possessiveEnglish".equalsIgnoreCase(language)) {
return new EnglishPossessiveFilter(tokenStream);
} else if ("estonian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new EstonianStemmer());
} else if ("estonian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new EstonianStemmer());
// Finnish stemmers
} else if ("finnish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new FinnishStemmer());
} else if ("light_finish".equalsIgnoreCase(language) || "lightFinish".equalsIgnoreCase(language)) {
// leaving this for backward compatibility
return new FinnishLightStemFilter(tokenStream);
} else if ("light_finnish".equalsIgnoreCase(language) || "lightFinnish".equalsIgnoreCase(language)) {
return new FinnishLightStemFilter(tokenStream);
// Finnish stemmers
} else if ("finnish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new FinnishStemmer());
} else if ("light_finish".equalsIgnoreCase(language) || "lightFinish".equalsIgnoreCase(language)) {
// leaving this for backward compatibility
return new FinnishLightStemFilter(tokenStream);
} else if ("light_finnish".equalsIgnoreCase(language) || "lightFinnish".equalsIgnoreCase(language)) {
return new FinnishLightStemFilter(tokenStream);
// French stemmers
} else if ("french".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new FrenchStemmer());
} else if ("light_french".equalsIgnoreCase(language) || "lightFrench".equalsIgnoreCase(language)) {
return new FrenchLightStemFilter(tokenStream);
} else if ("minimal_french".equalsIgnoreCase(language) || "minimalFrench".equalsIgnoreCase(language)) {
return new FrenchMinimalStemFilter(tokenStream);
// French stemmers
} else if ("french".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new FrenchStemmer());
} else if ("light_french".equalsIgnoreCase(language) || "lightFrench".equalsIgnoreCase(language)) {
return new FrenchLightStemFilter(tokenStream);
} else if ("minimal_french".equalsIgnoreCase(language) || "minimalFrench".equalsIgnoreCase(language)) {
return new FrenchMinimalStemFilter(tokenStream);
// Galician stemmers
} else if ("galician".equalsIgnoreCase(language)) {
return new GalicianStemFilter(tokenStream);
} else if ("minimal_galician".equalsIgnoreCase(language)) {
return new GalicianMinimalStemFilter(tokenStream);
// Galician stemmers
} else if ("galician".equalsIgnoreCase(language)) {
return new GalicianStemFilter(tokenStream);
} else if ("minimal_galician".equalsIgnoreCase(language)) {
return new GalicianMinimalStemFilter(tokenStream);
// German stemmers
} else if ("german".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new GermanStemmer());
} else if ("german2".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new German2Stemmer());
} else if ("light_german".equalsIgnoreCase(language) || "lightGerman".equalsIgnoreCase(language)) {
return new GermanLightStemFilter(tokenStream);
} else if ("minimal_german".equalsIgnoreCase(language) || "minimalGerman".equalsIgnoreCase(language)) {
return new GermanMinimalStemFilter(tokenStream);
// German stemmers
} else if ("german".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new GermanStemmer());
} else if ("german2".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new German2Stemmer());
} else if ("light_german".equalsIgnoreCase(language) || "lightGerman".equalsIgnoreCase(language)) {
return new GermanLightStemFilter(tokenStream);
} else if ("minimal_german".equalsIgnoreCase(language) || "minimalGerman".equalsIgnoreCase(language)) {
return new GermanMinimalStemFilter(tokenStream);
} else if ("greek".equalsIgnoreCase(language)) {
return new GreekStemFilter(tokenStream);
} else if ("hindi".equalsIgnoreCase(language)) {
return new HindiStemFilter(tokenStream);
} else if ("greek".equalsIgnoreCase(language)) {
return new GreekStemFilter(tokenStream);
} else if ("hindi".equalsIgnoreCase(language)) {
return new HindiStemFilter(tokenStream);
// Hungarian stemmers
} else if ("hungarian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new HungarianStemmer());
} else if ("light_hungarian".equalsIgnoreCase(language) || "lightHungarian".equalsIgnoreCase(language)) {
return new HungarianLightStemFilter(tokenStream);
// Hungarian stemmers
} else if ("hungarian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new HungarianStemmer());
} else if ("light_hungarian".equalsIgnoreCase(language) || "lightHungarian".equalsIgnoreCase(language)) {
return new HungarianLightStemFilter(tokenStream);
} else if ("indonesian".equalsIgnoreCase(language)) {
return new IndonesianStemFilter(tokenStream);
} else if ("indonesian".equalsIgnoreCase(language)) {
return new IndonesianStemFilter(tokenStream);
// Irish stemmer
} else if ("irish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new IrishStemmer());
// Irish stemmer
} else if ("irish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new IrishStemmer());
// Italian stemmers
} else if ("italian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new ItalianStemmer());
} else if ("light_italian".equalsIgnoreCase(language) || "lightItalian".equalsIgnoreCase(language)) {
return new ItalianLightStemFilter(tokenStream);
// Italian stemmers
} else if ("italian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new ItalianStemmer());
} else if ("light_italian".equalsIgnoreCase(language) || "lightItalian".equalsIgnoreCase(language)) {
return new ItalianLightStemFilter(tokenStream);
} else if ("latvian".equalsIgnoreCase(language)) {
return new LatvianStemFilter(tokenStream);
} else if ("latvian".equalsIgnoreCase(language)) {
return new LatvianStemFilter(tokenStream);
} else if ("lithuanian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new LithuanianStemmer());
} else if ("lithuanian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new LithuanianStemmer());
// Norwegian (Bokmål) stemmers
} else if ("norwegian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new NorwegianStemmer());
} else if ("light_norwegian".equalsIgnoreCase(language) || "lightNorwegian".equalsIgnoreCase(language)) {
return new NorwegianLightStemFilter(tokenStream);
} else if ("minimal_norwegian".equalsIgnoreCase(language) || "minimalNorwegian".equals(language)) {
return new NorwegianMinimalStemFilter(tokenStream);
// Norwegian (Bokmål) stemmers
} else if ("norwegian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new NorwegianStemmer());
} else if ("light_norwegian".equalsIgnoreCase(language) || "lightNorwegian".equalsIgnoreCase(language)) {
return new NorwegianLightStemFilter(tokenStream);
} else if ("minimal_norwegian".equalsIgnoreCase(language) || "minimalNorwegian".equals(language)) {
return new NorwegianMinimalStemFilter(tokenStream);
// Norwegian (Nynorsk) stemmers
} else if ("light_nynorsk".equalsIgnoreCase(language) || "lightNynorsk".equalsIgnoreCase(language)) {
return new NorwegianLightStemFilter(tokenStream, NorwegianLightStemmer.NYNORSK);
} else if ("minimal_nynorsk".equalsIgnoreCase(language) || "minimalNynorsk".equalsIgnoreCase(language)) {
return new NorwegianMinimalStemFilter(tokenStream, NorwegianLightStemmer.NYNORSK);
// Norwegian (Nynorsk) stemmers
} else if ("light_nynorsk".equalsIgnoreCase(language) || "lightNynorsk".equalsIgnoreCase(language)) {
return new NorwegianLightStemFilter(tokenStream, NorwegianLightStemmer.NYNORSK);
} else if ("minimal_nynorsk".equalsIgnoreCase(language) || "minimalNynorsk".equalsIgnoreCase(language)) {
return new NorwegianMinimalStemFilter(tokenStream, NorwegianLightStemmer.NYNORSK);
// Portuguese stemmers
} else if ("portuguese".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new PortugueseStemmer());
} else if ("light_portuguese".equalsIgnoreCase(language) || "lightPortuguese".equalsIgnoreCase(language)) {
return new PortugueseLightStemFilter(tokenStream);
} else if ("minimal_portuguese".equalsIgnoreCase(language) || "minimalPortuguese".equalsIgnoreCase(language)) {
return new PortugueseMinimalStemFilter(tokenStream);
} else if ("portuguese_rslp".equalsIgnoreCase(language)) {
return new PortugueseStemFilter(tokenStream);
// Portuguese stemmers
} else if ("portuguese".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new PortugueseStemmer());
} else if ("light_portuguese".equalsIgnoreCase(language) || "lightPortuguese".equalsIgnoreCase(language)) {
return new PortugueseLightStemFilter(tokenStream);
} else if ("minimal_portuguese".equalsIgnoreCase(language) || "minimalPortuguese".equalsIgnoreCase(language)) {
return new PortugueseMinimalStemFilter(tokenStream);
} else if ("portuguese_rslp".equalsIgnoreCase(language)) {
return new PortugueseStemFilter(tokenStream);
} else if ("romanian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new RomanianStemmer());
} else if ("romanian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new RomanianStemmer());
// Russian stemmers
} else if ("russian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new RussianStemmer());
} else if ("light_russian".equalsIgnoreCase(language) || "lightRussian".equalsIgnoreCase(language)) {
return new RussianLightStemFilter(tokenStream);
// Russian stemmers
} else if ("russian".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new RussianStemmer());
} else if ("light_russian".equalsIgnoreCase(language) || "lightRussian".equalsIgnoreCase(language)) {
return new RussianLightStemFilter(tokenStream);
// Spanish stemmers
} else if ("spanish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new SpanishStemmer());
} else if ("light_spanish".equalsIgnoreCase(language) || "lightSpanish".equalsIgnoreCase(language)) {
return new SpanishLightStemFilter(tokenStream);
// Spanish stemmers
} else if ("spanish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new SpanishStemmer());
} else if ("light_spanish".equalsIgnoreCase(language) || "lightSpanish".equalsIgnoreCase(language)) {
return new SpanishLightStemFilter(tokenStream);
// Sorani Kurdish stemmer
} else if ("sorani".equalsIgnoreCase(language)) {
return new SoraniStemFilter(tokenStream);
// Sorani Kurdish stemmer
} else if ("sorani".equalsIgnoreCase(language)) {
return new SoraniStemFilter(tokenStream);
// Swedish stemmers
} else if ("swedish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new SwedishStemmer());
} else if ("light_swedish".equalsIgnoreCase(language) || "lightSwedish".equalsIgnoreCase(language)) {
return new SwedishLightStemFilter(tokenStream);
// Swedish stemmers
} else if ("swedish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new SwedishStemmer());
} else if ("light_swedish".equalsIgnoreCase(language) || "lightSwedish".equalsIgnoreCase(language)) {
return new SwedishLightStemFilter(tokenStream);
} else if ("turkish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new TurkishStemmer());
}
} else if ("turkish".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new TurkishStemmer());
}
return new SnowballFilter(tokenStream, language);
}

View File

@ -49,8 +49,7 @@ import java.util.function.Function;
public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, env, name, settings);
}
@ -60,9 +59,12 @@ public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
}
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters) {
public TokenFilterFactory getChainAwareTokenFilterFactory(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters, allFilters);
final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
final String name = name();

View File

@ -64,15 +64,16 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final Environment environment;
protected final AnalysisMode analysisMode;
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
this.settings = settings;
if (settings.get("ignore_case") != null) {
DEPRECATION_LOGGER.deprecate("synonym_ignore_case_option",
"The ignore_case option on the synonym_graph filter is deprecated. " +
"Instead, insert a lowercase filter in the filter chain before the synonym_graph filter.");
DEPRECATION_LOGGER.deprecate(
"synonym_ignore_case_option",
"The ignore_case option on the synonym_graph filter is deprecated. "
+ "Instead, insert a lowercase filter in the filter chain before the synonym_graph filter."
);
}
this.expand = settings.getAsBoolean("expand", true);
@ -94,9 +95,12 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
}
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters) {
public TokenFilterFactory getChainAwareTokenFilterFactory(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters, allFilters);
final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
final String name = name();
@ -126,12 +130,17 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
};
}
Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> tokenFilters, Function<String, TokenFilterFactory> allFilters) {
return new CustomAnalyzer(tokenizer, charFilters.toArray(new CharFilterFactory[0]),
tokenFilters.stream()
.map(TokenFilterFactory::getSynonymFilter)
.toArray(TokenFilterFactory[]::new));
Analyzer buildSynonymAnalyzer(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> tokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
return new CustomAnalyzer(
tokenizer,
charFilters.toArray(new CharFilterFactory[0]),
tokenFilters.stream().map(TokenFilterFactory::getSynonymFilter).toArray(TokenFilterFactory[]::new)
);
}
SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {

View File

@ -45,8 +45,7 @@ public class ThaiAnalyzerProvider extends AbstractIndexAnalyzerProvider<ThaiAnal
ThaiAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new ThaiAnalyzer(
Analysis.parseStopWords(env, settings, ThaiAnalyzer.getDefaultStopSet()));
analyzer = new ThaiAnalyzer(Analysis.parseStopWords(env, settings, ThaiAnalyzer.getDefaultStopSet()));
analyzer.setVersion(version);
}

View File

@ -47,7 +47,7 @@ public class TrimTokenFilterFactory extends AbstractTokenFilterFactory implement
TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
if (settings.get(UPDATE_OFFSETS_KEY) != null) {
throw new IllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain");
throw new IllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain");
}
}

View File

@ -97,5 +97,3 @@ class UniqueTokenFilter extends TokenFilter {
previous.clear();
}
}

View File

@ -52,5 +52,3 @@ public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
}
}

View File

@ -62,16 +62,14 @@ import static org.opensearch.analysis.common.WordDelimiterTokenFilterFactory.par
public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER =
DeprecationLogger.getLogger(WordDelimiterGraphTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(WordDelimiterGraphTokenFilterFactory.class);
private final byte[] charTypeTable;
private final int flags;
private final CharArraySet protoWords;
private final boolean adjustOffsets;
public WordDelimiterGraphTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
public WordDelimiterGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
// Sample Format for the type table:
@ -123,10 +121,11 @@ public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFac
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
} else {
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
}

View File

@ -66,15 +66,13 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.STEM_
public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER =
DeprecationLogger.getLogger(WordDelimiterTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(WordDelimiterTokenFilterFactory.class);
private final byte[] charTypeTable;
private final int flags;
private final CharArraySet protoWords;
public WordDelimiterTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
public WordDelimiterTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
// Sample Format for the type table:
@ -116,20 +114,18 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
@Override
public TokenStream create(TokenStream tokenStream) {
return new WordDelimiterFilter(tokenStream,
charTypeTable,
flags,
protoWords);
return new WordDelimiterFilter(tokenStream, charTypeTable, flags, protoWords);
}
@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
} else {
DEPRECATION_LOGGER.deprecate(
"synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
}
@ -151,21 +147,16 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
SortedMap<Character, Byte> typeMap = new TreeMap<>();
for (String rule : rules) {
Matcher m = typePattern.matcher(rule);
if (!m.find())
throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
if (!m.find()) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
String lhs = parseString(m.group(1).trim());
Byte rhs = parseType(m.group(2).trim());
if (lhs.length() != 1)
throw new RuntimeException("Invalid Mapping Rule : ["
+ rule + "]. Only a single character is allowed.");
if (rhs == null)
throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal type.");
if (lhs.length() != 1) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
if (rhs == null) throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal type.");
typeMap.put(lhs.charAt(0), rhs);
}
// ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
byte types[] = new byte[Math.max(
typeMap.lastKey() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.length)];
byte types[] = new byte[Math.max(typeMap.lastKey() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.length)];
for (int i = 0; i < types.length; i++)
types[i] = WordDelimiterIterator.getType(i);
for (Map.Entry<Character, Byte> mapping : typeMap.entrySet())
@ -174,20 +165,13 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
}
private static Byte parseType(String s) {
if (s.equals("LOWER"))
return WordDelimiterFilter.LOWER;
else if (s.equals("UPPER"))
return WordDelimiterFilter.UPPER;
else if (s.equals("ALPHA"))
return WordDelimiterFilter.ALPHA;
else if (s.equals("DIGIT"))
return WordDelimiterFilter.DIGIT;
else if (s.equals("ALPHANUM"))
return WordDelimiterFilter.ALPHANUM;
else if (s.equals("SUBWORD_DELIM"))
return WordDelimiterFilter.SUBWORD_DELIM;
else
return null;
if (s.equals("LOWER")) return WordDelimiterFilter.LOWER;
else if (s.equals("UPPER")) return WordDelimiterFilter.UPPER;
else if (s.equals("ALPHA")) return WordDelimiterFilter.ALPHA;
else if (s.equals("DIGIT")) return WordDelimiterFilter.DIGIT;
else if (s.equals("ALPHANUM")) return WordDelimiterFilter.ALPHANUM;
else if (s.equals("SUBWORD_DELIM")) return WordDelimiterFilter.SUBWORD_DELIM;
else return null;
}
private static String parseString(String s) {
@ -198,8 +182,7 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
while (readPos < len) {
char c = s.charAt(readPos++);
if (c == '\\') {
if (readPos >= len)
throw new RuntimeException("Invalid escaped char in [" + s + "]");
if (readPos >= len) throw new RuntimeException("Invalid escaped char in [" + s + "]");
c = s.charAt(readPos++);
switch (c) {
case '\\':
@ -221,8 +204,7 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
c = '\f';
break;
case 'u':
if (readPos + 3 >= len)
throw new RuntimeException("Invalid escaped char in [" + s + "]");
if (readPos + 3 >= len) throw new RuntimeException("Invalid escaped char in [" + s + "]");
c = (char) Integer.parseInt(s.substring(readPos, readPos + 4), 16);
readPos += 4;
break;

View File

@ -85,8 +85,8 @@ class XLowerCaseTokenizer extends Tokenizer {
assert start == -1;
start = offset + bufferIndex - charCount;
end = start;
} else if (length >= buffer.length-1) { // check if a supplementary could run out of bounds
buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
} else if (length >= buffer.length - 1) { // check if a supplementary could run out of bounds
buffer = termAtt.resizeBuffer(2 + length); // make sure a supplementary fits in the buffer
}
end += charCount;
length += Character.toChars(Character.toLowerCase(c), buffer, length); // buffer it, normalized

View File

@ -47,14 +47,15 @@ import java.io.StringReader;
public class ASCIIFoldingTokenFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testDefault() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_ascii_folding.type", "asciifolding")
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_ascii_folding.type", "asciifolding")
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_ascii_folding");
String source = "Ansprüche";
String[] expected = new String[]{"Anspruche"};
String[] expected = new String[] { "Anspruche" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -62,15 +63,16 @@ public class ASCIIFoldingTokenFilterFactoryTests extends OpenSearchTokenStreamTe
public void testPreserveOriginal() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_ascii_folding.type", "asciifolding")
.put("index.analysis.filter.my_ascii_folding.preserve_original", true)
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_ascii_folding.type", "asciifolding")
.put("index.analysis.filter.my_ascii_folding.preserve_original", true)
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_ascii_folding");
String source = "Ansprüche";
String[] expected = new String[]{"Anspruche", "Ansprüche"};
String[] expected = new String[] { "Anspruche", "Ansprüche" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -78,7 +80,7 @@ public class ASCIIFoldingTokenFilterFactoryTests extends OpenSearchTokenStreamTe
// but the multi-term aware component still emits a single token
tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
expected = new String[]{"Anspruche"};
expected = new String[] { "Anspruche" };
assertTokenStreamContents(tokenFilter.normalize(tokenizer), expected);
}
}

View File

@ -31,7 +31,6 @@
package org.opensearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.opensearch.common.settings.Settings;
@ -57,15 +56,15 @@ public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends OpenSe
public void testDefault() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[]{"Power", "Shot", "500", "42", "wi", "fi", "wi",
"fi", "4000", "j", "2", "se", "O", "Neil"};
String[] expected = new String[] { "Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -73,17 +72,17 @@ public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends OpenSe
public void testCatenateWords() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
.put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
.put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[] { "PowerShot", "500", "42", "wifi", "wifi", "4000", "j",
"2", "se", "ONeil" };
String[] expected = new String[] { "PowerShot", "500", "42", "wifi", "wifi", "4000", "j", "2", "se", "ONeil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -91,17 +90,17 @@ public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends OpenSe
public void testCatenateNumbers() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
.put("index.analysis.filter.my_word_delimiter.catenate_numbers", "true")
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
.put("index.analysis.filter.my_word_delimiter.catenate_numbers", "true")
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[] { "Power", "Shot", "50042", "wi", "fi", "wi", "fi", "4000",
"j", "2", "se", "O", "Neil" };
String[] expected = new String[] { "Power", "Shot", "50042", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -109,17 +108,18 @@ public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends OpenSe
public void testCatenateAll() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
.put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
.put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.generate_word_parts", "false")
.put("index.analysis.filter.my_word_delimiter.generate_number_parts", "false")
.put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[]{"PowerShot", "50042", "wifi", "wifi4000", "j2se", "ONeil"};
String[] expected = new String[] { "PowerShot", "50042", "wifi", "wifi4000", "j2se", "ONeil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -127,15 +127,16 @@ public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends OpenSe
public void testSplitOnCaseChange() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.split_on_case_change", "false")
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.split_on_case_change", "false")
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot";
String[] expected = new String[]{"PowerShot"};
String[] expected = new String[] { "PowerShot" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -143,17 +144,36 @@ public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends OpenSe
public void testPreserveOriginal() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[] { "PowerShot", "Power", "Shot", "500-42", "500", "42",
"wi-fi", "wi", "fi", "wi-fi-4000", "wi", "fi", "4000", "j2se", "j", "2", "se",
"O'Neil's", "O", "Neil" };
String[] expected = new String[] {
"PowerShot",
"Power",
"Shot",
"500-42",
"500",
"42",
"wi-fi",
"wi",
"fi",
"wi-fi-4000",
"wi",
"fi",
"4000",
"j2se",
"j",
"2",
"se",
"O'Neil's",
"O",
"Neil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -161,16 +181,16 @@ public abstract class BaseWordDelimiterTokenFilterFactoryTestCase extends OpenSe
public void testStemEnglishPossessive() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.stem_english_possessive", "false")
.build(),
new CommonAnalysisPlugin());
Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_word_delimiter.type", type)
.put("index.analysis.filter.my_word_delimiter.stem_english_possessive", "false")
.build(),
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
String[] expected = new String[] { "Power", "Shot", "500", "42", "wi", "fi", "wi", "fi",
"4000", "j", "2", "se", "O", "Neil", "s" };
String[] expected = new String[] { "Power", "Shot", "500", "42", "wi", "fi", "wi", "fi", "4000", "j", "2", "se", "O", "Neil", "s" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);

View File

@ -58,7 +58,7 @@ public class CJKFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testDefault() throws IOException {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("cjk_bigram");
String source = "多くの学生が試験に落ちた。";
String[] expected = new String[]{"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" };
String[] expected = new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -67,7 +67,7 @@ public class CJKFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testNoFlags() throws IOException {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("cjk_no_flags");
String source = "多くの学生が試験に落ちた。";
String[] expected = new String[]{"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" };
String[] expected = new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -76,7 +76,7 @@ public class CJKFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testHanOnly() throws IOException {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("cjk_han_only");
String source = "多くの学生が試験に落ちた。";
String[] expected = new String[]{"", "", "", "学生", "", "試験", "", "", "", "" };
String[] expected = new String[] { "", "", "", "学生", "", "試験", "", "", "", "" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -85,7 +85,7 @@ public class CJKFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testHanUnigramOnly() throws IOException {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("cjk_han_unigram_only");
String source = "多くの学生が試験に落ちた。";
String[] expected = new String[]{"", "", "", "", "学生", "", "", "", "試験", "", "", "", "", "" };
String[] expected = new String[] { "", "", "", "", "学生", "", "", "", "試験", "", "", "", "", "" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);

View File

@ -54,23 +54,25 @@ public class CharGroupTokenizerFactoryTests extends OpenSearchTokenStreamTestCas
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
final String name = "cg";
for (String[] conf : Arrays.asList(
new String[] { "\\v" },
new String[] { "\\u00245" },
new String[] { "commas" },
new String[] { "a", "b", "c", "\\$" })) {
new String[] { "\\v" },
new String[] { "\\u00245" },
new String[] { "commas" },
new String[] { "a", "b", "c", "\\$" }
)) {
final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", conf).build();
expectThrows(RuntimeException.class, () -> new CharGroupTokenizerFactory(indexProperties, null, name, settings).create());
}
for (String[] conf : Arrays.asList(
new String[0],
new String[] { "\\n" },
new String[] { "\\u0024" },
new String[] { "whitespace" },
new String[] { "a", "b", "c" },
new String[] { "a", "b", "c", "\\r" },
new String[] { "\\r" },
new String[] { "f", "o", "o", "symbol" })) {
new String[0],
new String[] { "\\n" },
new String[] { "\\u0024" },
new String[] { "whitespace" },
new String[] { "a", "b", "c" },
new String[] { "a", "b", "c", "\\r" },
new String[] { "\\r" },
new String[] { "f", "o", "o", "symbol" }
)) {
final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", Arrays.asList(conf)).build();
new CharGroupTokenizerFactory(indexProperties, null, name, settings).create();
// no exception
@ -83,22 +85,21 @@ public class CharGroupTokenizerFactoryTests extends OpenSearchTokenStreamTestCas
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
final String name = "cg";
String[] conf = new String[] {"-"};
String[] conf = new String[] { "-" };
final Settings defaultLengthSettings = newAnalysisSettingsBuilder()
.putList("tokenize_on_chars", conf)
.build();
final Settings defaultLengthSettings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", conf).build();
CharTokenizer tokenizer = (CharTokenizer) new CharGroupTokenizerFactory(indexProperties, null, name, defaultLengthSettings)
.create();
String textWithVeryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256).concat("-trailing");
try (Reader reader = new StringReader(textWithVeryLongToken)) {
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[] { textWithVeryLongToken.substring(0, 255),
textWithVeryLongToken.substring(255, 256), "trailing"});
assertTokenStreamContents(
tokenizer,
new String[] { textWithVeryLongToken.substring(0, 255), textWithVeryLongToken.substring(255, 256), "trailing" }
);
}
final Settings analysisSettings = newAnalysisSettingsBuilder()
.putList("tokenize_on_chars", conf)
final Settings analysisSettings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", conf)
.put("max_token_length", 2)
.build();
tokenizer = (CharTokenizer) new CharGroupTokenizerFactory(indexProperties, null, name, analysisSettings).create();
@ -107,20 +108,22 @@ public class CharGroupTokenizerFactoryTests extends OpenSearchTokenStreamTestCas
assertTokenStreamContents(tokenizer, new String[] { "on", "e", "tw", "o", "th", "re", "e" });
}
final Settings tooLongLengthSettings = newAnalysisSettingsBuilder()
.putList("tokenize_on_chars", conf)
final Settings tooLongLengthSettings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", conf)
.put("max_token_length", 1024 * 1024 + 1)
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> new CharGroupTokenizerFactory(indexProperties, null, name, tooLongLengthSettings).create());
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> new CharGroupTokenizerFactory(indexProperties, null, name, tooLongLengthSettings).create()
);
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage());
final Settings negativeLengthSettings = newAnalysisSettingsBuilder()
.putList("tokenize_on_chars", conf)
final Settings negativeLengthSettings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", conf)
.put("max_token_length", -1)
.build();
e = expectThrows(IllegalArgumentException.class,
() -> new CharGroupTokenizerFactory(indexProperties, null, name, negativeLengthSettings).create());
e = expectThrows(
IllegalArgumentException.class,
() -> new CharGroupTokenizerFactory(indexProperties, null, name, negativeLengthSettings).create()
);
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
}
@ -129,9 +132,13 @@ public class CharGroupTokenizerFactoryTests extends OpenSearchTokenStreamTestCas
final String name = "cg";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", "whitespace", ":", "\\u0024").build();
Tokenizer tokenizer = new CharGroupTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings),
null, name, settings).create();
Tokenizer tokenizer = new CharGroupTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
name,
settings
).create();
tokenizer.setReader(new StringReader("foo bar $34 test:test2"));
assertTokenStreamContents(tokenizer, new String[] {"foo", "bar", "34", "test", "test2"});
assertTokenStreamContents(tokenizer, new String[] { "foo", "bar", "34", "test", "test2" });
}
}

View File

@ -161,13 +161,13 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
@Override
protected Map<String, Class<?>> getCharFilters() {
Map<String, Class<?>> filters = new TreeMap<>(super.getCharFilters());
filters.put("htmlstrip", HtmlStripCharFilterFactory.class);
filters.put("mapping", MappingCharFilterFactory.class);
filters.put("htmlstrip", HtmlStripCharFilterFactory.class);
filters.put("mapping", MappingCharFilterFactory.class);
filters.put("patternreplace", PatternReplaceCharFilterFactory.class);
// TODO: these charfilters are not yet exposed: useful?
// handling of zwnj for persian
filters.put("persian", Void.class);
filters.put("persian", Void.class);
return filters;
}
@ -280,12 +280,16 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
}
private void markedTestCase(String name, Map<String, Class<?>> map) {
List<String> unmarked = map.entrySet().stream()
.filter(e -> e.getValue() == MovedToAnalysisCommon.class)
.map(Map.Entry::getKey)
.sorted()
.collect(toList());
assertEquals(name + " marked in AnalysisFactoryTestCase as moved to analysis-common "
+ "but not mapped here", emptyList(), unmarked);
List<String> unmarked = map.entrySet()
.stream()
.filter(e -> e.getValue() == MovedToAnalysisCommon.class)
.map(Map.Entry::getKey)
.sorted()
.collect(toList());
assertEquals(
name + " marked in AnalysisFactoryTestCase as moved to analysis-common " + "but not mapped here",
emptyList(),
unmarked
);
}
}

View File

@ -56,10 +56,16 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
* Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.0.0
*/
public void testNGramDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(
random(), LegacyESVersion.V_6_0_0, VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)))
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(
random(),
LegacyESVersion.V_6_0_0,
VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)
)
)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "nGram")
@ -69,15 +75,18 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
}
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
assertWarnings(
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead."
);
}
/**
* Check that the deprecated name "nGram" throws an error since 7.0.0
*/
public void testNGramDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, null))
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
@ -85,10 +94,15 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.", e.getMessage());
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
);
assertEquals(
"The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.",
e.getMessage()
);
}
}
@ -96,10 +110,16 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
* Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.0.0
*/
public void testEdgeNGramDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(
random(), LegacyESVersion.V_6_4_0, VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)))
random(),
LegacyESVersion.V_6_4_0,
VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)
)
)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "edgeNGram")
@ -108,15 +128,18 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
}
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
assertWarnings(
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead."
);
}
/**
* Check that the deprecated name "edgeNGram" throws an error for indices created since 7.0.0
*/
public void testEdgeNGramDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, null))
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
@ -124,10 +147,15 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.", ex.getMessage());
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin)
);
assertEquals(
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.",
ex.getMessage()
);
}
}
@ -135,29 +163,43 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
*/
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT))
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin));
assertEquals("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter", ex.getMessage());
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> createTestAnalysis(idxSettings, settings, commonAnalysisPlugin)
);
assertEquals(
"[standard_html_strip] analyzer is not supported for new indices, "
+ "use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter",
ex.getMessage()
);
}
/**
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7
*/
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_6_0_0,
VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)))
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(
random(),
LegacyESVersion.V_6_0_0,
VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)
)
)
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();
@ -168,8 +210,9 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
Analyzer analyzer = analyzers.get("custom_analyzer");
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
assertWarnings(
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
"Deprecated analyzer [standard_html_strip] used, "
+ "replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter"
);
}
}
@ -177,9 +220,12 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
* Check that the deprecated "nGram" filter logs a warning when the filter is used as a custom filter
*/
public void testnGramFilterInCustomAnalyzerDeprecationError() throws IOException {
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT))
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
@ -189,17 +235,22 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
assertWarnings(
"The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead."
);
}
/**
* Check that the deprecated "edgeNGram" filter logs a warning when the filter is used as a custom filter
*/
public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOException {
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT))
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram")
@ -208,8 +259,10 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
final CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin();
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
assertWarnings(
"The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead."
);
}
/**
@ -218,56 +271,90 @@ public class CommonAnalysisPluginTests extends OpenSearchTestCase {
*/
public void testNGramTokenizerDeprecation() throws IOException {
// tests for prebuilt tokenizer
doTestPrebuiltTokenizerDeprecation("nGram", "ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2), false);
doTestPrebuiltTokenizerDeprecation("edgeNGram", "edge_ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2), false);
doTestPrebuiltTokenizerDeprecation(
"nGram",
"ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestPrebuiltTokenizerDeprecation(
"edgeNGram",
"edge_ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestPrebuiltTokenizerDeprecation("nGram", "ngram", LegacyESVersion.V_7_6_0, true);
doTestPrebuiltTokenizerDeprecation("edgeNGram", "edge_ngram", LegacyESVersion.V_7_6_0, true);
// same batch of tests for custom tokenizer definition in the settings
doTestCustomTokenizerDeprecation("nGram", "ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2), false);
doTestCustomTokenizerDeprecation("edgeNGram", "edge_ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2), false);
doTestCustomTokenizerDeprecation(
"nGram",
"ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestCustomTokenizerDeprecation(
"edgeNGram",
"edge_ngram",
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, LegacyESVersion.V_7_5_2),
false
);
doTestCustomTokenizerDeprecation("nGram", "ngram", LegacyESVersion.V_7_6_0, true);
doTestCustomTokenizerDeprecation("edgeNGram", "edge_ngram", LegacyESVersion.V_7_6_0, true);
}
public void doTestPrebuiltTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning)
throws IOException {
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, version).build();
throws IOException {
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenizerFactory> tokenizers = createTestAnalysis(
IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin).tokenizer;
IndexSettingsModule.newIndexSettings("index", settings),
settings,
commonAnalysisPlugin
).tokenizer;
TokenizerFactory tokenizerFactory = tokenizers.get(deprecatedName);
Tokenizer tokenizer = tokenizerFactory.create();
assertNotNull(tokenizer);
if (expectWarning) {
assertWarnings("The [" + deprecatedName + "] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [" + replacement + "] instead.");
assertWarnings(
"The ["
+ deprecatedName
+ "] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to ["
+ replacement
+ "] instead."
);
}
}
}
public void doTestCustomTokenizerDeprecation(String deprecatedName, String replacement, Version version, boolean expectWarning)
throws IOException {
final Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
throws IOException {
final Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
.put("index.analysis.analyzer.custom_analyzer.type", "custom")
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "my_tokenizer")
.put("index.analysis.tokenizer.my_tokenizer.type", deprecatedName)
.build();
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin);
if (expectWarning) {
assertWarnings("The [" + deprecatedName + "] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to [" + replacement + "] instead.");
assertWarnings(
"The ["
+ deprecatedName
+ "] tokenizer name is deprecated and will be removed in a future version. "
+ "Please change the tokenizer name to ["
+ replacement
+ "] instead."
);
}
}
}

View File

@ -53,25 +53,25 @@ import java.nio.file.Path;
public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testDefault() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_default.type", "common_grams")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.put("index.analysis.filter.common_grams_default.type", "common_grams")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
try {
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
Assert.fail("[common_words] or [common_words_path] is set");
} catch (IllegalArgumentException e) {
} catch (IOException e) {
} catch (IllegalArgumentException e) {} catch (IOException e) {
fail("expected IAE");
}
}
public void testWithoutCommonWordsMatch() throws IOException {
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_default.type", "common_grams")
.putList("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_default.type", "common_grams")
.putList("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
@ -85,11 +85,12 @@ public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTes
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_default.type", "common_grams")
.put("index.analysis.filter.common_grams_default.query_mode", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putList("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_default.type", "common_grams")
.put("index.analysis.filter.common_grams_default.query_mode", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putList("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
@ -104,45 +105,85 @@ public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTes
public void testSettings() throws IOException {
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_1.type", "common_grams")
.put("index.analysis.filter.common_grams_1.ignore_case", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putList("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_1.type", "common_grams")
.put("index.analysis.filter.common_grams_1.ignore_case", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putList("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
String source = "the quick brown is a fox or noT";
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
"a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
String[] expected = new String[] {
"the",
"the_quick",
"quick",
"brown",
"brown_is",
"is",
"is_a",
"a",
"a_fox",
"fox",
"fox_or",
"or",
"or_noT",
"noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_2.type", "common_grams")
.put("index.analysis.filter.common_grams_2.ignore_case", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putList("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_2.type", "common_grams")
.put("index.analysis.filter.common_grams_2.ignore_case", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putList("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
String source = "the quick brown is a fox or why noT";
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "" +
"a_fox", "fox", "or", "why", "why_noT", "noT" };
String[] expected = new String[] {
"the",
"the_quick",
"quick",
"brown",
"brown_is",
"is",
"is_a",
"a",
"" + "a_fox",
"fox",
"or",
"why",
"why_noT",
"noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_3.type", "common_grams")
.putList("index.analysis.filter.common_grams_3.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_3.type", "common_grams")
.putList("index.analysis.filter.common_grams_3.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
String source = "the quick brown is a fox Or noT";
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
"a_fox", "fox", "Or", "noT" };
String[] expected = new String[] {
"the",
"the_quick",
"quick",
"brown",
"brown_is",
"is",
"is_a",
"a",
"a_fox",
"fox",
"Or",
"noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -152,37 +193,58 @@ public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTes
public void testCommonGramsAnalysis() throws IOException {
String json = "/org/opensearch/analysis/common/commongrams.json";
Settings settings = Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(Environment.PATH_HOME_SETTING.getKey(), createHome())
.build();
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(Environment.PATH_HOME_SETTING.getKey(), createHome())
.build();
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
"fox", "fox_or", "or", "not" };
String[] expected = new String[] {
"the",
"quick",
"quick_brown",
"brown",
"brown_is",
"is",
"a",
"a_fox",
"fox",
"fox_or",
"or",
"not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
"fox", "fox_or", "or", "not" };
String[] expected = new String[] {
"the",
"quick",
"quick_brown",
"brown",
"brown_is",
"is",
"a",
"a_fox",
"fox",
"fox_or",
"or",
"not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
}
public void testQueryModeSettings() throws IOException {
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_1.type", "common_grams")
.put("index.analysis.filter.common_grams_1.query_mode", true)
.putList("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
.put("index.analysis.filter.common_grams_1.ignore_case", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_1.type", "common_grams")
.put("index.analysis.filter.common_grams_1.query_mode", true)
.putList("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
.put("index.analysis.filter.common_grams_1.ignore_case", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
String source = "the quick brown is a fox or noT";
@ -192,12 +254,13 @@ public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTes
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_2.type", "common_grams")
.put("index.analysis.filter.common_grams_2.query_mode", true)
.putList("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.put("index.analysis.filter.common_grams_2.ignore_case", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_2.type", "common_grams")
.put("index.analysis.filter.common_grams_2.query_mode", true)
.putList("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.put("index.analysis.filter.common_grams_2.ignore_case", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
String source = "the quick brown is a fox or why noT";
@ -207,11 +270,12 @@ public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTes
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_3.type", "common_grams")
.put("index.analysis.filter.common_grams_3.query_mode", true)
.putList("index.analysis.filter.common_grams_3.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_3.type", "common_grams")
.put("index.analysis.filter.common_grams_3.query_mode", true)
.putList("index.analysis.filter.common_grams_3.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
String source = "the quick brown is a fox or why noT";
@ -221,11 +285,12 @@ public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTes
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_4.type", "common_grams")
.put("index.analysis.filter.common_grams_4.query_mode", true)
.putList("index.analysis.filter.common_grams_4.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder()
.put("index.analysis.filter.common_grams_4.type", "common_grams")
.put("index.analysis.filter.common_grams_4.query_mode", true)
.putList("index.analysis.filter.common_grams_4.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_4");
String source = "the quick brown is a fox Or noT";
@ -239,20 +304,18 @@ public class CommonGramsTokenFilterFactoryTests extends OpenSearchTokenStreamTes
public void testQueryModeCommonGramsAnalysis() throws IOException {
String json = "/org/opensearch/analysis/common/commongrams_query_mode.json";
Settings settings = Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(Environment.PATH_HOME_SETTING.getKey(), createHome())
.build();
.build();
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
{
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };

View File

@ -72,12 +72,14 @@ public class CompoundAnalysisTests extends OpenSearchTestCase {
}
public void testDictionaryDecompounder() throws Exception {
Settings[] settingsArr = new Settings[]{getJsonSettings(), getYamlSettings()};
Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
for (Settings settings : settingsArr) {
List<String> terms = analyze(settings, "decompoundingAnalyzer", "donaudampfschiff spargelcremesuppe");
MatcherAssert.assertThat(terms.size(), equalTo(8));
MatcherAssert.assertThat(terms,
hasItems("donau", "dampf", "schiff", "donaudampfschiff", "spargel", "creme", "suppe", "spargelcremesuppe"));
MatcherAssert.assertThat(
terms,
hasItems("donau", "dampf", "schiff", "donaudampfschiff", "spargel", "creme", "suppe", "spargelcremesuppe")
);
}
}
@ -87,7 +89,7 @@ public class CompoundAnalysisTests extends OpenSearchTestCase {
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
TokenStream stream = analyzer.tokenStream("" , text);
TokenStream stream = analyzer.tokenStream("", text);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
@ -112,18 +114,18 @@ public class CompoundAnalysisTests extends OpenSearchTestCase {
private Settings getJsonSettings() throws IOException {
String json = "/org/opensearch/analysis/common/test1.json";
return Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
}
private Settings getYamlSettings() throws IOException {
String yaml = "/org/opensearch/analysis/common/test1.yml";
return Settings.builder()
.loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
}
}

View File

@ -91,79 +91,52 @@ public class DisableGraphQueryTests extends OpenSearchSingleNodeTestCase {
.put("index.analysis.analyzer.text_shingle.tokenizer", "whitespace")
.put("index.analysis.analyzer.text_shingle.filter", "lowercase, shingle")
.put("index.analysis.analyzer.text_shingle_unigram.tokenizer", "whitespace")
.put("index.analysis.analyzer.text_shingle_unigram.filter",
"lowercase, shingle_unigram")
.put("index.analysis.analyzer.text_shingle_unigram.filter", "lowercase, shingle_unigram")
.build();
indexService = createIndex("test", settings, "t",
"text_shingle", "type=text,analyzer=text_shingle",
"text_shingle_unigram", "type=text,analyzer=text_shingle_unigram");
indexService = createIndex(
"test",
settings,
"t",
"text_shingle",
"type=text,analyzer=text_shingle",
"text_shingle_unigram",
"type=text,analyzer=text_shingle_unigram"
);
shardContext = indexService.newQueryShardContext(0, null, () -> 0L, null);
// parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
// that ignores position length attribute
expectedQueryWithUnigram= new BooleanQuery.Builder()
expectedQueryWithUnigram = new BooleanQuery.Builder().add(
new SynonymQuery(new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar")),
BooleanClause.Occur.SHOULD
)
.add(
new SynonymQuery(
new Term("text_shingle_unigram", "foo"),
new Term("text_shingle_unigram", "foo bar")
), BooleanClause.Occur.SHOULD)
.add(
new SynonymQuery(
new Term("text_shingle_unigram", "bar"),
new Term("text_shingle_unigram", "bar baz")
), BooleanClause.Occur.SHOULD)
.add(
new TermQuery(
new Term("text_shingle_unigram", "baz")
), BooleanClause.Occur.SHOULD)
new SynonymQuery(new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz")),
BooleanClause.Occur.SHOULD
)
.add(new TermQuery(new Term("text_shingle_unigram", "baz")), BooleanClause.Occur.SHOULD)
.build();
// parsed query for "text_shingle_unigram:\"foo bar baz\" with query parsers
// that ignores position length attribute
expectedPhraseQueryWithUnigram = new MultiPhraseQuery.Builder()
.add(
new Term[] {
new Term("text_shingle_unigram", "foo"),
new Term("text_shingle_unigram", "foo bar")
}, 0)
.add(
new Term[] {
new Term("text_shingle_unigram", "bar"),
new Term("text_shingle_unigram", "bar baz")
}, 1)
.add(
new Term[] {
new Term("text_shingle_unigram", "baz"),
}, 2)
expectedPhraseQueryWithUnigram = new MultiPhraseQuery.Builder().add(
new Term[] { new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar") },
0
)
.add(new Term[] { new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz") }, 1)
.add(new Term[] { new Term("text_shingle_unigram", "baz"), }, 2)
.build();
// parsed query for "text_shingle:(foo bar baz)
expectedQuery = new BooleanQuery.Builder()
.add(
new TermQuery(new Term("text_shingle", "foo bar")),
BooleanClause.Occur.SHOULD
)
.add(
new TermQuery(new Term("text_shingle","bar baz")),
BooleanClause.Occur.SHOULD
)
.add(
new TermQuery(new Term("text_shingle","baz biz")),
BooleanClause.Occur.SHOULD
)
expectedQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("text_shingle", "foo bar")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("text_shingle", "bar baz")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("text_shingle", "baz biz")), BooleanClause.Occur.SHOULD)
.build();
// parsed query for "text_shingle:"foo bar baz"
expectedPhraseQuery = new PhraseQuery.Builder()
.add(
new Term("text_shingle", "foo bar")
)
.add(
new Term("text_shingle","bar baz")
)
.add(
new Term("text_shingle","baz biz")
)
expectedPhraseQuery = new PhraseQuery.Builder().add(new Term("text_shingle", "foo bar"))
.add(new Term("text_shingle", "bar baz"))
.add(new Term("text_shingle", "baz biz"))
.build();
}
@ -176,20 +149,17 @@ public class DisableGraphQueryTests extends OpenSearchSingleNodeTestCase {
}
public void testMatchPhraseQuery() throws IOException {
MatchPhraseQueryBuilder builder =
new MatchPhraseQueryBuilder("text_shingle_unigram", "foo bar baz");
MatchPhraseQueryBuilder builder = new MatchPhraseQueryBuilder("text_shingle_unigram", "foo bar baz");
Query query = builder.toQuery(shardContext);
assertThat(expectedPhraseQueryWithUnigram, equalTo(query));
builder =
new MatchPhraseQueryBuilder("text_shingle", "foo bar baz biz");
builder = new MatchPhraseQueryBuilder("text_shingle", "foo bar baz biz");
query = builder.toQuery(shardContext);
assertThat(expectedPhraseQuery, equalTo(query));
}
public void testMatchQuery() throws IOException {
MatchQueryBuilder builder =
new MatchQueryBuilder("text_shingle_unigram", "foo bar baz");
MatchQueryBuilder builder = new MatchQueryBuilder("text_shingle_unigram", "foo bar baz");
Query query = builder.toQuery(shardContext);
assertThat(expectedQueryWithUnigram, equalTo(query));
@ -199,8 +169,7 @@ public class DisableGraphQueryTests extends OpenSearchSingleNodeTestCase {
}
public void testMultiMatchQuery() throws IOException {
MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("foo bar baz",
"text_shingle_unigram");
MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("foo bar baz", "text_shingle_unigram");
Query query = builder.toQuery(shardContext);
assertThat(expectedQueryWithUnigram, equalTo(query));

View File

@ -52,10 +52,11 @@ public class EdgeNGramTokenFilterFactoryTests extends OpenSearchTokenStreamTestC
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_edge_ngram.type", "edge_ngram")
.build(),
new CommonAnalysisPlugin());
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_edge_ngram");
String source = "foo";
String[] expected = new String[]{"f", "fo"};
String[] expected = new String[] { "f", "fo" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -68,10 +69,11 @@ public class EdgeNGramTokenFilterFactoryTests extends OpenSearchTokenStreamTestC
.put("index.analysis.filter.my_edge_ngram.type", "edge_ngram")
.put("index.analysis.filter.my_edge_ngram.preserve_original", true)
.build(),
new CommonAnalysisPlugin());
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_edge_ngram");
String source = "foo";
String[] expected = new String[]{"f", "fo", "foo"};
String[] expected = new String[] { "f", "fo", "foo" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);

View File

@ -55,39 +55,44 @@ import java.util.Collections;
public class EdgeNGramTokenizerTests extends OpenSearchTokenStreamTestCase {
private IndexAnalyzers buildAnalyzers(Version version, String tokenizer) throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
.put("index.analysis.analyzer.my_analyzer.tokenizer", tokenizer)
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
return new AnalysisModule(TestEnvironment.newEnvironment(settings),
Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
return new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisPlugin()))
.getAnalysisRegistry()
.build(idxSettings);
}
public void testPreConfiguredTokenizer() throws IOException {
// Before 7.3 we return ngrams of length 1 only
{
Version version = VersionUtils.randomVersionBetween(random(), LegacyESVersion.fromString("7.0.0"),
VersionUtils.getPreviousVersion(LegacyESVersion.fromString("7.3.0")));
Version version = VersionUtils.randomVersionBetween(
random(),
LegacyESVersion.fromString("7.0.0"),
VersionUtils.getPreviousVersion(LegacyESVersion.fromString("7.3.0"))
);
try (IndexAnalyzers indexAnalyzers = buildAnalyzers(version, "edge_ngram")) {
NamedAnalyzer analyzer = indexAnalyzers.get("my_analyzer");
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "test", new String[]{"t"});
assertAnalyzesTo(analyzer, "test", new String[] { "t" });
}
}
// Check deprecated name as well
{
Version version = VersionUtils.randomVersionBetween(random(), LegacyESVersion.fromString("7.0.0"),
VersionUtils.getPreviousVersion(LegacyESVersion.fromString("7.3.0")));
Version version = VersionUtils.randomVersionBetween(
random(),
LegacyESVersion.fromString("7.0.0"),
VersionUtils.getPreviousVersion(LegacyESVersion.fromString("7.3.0"))
);
try (IndexAnalyzers indexAnalyzers = buildAnalyzers(version, "edgeNGram")) {
NamedAnalyzer analyzer = indexAnalyzers.get("my_analyzer");
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "test", new String[]{"t"});
assertAnalyzesTo(analyzer, "test", new String[] { "t" });
}
}
@ -96,18 +101,21 @@ public class EdgeNGramTokenizerTests extends OpenSearchTokenStreamTestCase {
try (IndexAnalyzers indexAnalyzers = buildAnalyzers(Version.CURRENT, "edge_ngram")) {
NamedAnalyzer analyzer = indexAnalyzers.get("my_analyzer");
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "test", new String[]{"t", "te"});
assertAnalyzesTo(analyzer, "test", new String[] { "t", "te" });
}
}
// Check deprecated name as well, needs version before 8.0 because throws IAE after that
{
try (IndexAnalyzers indexAnalyzers = buildAnalyzers(
try (
IndexAnalyzers indexAnalyzers = buildAnalyzers(
VersionUtils.randomVersionBetween(random(), LegacyESVersion.fromString("7.3.0"), Version.CURRENT),
"edgeNGram")) {
"edgeNGram"
)
) {
NamedAnalyzer analyzer = indexAnalyzers.get("my_analyzer");
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "test", new String[]{"t", "te"});
assertAnalyzesTo(analyzer, "test", new String[] { "t", "te" });
}
}
@ -119,12 +127,19 @@ public class EdgeNGramTokenizerTests extends OpenSearchTokenStreamTestCase {
final String name = "engr";
final Settings indexSettings = newAnalysisSettingsBuilder().put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 2).build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.putList("token_chars", "letter", "custom").put("custom_token_chars","_-").build();
Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name,
settings).create();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.putList("token_chars", "letter", "custom")
.put("custom_token_chars", "_-")
.build();
Tokenizer tokenizer = new EdgeNGramTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
name,
settings
).create();
tokenizer.setReader(new StringReader("Abc -gh _jk =lm"));
assertTokenStreamContents(tokenizer, new String[] {"Ab", "Abc", "-g", "-gh", "_j", "_jk", "lm"});
assertTokenStreamContents(tokenizer, new String[] { "Ab", "Abc", "-g", "-gh", "_j", "_jk", "lm" });
}
}

View File

@ -47,8 +47,10 @@ public class ElisionFilterFactoryTests extends OpenSearchTokenStreamTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()));
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin())
);
assertEquals("elision filter requires [articles] or [articles_path] setting", e.getMessage());
}

View File

@ -40,34 +40,27 @@ public class FingerprintAnalyzerTests extends OpenSearchTokenStreamTestCase {
public void testFingerprint() throws Exception {
Analyzer a = new FingerprintAnalyzer(CharArraySet.EMPTY_SET, ' ', 255);
assertAnalyzesTo(a, "foo bar@baz Baz $ foo foo FOO. FoO",
new String[]{"bar baz foo"});
assertAnalyzesTo(a, "foo bar@baz Baz $ foo foo FOO. FoO", new String[] { "bar baz foo" });
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new FingerprintAnalyzer(CharArraySet.EMPTY_SET, ' ', 255);
assertAnalyzesTo(a, "foo bar baz Baz foo foo FOO. FoO",
new String[]{"bar baz foo"});
assertAnalyzesTo(a, "xyz XYZ abc 123.2 abc",
new String[]{"123.2 abc xyz"});
assertAnalyzesTo(a, "foo bar baz Baz foo foo FOO. FoO", new String[] { "bar baz foo" });
assertAnalyzesTo(a, "xyz XYZ abc 123.2 abc", new String[] { "123.2 abc xyz" });
}
public void testAsciifolding() throws Exception {
Analyzer a = new FingerprintAnalyzer(CharArraySet.EMPTY_SET, ' ', 255);
assertAnalyzesTo(a, "gödel escher bach",
new String[]{"bach escher godel"});
assertAnalyzesTo(a, "gödel escher bach", new String[] { "bach escher godel" });
assertAnalyzesTo(a, "gödel godel escher bach",
new String[]{"bach escher godel"});
assertAnalyzesTo(a, "gödel godel escher bach", new String[] { "bach escher godel" });
}
public void testLimit() throws Exception {
Analyzer a = new FingerprintAnalyzer(CharArraySet.EMPTY_SET, ' ', 3);
assertAnalyzesTo(a, "e d c b a",
new String[]{});
assertAnalyzesTo(a, "e d c b a", new String[] {});
assertAnalyzesTo(a, "b a",
new String[]{"a b"});
assertAnalyzesTo(a, "b a", new String[] { "a b" });
}
}

View File

@ -54,27 +54,32 @@ public class FlattenGraphTokenFilterFactoryTests extends OpenSearchTokenStreamTe
Settings settings = newAnalysisSettingsBuilder().build();
// "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
TokenStream in = new CannedTokenStream(0, 12, new Token[] {
token("wtf", 1, 5, 0, 3),
token("what", 0, 1, 0, 3),
token("wow", 0, 3, 0, 3),
token("the", 1, 1, 0, 3),
token("fudge", 1, 3, 0, 3),
token("that's", 1, 1, 0, 3),
token("funny", 1, 1, 0, 3),
token("happened", 1, 1, 4, 12)
});
TokenStream in = new CannedTokenStream(
0,
12,
new Token[] {
token("wtf", 1, 5, 0, 3),
token("what", 0, 1, 0, 3),
token("wow", 0, 3, 0, 3),
token("the", 1, 1, 0, 3),
token("fudge", 1, 3, 0, 3),
token("that's", 1, 1, 0, 3),
token("funny", 1, 1, 0, 3),
token("happened", 1, 1, 4, 12) }
);
TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in);
// ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
assertTokenStreamContents(tokens,
new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"},
new int[] {0, 0, 0, 0, 0, 0, 0, 4},
new int[] {3, 3, 3, 3, 3, 3, 3, 12},
new int[] {1, 0, 0, 1, 0, 1, 0, 1},
new int[] {3, 1, 1, 1, 1, 1, 1, 1},
12);
assertTokenStreamContents(
tokens,
new String[] { "wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened" },
new int[] { 0, 0, 0, 0, 0, 0, 0, 4 },
new int[] { 3, 3, 3, 3, 3, 3, 3, 12 },
new int[] { 1, 0, 0, 1, 0, 1, 0, 1 },
new int[] { 3, 1, 1, 1, 1, 1, 1, 1 },
12
);
}
private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) {

View File

@ -69,26 +69,29 @@ public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase {
}
public void testNgramHighlightingWithBrokenPositions() throws IOException {
assertAcked(prepareCreate("test")
.addMapping("test", jsonBuilder()
.startObject()
.startObject("test")
.startObject("properties")
.startObject("name")
.field("type", "text")
.startObject("fields")
.startObject("autocomplete")
.field("type", "text")
.field("analyzer", "autocomplete")
.field("search_analyzer", "search_autocomplete")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject())
.setSettings(Settings.builder()
assertAcked(
prepareCreate("test").addMapping(
"test",
jsonBuilder().startObject()
.startObject("test")
.startObject("properties")
.startObject("name")
.field("type", "text")
.startObject("fields")
.startObject("autocomplete")
.field("type", "text")
.field("analyzer", "autocomplete")
.field("search_analyzer", "search_autocomplete")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
)
.setSettings(
Settings.builder()
.put(indexSettings())
.put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 19)
.put("analysis.tokenizer.autocomplete.max_gram", 20)
@ -96,15 +99,31 @@ public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase {
.put("analysis.tokenizer.autocomplete.token_chars", "letter,digit")
.put("analysis.tokenizer.autocomplete.type", "ngram")
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
.putList("analysis.filter.wordDelimiter.type_table",
"& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM",
"? => ALPHANUM", ". => ALPHANUM", "- => ALPHANUM",
"# => ALPHANUM", "% => ALPHANUM", "+ => ALPHANUM",
", => ALPHANUM", "~ => ALPHANUM", ": => ALPHANUM",
"/ => ALPHANUM", "^ => ALPHANUM", "$ => ALPHANUM",
"@ => ALPHANUM", ") => ALPHANUM", "( => ALPHANUM",
"] => ALPHANUM", "[ => ALPHANUM", "} => ALPHANUM",
"{ => ALPHANUM")
.putList(
"analysis.filter.wordDelimiter.type_table",
"& => ALPHANUM",
"| => ALPHANUM",
"! => ALPHANUM",
"? => ALPHANUM",
". => ALPHANUM",
"- => ALPHANUM",
"# => ALPHANUM",
"% => ALPHANUM",
"+ => ALPHANUM",
", => ALPHANUM",
"~ => ALPHANUM",
": => ALPHANUM",
"/ => ALPHANUM",
"^ => ALPHANUM",
"$ => ALPHANUM",
"@ => ALPHANUM",
") => ALPHANUM",
"( => ALPHANUM",
"] => ALPHANUM",
"[ => ALPHANUM",
"} => ALPHANUM",
"{ => ALPHANUM"
)
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
.put("analysis.filter.wordDelimiter.generate_number_parts", false)
@ -113,19 +132,19 @@ public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase {
.put("analysis.filter.wordDelimiter.catenate_all", false)
.put("analysis.analyzer.autocomplete.tokenizer", "autocomplete")
.putList("analysis.analyzer.autocomplete.filter",
"lowercase", "wordDelimiter")
.putList("analysis.analyzer.autocomplete.filter", "lowercase", "wordDelimiter")
.put("analysis.analyzer.search_autocomplete.tokenizer", "whitespace")
.putList("analysis.analyzer.search_autocomplete.filter",
"lowercase", "wordDelimiter")));
client().prepareIndex("test", "test", "1")
.setSource("name", "ARCOTEL Hotels Deutschland").get();
.putList("analysis.analyzer.search_autocomplete.filter", "lowercase", "wordDelimiter")
)
);
client().prepareIndex("test", "test", "1").setSource("name", "ARCOTEL Hotels Deutschland").get();
refresh();
SearchResponse search = client().prepareSearch("test").setTypes("test")
.setQuery(matchQuery("name.autocomplete", "deut tel").operator(Operator.OR))
.highlighter(new HighlightBuilder().field("name.autocomplete")).get();
assertHighlight(search, 0, "name.autocomplete", 0,
equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland"));
SearchResponse search = client().prepareSearch("test")
.setTypes("test")
.setQuery(matchQuery("name.autocomplete", "deut tel").operator(Operator.OR))
.highlighter(new HighlightBuilder().field("name.autocomplete"))
.get();
assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland"));
}
public void testMultiPhraseCutoff() throws IOException {
@ -133,48 +152,70 @@ public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase {
* MultiPhraseQuery can literally kill an entire node if there are too many terms in the
* query. We cut off and extract terms if there are more than 16 terms in the query
*/
assertAcked(prepareCreate("test")
.addMapping("test", "body", "type=text,analyzer=custom_analyzer,"
+ "search_analyzer=custom_analyzer,term_vector=with_positions_offsets")
assertAcked(
prepareCreate("test").addMapping(
"test",
"body",
"type=text,analyzer=custom_analyzer," + "search_analyzer=custom_analyzer,term_vector=with_positions_offsets"
)
.setSettings(
Settings.builder().put(indexSettings())
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
.put("analysis.filter.wordDelimiter.generate_number_parts", true)
.put("analysis.filter.wordDelimiter.catenate_words", true)
.put("analysis.filter.wordDelimiter.catenate_numbers", true)
.put("analysis.filter.wordDelimiter.catenate_all", false)
.put("analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putList("analysis.analyzer.custom_analyzer.filter",
"lowercase", "wordDelimiter"))
Settings.builder()
.put(indexSettings())
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
.put("analysis.filter.wordDelimiter.generate_number_parts", true)
.put("analysis.filter.wordDelimiter.catenate_words", true)
.put("analysis.filter.wordDelimiter.catenate_numbers", true)
.put("analysis.filter.wordDelimiter.catenate_all", false)
.put("analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putList("analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
)
);
ensureGreen();
client().prepareIndex("test", "test", "1")
.setSource("body", "Test: http://www.facebook.com http://elasticsearch.org "
.setSource(
"body",
"Test: http://www.facebook.com http://elasticsearch.org "
+ "http://xing.com http://cnn.com http://quora.com http://twitter.com this is "
+ "a test for highlighting feature Test: http://www.facebook.com "
+ "http://elasticsearch.org http://xing.com http://cnn.com http://quora.com "
+ "http://twitter.com this is a test for highlighting feature")
+ "http://twitter.com this is a test for highlighting feature"
)
.get();
refresh();
SearchResponse search = client().prepareSearch()
.setQuery(matchPhraseQuery("body", "Test: http://www.facebook.com "))
.highlighter(new HighlightBuilder().field("body").highlighterType("fvh")).get();
.setQuery(matchPhraseQuery("body", "Test: http://www.facebook.com "))
.highlighter(new HighlightBuilder().field("body").highlighterType("fvh"))
.get();
assertHighlight(search, 0, "body", 0, startsWith("<em>Test: http://www.facebook.com</em>"));
search = client()
.prepareSearch()
.setQuery(matchPhraseQuery("body", "Test: http://www.facebook.com "
search = client().prepareSearch()
.setQuery(
matchPhraseQuery(
"body",
"Test: http://www.facebook.com "
+ "http://elasticsearch.org http://xing.com http://cnn.com "
+ "http://quora.com http://twitter.com this is a test for highlighting "
+ "feature Test: http://www.facebook.com http://elasticsearch.org "
+ "http://xing.com http://cnn.com http://quora.com http://twitter.com this "
+ "is a test for highlighting feature"))
.highlighter(new HighlightBuilder().field("body").highlighterType("fvh")).execute().actionGet();
assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: "
+ "<em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> "
+ "<em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
+ "is a test for highlighting feature"
)
)
.highlighter(new HighlightBuilder().field("body").highlighterType("fvh"))
.execute()
.actionGet();
assertHighlight(
search,
0,
"body",
0,
equalTo(
"<em>Test</em>: "
+ "<em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> "
+ "<em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"
)
);
}
public void testSynonyms() throws IOException {
@ -185,36 +226,29 @@ public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase {
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "fast,quick");
assertAcked(prepareCreate("test").setSettings(builder.build())
.addMapping("type1", "field1",
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
"analyzer=standard,index_options=offsets"));
assertAcked(
prepareCreate("test").setSettings(builder.build())
.addMapping(
"type1",
"field1",
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," + "analyzer=standard,index_options=offsets"
)
);
ensureGreen();
client().prepareIndex("test", "type1", "0").setSource(
"field1", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("test", "type1", "0").setSource("field1", "The quick brown fox jumps over the lazy dog").get();
refresh();
for (String highlighterType : new String[] {"plain", "fvh", "unified"}) {
for (String highlighterType : new String[] { "plain", "fvh", "unified" }) {
logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1");
SearchSourceBuilder source = searchSource()
.query(matchQuery("field1", "quick brown fox").operator(Operator.AND))
.highlighter(
highlight()
.field("field1")
.order("score")
.preTags("<x>")
.postTags("</x>")
.highlighterType(highlighterType));
SearchSourceBuilder source = searchSource().query(matchQuery("field1", "quick brown fox").operator(Operator.AND))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>").highlighterType(highlighterType));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
source = searchSource()
.query(matchQuery("field1", "fast brown fox").operator(Operator.AND))
source = searchSource().query(matchQuery("field1", "fast brown fox").operator(Operator.AND))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
}
}
@ -230,93 +264,139 @@ public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase {
ensureGreen();
client().prepareIndex("first_test_index", "type1", "0").setSource(
"field0", "The quick brown fox jumps over the lazy dog",
"field1", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("first_test_index", "type1", "1").setSource("field1",
"The quick browse button is a fancy thing, right bro?").get();
client().prepareIndex("first_test_index", "type1", "0")
.setSource("field0", "The quick brown fox jumps over the lazy dog", "field1", "The quick brown fox jumps over the lazy dog")
.get();
client().prepareIndex("first_test_index", "type1", "1")
.setSource("field1", "The quick browse button is a fancy thing, right bro?")
.get();
refresh();
logger.info("--> highlighting and searching on field0");
SearchSourceBuilder source = searchSource()
.query(matchPhrasePrefixQuery("field0", "bro"))
SearchSourceBuilder source = searchSource().query(matchPhrasePrefixQuery("field0", "bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick <x>brown</x> fox jumps over the lazy dog"));
source = searchSource()
.query(matchPhrasePrefixQuery("field0", "quick bro"))
source = searchSource().query(matchPhrasePrefixQuery("field0", "quick bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
logger.info("--> highlighting and searching on field1");
source = searchSource()
.query(boolQuery()
.should(matchPhrasePrefixQuery("field1", "test"))
.should(matchPhrasePrefixQuery("field1", "bro"))
)
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
source = searchSource().query(
boolQuery().should(matchPhrasePrefixQuery("field1", "test")).should(matchPhrasePrefixQuery("field1", "bro"))
).highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertThat(searchResponse.getHits().getTotalHits().value, equalTo(2L));
for (int i = 0; i < 2; i++) {
assertHighlight(searchResponse, i, "field1", 0, 1, anyOf(
equalTo("The quick <x>browse</x> button is a fancy thing, right <x>bro</x>?"),
equalTo("The quick <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(
searchResponse,
i,
"field1",
0,
1,
anyOf(
equalTo("The quick <x>browse</x> button is a fancy thing, right <x>bro</x>?"),
equalTo("The quick <x>brown</x> fox jumps over the lazy dog")
)
);
}
source = searchSource()
.query(matchPhrasePrefixQuery("field1", "quick bro"))
source = searchSource().query(matchPhrasePrefixQuery("field1", "quick bro"))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(
searchResponse,
0,
"field1",
0,
1,
anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
)
);
assertHighlight(
searchResponse,
1,
"field1",
0,
1,
anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
)
);
assertAcked(prepareCreate("second_test_index").setSettings(builder.build()).addMapping("doc",
"field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym",
"field3", "type=text,analyzer=synonym"));
assertAcked(
prepareCreate("second_test_index").setSettings(builder.build())
.addMapping(
"doc",
"field4",
"type=text,term_vector=with_positions_offsets,analyzer=synonym",
"field3",
"type=text,analyzer=synonym"
)
);
// with synonyms
client().prepareIndex("second_test_index", "doc", "0").setSource(
"type", "type2",
"field4", "The quick brown fox jumps over the lazy dog",
"field3", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("second_test_index", "doc", "1").setSource(
"type", "type2",
"field4", "The quick browse button is a fancy thing, right bro?").get();
client().prepareIndex("second_test_index", "doc", "2").setSource(
"type", "type2",
"field4", "a quick fast blue car").get();
client().prepareIndex("second_test_index", "doc", "0")
.setSource(
"type",
"type2",
"field4",
"The quick brown fox jumps over the lazy dog",
"field3",
"The quick brown fox jumps over the lazy dog"
)
.get();
client().prepareIndex("second_test_index", "doc", "1")
.setSource("type", "type2", "field4", "The quick browse button is a fancy thing, right bro?")
.get();
client().prepareIndex("second_test_index", "doc", "2").setSource("type", "type2", "field4", "a quick fast blue car").get();
refresh();
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field3", "fast bro"))
source = searchSource().postFilter(termQuery("type", "type2"))
.query(matchPhrasePrefixQuery("field3", "fast bro"))
.highlighter(highlight().field("field3").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field3", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro"))
source = searchSource().postFilter(termQuery("type", "type2"))
.query(matchPhrasePrefixQuery("field4", "the fast bro"))
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(
searchResponse,
0,
"field4",
0,
1,
anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
)
);
assertHighlight(
searchResponse,
1,
"field4",
0,
1,
anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
)
);
logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2"))
@ -324,17 +404,31 @@ public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase {
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field4", 0, 1,
anyOf(equalTo("<x>a quick fast blue car</x>"),
equalTo("<x>a</x> <x>quick</x> <x>fast</x> <x>blue</x> <x>car</x>")));
assertHighlight(
searchResponse,
0,
"field4",
0,
1,
anyOf(equalTo("<x>a quick fast blue car</x>"), equalTo("<x>a</x> <x>quick</x> <x>fast</x> <x>blue</x> <x>car</x>"))
);
}
public static XContentBuilder type1TermVectorMapping() throws IOException {
return XContentFactory.jsonBuilder().startObject().startObject("type1")
return XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("properties")
.startObject("field1").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
.startObject("field2").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
.startObject("field1")
.field("type", "text")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject().endObject();
.startObject("field2")
.field("type", "text")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject();
}
}

View File

@ -47,25 +47,29 @@ import java.io.IOException;
import java.io.StringReader;
import java.util.Map;
public class HtmlStripCharFilterFactoryTests extends OpenSearchTestCase {
/**
* Check that the deprecated name "htmlStrip" issues a deprecation warning for indices created since 6.3.0
*/
public void testDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_6_3_0, Version.CURRENT))
.build();
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_6_3_0, Version.CURRENT)
)
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, CharFilterFactory> charFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).charFilter;
CharFilterFactory charFilterFactory = charFilters.get("htmlStrip");
assertNotNull(charFilterFactory.create(new StringReader("input")));
assertWarnings("The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead.");
assertWarnings(
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead."
);
}
}
@ -73,10 +77,13 @@ public class HtmlStripCharFilterFactoryTests extends OpenSearchTestCase {
* Check that the deprecated name "htmlStrip" does NOT issues a deprecation warning for indices created before 6.3.0
*/
public void testNoDeprecationWarningPre6_3() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_6_0_0, LegacyESVersion.V_6_2_4))
.build();
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_6_0_0, LegacyESVersion.V_6_2_4)
)
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {

View File

@ -52,51 +52,49 @@ public class KeepFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testLoadWithoutSettings() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(
createTempDir(), RESOURCE, new CommonAnalysisPlugin());
createTempDir(),
RESOURCE,
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("keep");
Assert.assertNull(tokenFilter);
}
public void testLoadOverConfiguredSettings() {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.broken_keep_filter.type", "keep")
.put("index.analysis.filter.broken_keep_filter.keep_words_path", "does/not/exists.txt")
.put("index.analysis.filter.broken_keep_filter.keep_words", "[\"Hello\", \"worlD\"]")
.build();
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.broken_keep_filter.type", "keep")
.put("index.analysis.filter.broken_keep_filter.keep_words_path", "does/not/exists.txt")
.put("index.analysis.filter.broken_keep_filter.keep_words", "[\"Hello\", \"worlD\"]")
.build();
try {
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
Assert.fail("path and array are configured");
} catch (IllegalArgumentException e) {
} catch (IOException e) {
} catch (IllegalArgumentException e) {} catch (IOException e) {
fail("expected IAE");
}
}
public void testKeepWordsPathSettings() {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.non_broken_keep_filter.type", "keep")
.put("index.analysis.filter.non_broken_keep_filter.keep_words_path", "does/not/exists.txt")
.build();
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.non_broken_keep_filter.type", "keep")
.put("index.analysis.filter.non_broken_keep_filter.keep_words_path", "does/not/exists.txt")
.build();
try {
// test our none existing setup is picked up
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
fail("expected an exception due to non existent keep_words_path");
} catch (IllegalArgumentException e) {
} catch (IOException e) {
} catch (IllegalArgumentException e) {} catch (IOException e) {
fail("expected IAE");
}
settings = Settings.builder().put(settings)
.putList("index.analysis.filter.non_broken_keep_filter.keep_words", "test")
.build();
settings = Settings.builder().put(settings).putList("index.analysis.filter.non_broken_keep_filter.keep_words", "test").build();
try {
// test our none existing setup is picked up
AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
fail("expected an exception indicating that you can't use [keep_words_path] with [keep_words] ");
} catch (IllegalArgumentException e) {
} catch (IOException e) {
} catch (IllegalArgumentException e) {} catch (IOException e) {
fail("expected IAE");
}
@ -104,25 +102,31 @@ public class KeepFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testCaseInsensitiveMapping() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(
createTempDir(), RESOURCE, new CommonAnalysisPlugin());
createTempDir(),
RESOURCE,
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keep_filter");
assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class));
String source = "hello small world";
String[] expected = new String[]{"hello", "world"};
String[] expected = new String[] { "hello", "world" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[]{1, 2});
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] { 1, 2 });
}
public void testCaseSensitiveMapping() throws IOException {
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(
createTempDir(), RESOURCE, new CommonAnalysisPlugin());
createTempDir(),
RESOURCE,
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_case_sensitive_keep_filter");
assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class));
String source = "Hello small world";
String[] expected = new String[]{"Hello"};
String[] expected = new String[] { "Hello" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[]{1});
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] { 1 });
}
}

View File

@ -51,13 +51,16 @@ public class KeepTypesFilterFactoryTests extends OpenSearchTokenStreamTestCase {
private static final String BASE_SETTING = "index.analysis.filter.keep_numbers";
public void testKeepTypesInclude() throws IOException {
Settings.Builder settingsBuilder = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(BASE_SETTING + ".type", "keep_types")
.putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" });
Settings.Builder settingsBuilder = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(BASE_SETTING + ".type", "keep_types")
.putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" });
// either use default mode or set "include" mode explicitly
if (random().nextBoolean()) {
settingsBuilder.put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY,
KeepTypesFilterFactory.KeepTypesMode.INCLUDE);
settingsBuilder.put(
BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY,
KeepTypesFilterFactory.KeepTypesMode.INCLUDE
);
}
Settings settings = settingsBuilder.build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
@ -71,10 +74,12 @@ public class KeepTypesFilterFactoryTests extends OpenSearchTokenStreamTestCase {
}
public void testKeepTypesExclude() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(BASE_SETTING + ".type", "keep_types")
.putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" })
.put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, KeepTypesFilterFactory.KeepTypesMode.EXCLUDE).build();
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(BASE_SETTING + ".type", "keep_types")
.putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" })
.put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, KeepTypesFilterFactory.KeepTypesMode.EXCLUDE)
.build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("keep_numbers");
assertThat(tokenFilter, instanceOf(KeepTypesFilterFactory.class));
@ -86,12 +91,16 @@ public class KeepTypesFilterFactoryTests extends OpenSearchTokenStreamTestCase {
}
public void testKeepTypesException() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(BASE_SETTING + ".type", "keep_types")
.putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" })
.put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, "bad_parameter").build();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()));
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(BASE_SETTING + ".type", "keep_types")
.putList(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_KEY, new String[] { "<NUM>", "<SOMETHINGELSE>" })
.put(BASE_SETTING + "." + KeepTypesFilterFactory.KEEP_TYPES_MODE_KEY, "bad_parameter")
.build();
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin())
);
assertEquals("`keep_types` tokenfilter mode can only be [include] or [exclude] but was [bad_parameter].", ex.getMessage());
}
}

View File

@ -72,8 +72,7 @@ public class KeywordMarkerFilterFactoryTests extends OpenSearchTokenStreamTestCa
assertThat(filter, instanceOf(SetKeywordMarkerFilter.class));
NamedAnalyzer analyzer = analysis.indexAnalyzers.get("my_keyword");
// jogging is not part of the keywords set, so verify that its the only stemmed word
assertAnalyzesTo(analyzer, "running jogging sleeping",
new String[] { "running", "jog", "sleeping" });
assertAnalyzesTo(analyzer, "running jogging sleeping", new String[] { "running", "jog", "sleeping" });
}
/**
@ -111,9 +110,10 @@ public class KeywordMarkerFilterFactoryTests extends OpenSearchTokenStreamTestCa
.put("index.analysis.analyzer.my_keyword.filter", "my_keyword, porter_stem")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()));
assertEquals("cannot specify both `keywords_pattern` and `keywords` or `keywords_path`",
e.getMessage());
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin())
);
assertEquals("cannot specify both `keywords_pattern` and `keywords` or `keywords_path`", e.getMessage());
}
}

View File

@ -47,9 +47,9 @@ import java.io.StringReader;
public class LimitTokenCountFilterFactoryTests extends OpenSearchTokenStreamTestCase {
public void testDefault() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.limit_default.type", "limit")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.put("index.analysis.filter.limit_default.type", "limit")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_default");
@ -72,11 +72,11 @@ public class LimitTokenCountFilterFactoryTests extends OpenSearchTokenStreamTest
public void testSettings() throws IOException {
{
Settings settings = Settings.builder()
.put("index.analysis.filter.limit_1.type", "limit")
.put("index.analysis.filter.limit_1.max_token_count", 3)
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.put("index.analysis.filter.limit_1.type", "limit")
.put("index.analysis.filter.limit_1.max_token_count", 3)
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
String source = "the quick brown fox";
@ -87,11 +87,11 @@ public class LimitTokenCountFilterFactoryTests extends OpenSearchTokenStreamTest
}
{
Settings settings = Settings.builder()
.put("index.analysis.filter.limit_1.type", "limit")
.put("index.analysis.filter.limit_1.max_token_count", 3)
.put("index.analysis.filter.limit_1.consume_all_tokens", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.put("index.analysis.filter.limit_1.type", "limit")
.put("index.analysis.filter.limit_1.max_token_count", 3)
.put("index.analysis.filter.limit_1.consume_all_tokens", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
String source = "the quick brown fox";
@ -103,11 +103,11 @@ public class LimitTokenCountFilterFactoryTests extends OpenSearchTokenStreamTest
{
Settings settings = Settings.builder()
.put("index.analysis.filter.limit_1.type", "limit")
.put("index.analysis.filter.limit_1.max_token_count", 17)
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.put("index.analysis.filter.limit_1.type", "limit")
.put("index.analysis.filter.limit_1.max_token_count", 17)
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
String source = "the quick brown fox";

View File

@ -51,13 +51,18 @@ public class MassiveWordListTests extends OpenSearchSingleNodeTestCase {
for (int i = 0; i < wordList.length; i++) {
wordList[i] = "hello world";
}
client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", 1)
.put("analysis.analyzer.test_analyzer.type", "custom")
.put("analysis.analyzer.test_analyzer.tokenizer", "standard")
.putList("analysis.analyzer.test_analyzer.filter", "dictionary_decompounder", "lowercase")
.put("analysis.filter.dictionary_decompounder.type", "dictionary_decompounder")
.putList("analysis.filter.dictionary_decompounder.word_list", wordList)
).get();
client().admin()
.indices()
.prepareCreate("test")
.setSettings(
Settings.builder()
.put("index.number_of_shards", 1)
.put("analysis.analyzer.test_analyzer.type", "custom")
.put("analysis.analyzer.test_analyzer.tokenizer", "standard")
.putList("analysis.analyzer.test_analyzer.filter", "dictionary_decompounder", "lowercase")
.put("analysis.filter.dictionary_decompounder.type", "dictionary_decompounder")
.putList("analysis.filter.dictionary_decompounder.word_list", wordList)
)
.get();
}
}

View File

@ -49,9 +49,7 @@ public class MinHashFilterFactoryTests extends OpenSearchTokenStreamTestCase {
int default_hash_count = 1;
int default_bucket_size = 512;
int default_hash_set_size = 1;
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("min_hash");
String source = "the quick brown fox";
@ -60,8 +58,7 @@ public class MinHashFilterFactoryTests extends OpenSearchTokenStreamTestCase {
// with_rotation is true by default, and hash_set_size is 1, so even though the source doesn't
// have enough tokens to fill all the buckets, we still expect 512 tokens.
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer),
default_hash_count * default_bucket_size * default_hash_set_size);
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), default_hash_count * default_bucket_size * default_hash_set_size);
}
public void testSettings() throws IOException {

View File

@ -50,9 +50,7 @@ import java.util.Collections;
public class MultiplexerTokenFilterTests extends OpenSearchTokenStreamTestCase {
public void testMultiplexingFilter() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.t.type", "truncate")
@ -65,30 +63,27 @@ public class MultiplexerTokenFilterTests extends OpenSearchTokenStreamTestCase {
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings),
Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
IndexAnalyzers indexAnalyzers = new AnalysisModule(
TestEnvironment.newEnvironment(settings),
Collections.singletonList(new CommonAnalysisPlugin())
).getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "ONe tHree", new String[]{
"ONe", "on", "ONE", "tHree", "th", "THREE"
}, new int[]{
1, 0, 0, 1, 0, 0
});
assertAnalyzesTo(
analyzer,
"ONe tHree",
new String[] { "ONe", "on", "ONE", "tHree", "th", "THREE" },
new int[] { 1, 0, 0, 1, 0, 0 }
);
// Duplicates are removed
assertAnalyzesTo(analyzer, "ONe THREE", new String[]{
"ONe", "on", "ONE", "THREE", "th"
}, new int[]{
1, 0, 0, 1, 0, 0
});
assertAnalyzesTo(analyzer, "ONe THREE", new String[] { "ONe", "on", "ONE", "THREE", "th" }, new int[] { 1, 0, 0, 1, 0, 0 });
}
}
public void testMultiplexingNoOriginal() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.t.type", "truncate")
@ -102,16 +97,14 @@ public class MultiplexerTokenFilterTests extends OpenSearchTokenStreamTestCase {
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings),
Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings);
IndexAnalyzers indexAnalyzers = new AnalysisModule(
TestEnvironment.newEnvironment(settings),
Collections.singletonList(new CommonAnalysisPlugin())
).getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "ONe tHree", new String[]{
"on", "ONE", "th", "THREE"
}, new int[]{
1, 0, 1, 0,
});
assertAnalyzesTo(analyzer, "ONe tHree", new String[] { "on", "ONE", "th", "THREE" }, new int[] { 1, 0, 1, 0, });
}
}

View File

@ -52,10 +52,11 @@ public class NGramTokenFilterFactoryTests extends OpenSearchTokenStreamTestCase
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_ngram.type", "ngram")
.build(),
new CommonAnalysisPlugin());
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_ngram");
String source = "foo";
String[] expected = new String[]{"f", "fo", "o", "oo", "o"};
String[] expected = new String[] { "f", "fo", "o", "oo", "o" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -68,10 +69,11 @@ public class NGramTokenFilterFactoryTests extends OpenSearchTokenStreamTestCase
.put("index.analysis.filter.my_ngram.type", "ngram")
.put("index.analysis.filter.my_ngram.preserve_original", true)
.build(),
new CommonAnalysisPlugin());
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_ngram");
String source = "foo";
String[] expected = new String[]{"f", "fo", "o", "oo", "o", "foo"};
String[] expected = new String[] { "f", "fo", "o", "oo", "o", "foo" };
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);

View File

@ -61,30 +61,43 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
final Settings indexSettings = newAnalysisSettingsBuilder().build();
final IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) {
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", tokenChars).build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.put("token_chars", tokenChars)
.build();
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
// no exception
}
{
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", "DIRECTIONALITY_UNDEFINED").build();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> new NGramTokenizerFactory(indexProperties, null, name, settings).create());
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.put("token_chars", "DIRECTIONALITY_UNDEFINED")
.build();
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> new NGramTokenizerFactory(indexProperties, null, name, settings).create()
);
assertEquals("Unknown token type: 'directionality_undefined'", ex.getMessage().substring(0, 46));
assertTrue(ex.getMessage().contains("custom"));
}
{
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "custom")
.put("custom_token_chars", "_-").build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.put("token_chars", "custom")
.put("custom_token_chars", "_-")
.build();
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
// no exception
}
{
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "custom")
.build();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> new NGramTokenizerFactory(indexProperties, null, name, settings).create());
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.put("token_chars", "custom")
.build();
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> new NGramTokenizerFactory(indexProperties, null, name, settings).create()
);
assertEquals("Token type: 'custom' requires setting `custom_token_chars`", ex.getMessage());
}
}
@ -94,12 +107,14 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 2).build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4)
.putList("token_chars", new String[0]).build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 4)
.putList("token_chars", new String[0])
.build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create();
tokenizer.setReader(new StringReader("1.34"));
assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
assertTokenStreamContents(tokenizer, new String[] { "1.", "1.3", "1.34", ".3", ".34", "34" });
}
public void testCustomTokenChars() throws IOException {
@ -107,12 +122,15 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 2).build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.putList("token_chars", "letter", "custom").put("custom_token_chars","_-").build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.putList("token_chars", "letter", "custom")
.put("custom_token_chars", "_-")
.build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create();
tokenizer.setReader(new StringReader("Abc -gh _jk =lm"));
assertTokenStreamContents(tokenizer, new String[] {"Ab", "Abc", "bc", "-g", "-gh", "gh", "_j", "_jk", "jk", "lm"});
assertTokenStreamContents(tokenizer, new String[] { "Ab", "Abc", "bc", "-g", "-gh", "gh", "_j", "_jk", "jk", "lm" });
}
public void testPreTokenization() throws IOException {
@ -120,19 +138,21 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", "letter,digit").build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create();
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
assertTokenStreamContents(tokenizer,
new String[] {"Åb", "Åbc", "bc", "", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"});
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
assertTokenStreamContents(
tokenizer,
new String[] { "Åb", "Åbc", "bc", "", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f" }
);
settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.put("token_chars", "letter,digit,punctuation,whitespace,symbol")
.build();
tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer,
new String[] {" a", " a!", "a!", "a!$", "!$", "!$ ", "$ ", "$ 9", " 9"});
assertTokenStreamContents(tokenizer, new String[] { " a", " a!", "a!", "a!$", "!$", "!$ ", "$ ", "$ 9", " 9" });
}
public void testPreTokenizationEdge() throws IOException {
@ -141,18 +161,22 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
Tokenizer tokenizer =
new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
Tokenizer tokenizer = new EdgeNGramTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
name,
settings
).create();
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
assertTokenStreamContents(tokenizer,
new String[] {"Åb", "Åbc", "", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"});
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
assertTokenStreamContents(tokenizer, new String[] { "Åb", "Åbc", "", "déf", "g\uD801\uDC00", "g\uD801\uDC00f" });
settings = newAnalysisSettingsBuilder().put("min_gram", 2)
.put("max_gram", 3)
.put("token_chars", "letter,digit,punctuation,whitespace,symbol")
.build();
tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer,
new String[] {" a", " a!"});
assertTokenStreamContents(tokenizer, new String[] { " a", " a!" });
}
public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
@ -170,9 +194,12 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetadata.SETTING_VERSION_CREATED, v.id).build();
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream edgeNGramTokenFilter =
new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create(tokenizer);
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
name,
settings
).create(tokenizer);
if (reverse) {
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
} else {
@ -185,7 +212,7 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
* test that throws an error when trying to get a NGramTokenizer where difference between max_gram and min_gram
* is greater than the allowed value of max_ngram_diff
*/
public void testMaxNGramDiffException() throws Exception{
public void testMaxNGramDiffException() throws Exception {
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
@ -197,12 +224,19 @@ public class NGramTokenizerFactoryTests extends OpenSearchTokenStreamTestCase {
int max_gram = min_gram + ngramDiff;
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", min_gram).put("max_gram", max_gram).build();
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () ->
new NGramTokenizerFactory(indexProperties, null, name, settings).create());
IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
() -> new NGramTokenizerFactory(indexProperties, null, name, settings).create()
);
assertEquals(
"The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
+ maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.",
ex.getMessage());
+ maxAllowedNgramDiff
+ "] but was ["
+ ngramDiff
+ "]. This limit can be set by changing the ["
+ IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey()
+ "] index level setting.",
ex.getMessage()
);
}
}

View File

@ -52,23 +52,20 @@ public class OpenSearchSolrSynonymParserTests extends OpenSearchTokenStreamTestC
public void testLenientParser() throws IOException, ParseException {
OpenSearchSolrSynonymParser parser = new OpenSearchSolrSynonymParser(true, false, true, new StandardAnalyzer());
String rules =
"&,and\n" +
"come,advance,approach\n";
String rules = "&,and\n" + "come,advance,approach\n";
StringReader rulesReader = new StringReader(rules);
parser.parse(rulesReader);
SynonymMap synonymMap = parser.build();
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader("approach quietly then advance & destroy"));
TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false);
assertTokenStreamContents(ts, new String[]{"come", "quietly", "then", "come", "destroy"});
assertTokenStreamContents(ts, new String[] { "come", "quietly", "then", "come", "destroy" });
}
public void testLenientParserWithSomeIncorrectLines() throws IOException, ParseException {
CharArraySet stopSet = new CharArraySet(1, true);
stopSet.add("bar");
OpenSearchSolrSynonymParser parser =
new OpenSearchSolrSynonymParser(true, false, true, new StandardAnalyzer(stopSet));
OpenSearchSolrSynonymParser parser = new OpenSearchSolrSynonymParser(true, false, true, new StandardAnalyzer(stopSet));
String rules = "foo,bar,baz";
StringReader rulesReader = new StringReader(rules);
parser.parse(rulesReader);
@ -76,14 +73,12 @@ public class OpenSearchSolrSynonymParserTests extends OpenSearchTokenStreamTestC
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader("first word is foo, then bar and lastly baz"));
TokenStream ts = new SynonymFilter(new StopFilter(tokenizer, stopSet), synonymMap, false);
assertTokenStreamContents(ts, new String[]{"first", "word", "is", "foo", "then", "and", "lastly", "foo"});
assertTokenStreamContents(ts, new String[] { "first", "word", "is", "foo", "then", "and", "lastly", "foo" });
}
public void testNonLenientParser() {
OpenSearchSolrSynonymParser parser = new OpenSearchSolrSynonymParser(true, false, false, new StandardAnalyzer());
String rules =
"&,and=>and\n" +
"come,advance,approach\n";
String rules = "&,and=>and\n" + "come,advance,approach\n";
StringReader rulesReader = new StringReader(rules);
ParseException ex = expectThrows(ParseException.class, () -> parser.parse(rulesReader));
assertThat(ex.getMessage(), containsString("Invalid synonym rule at line 1"));

View File

@ -52,47 +52,41 @@ public class OpenSearchWordnetSynonymParserTests extends OpenSearchTokenStreamTe
public void testLenientParser() throws IOException, ParseException {
OpenSearchWordnetSynonymParser parser = new OpenSearchWordnetSynonymParser(true, false, true, new StandardAnalyzer());
String rules =
"s(100000001,1,'&',a,1,0).\n" +
"s(100000001,2,'and',a,1,0).\n" +
"s(100000002,1,'come',v,1,0).\n" +
"s(100000002,2,'advance',v,1,0).\n" +
"s(100000002,3,'approach',v,1,0).";
String rules = "s(100000001,1,'&',a,1,0).\n"
+ "s(100000001,2,'and',a,1,0).\n"
+ "s(100000002,1,'come',v,1,0).\n"
+ "s(100000002,2,'advance',v,1,0).\n"
+ "s(100000002,3,'approach',v,1,0).";
StringReader rulesReader = new StringReader(rules);
parser.parse(rulesReader);
SynonymMap synonymMap = parser.build();
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader("approach quietly then advance & destroy"));
TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false);
assertTokenStreamContents(ts, new String[]{"come", "quietly", "then", "come", "destroy"});
assertTokenStreamContents(ts, new String[] { "come", "quietly", "then", "come", "destroy" });
}
public void testLenientParserWithSomeIncorrectLines() throws IOException, ParseException {
CharArraySet stopSet = new CharArraySet(1, true);
stopSet.add("bar");
OpenSearchWordnetSynonymParser parser =
new OpenSearchWordnetSynonymParser(true, false, true, new StandardAnalyzer(stopSet));
String rules =
"s(100000001,1,'foo',v,1,0).\n" +
"s(100000001,2,'bar',v,1,0).\n" +
"s(100000001,3,'baz',v,1,0).";
OpenSearchWordnetSynonymParser parser = new OpenSearchWordnetSynonymParser(true, false, true, new StandardAnalyzer(stopSet));
String rules = "s(100000001,1,'foo',v,1,0).\n" + "s(100000001,2,'bar',v,1,0).\n" + "s(100000001,3,'baz',v,1,0).";
StringReader rulesReader = new StringReader(rules);
parser.parse(rulesReader);
SynonymMap synonymMap = parser.build();
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader("first word is foo, then bar and lastly baz"));
TokenStream ts = new SynonymFilter(new StopFilter(tokenizer, stopSet), synonymMap, false);
assertTokenStreamContents(ts, new String[]{"first", "word", "is", "foo", "then", "and", "lastly", "foo"});
assertTokenStreamContents(ts, new String[] { "first", "word", "is", "foo", "then", "and", "lastly", "foo" });
}
public void testNonLenientParser() {
OpenSearchWordnetSynonymParser parser = new OpenSearchWordnetSynonymParser(true, false, false, new StandardAnalyzer());
String rules =
"s(100000001,1,'&',a,1,0).\n" +
"s(100000001,2,'and',a,1,0).\n" +
"s(100000002,1,'come',v,1,0).\n" +
"s(100000002,2,'advance',v,1,0).\n" +
"s(100000002,3,'approach',v,1,0).";
String rules = "s(100000001,1,'&',a,1,0).\n"
+ "s(100000001,2,'and',a,1,0).\n"
+ "s(100000002,1,'come',v,1,0).\n"
+ "s(100000002,2,'advance',v,1,0).\n"
+ "s(100000002,3,'approach',v,1,0).";
StringReader rulesReader = new StringReader(rules);
ParseException ex = expectThrows(ParseException.class, () -> parser.parse(rulesReader));
assertThat(ex.getMessage(), containsString("Invalid synonym rule at line 1"));

View File

@ -48,73 +48,105 @@ public class PathHierarchyTokenizerFactoryTests extends OpenSearchTokenStreamTes
public void testDefaults() throws IOException {
final Index index = new Index("test", "_na_");
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null,
"path-hierarchy-tokenizer", Settings.EMPTY).create();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
"path-hierarchy-tokenizer",
Settings.EMPTY
).create();
tokenizer.setReader(new StringReader("/one/two/three"));
assertTokenStreamContents(tokenizer, new String[] {"/one", "/one/two", "/one/two/three"});
assertTokenStreamContents(tokenizer, new String[] { "/one", "/one/two", "/one/two/three" });
}
public void testReverse() throws IOException {
final Index index = new Index("test", "_na_");
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("reverse", true).build();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null,
"path-hierarchy-tokenizer", settings).create();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
"path-hierarchy-tokenizer",
settings
).create();
tokenizer.setReader(new StringReader("/one/two/three"));
assertTokenStreamContents(tokenizer, new String[] {"/one/two/three", "one/two/three", "two/three", "three"});
assertTokenStreamContents(tokenizer, new String[] { "/one/two/three", "one/two/three", "two/three", "three" });
}
public void testDelimiter() throws IOException {
final Index index = new Index("test", "_na_");
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("delimiter", "-").build();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null,
"path-hierarchy-tokenizer", settings).create();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
"path-hierarchy-tokenizer",
settings
).create();
tokenizer.setReader(new StringReader("/one/two/three"));
assertTokenStreamContents(tokenizer, new String[] {"/one/two/three"});
assertTokenStreamContents(tokenizer, new String[] { "/one/two/three" });
tokenizer.setReader(new StringReader("one-two-three"));
assertTokenStreamContents(tokenizer, new String[] {"one", "one-two", "one-two-three"});
assertTokenStreamContents(tokenizer, new String[] { "one", "one-two", "one-two-three" });
}
public void testReplace() throws IOException {
final Index index = new Index("test", "_na_");
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("replacement", "-").build();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null,
"path-hierarchy-tokenizer", settings).create();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
"path-hierarchy-tokenizer",
settings
).create();
tokenizer.setReader(new StringReader("/one/two/three"));
assertTokenStreamContents(tokenizer, new String[] {"-one", "-one-two", "-one-two-three"});
assertTokenStreamContents(tokenizer, new String[] { "-one", "-one-two", "-one-two-three" });
tokenizer.setReader(new StringReader("one-two-three"));
assertTokenStreamContents(tokenizer, new String[] {"one-two-three"});
assertTokenStreamContents(tokenizer, new String[] { "one-two-three" });
}
public void testSkip() throws IOException {
final Index index = new Index("test", "_na_");
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("skip", 2).build();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null,
"path-hierarchy-tokenizer", settings).create();
Tokenizer tokenizer = new PathHierarchyTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
"path-hierarchy-tokenizer",
settings
).create();
tokenizer.setReader(new StringReader("/one/two/three/four/five"));
assertTokenStreamContents(tokenizer, new String[] {"/three", "/three/four", "/three/four/five"});
assertTokenStreamContents(tokenizer, new String[] { "/three", "/three/four", "/three/four/five" });
}
public void testDelimiterExceptions() {
final Index index = new Index("test", "_na_");
final Settings indexSettings = newAnalysisSettingsBuilder().build();
{
String delimiter = RandomPicks.randomFrom(random(), new String[] {"--", ""});
String delimiter = RandomPicks.randomFrom(random(), new String[] { "--", "" });
Settings settings = newAnalysisSettingsBuilder().put("delimiter", delimiter).build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null,
"path-hierarchy-tokenizer", settings).create());
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> new PathHierarchyTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
"path-hierarchy-tokenizer",
settings
).create()
);
assertEquals("delimiter must be a one char value", e.getMessage());
}
{
String replacement = RandomPicks.randomFrom(random(), new String[] {"--", ""});
String replacement = RandomPicks.randomFrom(random(), new String[] { "--", "" });
Settings settings = newAnalysisSettingsBuilder().put("replacement", replacement).build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null,
"path-hierarchy-tokenizer", settings).create());
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> new PathHierarchyTokenizerFactory(
IndexSettingsModule.newIndexSettings(index, indexSettings),
null,
"path-hierarchy-tokenizer",
settings
).create()
);
assertEquals("replacement must be a one char value", e.getMessage());
}
}

View File

@ -46,90 +46,96 @@ import java.util.regex.Pattern;
*/
public class PatternAnalyzerTests extends OpenSearchTokenStreamTestCase {
/**
* Test PatternAnalyzer when it is configured with a non-word pattern.
*/
public void testNonWordPattern() throws IOException {
// Split on non-letter pattern, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\W+"), false, null);
assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "The", "quick", "brown", "Fox", "the", "abcd1234", "56", "78", "dc" });
/**
* Test PatternAnalyzer when it is configured with a non-word pattern.
*/
public void testNonWordPattern() throws IOException {
// Split on non-letter pattern, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\W+"), false, null);
assertAnalyzesTo(
a,
"The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "The", "quick", "brown", "Fox", "the", "abcd1234", "56", "78", "dc" }
);
// split on non-letter pattern, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\W+"), true,
EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "quick", "brown", "fox", "abcd1234", "56", "78", "dc" });
}
// split on non-letter pattern, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\W+"), true, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(
b,
"The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "quick", "brown", "fox", "abcd1234", "56", "78", "dc" }
);
}
/**
* Test PatternAnalyzer when it is configured with a whitespace pattern.
* Behavior can be similar to WhitespaceAnalyzer (depending upon options)
*/
public void testWhitespacePattern() throws IOException {
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
/**
* Test PatternAnalyzer when it is configured with a whitespace pattern.
* Behavior can be similar to WhitespaceAnalyzer (depending upon options)
*/
public void testWhitespacePattern() throws IOException {
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
assertAnalyzesTo(
a,
"The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." }
);
// Split on whitespace patterns, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\s+"), true,
EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
}
// Split on whitespace patterns, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\s+"), true, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(
b,
"The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." }
);
}
/**
* Test PatternAnalyzer when it is configured with a custom pattern. In this
* case, text is tokenized on the comma ","
*/
public void testCustomPattern() throws IOException {
// Split on comma, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile(","), false, null);
assertAnalyzesTo(a, "Here,Are,some,Comma,separated,words,",
new String[] { "Here", "Are", "some", "Comma", "separated", "words" });
/**
* Test PatternAnalyzer when it is configured with a custom pattern. In this
* case, text is tokenized on the comma ","
*/
public void testCustomPattern() throws IOException {
// Split on comma, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile(","), false, null);
assertAnalyzesTo(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here", "Are", "some", "Comma", "separated", "words" });
// split on comma, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile(","), true,
EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(b, "Here,Are,some,Comma,separated,words,",
new String[] { "here", "some", "comma", "separated", "words" });
}
// split on comma, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile(","), true, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(b, "Here,Are,some,Comma,separated,words,", new String[] { "here", "some", "comma", "separated", "words" });
}
/**
* Test PatternAnalyzer against a large document.
*/
public void testHugeDocument() throws IOException {
StringBuilder document = new StringBuilder();
// 5000 a's
char largeWord[] = new char[5000];
Arrays.fill(largeWord, 'a');
document.append(largeWord);
/**
* Test PatternAnalyzer against a large document.
*/
public void testHugeDocument() throws IOException {
StringBuilder document = new StringBuilder();
// 5000 a's
char largeWord[] = new char[5000];
Arrays.fill(largeWord, 'a');
document.append(largeWord);
// a space
document.append(' ');
// a space
document.append(' ');
// 2000 b's
char largeWord2[] = new char[2000];
Arrays.fill(largeWord2, 'b');
document.append(largeWord2);
// 2000 b's
char largeWord2[] = new char[2000];
Arrays.fill(largeWord2, 'b');
document.append(largeWord2);
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
assertAnalyzesTo(a, document.toString(),
new String[] { new String(largeWord), new String(largeWord2) });
}
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
assertAnalyzesTo(a, document.toString(), new String[] { new String(largeWord), new String(largeWord2) });
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
Analyzer a = new PatternAnalyzer(Pattern.compile(","), true, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER);
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
Analyzer a = new PatternAnalyzer(Pattern.compile(","), true, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
checkRandomData(random(), a, 10000 * RANDOM_MULTIPLIER);
}
public void testNormalize() {
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
assertEquals(new BytesRef("FooBar"), a.normalize("dummy", "FooBar"));
a = new PatternAnalyzer(Pattern.compile("\\s+"), true, null);
assertEquals(new BytesRef("foobar"), a.normalize("dummy", "FooBar"));
}
public void testNormalize() {
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
assertEquals(new BytesRef("FooBar"), a.normalize("dummy", "FooBar"));
a = new PatternAnalyzer(Pattern.compile("\\s+"), true, null);
assertEquals(new BytesRef("foobar"), a.normalize("dummy", "FooBar"));
}
}

View File

@ -49,31 +49,35 @@ public class PatternCaptureTokenFilterTests extends OpenSearchTokenStreamTestCas
public void testPatternCaptureTokenFilter() throws Exception {
String json = "/org/opensearch/analysis/common/pattern_capture.json";
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.build();
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
NamedAnalyzer analyzer1 = indexAnalyzers.get("single");
assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[]{"foobarbaz","foobar","foo"});
assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[] { "foobarbaz", "foobar", "foo" });
NamedAnalyzer analyzer2 = indexAnalyzers.get("multi");
assertTokenStreamContents(analyzer2.tokenStream("test", "abc123def"), new String[]{"abc123def","abc","123","def"});
assertTokenStreamContents(analyzer2.tokenStream("test", "abc123def"), new String[] { "abc123def", "abc", "123", "def" });
NamedAnalyzer analyzer3 = indexAnalyzers.get("preserve");
assertTokenStreamContents(analyzer3.tokenStream("test", "foobarbaz"), new String[]{"foobar","foo"});
assertTokenStreamContents(analyzer3.tokenStream("test", "foobarbaz"), new String[] { "foobar", "foo" });
}
public void testNoPatterns() {
try {
new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null,
"pattern_capture", Settings.builder().put("pattern", "foobar").build());
fail ("Expected IllegalArgumentException");
new PatternCaptureGroupTokenFilterFactory(
IndexSettingsModule.newIndexSettings("test", Settings.EMPTY),
null,
"pattern_capture",
Settings.builder().put("pattern", "foobar").build()
);
fail("Expected IllegalArgumentException");
} catch (IllegalArgumentException e) {
assertThat(e.getMessage(), containsString("required setting 'patterns' is missing"));
}

View File

@ -53,9 +53,7 @@ import java.util.Collections;
public class PredicateTokenScriptFilterTests extends OpenSearchTokenStreamTestCase {
public void testSimpleFilter() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.f.type", "predicate_token_filter")
@ -74,7 +72,7 @@ public class PredicateTokenScriptFilterTests extends OpenSearchTokenStreamTestCa
};
@SuppressWarnings("unchecked")
ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()){
ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()) {
@Override
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
assertEquals(context, AnalysisPredicateScript.CONTEXT);
@ -85,16 +83,13 @@ public class PredicateTokenScriptFilterTests extends OpenSearchTokenStreamTestCa
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null);
AnalysisModule module
= new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
AnalysisModule module = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "Oh what a wonderful thing to be", new String[]{
"Oh", "what", "to", "be"
});
assertAnalyzesTo(analyzer, "Oh what a wonderful thing to be", new String[] { "Oh", "what", "to", "be" });
}
}

View File

@ -64,11 +64,7 @@ public class RemoveDuplicatesFilterFactoryTests extends OpenSearchTokenStreamTes
new Token("d", 1, 4, 5)
);
assertTokenStreamContents(tokenFilter.create(cts), new String[]{
"a", "b", "c", "d"
}, new int[]{
1, 1, 0, 1
});
assertTokenStreamContents(tokenFilter.create(cts), new String[] { "a", "b", "c", "d" }, new int[] { 1, 1, 0, 1 });
}
}

View File

@ -52,9 +52,7 @@ import java.util.Collections;
public class ScriptedConditionTokenFilterTests extends OpenSearchTokenStreamTestCase {
public void testSimpleCondition() throws Exception {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.cond.type", "condition")
@ -74,7 +72,7 @@ public class ScriptedConditionTokenFilterTests extends OpenSearchTokenStreamTest
};
@SuppressWarnings("unchecked")
ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()){
ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()) {
@Override
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
assertEquals(context, AnalysisPredicateScript.CONTEXT);
@ -85,16 +83,13 @@ public class ScriptedConditionTokenFilterTests extends OpenSearchTokenStreamTest
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
plugin.createComponents(null, null, null, null, scriptService, null, null, null, null, null, null);
AnalysisModule module
= new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
AnalysisModule module = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
assertNotNull(analyzer);
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[]{
"Vorsprung", "Durch", "TECHNIK"
});
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[] { "Vorsprung", "Durch", "TECHNIK" });
}
}

View File

@ -52,7 +52,8 @@ public class ShingleTokenFilterTests extends OpenSearchTokenStreamTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put("index.analysis.filter.my_ascii_folding.type", "asciifolding")
.build(),
new CommonAnalysisPlugin());
new CommonAnalysisPlugin()
);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("shingle");
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("this is a test"));

View File

@ -38,35 +38,29 @@ import org.opensearch.test.OpenSearchTokenStreamTestCase;
public class SnowballAnalyzerTests extends OpenSearchTokenStreamTestCase {
public void testEnglish() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
assertAnalyzesTo(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"});
}
public void testEnglish() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
assertAnalyzesTo(a, "he abhorred accents", new String[] { "he", "abhor", "accent" });
}
public void testStopwords() throws Exception {
Analyzer a = new SnowballAnalyzer("English",
EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(a, "the quick brown fox jumped",
new String[]{"quick", "brown", "fox", "jump"});
}
public void testStopwords() throws Exception {
Analyzer a = new SnowballAnalyzer("English", EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(a, "the quick brown fox jumped", new String[] { "quick", "brown", "fox", "jump" });
}
/**
* Test turkish lowercasing
*/
public void testTurkish() throws Exception {
Analyzer a = new SnowballAnalyzer("Turkish");
/**
* Test turkish lowercasing
*/
public void testTurkish() throws Exception {
Analyzer a = new SnowballAnalyzer("Turkish");
assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
}
assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
assertAnalyzesTo(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"});
assertAnalyzesTo(a, "she abhorred him",
new String[]{"she", "abhor", "him"});
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
assertAnalyzesTo(a, "he abhorred accents", new String[] { "he", "abhor", "accent" });
assertAnalyzesTo(a, "she abhorred him", new String[] { "she", "abhor", "him" });
}
}

View File

@ -59,7 +59,8 @@ public class StemmerOverrideTokenFilterFactoryTests extends OpenSearchTokenStrea
.putList("index.analysis.filter.my_stemmer_override.rules", rules)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build(),
new CommonAnalysisPlugin());
new CommonAnalysisPlugin()
);
return analysis.tokenFilter.get("my_stemmer_override");
}
@ -75,19 +76,18 @@ public class StemmerOverrideTokenFilterFactoryTests extends OpenSearchTokenStrea
"=>a", // no keys
"a,=>b" // empty key
)) {
expectThrows(RuntimeException.class, String.format(
Locale.ROOT, "Should fail for invalid rule: '%s'", rule
), () -> create(rule));
expectThrows(
RuntimeException.class,
String.format(Locale.ROOT, "Should fail for invalid rule: '%s'", rule),
() -> create(rule)
);
}
}
public void testRulesOk() throws IOException {
TokenFilterFactory tokenFilterFactory = create(
"a => 1",
"b,c => 2"
);
TokenFilterFactory tokenFilterFactory = create("a => 1", "b,c => 2");
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("a b c"));
assertTokenStreamContents(tokenFilterFactory.create(tokenizer), new String[]{"1", "2", "2"});
assertTokenStreamContents(tokenFilterFactory.create(tokenizer), new String[] { "1", "2", "2" });
}
}

View File

@ -63,13 +63,13 @@ public class StemmerTokenFilterFactoryTests extends OpenSearchTokenStreamTestCas
for (int i = 0; i < iters; i++) {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_english.type", "stemmer")
.put("index.analysis.filter.my_english.language", "english")
.put("index.analysis.analyzer.my_english.tokenizer","whitespace")
.put("index.analysis.analyzer.my_english.filter","my_english")
.put(SETTING_VERSION_CREATED,v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.put("index.analysis.filter.my_english.type", "stemmer")
.put("index.analysis.filter.my_english.language", "english")
.put("index.analysis.analyzer.my_english.tokenizer", "whitespace")
.put("index.analysis.analyzer.my_english.filter", "my_english")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_english");
@ -80,7 +80,7 @@ public class StemmerTokenFilterFactoryTests extends OpenSearchTokenStreamTestCas
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
NamedAnalyzer analyzer = indexAnalyzers.get("my_english");
assertThat(create, instanceOf(PorterStemFilter.class));
assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"});
assertAnalyzesTo(analyzer, "consolingly", new String[] { "consolingli" });
}
}
@ -90,13 +90,13 @@ public class StemmerTokenFilterFactoryTests extends OpenSearchTokenStreamTestCas
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_porter2.type", "stemmer")
.put("index.analysis.filter.my_porter2.language", "porter2")
.put("index.analysis.analyzer.my_porter2.tokenizer","whitespace")
.put("index.analysis.analyzer.my_porter2.filter","my_porter2")
.put(SETTING_VERSION_CREATED,v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
.put("index.analysis.filter.my_porter2.type", "stemmer")
.put("index.analysis.filter.my_porter2.language", "porter2")
.put("index.analysis.analyzer.my_porter2.tokenizer", "whitespace")
.put("index.analysis.analyzer.my_porter2.filter", "my_porter2")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_porter2");
@ -107,18 +107,23 @@ public class StemmerTokenFilterFactoryTests extends OpenSearchTokenStreamTestCas
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
NamedAnalyzer analyzer = indexAnalyzers.get("my_porter2");
assertThat(create, instanceOf(SnowballFilter.class));
assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"});
assertAnalyzesTo(analyzer, "possibly", new String[] { "possibl" });
}
}
public void testMultipleLanguagesThrowsException() throws IOException {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer")
.putList("index.analysis.filter.my_english.language", "english", "light_english").put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings settings = Settings.builder()
.put("index.analysis.filter.my_english.type", "stemmer")
.putList("index.analysis.filter.my_english.language", "english", "light_english")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN));
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN)
);
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}
}

View File

@ -80,10 +80,11 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
Files.copy(synonymsWordnet, config.resolve("synonyms_wordnet.txt"));
String json = "/org/opensearch/analysis/common/synonyms.json";
Settings settings = Settings.builder().
loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(Environment.PATH_HOME_SETTING.getKey(), home)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
Settings settings = Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(Environment.PATH_HOME_SETTING.getKey(), home)
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
@ -110,7 +111,7 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
.put("index.analysis.filter.stop_within_synonym.type", "stop")
.putList("index.analysis.filter.stop_within_synonym.stopwords", "foobar", "opensearch")
.put("index.analysis.analyzer.synonymAnalyzerWithStopSynonymBeforeSynonym.tokenizer", "whitespace")
.putList("index.analysis.analyzer.synonymAnalyzerWithStopSynonymBeforeSynonym.filter", "stop_within_synonym","synonym")
.putList("index.analysis.analyzer.synonymAnalyzerWithStopSynonymBeforeSynonym.filter", "stop_within_synonym", "synonym")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try {
@ -131,7 +132,7 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
.put("index.analysis.filter.stop_within_synonym.type", "stop")
.putList("index.analysis.filter.stop_within_synonym.stopwords", "foobar", "opensearch")
.put("index.analysis.analyzer.synonymAnalyzerExpandWithStopBeforeSynonym.tokenizer", "whitespace")
.putList("index.analysis.analyzer.synonymAnalyzerExpandWithStopBeforeSynonym.filter", "stop_within_synonym","synonym_expand")
.putList("index.analysis.analyzer.synonymAnalyzerExpandWithStopBeforeSynonym.filter", "stop_within_synonym", "synonym_expand")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try {
@ -159,9 +160,12 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("synonymAnalyzer"), "Some developers are odd",
new String[]{ "some", "developers", "develop", "programm", "are", "odd" },
new int[]{ 1, 1, 0, 0, 1, 1 });
BaseTokenStreamTestCase.assertAnalyzesTo(
indexAnalyzers.get("synonymAnalyzer"),
"Some developers are odd",
new String[] { "some", "developers", "develop", "programm", "are", "odd" },
new int[] { 1, 1, 0, 0, 1, 1 }
);
}
public void testAsciiFoldingFilterForSynonyms() throws IOException {
@ -176,9 +180,12 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("synonymAnalyzer"), "høj",
new String[]{ "hoj", "height" },
new int[]{ 1, 0 });
BaseTokenStreamTestCase.assertAnalyzesTo(
indexAnalyzers.get("synonymAnalyzer"),
"høj",
new String[] { "hoj", "height" },
new int[] { 1, 0 }
);
}
public void testPreconfigured() throws IOException {
@ -193,9 +200,12 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("my_analyzer"), "würst",
new String[]{ "wurst", "sausage"},
new int[]{ 1, 0 });
BaseTokenStreamTestCase.assertAnalyzesTo(
indexAnalyzers.get("my_analyzer"),
"würst",
new String[] { "wurst", "sausage" },
new int[] { 1, 0 }
);
}
public void testChainedSynonymFilters() throws IOException {
@ -212,15 +222,21 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
BaseTokenStreamTestCase.assertAnalyzesTo(indexAnalyzers.get("syn"), "term1",
new String[]{ "term1", "term3", "term2" }, new int[]{ 1, 0, 0 });
BaseTokenStreamTestCase.assertAnalyzesTo(
indexAnalyzers.get("syn"),
"term1",
new String[] { "term1", "term3", "term2" },
new int[] { 1, 0, 0 }
);
}
public void testShingleFilters() {
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT))
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("path.home", createTempDir().toString())
.put("index.analysis.filter.synonyms.type", "synonym")
.putList("index.analysis.filter.synonyms.synonyms", "programmer, developer")
@ -230,9 +246,10 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
expectThrows(IllegalArgumentException.class, () -> {
indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
});
expectThrows(
IllegalArgumentException.class,
() -> { indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers; }
);
}
@ -246,9 +263,7 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
String[] bypassingFactories = new String[]{
"dictionary_decompounder"
};
String[] bypassingFactories = new String[] { "dictionary_decompounder" };
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
for (String factory : bypassingFactories) {
@ -265,14 +280,25 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
}
public void testPreconfiguredTokenFilters() throws IOException {
Set<String> disallowedFilters = new HashSet<>(Arrays.asList(
"common_grams", "edge_ngram", "edgeNGram", "keyword_repeat", "ngram", "nGram",
"shingle", "word_delimiter", "word_delimiter_graph"
));
Set<String> disallowedFilters = new HashSet<>(
Arrays.asList(
"common_grams",
"edge_ngram",
"edgeNGram",
"keyword_repeat",
"ngram",
"nGram",
"shingle",
"word_delimiter",
"word_delimiter_graph"
)
);
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT))
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("path.home", createTempDir().toString())
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@ -281,23 +307,26 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
for (PreConfiguredTokenFilter tf : plugin.getPreConfiguredTokenFilters()) {
if (disallowedFilters.contains(tf.getName())) {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
"Expected exception for factory " + tf.getName(), () -> {
tf.get(idxSettings, null, tf.getName(), settings).getSynonymFilter();
});
assertEquals(tf.getName(), "Token filter [" + tf.getName()
+ "] cannot be used to parse synonyms",
e.getMessage());
}
else {
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
"Expected exception for factory " + tf.getName(),
() -> { tf.get(idxSettings, null, tf.getName(), settings).getSynonymFilter(); }
);
assertEquals(tf.getName(), "Token filter [" + tf.getName() + "] cannot be used to parse synonyms", e.getMessage());
} else {
tf.get(idxSettings, null, tf.getName(), settings).getSynonymFilter();
}
}
Settings settings2 = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED,
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(
random(), LegacyESVersion.V_6_0_0, VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)))
random(),
LegacyESVersion.V_6_0_0,
VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)
)
)
.put("path.home", createTempDir().toString())
.putList("common_words", "a", "b")
.put("output_unigrams", "true")
@ -309,8 +338,7 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
if (disallowedFilters.contains(tf.getName())) {
tf.get(idxSettings2, null, tf.getName(), settings2).getSynonymFilter();
expectedWarnings.add("Token filter [" + tf.getName() + "] will not be usable to parse synonyms after v7.0");
}
else {
} else {
tf.get(idxSettings2, null, tf.getName(), settings2).getSynonymFilter();
}
}
@ -320,8 +348,10 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
public void testDisallowedTokenFilters() throws IOException {
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT))
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
)
.put("path.home", createTempDir().toString())
.putList("common_words", "a", "b")
.put("output_unigrams", "true")
@ -329,29 +359,39 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
String[] disallowedFactories = new String[]{
"multiplexer", "cjk_bigram", "common_grams", "ngram", "edge_ngram",
"word_delimiter", "word_delimiter_graph", "fingerprint"
};
String[] disallowedFactories = new String[] {
"multiplexer",
"cjk_bigram",
"common_grams",
"ngram",
"edge_ngram",
"word_delimiter",
"word_delimiter_graph",
"fingerprint" };
for (String factory : disallowedFactories) {
TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, null, factory, settings);
TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
"Expected IllegalArgumentException for factory " + factory,
() -> stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null));
() -> stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null)
);
assertEquals(factory, "Token filter [" + factory
+ "] cannot be used to parse synonyms",
e.getMessage());
assertEquals(factory, "Token filter [" + factory + "] cannot be used to parse synonyms", e.getMessage());
}
settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED,
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(
random(), LegacyESVersion.V_6_0_0, VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)))
random(),
LegacyESVersion.V_6_0_0,
VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)
)
)
.put("path.home", createTempDir().toString())
.putList("common_words", "a", "b")
.put("output_unigrams", "true")
@ -365,16 +405,20 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null);
expectedWarnings.add("Token filter [" + factory
+ "] will not be usable to parse synonyms after v7.0");
expectedWarnings.add("Token filter [" + factory + "] will not be usable to parse synonyms after v7.0");
}
assertWarnings(expectedWarnings.toArray(new String[0]));
settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED,
.put(
IndexMetadata.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(
random(), LegacyESVersion.V_6_0_0, VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)))
random(),
LegacyESVersion.V_6_0_0,
VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)
)
)
.put("path.home", createTempDir().toString())
.put("preserve_original", "false")
.build();
@ -383,11 +427,12 @@ public class SynonymsAnalysisTests extends OpenSearchTestCase {
TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null));
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null)
);
assertEquals("Token filter [multiplexer] cannot be used to parse synonyms unless [preserve_original] is [true]",
e.getMessage());
assertEquals("Token filter [multiplexer] cannot be used to parse synonyms unless [preserve_original] is [true]", e.getMessage());
}

Some files were not shown because too many files have changed in this diff Show More