mirror of
synced 2025-03-24 17:09:48 +00:00
Enabling spotless, disabling checkstyle check on plugins (#1488)
* Enabling spotless, disabling checkstyle on below modules :plugins:mapper-annotated-text :plugins:mapper-murmur3 :plugins:mapper-size :plugins:repository-azure :plugins:repository-gcs :plugins:repository-hdfs :plugins:repository-s3 :plugins:store-smb :plugins:transport-nio :qa:die-with-dignity Signed-off-by: Himanshu Setia <setiah@amazon.com> * Enabling spotless for more plugins Signed-off-by: Himanshu Setia <setiah@amazon.com> * Fixing error in merge conflict Signed-off-by: Himanshu Setia <setiah@amazon.com>
This commit is contained in:
@ -27,11 +27,14 @@
<suppress files="libs" checks="." />
<!-- Excludes checkstyle run on modules module -->
<suppress files="modules" checks="." />
<!-- Excludes checkstyle run on plugins module -->
<suppress files="plugins" checks="." />
<!-- Excludes checkstyle run on below qa module -->
<suppress files="qa[/\\]die-with-dignity" checks="." />
<!-- Excludes checkstyle run on test module -->
<suppress files="test" checks="." />
<!-- Excludes checkstyle run on rest-api-spec module -->
<suppress files="rest-api-spec" checks="." />
Truly temporary suppressions suppression of snippets included in
documentation that are so wide that they scroll.
@ -56,29 +56,7 @@ import org.opensearch.gradle.BuildPlugin
// Do not add new sub-projects here!
def projectPathsToExclude = [
def projectPathsToExclude = []
subprojects {
plugins.withType(BuildPlugin).whenPluginAdded {
@ -73,10 +73,10 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
String[] equivalent = {"I WİLL USE TURKİSH CASING", "ı will use turkish casıng"};
String[] equivalent = { "I WİLL USE TURKİSH CASING", "ı will use turkish casıng" };
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -85,27 +85,26 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("language", "tr")
.field("strength", "primary")
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
// both values should collate to same value
client().prepareIndex(index, type, "1")
.setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2")
.setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
client().prepareIndex(index, type, "1").setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
// searching for either of the terms should return both results since they collate to the same value
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
new SearchSourceBuilder().fetchSource(false)
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
SearchResponse response = client().search(request).actionGet();
@ -118,10 +117,10 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
String[] equivalent = {"a", "C", "a", "B"};
String[] equivalent = { "a", "C", "a", "B" };
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -129,28 +128,28 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("type", "icu_collation_keyword")
.field("language", "en")
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
// everything should be indexed fine, no exceptions
client().prepareIndex(index, type, "1")
.setSource("{\"id\":\"1\", \"collate\":[\"" + equivalent[0] + "\", \"" + equivalent[1] + "\"]}", XContentType.JSON),
client().prepareIndex(index, type, "2")
.setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[2] + "\"}", XContentType.JSON)
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[2] + "\"}", XContentType.JSON)
// using sort mode = max, values B and C will be used for the sort
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.query(QueryBuilders.termQuery("collate", "a"))
// if mode max we use c and b as sort values, if max we use "a" for both
.sort("id", SortOrder.DESC) // will be ignored
new SearchSourceBuilder().fetchSource(false)
.query(QueryBuilders.termQuery("collate", "a"))
// if mode max we use c and b as sort values, if max we use "a" for both
.sort("id", SortOrder.DESC) // will be ignored
SearchResponse response = client().search(request).actionGet();
@ -159,15 +158,14 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
assertOrderedSearchHits(response, "1", "2");
// same thing, using different sort mode that will use a for both docs
request = new SearchRequest()
request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.query(QueryBuilders.termQuery("collate", "a"))
// if mode max we use c and b as sort values, if max we use "a" for both
.sort("id", SortOrder.DESC) // will NOT be ignored and will determine order
new SearchSourceBuilder().fetchSource(false)
.query(QueryBuilders.termQuery("collate", "a"))
// if mode max we use c and b as sort values, if max we use "a" for both
.sort("id", SortOrder.DESC) // will NOT be ignored and will determine order
response = client().search(request).actionGet();
@ -183,10 +181,10 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
String[] equivalent = {"I W\u0049\u0307LL USE TURKİSH CASING", "ı will use turkish casıng"};
String[] equivalent = { "I W\u0049\u0307LL USE TURKİSH CASING", "ı will use turkish casıng" };
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -196,26 +194,25 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("strength", "primary")
.field("decomposition", "canonical")
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1")
.setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2")
.setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
client().prepareIndex(index, type, "1").setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
// searching for either of the terms should return both results since they collate to the same value
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
new SearchSourceBuilder().fetchSource(false)
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
SearchResponse response = client().search(request).actionGet();
@ -231,10 +228,10 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
String[] equivalent = {"TESTING", "testing"};
String[] equivalent = { "TESTING", "testing" };
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -244,25 +241,24 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("strength", "secondary")
.field("decomposition", "no")
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1")
.setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2")
.setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
client().prepareIndex(index, type, "1").setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
new SearchSourceBuilder().fetchSource(false)
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
SearchResponse response = client().search(request).actionGet();
@ -279,10 +275,10 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
String[] equivalent = {"foo-bar", "foo bar"};
String[] equivalent = { "foo-bar", "foo bar" };
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -292,23 +288,24 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("strength", "primary")
.field("alternate", "shifted")
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1").setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
new SearchSourceBuilder().fetchSource(false)
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
SearchResponse response = client().search(request).actionGet();
@ -325,8 +322,8 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -338,23 +335,26 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("variable_top", " ")
.field("index", false)
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1").setSource("{\"id\":\"1\",\"collate\":\"foo bar\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"foobar\"}", XContentType.JSON),
client().prepareIndex(index, type, "3").setSource("{\"id\":\"3\",\"collate\":\"foo-bar\"}", XContentType.JSON)
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.sort("collate", SortOrder.ASC)
.sort("id", SortOrder.ASC) // secondary sort should kick in on docs 1 and 3 because same value collate value
new SearchSourceBuilder().fetchSource(false).sort("collate", SortOrder.ASC).sort("id", SortOrder.ASC) // secondary sort
// should kick in on
// docs 1 and 3
// because same value
// collate value
SearchResponse response = client().search(request).actionGet();
@ -371,30 +371,28 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "icu_collation_keyword")
.field("language", "en")
.field("numeric", true)
.field("index", false)
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"foobar-10\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"foobar-9\"}", XContentType.JSON)
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.sort("collate", SortOrder.ASC)
.source(new SearchSourceBuilder().fetchSource(false).sort("collate", SortOrder.ASC));
SearchResponse response = client().search(request).actionGet();
@ -410,8 +408,8 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -422,25 +420,22 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("case_level", true)
.field("index", false)
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1").setSource("{\"id\":\"1\",\"collate\":\"résumé\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"Resume\"}", XContentType.JSON),
client().prepareIndex(index, type, "3").setSource("{\"id\":\"3\",\"collate\":\"resume\"}", XContentType.JSON),
client().prepareIndex(index, type, "4").setSource("{\"id\":\"4\",\"collate\":\"Résumé\"}", XContentType.JSON)
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.sort("collate", SortOrder.ASC)
.sort("id", SortOrder.DESC)
.source(new SearchSourceBuilder().fetchSource(false).sort("collate", SortOrder.ASC).sort("id", SortOrder.DESC));
SearchResponse response = client().search(request).actionGet();
@ -456,8 +451,8 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String index = "foo";
String type = "mytype";
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "icu_collation_keyword")
.field("language", "en")
@ -465,22 +460,20 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("case_first", "upper")
.field("index", false)
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"resume\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"Resume\"}", XContentType.JSON)
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.sort("collate", SortOrder.ASC)
.source(new SearchSourceBuilder().fetchSource(false).sort("collate", SortOrder.ASC));
SearchResponse response = client().search(request).actionGet();
@ -500,18 +493,15 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
String type = "mytype";
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
String DIN5007_2_tailorings =
"& ae , a\u0308 & AE , A\u0308" +
"& oe , o\u0308 & OE , O\u0308" +
"& ue , u\u0308 & UE , u\u0308";
String DIN5007_2_tailorings = "& ae , a\u0308 & AE , A\u0308" + "& oe , o\u0308 & OE , O\u0308" + "& ue , u\u0308 & UE , u\u0308";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
String[] equivalent = {"Töne", "Toene"};
String[] equivalent = { "Töne", "Toene" };
XContentBuilder builder = jsonBuilder()
XContentBuilder builder = jsonBuilder().startObject()
.field("type", "keyword")
@ -520,23 +510,24 @@ public class ICUCollationKeywordFieldMapperIT extends OpenSearchIntegTestCase {
.field("rules", tailoredRules)
.field("strength", "primary")
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
client().prepareIndex(index, type, "1").setSource("{\"id\":\"1\",\"collate\":\"" + equivalent[0] + "\"}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"id\":\"2\",\"collate\":\"" + equivalent[1] + "\"}", XContentType.JSON)
SearchRequest request = new SearchRequest()
SearchRequest request = new SearchRequest().indices(index)
.source(new SearchSourceBuilder()
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("collate", SortOrder.ASC)
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
new SearchSourceBuilder().fetchSource(false)
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equivalent[0] : equivalent[1]))
.sort("collate", SortOrder.ASC)
.sort("id", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
SearchResponse response = client().search(request).actionGet();
@ -81,42 +81,40 @@ import java.io.IOException;
public final class ICUCollationKeyFilter extends TokenFilter {
private Collator collator = null;
private RawCollationKey reusableKey = new RawCollationKey();
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private Collator collator = null;
private RawCollationKey reusableKey = new RawCollationKey();
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
* @param input Source token stream
* @param collator CollationKey generator
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
// clone the collator: see http://userguide.icu-project.org/collation/architecture
try {
this.collator = (Collator) collator.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
* @param input Source token stream
* @param collator CollationKey generator
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
// clone the collator: see http://userguide.icu-project.org/collation/architecture
try {
this.collator = (Collator) collator.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char[] termBuffer = termAtt.buffer();
String termText = new String(termBuffer, 0, termAtt.length());
collator.getRawCollationKey(termText, reusableKey);
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
reusableKey.bytes, 0, reusableKey.size);
if (encodedLength > termBuffer.length) {
IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
termAtt.buffer(), 0, encodedLength);
return true;
} else {
return false;
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char[] termBuffer = termAtt.buffer();
String termText = new String(termBuffer, 0, termAtt.length());
collator.getRawCollationKey(termText, reusableKey);
int encodedLength = IndexableBinaryStringTools.getEncodedLength(reusableKey.bytes, 0, reusableKey.size);
if (encodedLength > termBuffer.length) {
IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size, termAtt.buffer(), 0, encodedLength);
return true;
} else {
return false;
@ -53,11 +53,15 @@ public class IcuAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analyzer>
String method = settings.get("method", "nfkc_cf");
String mode = settings.get("mode", "compose");
if (!"compose".equals(mode) && !"decompose".equals(mode)) {
throw new IllegalArgumentException("Unknown mode [" + mode + "] in analyzer [" + name +
"], expected one of [compose, decompose]");
throw new IllegalArgumentException(
"Unknown mode [" + mode + "] in analyzer [" + name + "], expected one of [compose, decompose]"
Normalizer2 normalizer = Normalizer2.getInstance(
null, method, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
"compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE
this.normalizer = IcuNormalizerTokenFilterFactory.wrapWithUnicodeSetFilter(indexSettings, normalizer, settings);
@ -40,7 +40,6 @@ import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
* Uses the {@link org.apache.lucene.analysis.icu.ICUFoldingFilter}.
* Applies foldings from UTR#30 Character Foldings.
@ -57,7 +56,10 @@ import org.opensearch.index.IndexSettings;
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
/** Store here the same Normalizer used by the lucene ICUFoldingFilter */
private static final Normalizer2 ICU_FOLDING_NORMALIZER = Normalizer2.getInstance(
ICUFoldingFilter.class.getResourceAsStream("utr30.nrm"), "utr30", Normalizer2.Mode.COMPOSE);
private final Normalizer2 normalizer;
@ -32,7 +32,6 @@
package org.opensearch.index.analysis;
import com.ibm.icu.text.Normalizer2;
import org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter;
@ -42,7 +41,6 @@ import org.opensearch.index.IndexSettings;
import java.io.Reader;
* Uses the {@link org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter} to normalize character.
* <p>The {@code name} can be used to provide the type of normalization to perform.</p>
@ -61,7 +59,10 @@ public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory im
mode = "compose";
Normalizer2 normalizer = Normalizer2.getInstance(
null, method, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
"compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE
this.normalizer = IcuNormalizerTokenFilterFactory.wrapWithUnicodeSetFilter(indexSettings, normalizer, settings);
@ -42,7 +42,6 @@ import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
* Uses the {@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to normalize tokens.
* <p>The {@code name} can be used to provide the type of normalization to perform.</p>
@ -50,8 +49,7 @@ import org.opensearch.index.IndexSettings;
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
private static final DeprecationLogger deprecationLogger =
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(IcuNormalizerTokenFilterFactory.class);
private final Normalizer2 normalizer;
@ -67,14 +65,14 @@ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory
return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream, normalizer);
static Normalizer2 wrapWithUnicodeSetFilter(final IndexSettings indexSettings,
final Normalizer2 normalizer,
final Settings settings) {
static Normalizer2 wrapWithUnicodeSetFilter(final IndexSettings indexSettings, final Normalizer2 normalizer, final Settings settings) {
String unicodeSetFilter = settings.get("unicodeSetFilter");
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
if (unicodeSetFilter != null) {
"[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]");
"[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
} else {
unicodeSetFilter = settings.get("unicode_set_filter");
@ -68,7 +68,7 @@ public class IcuTokenizerFactory extends AbstractTokenizerFactory {
public Tokenizer create() {
if (config == null) {
return new ICUTokenizer();
} else {
return new ICUTokenizer(config);
@ -117,14 +117,11 @@ public class IcuTokenizerFactory extends AbstractTokenizerFactory {
//parse a single RBBi rule file
// parse a single RBBi rule file
private BreakIterator parseRules(String filename, Environment env) throws IOException {
final Path path = env.configFile().resolve(filename);
String rules = Files.readAllLines(path)
.filter((v) -> v.startsWith("#") == false)
String rules = Files.readAllLines(path).stream().filter((v) -> v.startsWith("#") == false).collect(Collectors.joining("\n"));
return new RuleBasedBreakIterator(rules.toString());
@ -56,201 +56,193 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
public final class IndexableBinaryStringTools {
private static final CodingCase[] CODING_CASES = {
// CodingCase(int initialShift, int finalShift)
new CodingCase( 7, 1 ),
// CodingCase(int initialShift, int middleShift, int finalShift)
new CodingCase(14, 6, 2),
new CodingCase(13, 5, 3),
new CodingCase(12, 4, 4),
new CodingCase(11, 3, 5),
new CodingCase(10, 2, 6),
new CodingCase( 9, 1, 7),
new CodingCase( 8, 0 )
private static final CodingCase[] CODING_CASES = {
// CodingCase(int initialShift, int finalShift)
new CodingCase(7, 1),
// CodingCase(int initialShift, int middleShift, int finalShift)
new CodingCase(14, 6, 2),
new CodingCase(13, 5, 3),
new CodingCase(12, 4, 4),
new CodingCase(11, 3, 5),
new CodingCase(10, 2, 6),
new CodingCase(9, 1, 7),
new CodingCase(8, 0) };
// Export only static methods
private IndexableBinaryStringTools() {}
// Export only static methods
private IndexableBinaryStringTools() {}
* Returns the number of chars required to encode the given bytes.
* @param inputArray byte sequence to be encoded
* @param inputOffset initial offset into inputArray
* @param inputLength number of bytes in inputArray
* @return The number of chars required to encode the number of bytes.
public static int getEncodedLength(byte[] inputArray, int inputOffset,
int inputLength) {
// Use long for intermediaries to protect against overflow
return (int)((8L * inputLength + 14L) / 15L) + 1;
* Returns the number of bytes required to decode the given char sequence.
* @param encoded char sequence to be decoded
* @param offset initial offset
* @param length number of characters
* @return The number of bytes required to decode the given char sequence
public static int getDecodedLength(char[] encoded, int offset, int length) {
final int numChars = length - 1;
if (numChars <= 0) {
return 0;
} else {
// Use long for intermediaries to protect against overflow
final long numFullBytesInFinalChar = encoded[offset + length - 1];
final long numEncodedChars = numChars - 1;
return (int)((numEncodedChars * 15L + 7L) / 8L + numFullBytesInFinalChar);
* Returns the number of chars required to encode the given bytes.
* @param inputArray byte sequence to be encoded
* @param inputOffset initial offset into inputArray
* @param inputLength number of bytes in inputArray
* @return The number of chars required to encode the number of bytes.
public static int getEncodedLength(byte[] inputArray, int inputOffset, int inputLength) {
// Use long for intermediaries to protect against overflow
return (int) ((8L * inputLength + 14L) / 15L) + 1;
* Encodes the input byte sequence into the output char sequence. Before
* calling this method, ensure that the output array has sufficient
* capacity by calling {@link #getEncodedLength(byte[], int, int)}.
* @param inputArray byte sequence to be encoded
* @param inputOffset initial offset into inputArray
* @param inputLength number of bytes in inputArray
* @param outputArray char sequence to store encoded result
* @param outputOffset initial offset into outputArray
* @param outputLength length of output, must be getEncodedLength
public static void encode(byte[] inputArray, int inputOffset,
int inputLength, char[] outputArray, int outputOffset, int outputLength) {
assert (outputLength == getEncodedLength(inputArray, inputOffset,
if (inputLength > 0) {
int inputByteNum = inputOffset;
int caseNum = 0;
int outputCharNum = outputOffset;
CodingCase codingCase;
for (; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength; ++outputCharNum) {
codingCase = CODING_CASES[caseNum];
if (2 == codingCase.numBytes) {
outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
} else { // numBytes is 3
outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
+ (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
* Returns the number of bytes required to decode the given char sequence.
* @param encoded char sequence to be decoded
* @param offset initial offset
* @param length number of characters
* @return The number of bytes required to decode the given char sequence
public static int getDecodedLength(char[] encoded, int offset, int length) {
final int numChars = length - 1;
if (numChars <= 0) {
return 0;
} else {
// Use long for intermediaries to protect against overflow
final long numFullBytesInFinalChar = encoded[offset + length - 1];
final long numEncodedChars = numChars - 1;
return (int) ((numEncodedChars * 15L + 7L) / 8L + numFullBytesInFinalChar);
inputByteNum += codingCase.advanceBytes;
if (++caseNum == CODING_CASES.length) {
caseNum = 0;
// Produce final char (if any) and trailing count chars.
codingCase = CODING_CASES[caseNum];
if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
outputArray[outputCharNum++] = (char) (
( ((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
) & (short) 0x7FFF);
// Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = (char) 1;
} else if (inputByteNum < inputLength) {
outputArray[outputCharNum++] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) & (short) 0x7FFF);
// Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = caseNum == 0 ? (char) 1 : (char) 0;
} else { // No left over bits - last char is completely filled.
// Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = (char) 1;
* Decodes the input char sequence into the output byte sequence. Before
* calling this method, ensure that the output array has sufficient capacity
* by calling {@link #getDecodedLength(char[], int, int)}.
* @param inputArray char sequence to be decoded
* @param inputOffset initial offset into inputArray
* @param inputLength number of chars in inputArray
* @param outputArray byte sequence to store encoded result
* @param outputOffset initial offset into outputArray
* @param outputLength length of output, must be
* getDecodedLength(inputArray, inputOffset, inputLength)
public static void decode(char[] inputArray, int inputOffset,
int inputLength, byte[] outputArray, int outputOffset, int outputLength) {
assert (outputLength == getDecodedLength(inputArray, inputOffset,
final int numInputChars = inputLength - 1;
final int numOutputBytes = outputLength;
* Encodes the input byte sequence into the output char sequence. Before
* calling this method, ensure that the output array has sufficient
* capacity by calling {@link #getEncodedLength(byte[], int, int)}.
* @param inputArray byte sequence to be encoded
* @param inputOffset initial offset into inputArray
* @param inputLength number of bytes in inputArray
* @param outputArray char sequence to store encoded result
* @param outputOffset initial offset into outputArray
* @param outputLength length of output, must be getEncodedLength
public static void encode(byte[] inputArray, int inputOffset, int inputLength, char[] outputArray, int outputOffset, int outputLength) {
assert (outputLength == getEncodedLength(inputArray, inputOffset, inputLength));
if (inputLength > 0) {
int inputByteNum = inputOffset;
int caseNum = 0;
int outputCharNum = outputOffset;
CodingCase codingCase;
for (; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength; ++outputCharNum) {
codingCase = CODING_CASES[caseNum];
if (2 == codingCase.numBytes) {
outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
} else { // numBytes is 3
outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift) + (((inputArray[inputByteNum + 2] & 0xFF)
>>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
inputByteNum += codingCase.advanceBytes;
if (++caseNum == CODING_CASES.length) {
caseNum = 0;
// Produce final char (if any) and trailing count chars.
codingCase = CODING_CASES[caseNum];
if (numOutputBytes > 0) {
int caseNum = 0;
int outputByteNum = outputOffset;
int inputCharNum = inputOffset;
short inputChar;
CodingCase codingCase;
for (; inputCharNum < numInputChars - 1; ++inputCharNum) {
codingCase = CODING_CASES[caseNum];
inputChar = (short) inputArray[inputCharNum];
if (2 == codingCase.numBytes) {
if (0 == caseNum) {
outputArray[outputByteNum] = (byte) (inputChar >>> codingCase.initialShift);
} else {
if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
outputArray[outputCharNum++] = (char) ((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF);
// Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = (char) 1;
} else if (inputByteNum < inputLength) {
outputArray[outputCharNum++] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) & (short) 0x7FFF);
// Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = caseNum == 0 ? (char) 1 : (char) 0;
} else { // No left over bits - last char is completely filled.
// Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = (char) 1;
* Decodes the input char sequence into the output byte sequence. Before
* calling this method, ensure that the output array has sufficient capacity
* by calling {@link #getDecodedLength(char[], int, int)}.
* @param inputArray char sequence to be decoded
* @param inputOffset initial offset into inputArray
* @param inputLength number of chars in inputArray
* @param outputArray byte sequence to store encoded result
* @param outputOffset initial offset into outputArray
* @param outputLength length of output, must be
* getDecodedLength(inputArray, inputOffset, inputLength)
public static void decode(char[] inputArray, int inputOffset, int inputLength, byte[] outputArray, int outputOffset, int outputLength) {
assert (outputLength == getDecodedLength(inputArray, inputOffset, inputLength));
final int numInputChars = inputLength - 1;
final int numOutputBytes = outputLength;
if (numOutputBytes > 0) {
int caseNum = 0;
int outputByteNum = outputOffset;
int inputCharNum = inputOffset;
short inputChar;
CodingCase codingCase;
for (; inputCharNum < numInputChars - 1; ++inputCharNum) {
codingCase = CODING_CASES[caseNum];
inputChar = (short) inputArray[inputCharNum];
if (2 == codingCase.numBytes) {
if (0 == caseNum) {
outputArray[outputByteNum] = (byte) (inputChar >>> codingCase.initialShift);
} else {
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
} else { // numBytes is 3
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
outputByteNum += codingCase.advanceBytes;
if (++caseNum == CODING_CASES.length) {
caseNum = 0;
// Handle final char
inputChar = (short) inputArray[inputCharNum];
codingCase = CODING_CASES[caseNum];
if (0 == caseNum) {
outputArray[outputByteNum] = 0;
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
} else { // numBytes is 3
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
final int bytesLeft = numOutputBytes - outputByteNum;
if (bytesLeft > 1) {
if (2 == codingCase.numBytes) {
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) >>> codingCase.finalShift);
} else { // numBytes is 3
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
if (bytesLeft > 2) {
outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
outputByteNum += codingCase.advanceBytes;
if (++caseNum == CODING_CASES.length) {
caseNum = 0;
// Handle final char
inputChar = (short) inputArray[inputCharNum];
codingCase = CODING_CASES[caseNum];
if (0 == caseNum) {
outputArray[outputByteNum] = 0;
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
final int bytesLeft = numOutputBytes - outputByteNum;
if (bytesLeft > 1) {
if (2 == codingCase.numBytes) {
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) >>> codingCase.finalShift);
} else { // numBytes is 3
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
if (bytesLeft > 2) {
outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
static class CodingCase {
int numBytes, initialShift, middleShift, finalShift, advanceBytes = 2;
short middleMask, finalMask;
CodingCase(int initialShift, int middleShift, int finalShift) {
this.numBytes = 3;
this.initialShift = initialShift;
this.middleShift = middleShift;
this.finalShift = finalShift;
this.finalMask = (short)((short)0xFF >>> finalShift);
this.middleMask = (short)((short)0xFF << middleShift);
CodingCase(int initialShift, int finalShift) {
this.numBytes = 2;
this.initialShift = initialShift;
this.finalShift = finalShift;
this.finalMask = (short)((short)0xFF >>> finalShift);
if (finalShift != 0) {
advanceBytes = 1;
static class CodingCase {
int numBytes, initialShift, middleShift, finalShift, advanceBytes = 2;
short middleMask, finalMask;
CodingCase(int initialShift, int middleShift, int finalShift) {
this.numBytes = 3;
this.initialShift = initialShift;
this.middleShift = middleShift;
this.finalShift = finalShift;
this.finalMask = (short) ((short) 0xFF >>> finalShift);
this.middleMask = (short) ((short) 0xFF << middleShift);
CodingCase(int initialShift, int finalShift) {
this.numBytes = 2;
this.initialShift = initialShift;
this.finalShift = finalShift;
this.finalMask = (short) ((short) 0xFF >>> finalShift);
if (finalShift != 0) {
advanceBytes = 1;
@ -89,8 +89,16 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
private final String nullValue;
private final int ignoreAbove;
public CollationFieldType(String name, boolean isSearchable, boolean isStored, boolean hasDocValues,
Collator collator, String nullValue, int ignoreAbove, Map<String, String> meta) {
public CollationFieldType(
String name,
boolean isSearchable,
boolean isStored,
boolean hasDocValues,
Collator collator,
String nullValue,
int ignoreAbove,
Map<String, String> meta
) {
super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
this.collator = collator;
@ -153,28 +161,41 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
boolean transpositions, QueryShardContext context) {
public Query fuzzyQuery(
Object value,
Fuzziness fuzziness,
int prefixLength,
int maxExpansions,
boolean transpositions,
QueryShardContext context
) {
throw new UnsupportedOperationException("[fuzzy] queries are not supported on [" + CONTENT_TYPE + "] fields.");
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method,
boolean caseInsensitive, QueryShardContext context) {
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) {
throw new UnsupportedOperationException("[prefix] queries are not supported on [" + CONTENT_TYPE + "] fields.");
public Query wildcardQuery(String value,
@Nullable MultiTermQuery.RewriteMethod method,
boolean caseInsensitive,
QueryShardContext context) {
public Query wildcardQuery(
String value,
@Nullable MultiTermQuery.RewriteMethod method,
boolean caseInsensitive,
QueryShardContext context
) {
throw new UnsupportedOperationException("[wildcard] queries are not supported on [" + CONTENT_TYPE + "] fields.");
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
public Query regexpQuery(
String value,
int syntaxFlags,
int matchFlags,
int maxDeterminizedStates,
MultiTermQuery.RewriteMethod method,
QueryShardContext context
) {
throw new UnsupportedOperationException("[regexp] queries are not supported on [" + CONTENT_TYPE + "] fields.");
@ -185,8 +206,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
public void writeTo(StreamOutput out) {
public void writeTo(StreamOutput out) {}
public String format(BytesRef value) {
@ -236,8 +256,9 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
public Builder indexOptions(IndexOptions indexOptions) {
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0) {
throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support positions, got [index_options]="
+ indexOptionToString(indexOptions));
throw new IllegalArgumentException(
"The [" + CONTENT_TYPE + "] field does not support positions, got [index_options]=" + indexOptionToString(indexOptions)
return super.indexOptions(indexOptions);
@ -463,22 +484,47 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
public ICUCollationKeywordFieldMapper build(BuilderContext context) {
final Collator collator = buildCollator();
CollationFieldType ft
= new CollationFieldType(buildFullName(context), indexed, fieldType.stored(),
hasDocValues, collator, nullValue, ignoreAbove, meta);
return new ICUCollationKeywordFieldMapper(name, fieldType, ft,
multiFieldsBuilder.build(this, context), copyTo, rules, language, country, variant, strength, decomposition,
alternate, caseLevel, caseFirst, numeric, variableTop, hiraganaQuaternaryMode, ignoreAbove, collator, nullValue);
CollationFieldType ft = new CollationFieldType(
return new ICUCollationKeywordFieldMapper(
multiFieldsBuilder.build(this, context),
public static class TypeParser implements Mapper.TypeParser {
public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserContext parserContext)
throws MapperParsingException {
public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
Builder builder = new Builder(name);
TypeParsers.parseField(builder, name, node, parserContext);
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext(); ) {
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
Map.Entry<String, Object> entry = iterator.next();
String fieldName = entry.getKey();
Object fieldNode = entry.getValue();
@ -571,12 +617,28 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
private final Collator collator;
private final String nullValue;
protected ICUCollationKeywordFieldMapper(String simpleName, FieldType fieldType, MappedFieldType mappedFieldType,
MultiFields multiFields, CopyTo copyTo, String rules, String language,
String country, String variant,
String strength, String decomposition, String alternate, boolean caseLevel, String caseFirst,
boolean numeric, String variableTop, boolean hiraganaQuaternaryMode,
int ignoreAbove, Collator collator, String nullValue) {
protected ICUCollationKeywordFieldMapper(
String simpleName,
FieldType fieldType,
MappedFieldType mappedFieldType,
MultiFields multiFields,
CopyTo copyTo,
String rules,
String language,
String country,
String variant,
String strength,
String decomposition,
String alternate,
boolean caseLevel,
String caseFirst,
boolean numeric,
String variableTop,
boolean hiraganaQuaternaryMode,
int ignoreAbove,
Collator collator,
String nullValue
) {
super(simpleName, fieldType, mappedFieldType, multiFields, copyTo);
assert collator.isFrozen();
this.rules = rules;
@ -49,58 +49,51 @@ public class IcuAnalyzerTests extends BaseTokenStreamTestCase {
public void testMixedAlphabetTokenization() throws IOException {
Settings settings = Settings.builder()
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
String input = "안녕은하철도999극장판2.1981년8월8일.일본개봉작1999년재더빙video판";
AnalysisICUPlugin plugin = new AnalysisICUPlugin();
Analyzer analyzer = plugin.getAnalyzers().get("icu_analyzer").get(idxSettings, null, "icu", settings).get();
assertAnalyzesTo(analyzer, input,
new String[]{"안녕은하철도", "999", "극장판", "2.1981", "년", "8", "월", "8", "일", "일본개봉작", "1999", "년재더빙", "video", "판"});
new String[] { "안녕은하철도", "999", "극장판", "2.1981", "년", "8", "월", "8", "일", "일본개봉작", "1999", "년재더빙", "video", "판" }
public void testMiddleDots() throws IOException {
Settings settings = Settings.builder()
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
String input = "경승지·산악·협곡·해협·곶·심연·폭포·호수·급류";
Analyzer analyzer = new IcuAnalyzerProvider(idxSettings, null, "icu", settings).get();
assertAnalyzesTo(analyzer, input,
new String[]{"경승지", "산악", "협곡", "해협", "곶", "심연", "폭포", "호수", "급류"});
assertAnalyzesTo(analyzer, input, new String[] { "경승지", "산악", "협곡", "해협", "곶", "심연", "폭포", "호수", "급류" });
public void testUnicodeNumericCharacters() throws IOException {
Settings settings = Settings.builder()
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
String input = "① ② ③ ⑴ ⑵ ⑶ ¼ ⅓ ⅜ ¹ ² ³ ₁ ₂ ₃";
Analyzer analyzer = new IcuAnalyzerProvider(idxSettings, null, "icu", settings).get();
assertAnalyzesTo(analyzer, input,
new String[]{"1", "2", "3", "1", "2", "3", "1/4", "1/3", "3/8", "1", "2", "3", "1", "2", "3"});
assertAnalyzesTo(analyzer, input, new String[] { "1", "2", "3", "1", "2", "3", "1/4", "1/3", "3/8", "1", "2", "3", "1", "2", "3" });
public void testBadSettings() {
Settings settings = Settings.builder()
.put("mode", "wrong")
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).put("mode", "wrong").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
new IcuAnalyzerProvider(idxSettings, null, "icu", settings);
IllegalArgumentException e = expectThrows(
() -> { new IcuAnalyzerProvider(idxSettings, null, "icu", settings); }
assertThat(e.getMessage(), containsString("Unknown mode [wrong] in analyzer [icu], expected one of [compose, decompose]"));
@ -60,7 +60,7 @@ public class IcuTokenizerFactoryTests extends OpenSearchTestCase {
Reader reader = new StringReader("向日葵, one-two");
assertTokenStreamContents(tokenizer, new String[]{"向日葵", "one", "two"});
assertTokenStreamContents(tokenizer, new String[] { "向日葵", "one", "two" });
public void testIcuCustomizeRuleFile() throws IOException {
@ -69,13 +69,28 @@ public class IcuTokenizerFactoryTests extends OpenSearchTestCase {
// test the tokenizer with single rule file
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("user_rule_tokenizer");
ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create();
Reader reader = new StringReader
("One-two punch. Brang-, not brung-it. This one--not that one--is the right one, -ish.");
Reader reader = new StringReader("One-two punch. Brang-, not brung-it. This one--not that one--is the right one, -ish.");
new String[]{"One-two", "punch", "Brang", "not", "brung-it",
"This", "one", "not", "that", "one", "is", "the", "right", "one", "ish"});
new String[] {
"ish" }
public void testMultipleIcuCustomizeRuleFiles() throws IOException {
@ -84,17 +99,15 @@ public class IcuTokenizerFactoryTests extends OpenSearchTestCase {
// test the tokenizer with two rule files
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("multi_rule_tokenizer");
ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create();
StringReader reader = new StringReader
("Some English. Немного русский. ข้อความภาษาไทยเล็ก ๆ น้อย ๆ More English.");
StringReader reader = new StringReader("Some English. Немного русский. ข้อความภาษาไทยเล็ก ๆ น้อย ๆ More English.");
assertTokenStreamContents(tokenizer, new String[]{"Some", "English",
"Немного русский. ",
"ข้อความภาษาไทยเล็ก ๆ น้อย ๆ ",
"More", "English"});
new String[] { "Some", "English", "Немного русский. ", "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ ", "More", "English" }
private static TestAnalysis createTestAnalysis() throws IOException {
InputStream keywords = IcuTokenizerFactoryTests.class.getResourceAsStream("KeywordTokenizer.rbbi");
InputStream latin = IcuTokenizerFactoryTests.class.getResourceAsStream("Latin-dont-break-on-hyphens.rbbi");
@ -46,215 +46,223 @@ import java.util.Locale;
* @deprecated Remove when IndexableBinaryStringTools is removed.
@Listeners({ ReproduceInfoPrinter.class })
@TimeoutSuite(millis = TimeUnits.HOUR)
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose")
public class IndexableBinaryStringToolsTests extends LuceneTestCase {
private static int NUM_RANDOM_TESTS;
private static int MAX_RANDOM_BINARY_LENGTH;
private static final String LINE_SEPARATOR = System.lineSeparator();
private static int NUM_RANDOM_TESTS;
private static int MAX_RANDOM_BINARY_LENGTH;
private static final String LINE_SEPARATOR = System.lineSeparator();
public static void beforeClass() throws Exception {
NUM_RANDOM_TESTS = atLeast(200);
public void testSingleBinaryRoundTrip() {
byte[] binary = new byte[] { (byte) 0x23, (byte) 0x98, (byte) 0x13,
(byte) 0xE4, (byte) 0x76, (byte) 0x41, (byte) 0xB2, (byte) 0xC9,
(byte) 0x7F, (byte) 0x0A, (byte) 0xA6, (byte) 0xD8 };
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
char encoded[] = new char[encodedLen];
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
byte decoded[] = new byte[decodedLen];
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
assertEquals("Round trip decode/decode returned different results:"
+ LINE_SEPARATOR + "original: "
+ binaryDump(binary, binary.length)
+ LINE_SEPARATOR + " encoded: "
+ charArrayDump(encoded, encoded.length)
+ LINE_SEPARATOR + " decoded: "
+ binaryDump(decoded, decoded.length),
binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
public void testEncodedSortability() {
byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
char[] originalString1 = new char[MAX_RANDOM_BINARY_LENGTH];
char[] encoded1 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
byte[] original2 = new byte[MAX_RANDOM_BINARY_LENGTH];
char[] originalString2 = new char[MAX_RANDOM_BINARY_LENGTH];
char[] encoded2 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
int numBytes1 = random().nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
for (int byteNum = 0; byteNum < numBytes1; ++byteNum) {
int randomInt = random().nextInt(0x100);
originalArray1[byteNum] = (byte) randomInt;
originalString1[byteNum] = (char) randomInt;
int numBytes2 = random().nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
for (int byteNum = 0; byteNum < numBytes2; ++byteNum) {
int randomInt = random().nextInt(0x100);
original2[byteNum] = (byte) randomInt;
originalString2[byteNum] = (char) randomInt;
int originalComparison = new String(originalString1, 0, numBytes1)
.compareTo(new String(originalString2, 0, numBytes2));
originalComparison = originalComparison < 0 ? -1
: originalComparison > 0 ? 1 : 0;
int encodedLen1 = IndexableBinaryStringTools.getEncodedLength(
originalArray1, 0, numBytes1);
if (encodedLen1 > encoded1.length)
encoded1 = new char[ArrayUtil.oversize(encodedLen1, Character.BYTES)];
IndexableBinaryStringTools.encode(originalArray1, 0, numBytes1, encoded1,
0, encodedLen1);
int encodedLen2 = IndexableBinaryStringTools.getEncodedLength(original2,
0, numBytes2);
if (encodedLen2 > encoded2.length)
encoded2 = new char[ArrayUtil.oversize(encodedLen2, Character.BYTES)];
IndexableBinaryStringTools.encode(original2, 0, numBytes2, encoded2, 0,
int encodedComparison = new String(encoded1, 0, encodedLen1)
.compareTo(new String(encoded2, 0, encodedLen2));
encodedComparison = encodedComparison < 0 ? -1
: encodedComparison > 0 ? 1 : 0;
assertEquals("Test #" + (testNum + 1)
+ ": Original bytes and encoded chars compare differently:"
+ LINE_SEPARATOR + " binary 1: "
+ binaryDump(originalArray1, numBytes1)
+ LINE_SEPARATOR + " binary 2: "
+ binaryDump(original2, numBytes2)
+ LINE_SEPARATOR + "encoded 1: "
+ charArrayDump(encoded1, encodedLen1)
+ LINE_SEPARATOR + "encoded 2: "
+ charArrayDump(encoded2, encodedLen2)
+ LINE_SEPARATOR, originalComparison,
public static void beforeClass() throws Exception {
NUM_RANDOM_TESTS = atLeast(200);
public void testEmptyInput() {
byte[] binary = new byte[0];
public void testSingleBinaryRoundTrip() {
byte[] binary = new byte[] {
(byte) 0x23,
(byte) 0x98,
(byte) 0x13,
(byte) 0xE4,
(byte) 0x76,
(byte) 0x41,
(byte) 0xB2,
(byte) 0xC9,
(byte) 0x7F,
(byte) 0x0A,
(byte) 0xA6,
(byte) 0xD8 };
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
char[] encoded = new char[encodedLen];
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0, binary.length);
char encoded[] = new char[encodedLen];
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0, encoded.length);
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
byte[] decoded = new byte[decodedLen];
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0, encoded.length);
byte decoded[] = new byte[decodedLen];
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0, decoded.length);
assertEquals("decoded empty input was not empty", decoded.length, 0);
public void testAllNullInput() {
byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
char encoded[] = new char[encodedLen];
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
byte[] decoded = new byte[decodedLen];
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
assertEquals("Round trip decode/decode returned different results:"
+ LINE_SEPARATOR + " original: "
+ binaryDump(binary, binary.length)
+ LINE_SEPARATOR + "decodedBuf: "
+ binaryDump(decoded, decoded.length),
binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
public void testRandomBinaryRoundTrip() {
byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
char[] encoded = new char[MAX_RANDOM_BINARY_LENGTH * 10];
byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH];
for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
int numBytes = random().nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
for (int byteNum = 0; byteNum < numBytes; ++byteNum) {
binary[byteNum] = (byte) random().nextInt(0x100);
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
if (encoded.length < encodedLen)
encoded = new char[ArrayUtil.oversize(encodedLen, Character.BYTES)];
IndexableBinaryStringTools.encode(binary, 0, numBytes, encoded, 0,
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
IndexableBinaryStringTools.decode(encoded, 0, encodedLen, decoded, 0,
assertEquals("Test #" + (testNum + 1)
+ ": Round trip decode/decode returned different results:"
+ LINE_SEPARATOR + " original: "
+ binaryDump(binary, numBytes) + LINE_SEPARATOR
+ "encodedBuf: " + charArrayDump(encoded, encodedLen)
+ LINE_SEPARATOR + "decodedBuf: "
+ binaryDump(decoded, decodedLen), binaryDump(binary, numBytes),
binaryDump(decoded, decodedLen));
"Round trip decode/decode returned different results:"
+ "original: "
+ binaryDump(binary, binary.length)
+ " encoded: "
+ charArrayDump(encoded, encoded.length)
+ " decoded: "
+ binaryDump(decoded, decoded.length),
binaryDump(binary, binary.length),
binaryDump(decoded, decoded.length)
public String binaryDump(byte[] binary, int numBytes) {
StringBuilder buf = new StringBuilder();
for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) {
String hex = Integer.toHexString(binary[byteNum] & 0xFF);
if (hex.length() == 1) {
if (byteNum < numBytes - 1) {
buf.append(' ');
return buf.toString();
public void testEncodedSortability() {
byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
char[] originalString1 = new char[MAX_RANDOM_BINARY_LENGTH];
char[] encoded1 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
byte[] original2 = new byte[MAX_RANDOM_BINARY_LENGTH];
char[] originalString2 = new char[MAX_RANDOM_BINARY_LENGTH];
char[] encoded2 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
public String charArrayDump(char[] charArray, int numBytes) {
StringBuilder buf = new StringBuilder();
for (int charNum = 0 ; charNum < numBytes ; ++charNum) {
String hex = Integer.toHexString(charArray[charNum]);
for (int digit = 0 ; digit < 4 - hex.length() ; ++digit) {
if (charNum < numBytes - 1) {
buf.append(' ');
for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
int numBytes1 = random().nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
for (int byteNum = 0; byteNum < numBytes1; ++byteNum) {
int randomInt = random().nextInt(0x100);
originalArray1[byteNum] = (byte) randomInt;
originalString1[byteNum] = (char) randomInt;
int numBytes2 = random().nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
for (int byteNum = 0; byteNum < numBytes2; ++byteNum) {
int randomInt = random().nextInt(0x100);
original2[byteNum] = (byte) randomInt;
originalString2[byteNum] = (char) randomInt;
int originalComparison = new String(originalString1, 0, numBytes1).compareTo(new String(originalString2, 0, numBytes2));
originalComparison = originalComparison < 0 ? -1 : originalComparison > 0 ? 1 : 0;
int encodedLen1 = IndexableBinaryStringTools.getEncodedLength(originalArray1, 0, numBytes1);
if (encodedLen1 > encoded1.length) encoded1 = new char[ArrayUtil.oversize(encodedLen1, Character.BYTES)];
IndexableBinaryStringTools.encode(originalArray1, 0, numBytes1, encoded1, 0, encodedLen1);
int encodedLen2 = IndexableBinaryStringTools.getEncodedLength(original2, 0, numBytes2);
if (encodedLen2 > encoded2.length) encoded2 = new char[ArrayUtil.oversize(encodedLen2, Character.BYTES)];
IndexableBinaryStringTools.encode(original2, 0, numBytes2, encoded2, 0, encodedLen2);
int encodedComparison = new String(encoded1, 0, encodedLen1).compareTo(new String(encoded2, 0, encodedLen2));
encodedComparison = encodedComparison < 0 ? -1 : encodedComparison > 0 ? 1 : 0;
"Test #"
+ (testNum + 1)
+ ": Original bytes and encoded chars compare differently:"
+ " binary 1: "
+ binaryDump(originalArray1, numBytes1)
+ " binary 2: "
+ binaryDump(original2, numBytes2)
+ "encoded 1: "
+ charArrayDump(encoded1, encodedLen1)
+ "encoded 2: "
+ charArrayDump(encoded2, encodedLen2)
public void testEmptyInput() {
byte[] binary = new byte[0];
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0, binary.length);
char[] encoded = new char[encodedLen];
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0, encoded.length);
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0, encoded.length);
byte[] decoded = new byte[decodedLen];
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0, decoded.length);
assertEquals("decoded empty input was not empty", decoded.length, 0);
public void testAllNullInput() {
byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0, binary.length);
char encoded[] = new char[encodedLen];
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0, encoded.length);
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0, encoded.length);
byte[] decoded = new byte[decodedLen];
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0, decoded.length);
"Round trip decode/decode returned different results:"
+ " original: "
+ binaryDump(binary, binary.length)
+ "decodedBuf: "
+ binaryDump(decoded, decoded.length),
binaryDump(binary, binary.length),
binaryDump(decoded, decoded.length)
public void testRandomBinaryRoundTrip() {
byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
char[] encoded = new char[MAX_RANDOM_BINARY_LENGTH * 10];
byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH];
for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
int numBytes = random().nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
for (int byteNum = 0; byteNum < numBytes; ++byteNum) {
binary[byteNum] = (byte) random().nextInt(0x100);
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0, numBytes);
if (encoded.length < encodedLen) encoded = new char[ArrayUtil.oversize(encodedLen, Character.BYTES)];
IndexableBinaryStringTools.encode(binary, 0, numBytes, encoded, 0, encodedLen);
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0, encodedLen);
IndexableBinaryStringTools.decode(encoded, 0, encodedLen, decoded, 0, decodedLen);
"Test #"
+ (testNum + 1)
+ ": Round trip decode/decode returned different results:"
+ " original: "
+ binaryDump(binary, numBytes)
+ "encodedBuf: "
+ charArrayDump(encoded, encodedLen)
+ "decodedBuf: "
+ binaryDump(decoded, decodedLen),
binaryDump(binary, numBytes),
binaryDump(decoded, decodedLen)
public String binaryDump(byte[] binary, int numBytes) {
StringBuilder buf = new StringBuilder();
for (int byteNum = 0; byteNum < numBytes; ++byteNum) {
String hex = Integer.toHexString(binary[byteNum] & 0xFF);
if (hex.length() == 1) {
if (byteNum < numBytes - 1) {
buf.append(' ');
return buf.toString();
public String charArrayDump(char[] charArray, int numBytes) {
StringBuilder buf = new StringBuilder();
for (int charNum = 0; charNum < numBytes; ++charNum) {
String hex = Integer.toHexString(charArray[charNum]);
for (int digit = 0; digit < 4 - hex.length(); ++digit) {
if (charNum < numBytes - 1) {
buf.append(' ');
return buf.toString();
return buf.toString();
@ -56,14 +56,15 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testDefaultUsage() throws Exception {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.strength", "primary")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "FOO", "foo");
* Turkish has some funny casing.
* This test shows how you can solve this kind of thing easily with collation.
@ -72,10 +73,10 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testBasicUsage() throws Exception {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "tr")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "tr")
.put("index.analysis.filter.myCollator.strength", "primary")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -87,11 +88,11 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testNormalization() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "tr")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.decomposition", "canonical")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "tr")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.decomposition", "canonical")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -103,11 +104,11 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testSecondaryStrength() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "secondary")
.put("index.analysis.filter.myCollator.decomposition", "no")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "secondary")
.put("index.analysis.filter.myCollator.decomposition", "no")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -120,11 +121,11 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testIgnorePunctuation() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.alternate", "shifted")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.alternate", "shifted")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -137,12 +138,12 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testIgnoreWhitespace() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.alternate", "shifted")
.put("index.analysis.filter.myCollator.variableTop", " ")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.alternate", "shifted")
.put("index.analysis.filter.myCollator.variableTop", " ")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -157,10 +158,10 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testNumerics() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.numeric", "true")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.numeric", "true")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -173,11 +174,11 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testIgnoreAccentsButNotCase() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.caseLevel", "true")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.caseLevel", "true")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -193,11 +194,11 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testUpperCaseFirst() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "tertiary")
.put("index.analysis.filter.myCollator.caseFirst", "upper")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.language", "en")
.put("index.analysis.filter.myCollator.strength", "tertiary")
.put("index.analysis.filter.myCollator.caseFirst", "upper")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
@ -213,25 +214,22 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testCustomRules() throws Exception {
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
String DIN5007_2_tailorings =
"& ae , a\u0308 & AE , A\u0308"+
"& oe , o\u0308 & OE , O\u0308"+
"& ue , u\u0308 & UE , u\u0308";
String DIN5007_2_tailorings = "& ae , a\u0308 & AE , A\u0308" + "& oe , o\u0308 & OE , O\u0308" + "& ue , u\u0308 & UE , u\u0308";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.rules", tailoredRules)
.put("index.analysis.filter.myCollator.strength", "primary")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.rules", tailoredRules)
.put("index.analysis.filter.myCollator.strength", "primary")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollatesToSame(filterFactory, "Töne", "Toene");
* Test a basic custom rules (should not interfere with reading rules list
* in IcuCollationTokenFilterFactory and throw InvalidPathException on
@ -239,16 +237,15 @@ public class SimpleIcuCollationTokenFilterTests extends OpenSearchTestCase {
public void testBasicCustomRules() throws Exception {
Settings settings = Settings.builder()
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.rules", "&a < g")
.put("index.analysis.filter.myCollator.type", "icu_collation")
.put("index.analysis.filter.myCollator.rules", "&a < g")
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myCollator");
assertCollation(filterFactory, "green", "bird", -1);
private void assertCollatesToSame(TokenFilterFactory factory, String string1, String string2) throws IOException {
assertCollation(factory, string1, string2, 0);
@ -41,15 +41,12 @@ import org.opensearch.test.OpenSearchTestCase;
import java.io.StringReader;
* Test
public class SimpleIcuNormalizerCharFilterTests extends OpenSearchTestCase {
public void testDefaultSetting() throws Exception {
Settings settings = Settings.builder()
.put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer")
Settings settings = Settings.builder().put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer").build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
CharFilterFactory charFilterFactory = analysis.charFilter.get("myNormalizerChar");
@ -50,7 +50,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class CollationFieldTypeTests extends FieldTypeTestCase{
public class CollationFieldTypeTests extends FieldTypeTestCase {
private static final Collator DEFAULT_COLLATOR = Collator.getInstance(ULocale.ROOT).freeze();
@ -61,10 +61,19 @@ public class CollationFieldTypeTests extends FieldTypeTestCase{
public void testIsFieldWithinQuery() throws IOException {
CollationFieldType ft = createFieldType();
// current impl ignores args and shourd always return INTERSECTS
assertEquals(Relation.INTERSECTS, ft.isFieldWithinQuery(null,
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
randomBoolean(), randomBoolean(), null, null, null));
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
public void testTermQuery() {
@ -79,8 +88,7 @@ public class CollationFieldTypeTests extends FieldTypeTestCase{
assertEquals(new TermQuery(new Term("field", expected)), ft.termQuery("I WİLL USE TURKİSH CASING", null));
MappedFieldType unsearchable = new CollationFieldType("field", false, collator);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> unsearchable.termQuery("bar", null));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
@ -95,40 +103,49 @@ public class CollationFieldTypeTests extends FieldTypeTestCase{
terms.add(new BytesRef(fooKey.bytes, 0, fooKey.size));
terms.add(new BytesRef(barKey.bytes, 0, barKey.size));
assertEquals(new TermInSetQuery("field", terms),
ft.termsQuery(Arrays.asList("foo", "bar"), null));
assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null));
MappedFieldType unsearchable = new CollationFieldType("field", false, collator);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null));
IllegalArgumentException e = expectThrows(
() -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null)
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
public void testRegexpQuery() {
MappedFieldType ft = createFieldType();
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
() -> ft.regexpQuery("foo.*", 0, 0, 10, null, randomMockShardContext()));
UnsupportedOperationException e = expectThrows(
() -> ft.regexpQuery("foo.*", 0, 0, 10, null, randomMockShardContext())
assertEquals("[regexp] queries are not supported on [icu_collation_keyword] fields.", e.getMessage());
public void testFuzzyQuery() {
MappedFieldType ft = createFieldType();
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, randomMockShardContext()));
UnsupportedOperationException e = expectThrows(
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, randomMockShardContext())
assertEquals("[fuzzy] queries are not supported on [icu_collation_keyword] fields.", e.getMessage());
public void testPrefixQuery() {
MappedFieldType ft = createFieldType();
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
() -> ft.prefixQuery("prefix", null, randomMockShardContext()));
UnsupportedOperationException e = expectThrows(
() -> ft.prefixQuery("prefix", null, randomMockShardContext())
assertEquals("[prefix] queries are not supported on [icu_collation_keyword] fields.", e.getMessage());
public void testWildcardQuery() {
MappedFieldType ft = createFieldType();
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
() -> ft.wildcardQuery("foo*", null, randomMockShardContext()));
UnsupportedOperationException e = expectThrows(
() -> ft.wildcardQuery("foo*", null, randomMockShardContext())
assertEquals("[wildcard] queries are not supported on [icu_collation_keyword] fields.", e.getMessage());
@ -137,19 +154,30 @@ public class CollationFieldTypeTests extends FieldTypeTestCase{
RawCollationKey aKey = DEFAULT_COLLATOR.getRawCollationKey("a", null);
RawCollationKey bKey = DEFAULT_COLLATOR.getRawCollationKey("b", null);
TermRangeQuery expected = new TermRangeQuery("field", new BytesRef(aKey.bytes, 0, aKey.size),
new BytesRef(bKey.bytes, 0, bKey.size), false, false);
TermRangeQuery expected = new TermRangeQuery(
new BytesRef(aKey.bytes, 0, aKey.size),
new BytesRef(bKey.bytes, 0, bKey.size),
assertEquals(expected, ft.rangeQuery("a", "b", false, false, null, null, null, MOCK_QSC));
OpenSearchException ee = expectThrows(OpenSearchException.class,
() -> ft.rangeQuery("a", "b", true, true, null, null, null, MOCK_QSC_DISALLOW_EXPENSIVE));
assertEquals("[range] queries on [text] or [keyword] fields cannot be executed when " +
"'search.allow_expensive_queries' is set to false.", ee.getMessage());
OpenSearchException ee = expectThrows(
() -> ft.rangeQuery("a", "b", true, true, null, null, null, MOCK_QSC_DISALLOW_EXPENSIVE)
"[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.",
MappedFieldType unsearchable = new CollationFieldType("field", false, DEFAULT_COLLATOR);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> unsearchable.rangeQuery("a", "b", false, false, null, null, null, MOCK_QSC));
IllegalArgumentException e = expectThrows(
() -> unsearchable.rangeQuery("a", "b", false, false, null, null, null, MOCK_QSC)
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
@ -130,8 +130,7 @@ public class ICUCollationKeywordFieldMapperTests extends FieldMapperTestCase2<IC
assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field"));
mapper = createDocumentMapper(fieldMapping(b -> b.field("type", FIELD_TYPE).field("null_value", "1234")));
doc = mapper.parse(source(b -> {
doc = mapper.parse(source(b -> {}));
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
@ -229,8 +228,10 @@ public class ICUCollationKeywordFieldMapperTests extends FieldMapperTestCase2<IC
assertEquals(IndexOptions.DOCS_AND_FREQS, fields[0].fieldType().indexOptions());
for (String indexOptions : Arrays.asList("positions", "offsets")) {
Exception e = expectThrows(MapperParsingException.class,
() -> createDocumentMapper(fieldMapping(b -> b.field("type", FIELD_TYPE).field("index_options", indexOptions))));
Exception e = expectThrows(
() -> createDocumentMapper(fieldMapping(b -> b.field("type", FIELD_TYPE).field("index_options", indexOptions)))
containsString("The [" + FIELD_TYPE + "] field does not support positions, got [index_options]=" + indexOptions)
@ -289,7 +290,6 @@ public class ICUCollationKeywordFieldMapperTests extends FieldMapperTestCase2<IC
assertThat(e.getMessage(), containsString("mapper [field] has different [collator]"));
public void testIgnoreAbove() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", FIELD_TYPE).field("ignore_above", 5)));
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "elk")));
@ -49,15 +49,13 @@ public class ICUCollationKeywordFieldTypeTests extends FieldTypeTestCase {
assertEquals(Collections.singletonList("42"), fetchSourceValue(mapper.fieldType(), 42L));
assertEquals(Collections.singletonList("true"), fetchSourceValue(mapper.fieldType(), true));
ICUCollationKeywordFieldMapper ignoreAboveMapper = new ICUCollationKeywordFieldMapper.Builder("field")
ICUCollationKeywordFieldMapper ignoreAboveMapper = new ICUCollationKeywordFieldMapper.Builder("field").ignoreAbove(4)
assertEquals(Collections.emptyList(), fetchSourceValue(ignoreAboveMapper.fieldType(), "value"));
assertEquals(Collections.singletonList("42"), fetchSourceValue(ignoreAboveMapper.fieldType(), 42L));
assertEquals(Collections.singletonList("true"), fetchSourceValue(ignoreAboveMapper.fieldType(), true));
ICUCollationKeywordFieldMapper nullValueMapper = new ICUCollationKeywordFieldMapper.Builder("field")
ICUCollationKeywordFieldMapper nullValueMapper = new ICUCollationKeywordFieldMapper.Builder("field").nullValue("NULL")
assertEquals(Collections.singletonList("NULL"), fetchSourceValue(nullValueMapper.fieldType(), null));
@ -49,4 +49,3 @@ public class IcuClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTestCase
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -32,7 +32,6 @@
package org.opensearch.index.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
@ -47,7 +46,7 @@ import java.util.Set;
import static java.util.Collections.singletonMap;
public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory{
public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory {
private static final Map<String, Set<?>> NAMED_STOP_WORDS = singletonMap("_japanese_", JapaneseAnalyzer.getDefaultStopSet());
private final CharArraySet stopWords;
@ -60,8 +59,14 @@ public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory{
super(indexSettings, name, settings);
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
this.stopWords = Analysis.parseWords(env, settings, "stopwords",
JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
this.stopWords = Analysis.parseWords(
@ -59,5 +59,4 @@ public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider<Japa
return this.analyzer;
@ -76,8 +76,9 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
public static UserDictionary getUserDictionary(Environment env, Settings settings) {
if (settings.get(USER_DICT_PATH_OPTION) != null && settings.get(USER_DICT_RULES_OPTION) != null) {
throw new IllegalArgumentException("It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" +
" with [" + USER_DICT_RULES_OPTION + "]");
throw new IllegalArgumentException(
"It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" + " with [" + USER_DICT_RULES_OPTION + "]"
try {
List<String> ruleList = Analysis.getWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, false);
@ -91,11 +92,12 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory {
if (line.startsWith("#") == false) {
String[] values = CSVUtil.parse(line);
if (dup.add(values[0]) == false) {
throw new IllegalArgumentException("Found duplicate term [" + values[0] + "] in user dictionary " +
"at line [" + lineNum + "]");
throw new IllegalArgumentException(
"Found duplicate term [" + values[0] + "] in user dictionary " + "at line [" + lineNum + "]"
++ lineNum;
StringBuilder sb = new StringBuilder();
for (String line : ruleList) {
@ -93,7 +93,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
assertThat(analyzer.analyzer(), instanceOf(CustomAnalyzer.class));
assertThat(analyzer.analyzer().tokenStream(null, new StringReader("")), instanceOf(JapaneseTokenizer.class));
CharFilterFactory charFilterFactory = analysis.charFilter.get("kuromoji_iteration_mark");
CharFilterFactory charFilterFactory = analysis.charFilter.get("kuromoji_iteration_mark");
assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class));
@ -103,7 +103,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_pos");
assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class));
String source = "私は制限スピードを超える。";
String[] expected = new String[]{"私", "は", "制限", "スピード", "を"};
String[] expected = new String[] { "私", "は", "制限", "スピード", "を" };
Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
tokenizer.setReader(new StringReader(source));
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);
@ -116,7 +116,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class));
String source = "寿司がおいしいね";
String[] expected_tokens = new String[]{"寿司", "おいしい"};
String[] expected_tokens = new String[] { "寿司", "おいしい" };
Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
tokenizer.setReader(new StringReader(source));
@ -129,7 +129,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_rf");
assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class));
String source = "今夜はロバート先生と話した";
String[] expected_tokens_romaji = new String[]{"kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta"};
String[] expected_tokens_romaji = new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" };
Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
tokenizer.setReader(new StringReader(source));
@ -138,7 +138,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
tokenizer.setReader(new StringReader(source));
String[] expected_tokens_katakana = new String[]{"コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ"};
String[] expected_tokens_katakana = new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" };
tokenFilter = analysis.tokenFilter.get("kuromoji_readingform");
assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class));
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
@ -156,7 +156,21 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
// パーティー should be stemmed by default
// (min len) コピー should not be stemmed
String[] expected_tokens_katakana = new String[] {
"明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
"た" };
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
tokenFilter = analysis.tokenFilter.get("kuromoji_ks");
@ -167,7 +181,21 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
// パーティー should not be stemmed since min len == 6
// コピー should not be stemmed
expected_tokens_katakana = new String[] {
"明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
"た" };
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
@ -209,7 +237,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("ja_stop");
assertThat(tokenFilter, instanceOf(JapaneseStopTokenFilterFactory.class));
String source = "私は制限スピードを超える。";
String[] expected = new String[]{"私", "制限", "超える"};
String[] expected = new String[] { "私", "制限", "超える" };
Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
tokenizer.setReader(new StringReader(source));
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);
@ -233,8 +261,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
return createTestAnalysis(new Index("test", "_na_"), nodeSettings, settings, new AnalysisKuromojiPlugin());
public static void assertSimpleTSOutput(TokenStream stream,
String[] expected) throws IOException {
public static void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException {
CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
assertThat(termAttr, notNullValue());
@ -246,8 +273,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
assertThat("not all tokens produced", i, equalTo(expected.length));
private void assertCharFilterEquals(Reader filtered,
String expected) throws IOException {
private void assertCharFilterEquals(Reader filtered, String expected) throws IOException {
String actual = readFully(filtered);
assertThat(actual, equalTo(expected));
@ -255,8 +281,8 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
private String readFully(Reader reader) throws IOException {
StringBuilder buffer = new StringBuilder();
int ch;
while((ch = reader.read()) != -1){
while ((ch = reader.read()) != -1) {
buffer.append((char) ch);
return buffer.toString();
@ -265,7 +291,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TestAnalysis analysis = createTestAnalysis();
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_user_dict");
String source = "私は制限スピードを超える。";
String[] expected = new String[]{"私", "は", "制限スピード", "を", "超える"};
String[] expected = new String[] { "私", "は", "制限スピード", "を", "超える" };
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(new StringReader(source));
@ -283,7 +309,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TestAnalysis analysis = createTestAnalysis();
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_cost");
String source = "鳩山積み";
String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"};
String[] expected = new String[] { "鳩", "鳩山", "山積み", "積み" };
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(new StringReader(source));
@ -294,7 +320,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TestAnalysis analysis = createTestAnalysis();
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_examples");
String source = "鳩山積み";
String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"};
String[] expected = new String[] { "鳩", "鳩山", "山積み", "積み" };
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(new StringReader(source));
@ -305,7 +331,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TestAnalysis analysis = createTestAnalysis();
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_both");
String source = "鳩山積み";
String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"};
String[] expected = new String[] { "鳩", "鳩山", "山積み", "積み" };
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(new StringReader(source));
@ -318,7 +344,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_number");
assertThat(tokenFilter, instanceOf(KuromojiNumberFilterFactory.class));
String source = "本日十万二千五百円のワインを買った";
String[] expected = new String[]{"本日", "102500", "円", "の", "ワイン", "を", "買っ", "た"};
String[] expected = new String[] { "本日", "102500", "円", "の", "ワイン", "を", "買っ", "た" };
Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
tokenizer.setReader(new StringReader(source));
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);
@ -332,11 +358,11 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TestAnalysis analysis = createTestAnalysis(settings);
Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
try (TokenStream stream = analyzer.tokenStream("", "制限スピード")) {
assertTokenStreamContents(stream, new String[]{"制限スピード"});
assertTokenStreamContents(stream, new String[] { "制限スピード" });
try (TokenStream stream = analyzer.tokenStream("", "c++world")) {
assertTokenStreamContents(stream, new String[]{"c++", "world"});
assertTokenStreamContents(stream, new String[] { "c++", "world" });
@ -347,15 +373,22 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
.putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++,c++,w,w")
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
assertThat(exc.getMessage(), containsString("It is not allowed to use [user_dictionary] in conjunction " +
"with [user_dictionary_rules]"));
containsString("It is not allowed to use [user_dictionary] in conjunction " + "with [user_dictionary_rules]")
public void testKuromojiAnalyzerDuplicateUserDictRule() throws Exception {
Settings settings = Settings.builder()
.put("index.analysis.analyzer.my_analyzer.type", "kuromoji")
"c++,c++,w,w", "#comment", "制限スピード,制限スピード,セイゲンスピード,テスト名詞", "制限スピード,制限スピード,セイゲンスピード,テスト名詞")
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
assertThat(exc.getMessage(), containsString("[制限スピード] in user dictionary at line [3]"));
@ -365,7 +398,7 @@ public class KuromojiAnalysisTests extends OpenSearchTestCase {
TestAnalysis analysis = createTestAnalysis();
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_discard_compound_token");
String source = "株式会社";
String[] expected = new String[] {"株式", "会社"};
String[] expected = new String[] { "株式", "会社" };
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(new StringReader(source));
@ -49,4 +49,3 @@ public class KuromojiClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTest
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -45,7 +45,6 @@ import org.opensearch.index.IndexSettings;
import static org.opensearch.index.analysis.NoriPartOfSpeechStopFilterFactory.resolvePOSList;
public class NoriAnalyzerProvider extends AbstractIndexAnalyzerProvider<KoreanAnalyzer> {
private final KoreanAnalyzer analyzer;
@ -63,5 +62,4 @@ public class NoriAnalyzerProvider extends AbstractIndexAnalyzerProvider<KoreanAn
return analyzer;
@ -57,7 +57,6 @@ public class NoriPartOfSpeechStopFilterFactory extends AbstractTokenFilterFactor
return new KoreanPartOfSpeechStopFilter(tokenStream, stopTags);
static Set<POS.Tag> resolvePOSList(List<String> tagList) {
Set<POS.Tag> stopTags = new HashSet<>();
for (String tag : tagList) {
@ -63,8 +63,9 @@ public class NoriTokenizerFactory extends AbstractTokenizerFactory {
public static UserDictionary getUserDictionary(Environment env, Settings settings) {
if (settings.get(USER_DICT_PATH_OPTION) != null && settings.get(USER_DICT_RULES_OPTION) != null) {
throw new IllegalArgumentException("It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" +
" with [" + USER_DICT_RULES_OPTION + "]");
throw new IllegalArgumentException(
"It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" + " with [" + USER_DICT_RULES_OPTION + "]"
List<String> ruleList = Analysis.getWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, true);
StringBuilder sb = new StringBuilder();
@ -92,8 +93,13 @@ public class NoriTokenizerFactory extends AbstractTokenizerFactory {
public Tokenizer create() {
return new KoreanTokenizer(KoreanTokenizer.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDictionary, decompoundMode, false,
return new KoreanTokenizer(
@ -83,12 +83,12 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
TestAnalysis analysis = createTestAnalysis(settings);
Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
try (TokenStream stream = analyzer.tokenStream("", "여섯 용이" )) {
assertTokenStreamContents(stream, new String[] {"용", "이"});
try (TokenStream stream = analyzer.tokenStream("", "여섯 용이")) {
assertTokenStreamContents(stream, new String[] { "용", "이" });
try (TokenStream stream = analyzer.tokenStream("", "가늠표")) {
assertTokenStreamContents(stream, new String[] {"가늠표", "가늠", "표"});
assertTokenStreamContents(stream, new String[] { "가늠표", "가늠", "표" });
@ -100,11 +100,11 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
TestAnalysis analysis = createTestAnalysis(settings);
Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
try (TokenStream stream = analyzer.tokenStream("", "세종시")) {
assertTokenStreamContents(stream, new String[]{"세종", "시"});
assertTokenStreamContents(stream, new String[] { "세종", "시" });
try (TokenStream stream = analyzer.tokenStream("", "c++world")) {
assertTokenStreamContents(stream, new String[]{"c++", "world"});
assertTokenStreamContents(stream, new String[] { "c++", "world" });
@ -115,12 +115,12 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
TestAnalysis analysis = createTestAnalysis(settings);
Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
try (TokenStream stream = analyzer.tokenStream("", "세종시" )) {
assertTokenStreamContents(stream, new String[] {"세종", "시"});
try (TokenStream stream = analyzer.tokenStream("", "세종시")) {
assertTokenStreamContents(stream, new String[] { "세종", "시" });
try (TokenStream stream = analyzer.tokenStream("", "c++world")) {
assertTokenStreamContents(stream, new String[] {"c++", "world"});
assertTokenStreamContents(stream, new String[] { "c++", "world" });
@ -131,8 +131,10 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
.putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++", "C샤프", "세종", "세종시 세종 시")
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
assertThat(exc.getMessage(), containsString("It is not allowed to use [user_dictionary] in conjunction " +
"with [user_dictionary_rules]"));
containsString("It is not allowed to use [user_dictionary] in conjunction " + "with [user_dictionary_rules]")
public void testNoriTokenizer() throws Exception {
@ -143,12 +145,12 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
TestAnalysis analysis = createTestAnalysis(settings);
Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create();
tokenizer.setReader(new StringReader("뿌리가 깊은 나무"));
assertTokenStreamContents(tokenizer, new String[] {"뿌리", "가", "깊", "은", "나무"});
assertTokenStreamContents(tokenizer, new String[] { "뿌리", "가", "깊", "은", "나무" });
tokenizer.setReader(new StringReader("가늠표"));
assertTokenStreamContents(tokenizer, new String[] {"가늠표", "가늠", "표"});
assertTokenStreamContents(tokenizer, new String[] { "가늠표", "가늠", "표" });
// discard_punctuation default(true)
tokenizer.setReader(new StringReader("3.2개"));
assertTokenStreamContents(tokenizer, new String[] {"3", "2", "개"});
assertTokenStreamContents(tokenizer, new String[] { "3", "2", "개" });
public void testNoriTokenizerDiscardPunctuationOptionTrue() throws Exception {
@ -156,7 +158,7 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
TestAnalysis analysis = createTestAnalysis(settings);
Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create();
tokenizer.setReader(new StringReader("3.2개"));
assertTokenStreamContents(tokenizer, new String[] {"3", "2", "개"});
assertTokenStreamContents(tokenizer, new String[] { "3", "2", "개" });
public void testNoriTokenizerDiscardPunctuationOptionFalse() throws Exception {
@ -164,15 +166,14 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
TestAnalysis analysis = createTestAnalysis(settings);
Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create();
tokenizer.setReader(new StringReader("3.2개"));
assertTokenStreamContents(tokenizer, new String[] {"3", ".", "2", "개"});
assertTokenStreamContents(tokenizer, new String[] { "3", ".", "2", "개" });
public void testNoriTokenizerInvalidDiscardPunctuationOption() {
String wrongOption = "wrong";
Settings settings = createDiscardPunctuationOption(wrongOption);
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings));
assertThat(exc.getMessage(), containsString("Failed to parse value [" + wrongOption
+ "] as only [true] or [false] are allowed."));
assertThat(exc.getMessage(), containsString("Failed to parse value [" + wrongOption + "] as only [true] or [false] are allowed."));
public void testNoriPartOfSpeech() throws IOException {
@ -185,7 +186,7 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
Tokenizer tokenizer = new KoreanTokenizer();
tokenizer.setReader(new StringReader("여섯 용이"));
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] {"용", "이"});
assertTokenStreamContents(stream, new String[] { "용", "이" });
public void testNoriReadingForm() throws IOException {
@ -199,7 +200,7 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
Tokenizer tokenizer = new KoreanTokenizer();
tokenizer.setReader(new StringReader("鄕歌"));
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] {"향가"});
assertTokenStreamContents(stream, new String[] { "향가" });
public void testNoriNumber() throws IOException {
@ -213,7 +214,7 @@ public class NoriAnalysisTests extends OpenSearchTokenStreamTestCase {
Tokenizer tokenizer = new KoreanTokenizer();
tokenizer.setReader(new StringReader("오늘 십만이천오백원짜리 와인 구입"));
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] {"오늘", "102500", "원", "짜리", "와인", "구입"});
assertTokenStreamContents(stream, new String[] { "오늘", "102500", "원", "짜리", "와인", "구입" });
private Settings createDiscardPunctuationOption(String option) {
@ -49,4 +49,3 @@ public class NoriClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTestCase
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -62,8 +62,7 @@ import java.util.List;
public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(PhoneticTokenFilterFactory.class);
private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(PhoneticTokenFilterFactory.class);
private final Encoder encoder;
private final boolean replace;
@ -101,38 +100,38 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
this.encoder = null;
this.maxcodelength = settings.getAsInt("max_code_len", 4);
} else if ("bm".equalsIgnoreCase(encodername)
|| "beider_morse".equalsIgnoreCase(encodername)
|| "beidermorse".equalsIgnoreCase(encodername)) {
this.encoder = null;
this.languageset = settings.getAsList("languageset");
String ruleType = settings.get("rule_type", "approx");
if ("approx".equalsIgnoreCase(ruleType)) {
ruletype = RuleType.APPROX;
} else if ("exact".equalsIgnoreCase(ruleType)) {
ruletype = RuleType.EXACT;
|| "beider_morse".equalsIgnoreCase(encodername)
|| "beidermorse".equalsIgnoreCase(encodername)) {
this.encoder = null;
this.languageset = settings.getAsList("languageset");
String ruleType = settings.get("rule_type", "approx");
if ("approx".equalsIgnoreCase(ruleType)) {
ruletype = RuleType.APPROX;
} else if ("exact".equalsIgnoreCase(ruleType)) {
ruletype = RuleType.EXACT;
} else {
throw new IllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder");
String nameType = settings.get("name_type", "generic");
if ("GENERIC".equalsIgnoreCase(nameType)) {
nametype = NameType.GENERIC;
} else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
nametype = NameType.ASHKENAZI;
} else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
nametype = NameType.SEPHARDIC;
} else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
this.encoder = new KoelnerPhonetik();
} else if ("haasephonetik".equalsIgnoreCase(encodername)) {
this.encoder = new HaasePhonetik();
} else if ("nysiis".equalsIgnoreCase(encodername)) {
this.encoder = new Nysiis();
} else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
this.encoder = null;
this.isDaitchMokotoff = true;
} else {
throw new IllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder");
throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
String nameType = settings.get("name_type", "generic");
if ("GENERIC".equalsIgnoreCase(nameType)) {
nametype = NameType.GENERIC;
} else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
nametype = NameType.ASHKENAZI;
} else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
nametype = NameType.SEPHARDIC;
} else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
this.encoder = new KoelnerPhonetik();
} else if ("haasephonetik".equalsIgnoreCase(encodername)) {
this.encoder = new HaasePhonetik();
} else if ("nysiis".equalsIgnoreCase(encodername)) {
this.encoder = new Nysiis();
} else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
this.encoder = null;
this.isDaitchMokotoff = true;
} else {
throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
@ -161,10 +160,11 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
else {
DEPRECATION_LOGGER.deprecate("synonym_tokenfilters", "Token filter [" + name()
+ "] will not be usable to parse synonyms after v7.0");
} else {
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
return this;
@ -45,15 +45,25 @@ package org.opensearch.index.analysis.phonetic;
* nach: Martin Wilz, Aspekte der Kodierung phonetischer Ähnlichkeiten
* in deutschen Eigennamen, Magisterarbeit.
* http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf
* @author <a href="mailto:joergprante@gmail.com">Jörg Prante</a>
public class HaasePhonetik extends KoelnerPhonetik {
private static final String[] HAASE_VARIATIONS_PATTERNS = {"OWN", "RB", "WSK", "A$", "O$", "SCH",
"GLI", "EAU$", "^CH", "AUX", "EUX", "ILLE"};
private static final String[] HAASE_VARIATIONS_REPLACEMENTS = {"AUN", "RW", "RSK", "AR", "OW", "CH",
"LI", "O", "SCH", "O", "O", "I"};
private static final String[] HAASE_VARIATIONS_PATTERNS = {
"ILLE" };
private static final String[] HAASE_VARIATIONS_REPLACEMENTS = { "AUN", "RW", "RSK", "AR", "OW", "CH", "LI", "O", "SCH", "O", "O", "I" };
protected String[] getPatterns() {
@ -58,18 +58,14 @@ import org.apache.commons.codec.StringEncoder;
public class KoelnerPhonetik implements StringEncoder {
private static final String[] POSTEL_VARIATIONS_PATTERNS = {"AUN", "OWN", "RB", "RW", "WSK", "RSK"};
private static final String[] POSTEL_VARIATIONS_REPLACEMENTS = {"OWN", "AUN", "RW", "RB", "RSK", "WSK"};
private static final String[] POSTEL_VARIATIONS_PATTERNS = { "AUN", "OWN", "RB", "RW", "WSK", "RSK" };
private static final String[] POSTEL_VARIATIONS_REPLACEMENTS = { "OWN", "AUN", "RW", "RB", "RSK", "WSK" };
private Pattern[] variationsPatterns;
private boolean primary = false;
private final Set<Character> csz = new HashSet<>(Arrays.asList(
'C', 'S', 'Z'));
private final Set<Character> ckq = new HashSet<>(Arrays.asList(
'C', 'K', 'Q'));
private final Set<Character> aouhkxq = new HashSet<>(Arrays.asList(
'A', 'O', 'U', 'H', 'K', 'X', 'Q'));
private final Set<Character> ahkloqrux = new HashSet<>(Arrays.asList(
'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'));
private final Set<Character> csz = new HashSet<>(Arrays.asList('C', 'S', 'Z'));
private final Set<Character> ckq = new HashSet<>(Arrays.asList('C', 'K', 'Q'));
private final Set<Character> aouhkxq = new HashSet<>(Arrays.asList('A', 'O', 'U', 'H', 'K', 'X', 'Q'));
private final Set<Character> ahkloqrux = new HashSet<>(Arrays.asList('A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'));
* Constructor for Kölner Phonetik
@ -132,7 +128,6 @@ public class KoelnerPhonetik implements StringEncoder {
return sb.toString();
private void init() {
this.variationsPatterns = new Pattern[getPatterns().length];
for (int i = 0; i < getPatterns().length; i++) {
@ -85,15 +85,15 @@ import java.util.regex.Pattern;
public class Nysiis implements StringEncoder {
private static final char[] CHARS_A = new char[]{'A'};
private static final char[] CHARS_AF = new char[]{'A', 'F'};
private static final char[] CHARS_C = new char[]{'C'};
private static final char[] CHARS_FF = new char[]{'F', 'F'};
private static final char[] CHARS_G = new char[]{'G'};
private static final char[] CHARS_N = new char[]{'N'};
private static final char[] CHARS_NN = new char[]{'N', 'N'};
private static final char[] CHARS_S = new char[]{'S'};
private static final char[] CHARS_SSS = new char[]{'S', 'S', 'S'};
private static final char[] CHARS_A = new char[] { 'A' };
private static final char[] CHARS_AF = new char[] { 'A', 'F' };
private static final char[] CHARS_C = new char[] { 'C' };
private static final char[] CHARS_FF = new char[] { 'F', 'F' };
private static final char[] CHARS_G = new char[] { 'G' };
private static final char[] CHARS_N = new char[] { 'N' };
private static final char[] CHARS_NN = new char[] { 'N', 'N' };
private static final char[] CHARS_S = new char[] { 'S' };
private static final char[] CHARS_SSS = new char[] { 'S', 'S', 'S' };
private static final Pattern PAT_MAC = Pattern.compile("^MAC");
private static final Pattern PAT_KN = Pattern.compile("^KN");
private static final Pattern PAT_K = Pattern.compile("^K");
@ -166,16 +166,17 @@ public class Nysiis implements StringEncoder {
// 5. H -> If previous or next is a non vowel, previous.
if (curr == 'H' && (!isVowel(prev) || !isVowel(next))) {
return new char[]{prev};
return new char[] { prev };
// 6. W -> If previous is vowel, previous.
if (curr == 'W' && isVowel(prev)) {
return new char[]{prev};
return new char[] { prev };
return new char[]{curr};
return new char[] { curr };
* Indicates the strict mode.
@ -48,4 +48,3 @@ public class AnalysisPhoneticPlugin extends Plugin implements AnalysisPlugin {
return singletonMap("phonetic", PhoneticTokenFilterFactory::new);
@ -65,20 +65,27 @@ public class AnalysisPhoneticFactoryTests extends AnalysisFactoryTestCase {
AnalysisPhoneticPlugin plugin = new AnalysisPhoneticPlugin();
Settings settings = Settings.builder()
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT))
VersionUtils.randomVersionBetween(random(), LegacyESVersion.V_7_0_0, Version.CURRENT)
.put("path.home", createTempDir().toString())
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
TokenFilterFactory tff
= plugin.getTokenFilters().get("phonetic").get(idxSettings, null, "phonetic", settings);
TokenFilterFactory tff = plugin.getTokenFilters().get("phonetic").get(idxSettings, null, "phonetic", settings);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, tff::getSynonymFilter);
assertEquals("Token filter [phonetic] cannot be used to parse synonyms", e.getMessage());
settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(),
LegacyESVersion.V_6_0_0, VersionUtils.getPreviousVersion(LegacyESVersion.V_7_0_0)))
.put("path.home", createTempDir().toString())
idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
@ -57,9 +57,10 @@ public class SimplePhoneticAnalysisTests extends OpenSearchTestCase {
public void setup() throws IOException {
String yaml = "/org/opensearch/index/analysis/phonetic-1.yml";
Settings settings = Settings.builder().loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
Settings settings = Settings.builder()
.loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
this.analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisPhoneticPlugin());
@ -72,9 +73,35 @@ public class SimplePhoneticAnalysisTests extends OpenSearchTestCase {
TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilter");
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("ABADIAS"));
String[] expected = new String[] { "abYdias", "abYdios", "abadia", "abadiaS", "abadias", "abadio", "abadioS", "abadios", "abodia",
"abodiaS", "abodias", "abodio", "abodioS", "abodios", "avadias", "avadios", "avodias", "avodios", "obadia", "obadiaS",
"obadias", "obadio", "obadioS", "obadios", "obodia", "obodiaS", "obodias", "obodioS" };
String[] expected = new String[] {
"obodioS" };
BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected);
@ -82,8 +109,23 @@ public class SimplePhoneticAnalysisTests extends OpenSearchTestCase {
TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilterfrench");
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("Rimbault"));
String[] expected = new String[] { "rimbD", "rimbDlt", "rimba", "rimbalt", "rimbo", "rimbolt", "rimbu", "rimbult", "rmbD", "rmbDlt",
"rmba", "rmbalt", "rmbo", "rmbolt", "rmbu", "rmbult" };
String[] expected = new String[] {
"rmbult" };
BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected);
@ -49,4 +49,3 @@ public class PhoneticClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTest
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -58,8 +58,14 @@ public class SmartChineseStopTokenFilterFactory extends AbstractTokenFilterFacto
super(indexSettings, name, settings);
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
this.stopWords = Analysis.parseWords(env, settings, "stopwords",
SmartChineseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
this.stopWords = Analysis.parseWords(
@ -42,6 +42,7 @@ public class AnalysisSmartChineseFactoryTests extends AnalysisFactoryTestCase {
public AnalysisSmartChineseFactoryTests() {
super(new AnalysisSmartChinesePlugin());
protected Map<String, Class<?>> getTokenizers() {
Map<String, Class<?>> tokenizers = new HashMap<>(super.getTokenizers());
@ -44,8 +44,7 @@ import static org.hamcrest.Matchers.instanceOf;
public class SimpleSmartChineseAnalysisTests extends OpenSearchTestCase {
public void testDefaultsIcuAnalysis() throws IOException {
final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY,
new AnalysisSmartChinesePlugin());
final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisSmartChinesePlugin());
TokenizerFactory tokenizerFactory = analysis.tokenizer.get("smartcn_tokenizer");
MatcherAssert.assertThat(tokenizerFactory, instanceOf(SmartChineseTokenizerTokenizerFactory.class));
@ -49,4 +49,3 @@ public class SmartCNClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTestC
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -41,16 +41,14 @@ import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractTokenFilterFactory;
public class PolishStemTokenFilterFactory extends AbstractTokenFilterFactory {
public PolishStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@Override public TokenStream create(TokenStream tokenStream) {
public TokenStream create(TokenStream tokenStream) {
return new StempelFilter(tokenStream, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
@ -32,7 +32,6 @@
package org.opensearch.index.analysis.pl;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
@ -62,8 +61,7 @@ public class PolishStopTokenFilterFactory extends AbstractTokenFilterFactory {
super(indexSettings, name, settings);
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
this.stopWords = Analysis.parseWords(env, settings, "stopwords",
PolishAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
this.stopWords = Analysis.parseWords(env, settings, "stopwords", PolishAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
@ -64,7 +64,7 @@ public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
public void testThreadSafety() throws IOException {
// TODO: is this the right boilerplate? I forked this out of TransportAnalyzeAction.java:
// TODO: is this the right boilerplate? I forked this out of TransportAnalyzeAction.java:
Settings settings = Settings.builder()
// for _na_
@ -47,8 +47,7 @@ import static org.hamcrest.Matchers.instanceOf;
public class PolishAnalysisTests extends OpenSearchTestCase {
public void testDefaultsPolishAnalysis() throws IOException {
final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY,
new AnalysisStempelPlugin());
final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisStempelPlugin());
TokenFilterFactory tokenizerFactory = analysis.tokenFilter.get("polish_stem");
MatcherAssert.assertThat(tokenizerFactory, instanceOf(PolishStemTokenFilterFactory.class));
@ -59,9 +59,7 @@ public class SimplePolishTokenFilterTests extends OpenSearchTestCase {
private void testToken(String source, String expected) throws IOException {
Index index = new Index("test", "_na_");
Settings settings = Settings.builder()
.put("index.analysis.filter.myStemmer.type", "polish_stem")
Settings settings = Settings.builder().put("index.analysis.filter.myStemmer.type", "polish_stem").build();
TestAnalysis analysis = createTestAnalysis(index, settings, new AnalysisStempelPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myStemmer");
@ -49,4 +49,3 @@ public class StempelClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTestC
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -56,5 +56,4 @@ public class UkrainianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Ukr
return this.analyzer;
@ -47,8 +47,7 @@ import static org.hamcrest.Matchers.instanceOf;
public class UkrainianAnalysisTests extends OpenSearchTestCase {
public void testDefaultsUkranianAnalysis() throws IOException {
final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY,
new AnalysisUkrainianPlugin());
final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisUkrainianPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("ukrainian").analyzer();
MatcherAssert.assertThat(analyzer, instanceOf(UkrainianMorfologikAnalyzer.class));
@ -49,4 +49,3 @@ public class UkrainianClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTes
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -171,10 +171,12 @@ public abstract class AbstractAzureComputeServiceTestCase extends OpenSearchInte
* network addresses for Azure instances running on the same host but different ports.
protected AzureSeedHostsProvider createSeedHostsProvider(final Settings settings,
final AzureComputeService azureComputeService,
final TransportService transportService,
final NetworkService networkService) {
protected AzureSeedHostsProvider createSeedHostsProvider(
final Settings settings,
final AzureComputeService azureComputeService,
final TransportService transportService,
final NetworkService networkService
) {
return new AzureSeedHostsProvider(settings, azureComputeService, transportService, networkService) {
protected String resolveInstanceAddress(final HostType hostType, final RoleInstance instance) {
@ -129,24 +129,31 @@ public class AzureDiscoveryClusterFormationTests extends OpenSearchIntegTestCase
} catch (IOException e) {
throw new RuntimeException(e);
return Settings.builder().put(super.nodeSettings(nodeOrdinal))
return Settings.builder()
.put(DiscoveryModule.DISCOVERY_SEED_PROVIDERS_SETTING.getKey(), AzureDiscoveryPlugin.AZURE)
.put(Environment.PATH_LOGS_SETTING.getKey(), resolve)
.put(TransportSettings.PORT.getKey(), 0)
.put(Node.WRITE_PORTS_FILE_SETTING.getKey(), "true")
.put(AzureComputeService.Management.ENDPOINT_SETTING.getKey(), "https://" + InetAddress.getLoopbackAddress().getHostAddress() +
":" + httpsServer.getAddress().getPort())
"https://" + InetAddress.getLoopbackAddress().getHostAddress() + ":" + httpsServer.getAddress().getPort()
.put(AzureComputeService.Management.KEYSTORE_PATH_SETTING.getKey(), keyStoreFile.toAbsolutePath())
.put(AzureComputeService.Management.KEYSTORE_PASSWORD_SETTING.getKey(), "keypass")
.put(AzureComputeService.Management.KEYSTORE_TYPE_SETTING.getKey(), "jks")
.put(AzureComputeService.Management.SERVICE_NAME_SETTING.getKey(), "myservice")
.put(AzureComputeService.Management.SUBSCRIPTION_ID_SETTING.getKey(), "subscription")
.put(AzureComputeService.Discovery.DEPLOYMENT_NAME_SETTING.getKey(), "mydeployment")
.put(AzureComputeService.Discovery.ENDPOINT_NAME_SETTING.getKey(), "myendpoint")
@ -293,9 +300,9 @@ public class AzureDiscoveryClusterFormationTests extends OpenSearchIntegTestCase
} else if (JavaVersion.current().compareTo(JavaVersion.parse("12")) < 0) {
return "TLSv1.2";
} else {
JavaVersion full =
(PrivilegedAction<JavaVersion>) () -> JavaVersion.parse(System.getProperty("java.version")));
JavaVersion full = AccessController.doPrivileged(
(PrivilegedAction<JavaVersion>) () -> JavaVersion.parse(System.getProperty("java.version"))
if (full.compareTo(JavaVersion.parse("12.0.1")) < 0) {
return "TLSv1.2";
@ -40,16 +40,13 @@ import org.opensearch.test.OpenSearchIntegTestCase;
import static org.hamcrest.Matchers.containsString;
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST,
numDataNodes = 0,
transportClientRatio = 0.0,
numClientNodes = 0)
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0.0, numClientNodes = 0)
public class AzureSimpleTests extends AbstractAzureComputeServiceTestCase {
public void testOneNodeShouldRunUsingPrivateIp() {
Settings.Builder settings = Settings.builder()
.put(Management.SERVICE_NAME_SETTING.getKey(), "dummy")
.put(Discovery.HOST_TYPE_SETTING.getKey(), "private_ip");
.put(Management.SERVICE_NAME_SETTING.getKey(), "dummy")
.put(Discovery.HOST_TYPE_SETTING.getKey(), "private_ip");
final String node1 = internalCluster().startNode(settings);
@ -61,8 +58,8 @@ public class AzureSimpleTests extends AbstractAzureComputeServiceTestCase {
public void testOneNodeShouldRunUsingPublicIp() {
Settings.Builder settings = Settings.builder()
.put(Management.SERVICE_NAME_SETTING.getKey(), "dummy")
.put(Discovery.HOST_TYPE_SETTING.getKey(), "public_ip");
.put(Management.SERVICE_NAME_SETTING.getKey(), "dummy")
.put(Discovery.HOST_TYPE_SETTING.getKey(), "public_ip");
final String node1 = internalCluster().startNode(settings);
@ -74,8 +71,8 @@ public class AzureSimpleTests extends AbstractAzureComputeServiceTestCase {
public void testOneNodeShouldRunUsingWrongSettings() {
Settings.Builder settings = Settings.builder()
.put(Management.SERVICE_NAME_SETTING.getKey(), "dummy")
.put(Discovery.HOST_TYPE_SETTING.getKey(), "do_not_exist");
.put(Management.SERVICE_NAME_SETTING.getKey(), "dummy")
.put(Discovery.HOST_TYPE_SETTING.getKey(), "do_not_exist");
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> internalCluster().startNode(settings));
assertThat(e.getMessage(), containsString("invalid value for host type [do_not_exist]"));
@ -38,15 +38,11 @@ import org.opensearch.cloud.azure.classic.management.AzureComputeService.Managem
import org.opensearch.common.settings.Settings;
import org.opensearch.test.OpenSearchIntegTestCase;
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST,
numDataNodes = 0,
transportClientRatio = 0.0,
numClientNodes = 0)
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0.0, numClientNodes = 0)
public class AzureTwoStartedNodesTests extends AbstractAzureComputeServiceTestCase {
public void testTwoNodesShouldRunUsingPrivateOrPublicIp() {
final String hostType =
final String hostType = randomFrom(org.opensearch.discovery.azure.classic.AzureSeedHostsProvider.HostType.values()).getType();
logger.info("--> using azure host type " + hostType);
final Settings settings = Settings.builder()
@ -47,44 +47,78 @@ import java.util.function.Function;
public interface AzureComputeService {
final class Management {
public static final Setting<String> SUBSCRIPTION_ID_SETTING =
Setting.simpleString("cloud.azure.management.subscription.id", Property.NodeScope, Property.Filtered);
public static final Setting<String> SERVICE_NAME_SETTING =
Setting.simpleString("cloud.azure.management.cloud.service.name", Property.NodeScope);
public static final Setting<String> SUBSCRIPTION_ID_SETTING = Setting.simpleString(
public static final Setting<String> SERVICE_NAME_SETTING = Setting.simpleString(
// Keystore settings
public static final Setting<String> KEYSTORE_PATH_SETTING =
Setting.simpleString("cloud.azure.management.keystore.path", Property.NodeScope, Property.Filtered);
public static final Setting<String> KEYSTORE_PASSWORD_SETTING =
Setting.simpleString("cloud.azure.management.keystore.password", Property.NodeScope,
public static final Setting<KeyStoreType> KEYSTORE_TYPE_SETTING =
new Setting<>("cloud.azure.management.keystore.type", KeyStoreType.pkcs12.name(), KeyStoreType::fromString,
Property.NodeScope, Property.Filtered);
public static final Setting<String> KEYSTORE_PATH_SETTING = Setting.simpleString(
public static final Setting<String> KEYSTORE_PASSWORD_SETTING = Setting.simpleString(
public static final Setting<KeyStoreType> KEYSTORE_TYPE_SETTING = new Setting<>(
// so that it can overridden for tests
public static final Setting<URI> ENDPOINT_SETTING = new Setting<URI>("cloud.azure.management.endpoint",
"https://management.core.windows.net/", s -> {
public static final Setting<URI> ENDPOINT_SETTING = new Setting<URI>(
s -> {
try {
return new URI(s);
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}, Property.NodeScope);
final class Discovery {
public static final Setting<TimeValue> REFRESH_SETTING =
Setting.positiveTimeSetting("discovery.azure.refresh_interval", TimeValue.timeValueSeconds(0), Property.NodeScope);
public static final Setting<AzureSeedHostsProvider.HostType> HOST_TYPE_SETTING =
new Setting<>("discovery.azure.host.type", AzureSeedHostsProvider.HostType.PRIVATE_IP.name(),
AzureSeedHostsProvider.HostType::fromString, Property.NodeScope);
public static final Setting<String> ENDPOINT_NAME_SETTING = new Setting<>("discovery.azure.endpoint.name", "opensearch",
Function.identity(), Property.NodeScope);
public static final Setting<String> DEPLOYMENT_NAME_SETTING = Setting.simpleString("discovery.azure.deployment.name",
public static final Setting<Deployment> DEPLOYMENT_SLOT_SETTING = new Setting<>("discovery.azure.deployment.slot",
Deployment.PRODUCTION.name(), Deployment::fromString, Property.NodeScope);
public static final Setting<TimeValue> REFRESH_SETTING = Setting.positiveTimeSetting(
public static final Setting<AzureSeedHostsProvider.HostType> HOST_TYPE_SETTING = new Setting<>(
public static final Setting<String> ENDPOINT_NAME_SETTING = new Setting<>(
public static final Setting<String> DEPLOYMENT_NAME_SETTING = Setting.simpleString(
public static final Setting<Deployment> DEPLOYMENT_SLOT_SETTING = new Setting<>(
HostedServiceGetDetailedResponse getServiceDetails();
@ -56,11 +56,9 @@ import org.opensearch.common.component.AbstractLifecycleComponent;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Settings;
public class AzureComputeServiceImpl extends AbstractLifecycleComponent
implements AzureComputeService {
public class AzureComputeServiceImpl extends AbstractLifecycleComponent implements AzureComputeService {
private static final Logger logger = LogManager.getLogger(AzureComputeServiceImpl.class);
private final ComputeManagementClient client;
private final String serviceName;
@ -89,8 +87,15 @@ public class AzureComputeServiceImpl extends AbstractLifecycleComponent
Configuration configuration = new Configuration(builder);
configuration.setProperty(Configuration.PROPERTY_LOG_HTTP_REQUESTS, logger.isTraceEnabled());
Configuration managementConfig = ManagementConfiguration.configure(null, configuration,
Management.ENDPOINT_SETTING.get(settings), subscriptionId, keystorePath, keystorePassword, keystoreType);
Configuration managementConfig = ManagementConfiguration.configure(
logger.debug("creating new Azure client for [{}], [{}]", subscriptionId, serviceName);
client = ComputeManagementService.create(managementConfig);
@ -111,20 +116,20 @@ public class AzureComputeServiceImpl extends AbstractLifecycleComponent
public HostedServiceGetDetailedResponse getServiceDetails() {
try {
return AccessController.doPrivileged((PrivilegedExceptionAction<HostedServiceGetDetailedResponse>)
() -> client.getHostedServicesOperations().getDetailed(serviceName));
return AccessController.doPrivileged(
(PrivilegedExceptionAction<HostedServiceGetDetailedResponse>) () -> client.getHostedServicesOperations()
} catch (PrivilegedActionException e) {
throw new AzureServiceRemoteException("can not get list of azure nodes", e.getCause());
protected void doStart() throws OpenSearchException {
protected void doStart() throws OpenSearchException {}
protected void doStop() throws OpenSearchException {
protected void doStop() throws OpenSearchException {}
protected void doClose() throws OpenSearchException {
@ -61,17 +61,17 @@ import java.util.ArrayList;
import java.util.List;
public class AzureSeedHostsProvider implements SeedHostsProvider {
private static final Logger logger = LogManager.getLogger(AzureSeedHostsProvider.class);
public enum HostType {
private String type ;
private String type;
HostType(String type) {
this.type = type ;
this.type = type;
public String getType() {
@ -123,8 +123,12 @@ public class AzureSeedHostsProvider implements SeedHostsProvider {
private final String deploymentName;
private final DeploymentSlot deploymentSlot;
public AzureSeedHostsProvider(Settings settings, AzureComputeService azureComputeService,
TransportService transportService, NetworkService networkService) {
public AzureSeedHostsProvider(
Settings settings,
AzureComputeService azureComputeService,
TransportService transportService,
NetworkService networkService
) {
this.settings = settings;
this.azureComputeService = azureComputeService;
this.transportService = transportService;
@ -152,8 +156,8 @@ public class AzureSeedHostsProvider implements SeedHostsProvider {
public List<TransportAddress> getSeedAddresses(HostsResolver hostsResolver) {
if (refreshInterval.millis() != 0) {
if (dynamicHosts != null &&
(refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) {
if (dynamicHosts != null
&& (refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) {
logger.trace("using cache to retrieve node list");
return dynamicHosts;
@ -179,7 +183,8 @@ public class AzureSeedHostsProvider implements SeedHostsProvider {
InetAddress ipAddress = null;
try {
ipAddress = networkService.resolvePublishHostAddresses(
logger.trace("ip of current node: [{}]", ipAddress);
} catch (IOException e) {
// We can't find the publish host address... Hmmm. Too bad :-(
@ -189,24 +194,26 @@ public class AzureSeedHostsProvider implements SeedHostsProvider {
for (HostedServiceGetDetailedResponse.Deployment deployment : detailed.getDeployments()) {
// We check the deployment slot
if (deployment.getDeploymentSlot() != deploymentSlot) {
logger.debug("current deployment slot [{}] for [{}] is different from [{}]. skipping...",
deployment.getDeploymentSlot(), deployment.getName(), deploymentSlot);
"current deployment slot [{}] for [{}] is different from [{}]. skipping...",
// If provided, we check the deployment name
if (Strings.hasLength(deploymentName) && !deploymentName.equals(deployment.getName())) {
logger.debug("current deployment name [{}] different from [{}]. skipping...",
deployment.getName(), deploymentName);
logger.debug("current deployment name [{}] different from [{}]. skipping...", deployment.getName(), deploymentName);
// We check current deployment status
if (deployment.getStatus() != DeploymentStatus.Starting &&
deployment.getStatus() != DeploymentStatus.Deploying &&
deployment.getStatus() != DeploymentStatus.Running) {
logger.debug("[{}] status is [{}]. skipping...",
deployment.getName(), deployment.getStatus());
if (deployment.getStatus() != DeploymentStatus.Starting
&& deployment.getStatus() != DeploymentStatus.Deploying
&& deployment.getStatus() != DeploymentStatus.Running) {
logger.debug("[{}] status is [{}]. skipping...", deployment.getName(), deployment.getStatus());
@ -71,31 +71,36 @@ public class AzureDiscoveryPlugin extends Plugin implements DiscoveryPlugin {
public Map<String, Supplier<SeedHostsProvider>> getSeedHostProviders(TransportService transportService,
NetworkService networkService) {
return Collections.singletonMap(AZURE,
() -> createSeedHostsProvider(settings, createComputeService(), transportService, networkService));
public Map<String, Supplier<SeedHostsProvider>> getSeedHostProviders(TransportService transportService, NetworkService networkService) {
return Collections.singletonMap(
() -> createSeedHostsProvider(settings, createComputeService(), transportService, networkService)
// Used for testing
protected AzureSeedHostsProvider createSeedHostsProvider(final Settings settings,
final AzureComputeService azureComputeService,
final TransportService transportService,
final NetworkService networkService) {
protected AzureSeedHostsProvider createSeedHostsProvider(
final Settings settings,
final AzureComputeService azureComputeService,
final TransportService transportService,
final NetworkService networkService
) {
return new AzureSeedHostsProvider(settings, azureComputeService, transportService, networkService);
public List<Setting<?>> getSettings() {
return Arrays.asList(AzureComputeService.Discovery.REFRESH_SETTING,
return Arrays.asList(
@ -49,4 +49,3 @@ public class DiscoveryAzureClassicClientYamlTestSuiteIT extends OpenSearchClient
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -114,32 +114,41 @@ public class AmazonEC2Fixture extends AbstractHttpFixture {
return new Response(RestStatus.OK.getStatus(), TEXT_PLAIN_CONTENT_TYPE, "".getBytes(UTF_8));
if (instanceProfile &&
"/latest/meta-data/iam/security-credentials/".equals(request.getPath()) &&
HttpGet.METHOD_NAME.equals(request.getMethod())) {
if (instanceProfile
&& "/latest/meta-data/iam/security-credentials/".equals(request.getPath())
&& HttpGet.METHOD_NAME.equals(request.getMethod())) {
final Map<String, String> headers = new HashMap<>(contentType("text/plain"));
return new Response(RestStatus.OK.getStatus(), headers, "my_iam_profile".getBytes(UTF_8));
if (instanceProfile && "/latest/api/token".equals(request.getPath())
&& HttpPut.METHOD_NAME.equals(request.getMethod())) {
if (instanceProfile && "/latest/api/token".equals(request.getPath()) && HttpPut.METHOD_NAME.equals(request.getMethod())) {
// TODO: Implement IMDSv2 behavior here. For now this just returns a 403 which makes the SDK fall back to IMDSv1
// which is implemented in this fixture
// which is implemented in this fixture
return new Response(RestStatus.FORBIDDEN.getStatus(), TEXT_PLAIN_CONTENT_TYPE, EMPTY_BYTE);
if ((containerCredentials &&
"/ecs_credentials_endpoint".equals(request.getPath()) &&
HttpGet.METHOD_NAME.equals(request.getMethod())) ||
("/latest/meta-data/iam/security-credentials/my_iam_profile".equals(request.getPath()) &&
HttpGet.METHOD_NAME.equals(request.getMethod()))) {
if ((containerCredentials
&& "/ecs_credentials_endpoint".equals(request.getPath())
&& HttpGet.METHOD_NAME.equals(request.getMethod()))
|| ("/latest/meta-data/iam/security-credentials/my_iam_profile".equals(request.getPath())
&& HttpGet.METHOD_NAME.equals(request.getMethod()))) {
final Date expiration = new Date(new Date().getTime() + TimeUnit.DAYS.toMillis(1));
final String response = "{"
+ "\"AccessKeyId\": \"" + "ec2_integration_test_access_key" + "\","
+ "\"Expiration\": \"" + DateUtils.formatISO8601Date(expiration) + "\","
+ "\"RoleArn\": \"" + "test" + "\","
+ "\"SecretAccessKey\": \"" + "test" + "\","
+ "\"Token\": \"" + "test" + "\""
+ "\"AccessKeyId\": \""
+ "ec2_integration_test_access_key"
+ "\","
+ "\"Expiration\": \""
+ DateUtils.formatISO8601Date(expiration)
+ "\","
+ "\"RoleArn\": \""
+ "test"
+ "\","
+ "\"SecretAccessKey\": \""
+ "test"
+ "\","
+ "\"Token\": \""
+ "test"
+ "\""
+ "}";
final Map<String, String> headers = new HashMap<>(contentType("application/json"));
@ -56,9 +56,9 @@ public abstract class AbstractAwsTestCase extends OpenSearchIntegTestCase {
protected Settings nodeSettings(int nodeOrdinal) {
Settings.Builder settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir());
Settings.Builder settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir());
// if explicit, just load it and don't load from env
try {
@ -70,7 +70,8 @@ public abstract class AbstractAwsTestCase extends OpenSearchIntegTestCase {
} else {
throw new IllegalStateException(
"to run integration tests, you need to set -Dtests.thirdparty=true and -Dtests.config=/path/to/opensearch.yml");
"to run integration tests, you need to set -Dtests.thirdparty=true and -Dtests.config=/path/to/opensearch.yml"
} catch (SettingsException exception) {
throw new IllegalStateException("your test configuration file is incorrect: " + System.getProperty("tests.config"), exception);
@ -32,7 +32,6 @@
package org.opensearch.discovery.ec2;
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse;
import org.opensearch.common.UUIDs;
import org.opensearch.common.settings.Settings;
@ -50,17 +49,17 @@ import static org.hamcrest.CoreMatchers.is;
@ClusterScope(scope = Scope.TEST, numDataNodes = 0, numClientNodes = 0, transportClientRatio = 0.0)
public class Ec2DiscoveryUpdateSettingsTests extends AbstractAwsTestCase {
public void testMinimumMasterNodesStart() {
Settings nodeSettings = Settings.builder()
.put(DiscoveryModule.DISCOVERY_SEED_PROVIDERS_SETTING.getKey(), "ec2")
Settings nodeSettings = Settings.builder().put(DiscoveryModule.DISCOVERY_SEED_PROVIDERS_SETTING.getKey(), "ec2").build();
// We try to update a setting now
final String expectedValue = UUIDs.randomBase64UUID(random());
final String settingName = "cluster.routing.allocation.exclude.any_attribute";
final ClusterUpdateSettingsResponse response = client().admin().cluster().prepareUpdateSettings()
.setPersistentSettings(Settings.builder().put(settingName, expectedValue))
final ClusterUpdateSettingsResponse response = client().admin()
.setPersistentSettings(Settings.builder().put(settingName, expectedValue))
final String value = response.getPersistentSettings().get(settingName);
assertThat(value, is(expectedValue));
@ -102,8 +102,14 @@ class AwsEc2SeedHostsProvider implements SeedHostsProvider {
if (logger.isDebugEnabled()) {
logger.debug("using host_type [{}], tags [{}], groups [{}] with any_group [{}], availability_zones [{}]", hostType, tags,
groups, bindAnyGroup, availabilityZones);
"using host_type [{}], tags [{}], groups [{}] with any_group [{}], availability_zones [{}]",
@ -144,18 +150,25 @@ class AwsEc2SeedHostsProvider implements SeedHostsProvider {
if (bindAnyGroup) {
// We check if we can find at least one group name or one group id in groups.
if (disjoint(securityGroupNames, groups)
&& disjoint(securityGroupIds, groups)) {
logger.trace("filtering out instance {} based on groups {}, not part of {}", instance.getInstanceId(),
instanceSecurityGroups, groups);
if (disjoint(securityGroupNames, groups) && disjoint(securityGroupIds, groups)) {
"filtering out instance {} based on groups {}, not part of {}",
// continue to the next instance
} else {
// We need tp match all group names or group ids, otherwise we ignore this instance
if (!(securityGroupNames.containsAll(groups) || securityGroupIds.containsAll(groups))) {
logger.trace("filtering out instance {} based on groups {}, does not include all of {}",
instance.getInstanceId(), instanceSecurityGroups, groups);
"filtering out instance {} based on groups {}, does not include all of {}",
// continue to the next instance
@ -195,8 +208,13 @@ class AwsEc2SeedHostsProvider implements SeedHostsProvider {
} catch (final Exception e) {
final String finalAddress = address;
() -> new ParameterizedMessage("failed to add {}, address {}", instance.getInstanceId(), finalAddress), e);
(Supplier<?>) () -> new ParameterizedMessage(
"failed to add {}, address {}",
} else {
logger.trace("not adding {}, address is null, host_type {}", instance.getInstanceId(), hostType);
@ -210,23 +228,18 @@ class AwsEc2SeedHostsProvider implements SeedHostsProvider {
private DescribeInstancesRequest buildDescribeInstancesRequest() {
final DescribeInstancesRequest describeInstancesRequest = new DescribeInstancesRequest()
new Filter("instance-state-name").withValues("running", "pending")
final DescribeInstancesRequest describeInstancesRequest = new DescribeInstancesRequest().withFilters(
new Filter("instance-state-name").withValues("running", "pending")
for (final Map.Entry<String, List<String>> tagFilter : tags.entrySet()) {
// for a given tag key, OR relationship for multiple different values
new Filter("tag:" + tagFilter.getKey()).withValues(tagFilter.getValue())
describeInstancesRequest.withFilters(new Filter("tag:" + tagFilter.getKey()).withValues(tagFilter.getValue()));
if (!availabilityZones.isEmpty()) {
// OR relationship amongst multiple values of the availability-zone filter
new Filter("availability-zone").withValues(availabilityZones)
describeInstancesRequest.withFilters(new Filter("availability-zone").withValues(availabilityZones));
return describeInstancesRequest;
@ -235,7 +248,7 @@ class AwsEc2SeedHostsProvider implements SeedHostsProvider {
private final class TransportAddressesCache extends SingleObjectCache<List<TransportAddress>> {
protected TransportAddressesCache(TimeValue refreshInterval) {
super(refreshInterval, new ArrayList<>());
super(refreshInterval, new ArrayList<>());
@ -59,8 +59,12 @@ interface AwsEc2Service extends Closeable {
* XXXX refers to a name of a tag configured for all EC2 instances. Instances which don't
* have this tag set will be ignored by the discovery process. Defaults to private_ip.
Setting<String> HOST_TYPE_SETTING =
new Setting<>("discovery.ec2.host_type", HostType.PRIVATE_IP, Function.identity(), Property.NodeScope);
Setting<String> HOST_TYPE_SETTING = new Setting<>(
* discovery.ec2.any_group: If set to false, will require all security groups to be present for the instance to be used for the
* discovery. Defaults to true.
@ -70,19 +74,30 @@ interface AwsEc2Service extends Closeable {
* discovery.ec2.groups: Either a comma separated list or array based list of (security) groups. Only instances with the provided
* security groups will be used in the cluster discovery. (NOTE: You could provide either group NAME or group ID.)
Setting<List<String>> GROUPS_SETTING = Setting.listSetting("discovery.ec2.groups", new ArrayList<>(), s -> s.toString(),
Setting<List<String>> GROUPS_SETTING = Setting.listSetting(
new ArrayList<>(),
s -> s.toString(),
* discovery.ec2.availability_zones: Either a comma separated list or array based list of availability zones. Only instances within
* the provided availability zones will be used in the cluster discovery.
Setting<List<String>> AVAILABILITY_ZONES_SETTING = Setting.listSetting("discovery.ec2.availability_zones", Collections.emptyList(),
s -> s.toString(), Property.NodeScope);
Setting<List<String>> AVAILABILITY_ZONES_SETTING = Setting.listSetting(
s -> s.toString(),
* discovery.ec2.node_cache_time: How long the list of hosts is cached to prevent further requests to the AWS API. Defaults to 10s.
Setting<TimeValue> NODE_CACHE_TIME_SETTING = Setting.timeSetting("discovery.ec2.node_cache_time", TimeValue.timeValueSeconds(10),
Setting<TimeValue> NODE_CACHE_TIME_SETTING = Setting.timeSetting(
* discovery.ec2.tag.*: The ec2 discovery can filter machines to include in the cluster based on tags (and not just groups).
@ -90,8 +105,10 @@ interface AwsEc2Service extends Closeable {
* instances with a tag key set to stage, and a value of dev. Several tags set will require all of those tags to be set for the
* instance to be included.
Setting.AffixSetting<List<String>> TAG_SETTING = Setting.prefixKeySetting("discovery.ec2.tag.",
key -> Setting.listSetting(key, Collections.emptyList(), Function.identity(), Property.NodeScope));
Setting.AffixSetting<List<String>> TAG_SETTING = Setting.prefixKeySetting(
key -> Setting.listSetting(key, Collections.emptyList(), Function.identity(), Property.NodeScope)
* Builds then caches an {@code AmazonEC2} client using the current client
@ -53,8 +53,7 @@ class AwsEc2ServiceImpl implements AwsEc2Service {
private static final Logger logger = LogManager.getLogger(AwsEc2ServiceImpl.class);
private final AtomicReference<LazyInitializable<AmazonEc2Reference, OpenSearchException>> lazyClientReference =
new AtomicReference<>();
private final AtomicReference<LazyInitializable<AmazonEc2Reference, OpenSearchException>> lazyClientReference = new AtomicReference<>();
private AmazonEC2 buildClient(Ec2ClientSettings clientSettings) {
final AWSCredentialsProvider credentials = buildCredentials(logger, clientSettings);
@ -64,7 +63,8 @@ class AwsEc2ServiceImpl implements AwsEc2Service {
// proxy for testing
AmazonEC2 buildClient(AWSCredentialsProvider credentials, ClientConfiguration configuration, String endpoint) {
final AmazonEC2ClientBuilder builder = AmazonEC2ClientBuilder.standard().withCredentials(credentials)
final AmazonEC2ClientBuilder builder = AmazonEC2ClientBuilder.standard()
if (Strings.hasText(endpoint)) {
logger.debug("using explicit ec2 endpoint [{}]", endpoint);
@ -122,8 +122,10 @@ class AwsEc2ServiceImpl implements AwsEc2Service {
public void refreshAndClearCache(Ec2ClientSettings clientSettings) {
final LazyInitializable<AmazonEc2Reference, OpenSearchException> newClient = new LazyInitializable<>(
() -> new AmazonEc2Reference(buildClient(clientSettings)), clientReference -> clientReference.incRef(),
clientReference -> clientReference.decRef());
() -> new AmazonEc2Reference(buildClient(clientSettings)),
clientReference -> clientReference.incRef(),
clientReference -> clientReference.decRef()
final LazyInitializable<AmazonEc2Reference, OpenSearchException> oldClient = this.lazyClientReference.getAndSet(newClient);
if (oldClient != null) {
@ -71,12 +71,20 @@ final class Ec2ClientSettings {
static final Setting<Integer> PROXY_PORT_SETTING = Setting.intSetting("discovery.ec2.proxy.port", 80, 0, 1 << 16, Property.NodeScope);
/** An override for the ec2 endpoint to connect to. */
static final Setting<String> ENDPOINT_SETTING = new Setting<>("discovery.ec2.endpoint", "", s -> s.toLowerCase(Locale.ROOT),
static final Setting<String> ENDPOINT_SETTING = new Setting<>(
s -> s.toLowerCase(Locale.ROOT),
/** The protocol to use to connect to to ec2. */
static final Setting<Protocol> PROTOCOL_SETTING = new Setting<>("discovery.ec2.protocol", "https",
s -> Protocol.valueOf(s.toUpperCase(Locale.ROOT)), Property.NodeScope);
static final Setting<Protocol> PROTOCOL_SETTING = new Setting<>(
s -> Protocol.valueOf(s.toUpperCase(Locale.ROOT)),
/** The username of a proxy to connect to s3 through. */
static final Setting<SecureString> PROXY_USERNAME_SETTING = SecureSetting.secureString("discovery.ec2.proxy.username", null);
@ -85,8 +93,11 @@ final class Ec2ClientSettings {
static final Setting<SecureString> PROXY_PASSWORD_SETTING = SecureSetting.secureString("discovery.ec2.proxy.password", null);
/** The socket timeout for connecting to s3. */
static final Setting<TimeValue> READ_TIMEOUT_SETTING = Setting.timeSetting("discovery.ec2.read_timeout",
TimeValue.timeValueMillis(ClientConfiguration.DEFAULT_SOCKET_TIMEOUT), Property.NodeScope);
static final Setting<TimeValue> READ_TIMEOUT_SETTING = Setting.timeSetting(
private static final Logger logger = LogManager.getLogger(Ec2ClientSettings.class);
@ -122,8 +133,16 @@ final class Ec2ClientSettings {
/** The read timeout for the ec2 client. */
final int readTimeoutMillis;
protected Ec2ClientSettings(AWSCredentials credentials, String endpoint, Protocol protocol, String proxyHost, int proxyPort,
String proxyUsername, String proxyPassword, int readTimeoutMillis) {
protected Ec2ClientSettings(
AWSCredentials credentials,
String endpoint,
Protocol protocol,
String proxyHost,
int proxyPort,
String proxyUsername,
String proxyPassword,
int readTimeoutMillis
) {
this.credentials = credentials;
this.endpoint = endpoint;
this.protocol = protocol;
@ -135,27 +154,39 @@ final class Ec2ClientSettings {
static AWSCredentials loadCredentials(Settings settings) {
try (SecureString key = ACCESS_KEY_SETTING.get(settings);
SecureString secret = SECRET_KEY_SETTING.get(settings);
SecureString sessionToken = SESSION_TOKEN_SETTING.get(settings)) {
try (
SecureString key = ACCESS_KEY_SETTING.get(settings);
SecureString secret = SECRET_KEY_SETTING.get(settings);
SecureString sessionToken = SESSION_TOKEN_SETTING.get(settings)
) {
if (key.length() == 0 && secret.length() == 0) {
if (sessionToken.length() > 0) {
throw new SettingsException("Setting [{}] is set but [{}] and [{}] are not",
throw new SettingsException(
"Setting [{}] is set but [{}] and [{}] are not",
logger.debug("Using either environment variables, system properties or instance profile credentials");
return null;
} else {
if (key.length() == 0) {
"Setting [{}] is set but [{}] is not, which will be unsupported in future",
if (secret.length() == 0) {
"Setting [{}] is set but [{}] is not, which will be unsupported in future",
final AWSCredentials credentials;
@ -175,8 +206,10 @@ final class Ec2ClientSettings {
/** Parse settings for a single client. */
static Ec2ClientSettings getClientSettings(Settings settings) {
final AWSCredentials credentials = loadCredentials(settings);
try (SecureString proxyUsername = PROXY_USERNAME_SETTING.get(settings);
SecureString proxyPassword = PROXY_PASSWORD_SETTING.get(settings)) {
try (
SecureString proxyUsername = PROXY_USERNAME_SETTING.get(settings);
SecureString proxyPassword = PROXY_PASSWORD_SETTING.get(settings)
) {
return new Ec2ClientSettings(
@ -185,7 +218,8 @@ final class Ec2ClientSettings {
(int) READ_TIMEOUT_SETTING.get(settings).millis()
@ -109,33 +109,33 @@ public class Ec2DiscoveryPlugin extends Plugin implements DiscoveryPlugin, Reloa
public Map<String, Supplier<SeedHostsProvider>> getSeedHostProviders(TransportService transportService,
NetworkService networkService) {
public Map<String, Supplier<SeedHostsProvider>> getSeedHostProviders(TransportService transportService, NetworkService networkService) {
return Collections.singletonMap(EC2, () -> new AwsEc2SeedHostsProvider(settings, transportService, ec2Service));
public List<Setting<?>> getSettings() {
return Arrays.asList(
// Register EC2 discovery settings: discovery.ec2
// Register cloud node settings: cloud.node
// Register EC2 discovery settings: discovery.ec2
// Register cloud node settings: cloud.node
@ -169,8 +169,10 @@ public class Ec2DiscoveryPlugin extends Plugin implements DiscoveryPlugin, Reloa
throw new UncheckedIOException(e);
try (InputStream in = SocketAccess.doPrivilegedIOException(urlConnection::getInputStream);
BufferedReader urlReader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
try (
InputStream in = SocketAccess.doPrivilegedIOException(urlConnection::getInputStream);
BufferedReader urlReader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))
) {
final String metadataResult = urlReader.readLine();
if ((metadataResult == null) || (metadataResult.length() == 0)) {
@ -67,7 +67,7 @@ import java.nio.charset.StandardCharsets;
* @author Paul_Loy (keteracel)
class Ec2NameResolver implements CustomNameResolver {
private static final Logger logger = LogManager.getLogger(Ec2NameResolver.class);
@ -129,7 +129,7 @@ class Ec2NameResolver implements CustomNameResolver {
public InetAddress[] resolveDefault() {
return null; // using this, one has to explicitly specify _ec2_ in network setting
// return resolve(Ec2HostnameType.DEFAULT, false);
// return resolve(Ec2HostnameType.DEFAULT, false);
@ -49,8 +49,10 @@ import static org.hamcrest.Matchers.is;
public class AwsEc2ServiceImplTests extends OpenSearchTestCase {
public void testAWSCredentialsWithSystemProviders() {
final AWSCredentialsProvider credentialsProvider = AwsEc2ServiceImpl.buildCredentials(logger,
final AWSCredentialsProvider credentialsProvider = AwsEc2ServiceImpl.buildCredentials(
assertThat(credentialsProvider, instanceOf(DefaultAWSCredentialsProviderChain.class));
@ -58,8 +60,10 @@ public class AwsEc2ServiceImplTests extends OpenSearchTestCase {
final MockSecureSettings secureSettings = new MockSecureSettings();
secureSettings.setString("discovery.ec2.access_key", "aws_key");
secureSettings.setString("discovery.ec2.secret_key", "aws_secret");
final AWSCredentials credentials = AwsEc2ServiceImpl.buildCredentials(logger,
final AWSCredentials credentials = AwsEc2ServiceImpl.buildCredentials(
assertThat(credentials.getAWSAccessKeyId(), is("aws_key"));
assertThat(credentials.getAWSSecretKey(), is("aws_secret"));
@ -69,8 +73,10 @@ public class AwsEc2ServiceImplTests extends OpenSearchTestCase {
secureSettings.setString("discovery.ec2.access_key", "aws_key");
secureSettings.setString("discovery.ec2.secret_key", "aws_secret");
secureSettings.setString("discovery.ec2.session_token", "aws_session_token");
final BasicSessionCredentials credentials = (BasicSessionCredentials) AwsEc2ServiceImpl.buildCredentials(logger,
final BasicSessionCredentials credentials = (BasicSessionCredentials) AwsEc2ServiceImpl.buildCredentials(
assertThat(credentials.getAWSAccessKeyId(), is("aws_key"));
assertThat(credentials.getAWSSecretKey(), is("aws_secret"));
assertThat(credentials.getSessionToken(), is("aws_session_token"));
@ -79,37 +85,51 @@ public class AwsEc2ServiceImplTests extends OpenSearchTestCase {
public void testDeprecationOfLoneAccessKey() {
final MockSecureSettings secureSettings = new MockSecureSettings();
secureSettings.setString("discovery.ec2.access_key", "aws_key");
final AWSCredentials credentials = AwsEc2ServiceImpl.buildCredentials(logger,
final AWSCredentials credentials = AwsEc2ServiceImpl.buildCredentials(
assertThat(credentials.getAWSAccessKeyId(), is("aws_key"));
assertThat(credentials.getAWSSecretKey(), is(""));
assertSettingDeprecationsAndWarnings(new String[]{},
"Setting [discovery.ec2.access_key] is set but [discovery.ec2.secret_key] is not, which will be unsupported in future");
new String[] {},
"Setting [discovery.ec2.access_key] is set but [discovery.ec2.secret_key] is not, which will be unsupported in future"
public void testDeprecationOfLoneSecretKey() {
final MockSecureSettings secureSettings = new MockSecureSettings();
secureSettings.setString("discovery.ec2.secret_key", "aws_secret");
final AWSCredentials credentials = AwsEc2ServiceImpl.buildCredentials(logger,
final AWSCredentials credentials = AwsEc2ServiceImpl.buildCredentials(
assertThat(credentials.getAWSAccessKeyId(), is(""));
assertThat(credentials.getAWSSecretKey(), is("aws_secret"));
assertSettingDeprecationsAndWarnings(new String[]{},
"Setting [discovery.ec2.secret_key] is set but [discovery.ec2.access_key] is not, which will be unsupported in future");
new String[] {},
"Setting [discovery.ec2.secret_key] is set but [discovery.ec2.access_key] is not, which will be unsupported in future"
public void testRejectionOfLoneSessionToken() {
final MockSecureSettings secureSettings = new MockSecureSettings();
secureSettings.setString("discovery.ec2.session_token", "aws_session_token");
SettingsException e = expectThrows(SettingsException.class, () -> AwsEc2ServiceImpl.buildCredentials(logger,
assertThat(e.getMessage(), is(
"Setting [discovery.ec2.session_token] is set but [discovery.ec2.access_key] and [discovery.ec2.secret_key] are not"));
SettingsException e = expectThrows(
() -> AwsEc2ServiceImpl.buildCredentials(
is("Setting [discovery.ec2.session_token] is set but [discovery.ec2.access_key] and [discovery.ec2.secret_key] are not")
public void testAWSDefaultConfiguration() {
launchAWSConfigurationTest(Settings.EMPTY, Protocol.HTTPS, null, -1, null, null,
launchAWSConfigurationTest(Settings.EMPTY, Protocol.HTTPS, null, -1, null, null, ClientConfiguration.DEFAULT_SOCKET_TIMEOUT);
public void testAWSConfigurationWithAwsSettings() {
@ -126,15 +146,19 @@ public class AwsEc2ServiceImplTests extends OpenSearchTestCase {
launchAWSConfigurationTest(settings, Protocol.HTTP, "aws_proxy_host", 8080, "aws_proxy_username", "aws_proxy_password", 10000);
protected void launchAWSConfigurationTest(Settings settings,
Protocol expectedProtocol,
String expectedProxyHost,
int expectedProxyPort,
String expectedProxyUsername,
String expectedProxyPassword,
int expectedReadTimeout) {
final ClientConfiguration configuration = AwsEc2ServiceImpl.buildConfiguration(logger,
protected void launchAWSConfigurationTest(
Settings settings,
Protocol expectedProtocol,
String expectedProxyHost,
int expectedProxyPort,
String expectedProxyUsername,
String expectedProxyPassword,
int expectedReadTimeout
) {
final ClientConfiguration configuration = AwsEc2ServiceImpl.buildConfiguration(
assertThat(configuration.getResponseMetadataCacheSize(), is(0));
assertThat(configuration.getProtocol(), is(expectedProtocol));
@ -68,9 +68,21 @@ public class EC2RetriesTests extends AbstractEC2MockAPITestCase {
protected MockTransportService createTransportService() {
return new MockTransportService(Settings.EMPTY, new MockNioTransport(Settings.EMPTY, Version.CURRENT, threadPool, networkService,
PageCacheRecycler.NON_RECYCLING_INSTANCE, new NamedWriteableRegistry(Collections.emptyList()),
new NoneCircuitBreakerService()), threadPool, TransportService.NOOP_TRANSPORT_INTERCEPTOR, null);
return new MockTransportService(
new MockNioTransport(
new NamedWriteableRegistry(Collections.emptyList()),
new NoneCircuitBreakerService()
public void testEC2DiscoveryRetriesOnRateLimiting() throws IOException {
@ -88,8 +100,10 @@ public class EC2RetriesTests extends AbstractEC2MockAPITestCase {
if (auth == null || auth.contains(accessKey) == false) {
throw new IllegalArgumentException("wrong access key: " + auth);
if (failedRequests.compute(exchange.getRequestHeaders().getFirst("Amz-sdk-invocation-id"),
(requestId, count) -> (count == null ? 0 : count) + 1) < maxRetries) {
if (failedRequests.compute(
(requestId, count) -> (count == null ? 0 : count) + 1
) < maxRetries) {
exchange.sendResponseHeaders(HttpStatus.SC_SERVICE_UNAVAILABLE, -1);
@ -97,8 +111,9 @@ public class EC2RetriesTests extends AbstractEC2MockAPITestCase {
byte[] responseBody = null;
for (NameValuePair parse : URLEncodedUtils.parse(request, UTF_8)) {
if ("Action".equals(parse.getName())) {
responseBody = generateDescribeInstancesResponse(hosts.stream().map(
address -> new Instance().withPublicIpAddress(address)).collect(Collectors.toList()));
responseBody = generateDescribeInstancesResponse(
hosts.stream().map(address -> new Instance().withPublicIpAddress(address)).collect(Collectors.toList())
@ -56,9 +56,7 @@ import static org.hamcrest.Matchers.is;
public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
private Settings getNodeAttributes(Settings settings, String url) {
final Settings realSettings = Settings.builder()
.put(AwsEc2Service.AUTO_ATTRIBUTE_SETTING.getKey(), true)
final Settings realSettings = Settings.builder().put(AwsEc2Service.AUTO_ATTRIBUTE_SETTING.getKey(), true).put(settings).build();
return Ec2DiscoveryPlugin.getAvailabilityZoneNodeAttributes(realSettings, url);
@ -72,8 +70,7 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
public void testNodeAttributesDisabled() {
final Settings settings = Settings.builder()
.put(AwsEc2Service.AUTO_ATTRIBUTE_SETTING.getKey(), false).build();
final Settings settings = Settings.builder().put(AwsEc2Service.AUTO_ATTRIBUTE_SETTING.getKey(), false).build();
assertNodeAttributes(settings, "bogus", null);
@ -84,9 +81,7 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
public void testNodeAttributesBogusUrl() {
final UncheckedIOException e = expectThrows(UncheckedIOException.class, () ->
getNodeAttributes(Settings.EMPTY, "bogus")
final UncheckedIOException e = expectThrows(UncheckedIOException.class, () -> getNodeAttributes(Settings.EMPTY, "bogus"));
final String msg = e.getCause().getMessage();
assertTrue(msg, msg.contains("no protocol: bogus"));
@ -94,8 +89,9 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
public void testNodeAttributesEmpty() throws Exception {
final Path zoneUrl = createTempFile();
final IllegalStateException e = expectThrows(IllegalStateException.class, () ->
getNodeAttributes(Settings.EMPTY, zoneUrl.toUri().toURL().toString())
final IllegalStateException e = expectThrows(
() -> getNodeAttributes(Settings.EMPTY, zoneUrl.toUri().toURL().toString())
assertTrue(e.getMessage(), e.getMessage().contains("no ec2 metadata returned"));
@ -131,11 +127,11 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
mockSecure1.setString(Ec2ClientSettings.PROXY_USERNAME_SETTING.getKey(), "proxy_username_1");
mockSecure1.setString(Ec2ClientSettings.PROXY_PASSWORD_SETTING.getKey(), "proxy_password_1");
final Settings settings1 = Settings.builder()
.put(Ec2ClientSettings.PROXY_HOST_SETTING.getKey(), "proxy_host_1")
.put(Ec2ClientSettings.PROXY_PORT_SETTING.getKey(), 881)
.put(Ec2ClientSettings.ENDPOINT_SETTING.getKey(), "ec2_endpoint_1")
.put(Ec2ClientSettings.PROXY_HOST_SETTING.getKey(), "proxy_host_1")
.put(Ec2ClientSettings.PROXY_PORT_SETTING.getKey(), 881)
.put(Ec2ClientSettings.ENDPOINT_SETTING.getKey(), "ec2_endpoint_1")
final MockSecureSettings mockSecure2 = new MockSecureSettings();
mockSecure2.setString(Ec2ClientSettings.ACCESS_KEY_SETTING.getKey(), "ec2_access_2");
mockSecure2.setString(Ec2ClientSettings.SECRET_KEY_SETTING.getKey(), "ec2_secret_2");
@ -146,11 +142,11 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
mockSecure2.setString(Ec2ClientSettings.PROXY_USERNAME_SETTING.getKey(), "proxy_username_2");
mockSecure2.setString(Ec2ClientSettings.PROXY_PASSWORD_SETTING.getKey(), "proxy_password_2");
final Settings settings2 = Settings.builder()
.put(Ec2ClientSettings.PROXY_HOST_SETTING.getKey(), "proxy_host_2")
.put(Ec2ClientSettings.PROXY_PORT_SETTING.getKey(), 882)
.put(Ec2ClientSettings.ENDPOINT_SETTING.getKey(), "ec2_endpoint_2")
.put(Ec2ClientSettings.PROXY_HOST_SETTING.getKey(), "proxy_host_2")
.put(Ec2ClientSettings.PROXY_PORT_SETTING.getKey(), 882)
.put(Ec2ClientSettings.ENDPOINT_SETTING.getKey(), "ec2_endpoint_2")
try (Ec2DiscoveryPluginMock plugin = new Ec2DiscoveryPluginMock(settings1)) {
try (AmazonEc2Reference clientReference = plugin.ec2Service.client()) {
@ -159,7 +155,7 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
assertThat(credentials.getAWSSecretKey(), is("ec2_secret_1"));
if (mockSecure1HasSessionToken) {
assertThat(credentials, instanceOf(BasicSessionCredentials.class));
assertThat(((BasicSessionCredentials)credentials).getSessionToken(), is("ec2_session_token_1"));
assertThat(((BasicSessionCredentials) credentials).getSessionToken(), is("ec2_session_token_1"));
} else {
assertThat(credentials, instanceOf(BasicAWSCredentials.class));
@ -176,7 +172,7 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
final AWSCredentials credentials = ((AmazonEC2Mock) clientReference.client()).credentials.getCredentials();
if (mockSecure1HasSessionToken) {
assertThat(credentials, instanceOf(BasicSessionCredentials.class));
assertThat(((BasicSessionCredentials)credentials).getSessionToken(), is("ec2_session_token_1"));
assertThat(((BasicSessionCredentials) credentials).getSessionToken(), is("ec2_session_token_1"));
} else {
assertThat(credentials, instanceOf(BasicAWSCredentials.class));
@ -193,7 +189,7 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
assertThat(credentials.getAWSSecretKey(), is("ec2_secret_2"));
if (mockSecure2HasSessionToken) {
assertThat(credentials, instanceOf(BasicSessionCredentials.class));
assertThat(((BasicSessionCredentials)credentials).getSessionToken(), is("ec2_session_token_2"));
assertThat(((BasicSessionCredentials) credentials).getSessionToken(), is("ec2_session_token_2"));
} else {
assertThat(credentials, instanceOf(BasicAWSCredentials.class));
@ -211,8 +207,7 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
Ec2DiscoveryPluginMock(Settings settings) {
super(settings, new AwsEc2ServiceImpl() {
AmazonEC2 buildClient(AWSCredentialsProvider credentials, ClientConfiguration configuration,
String endpoint) {
AmazonEC2 buildClient(AWSCredentialsProvider credentials, ClientConfiguration configuration, String endpoint) {
return new AmazonEC2Mock(credentials, configuration, endpoint);
@ -232,7 +227,6 @@ public class Ec2DiscoveryPluginTests extends OpenSearchTestCase {
public void shutdown() {
public void shutdown() {}
@ -84,13 +84,19 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
private Map<String, TransportAddress> poorMansDNS = new ConcurrentHashMap<>();
protected MockTransportService createTransportService() {
final Transport transport = new MockNioTransport(Settings.EMPTY, Version.CURRENT, threadPool,
new NetworkService(Collections.emptyList()), PageCacheRecycler.NON_RECYCLING_INSTANCE, writableRegistry(),
new NoneCircuitBreakerService()) {
final Transport transport = new MockNioTransport(
new NetworkService(Collections.emptyList()),
new NoneCircuitBreakerService()
) {
public TransportAddress[] addressesFromString(String address) {
// we just need to ensure we don't resolve DNS here
return new TransportAddress[] {poorMansDNS.getOrDefault(address, buildNewFakeTransportAddress())};
return new TransportAddress[] { poorMansDNS.getOrDefault(address, buildNewFakeTransportAddress()) };
return new MockTransportService(Settings.EMPTY, transport, threadPool, TransportService.NOOP_TRANSPORT_INTERCEPTOR, null);
@ -116,21 +122,20 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
// Simulate an EC2 DescribeInstancesResponse
final Map<String, List<String>> tagsIncluded = new HashMap<>();
final String[] params = request.split("&");
Arrays.stream(params).filter(entry -> entry.startsWith("Filter.") && entry.contains("=tag%3A"))
.forEach(entry -> {
final int startIndex = "Filter.".length();
final int filterId = Integer.parseInt(entry.substring(startIndex, entry.indexOf(".", startIndex)));
tagsIncluded.put(entry.substring(entry.indexOf("=tag%3A") + "=tag%3A".length()),
.filter(param -> param.startsWith("Filter." + filterId + ".Value."))
.map(param -> param.substring(param.indexOf("=") + 1))
Arrays.stream(params).filter(entry -> entry.startsWith("Filter.") && entry.contains("=tag%3A")).forEach(entry -> {
final int startIndex = "Filter.".length();
final int filterId = Integer.parseInt(entry.substring(startIndex, entry.indexOf(".", startIndex)));
entry.substring(entry.indexOf("=tag%3A") + "=tag%3A".length()),
.filter(param -> param.startsWith("Filter." + filterId + ".Value."))
.map(param -> param.substring(param.indexOf("=") + 1))
final List<Instance> instances = IntStream.range(1, nodes + 1).mapToObj(node -> {
final String instanceId = "node" + node;
final Instance instance = new Instance()
final Instance instance = new Instance().withInstanceId(instanceId)
.withState(new InstanceState().withName(InstanceStateName.Running))
.withPrivateDnsName(PREFIX_PRIVATE_DNS + instanceId + SUFFIX_PRIVATE_DNS)
.withPublicDnsName(PREFIX_PUBLIC_DNS + instanceId + SUFFIX_PUBLIC_DNS)
@ -140,12 +145,19 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
instance.setTags(tagsList.get(node - 1));
return instance;
}).filter(instance ->
tagsIncluded.entrySet().stream().allMatch(entry -> instance.getTags().stream()
.filter(t -> t.getKey().equals(entry.getKey()))
instance -> tagsIncluded.entrySet()
entry -> instance.getTags()
.filter(t -> t.getKey().equals(entry.getKey()))
for (NameValuePair parse : URLEncodedUtils.parse(request, UTF_8)) {
if ("Action".equals(parse.getName())) {
@ -171,8 +183,7 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
public void testDefaultSettings() throws InterruptedException {
int nodes = randomInt(10);
Settings nodeSettings = Settings.builder()
Settings nodeSettings = Settings.builder().build();
List<TransportAddress> discoveryNodes = buildDynamicHosts(nodeSettings, nodes);
assertThat(discoveryNodes, hasSize(nodes));
@ -180,11 +191,9 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
public void testPrivateIp() throws InterruptedException {
int nodes = randomInt(10);
for (int i = 0; i < nodes; i++) {
poorMansDNS.put(PREFIX_PRIVATE_IP + (i+1), buildNewFakeTransportAddress());
poorMansDNS.put(PREFIX_PRIVATE_IP + (i + 1), buildNewFakeTransportAddress());
Settings nodeSettings = Settings.builder()
.put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "private_ip")
Settings nodeSettings = Settings.builder().put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "private_ip").build();
List<TransportAddress> transportAddresses = buildDynamicHosts(nodeSettings, nodes);
assertThat(transportAddresses, hasSize(nodes));
// We check that we are using here expected address
@ -198,11 +207,9 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
public void testPublicIp() throws InterruptedException {
int nodes = randomInt(10);
for (int i = 0; i < nodes; i++) {
poorMansDNS.put(PREFIX_PUBLIC_IP + (i+1), buildNewFakeTransportAddress());
poorMansDNS.put(PREFIX_PUBLIC_IP + (i + 1), buildNewFakeTransportAddress());
Settings nodeSettings = Settings.builder()
.put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "public_ip")
Settings nodeSettings = Settings.builder().put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "public_ip").build();
List<TransportAddress> dynamicHosts = buildDynamicHosts(nodeSettings, nodes);
assertThat(dynamicHosts, hasSize(nodes));
// We check that we are using here expected address
@ -216,21 +223,17 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
public void testPrivateDns() throws InterruptedException {
int nodes = randomInt(10);
for (int i = 0; i < nodes; i++) {
String instanceId = "node" + (i+1);
poorMansDNS.put(PREFIX_PRIVATE_DNS + instanceId +
SUFFIX_PRIVATE_DNS, buildNewFakeTransportAddress());
String instanceId = "node" + (i + 1);
poorMansDNS.put(PREFIX_PRIVATE_DNS + instanceId + SUFFIX_PRIVATE_DNS, buildNewFakeTransportAddress());
Settings nodeSettings = Settings.builder()
.put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "private_dns")
Settings nodeSettings = Settings.builder().put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "private_dns").build();
List<TransportAddress> dynamicHosts = buildDynamicHosts(nodeSettings, nodes);
assertThat(dynamicHosts, hasSize(nodes));
// We check that we are using here expected address
int node = 1;
for (TransportAddress address : dynamicHosts) {
String instanceId = "node" + node++;
TransportAddress expected = poorMansDNS.get(
TransportAddress expected = poorMansDNS.get(PREFIX_PRIVATE_DNS + instanceId + SUFFIX_PRIVATE_DNS);
assertEquals(address, expected);
@ -238,41 +241,31 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
public void testPublicDns() throws InterruptedException {
int nodes = randomInt(10);
for (int i = 0; i < nodes; i++) {
String instanceId = "node" + (i+1);
poorMansDNS.put(PREFIX_PUBLIC_DNS + instanceId
+ SUFFIX_PUBLIC_DNS, buildNewFakeTransportAddress());
String instanceId = "node" + (i + 1);
poorMansDNS.put(PREFIX_PUBLIC_DNS + instanceId + SUFFIX_PUBLIC_DNS, buildNewFakeTransportAddress());
Settings nodeSettings = Settings.builder()
.put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "public_dns")
Settings nodeSettings = Settings.builder().put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "public_dns").build();
List<TransportAddress> dynamicHosts = buildDynamicHosts(nodeSettings, nodes);
assertThat(dynamicHosts, hasSize(nodes));
// We check that we are using here expected address
int node = 1;
for (TransportAddress address : dynamicHosts) {
String instanceId = "node" + node++;
TransportAddress expected = poorMansDNS.get(
TransportAddress expected = poorMansDNS.get(PREFIX_PUBLIC_DNS + instanceId + SUFFIX_PUBLIC_DNS);
assertEquals(address, expected);
public void testInvalidHostType() throws InterruptedException {
Settings nodeSettings = Settings.builder()
.put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "does_not_exist")
Settings nodeSettings = Settings.builder().put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "does_not_exist").build();
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> {
buildDynamicHosts(nodeSettings, 1);
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> { buildDynamicHosts(nodeSettings, 1); });
assertThat(exception.getMessage(), containsString("does_not_exist is unknown for discovery.ec2.host_type"));
public void testFilterByTags() throws InterruptedException {
int nodes = randomIntBetween(5, 10);
Settings nodeSettings = Settings.builder()
.put(AwsEc2Service.TAG_SETTING.getKey() + "stage", "prod")
Settings nodeSettings = Settings.builder().put(AwsEc2Service.TAG_SETTING.getKey() + "stage", "prod").build();
int prodInstances = 0;
List<List<Tag>> tagsList = new ArrayList<>();
@ -295,9 +288,7 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
public void testFilterByMultipleTags() throws InterruptedException {
int nodes = randomIntBetween(5, 10);
Settings nodeSettings = Settings.builder()
.putList(AwsEc2Service.TAG_SETTING.getKey() + "stage", "prod", "preprod")
Settings nodeSettings = Settings.builder().putList(AwsEc2Service.TAG_SETTING.getKey() + "stage", "prod", "preprod").build();
int prodInstances = 0;
List<List<Tag>> tagsList = new ArrayList<>();
@ -334,9 +325,7 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
poorMansDNS.put("node" + (node + 1), new TransportAddress(InetAddress.getByName(addresses[node]), 9300));
Settings nodeSettings = Settings.builder()
.put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "tag:foo")
Settings nodeSettings = Settings.builder().put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "tag:foo").build();
List<List<Tag>> tagsList = new ArrayList<>();
@ -358,6 +347,7 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase {
abstract static class DummyEc2SeedHostsProvider extends AwsEc2SeedHostsProvider {
public int fetchCount = 0;
DummyEc2SeedHostsProvider(Settings settings, TransportService transportService, AwsEc2Service service) {
super(settings, transportService, service);
@ -75,7 +75,7 @@ public class Ec2NetworkTests extends OpenSearchTestCase {
public static void startHttp() throws Exception {
httpServer = HttpServer.create(new InetSocketAddress(InetAddress.getLoopbackAddress().getHostAddress(), 0), 0);
BiConsumer<String, String> registerContext = (path, v) ->{
BiConsumer<String, String> registerContext = (path, v) -> {
final byte[] message = v.getBytes(UTF_8);
httpServer.createContext(path, (s) -> {
s.sendResponseHeaders(RestStatus.OK.getStatus(), message.length);
@ -84,10 +84,10 @@ public class Ec2NetworkTests extends OpenSearchTestCase {
registerContext.accept("/latest/meta-data/local-ipv4", "");
registerContext.accept("/latest/meta-data/public-ipv4", "");
registerContext.accept("/latest/meta-data/public-hostname", "");
registerContext.accept("/latest/meta-data/local-hostname", "");
@ -95,8 +95,12 @@ public class Ec2NetworkTests extends OpenSearchTestCase {
public void setup() {
// redirect EC2 metadata service to httpServer
AccessController.doPrivileged((PrivilegedAction<String>) () -> System.setProperty(EC2_METADATA_SERVICE_OVERRIDE_SYSTEM_PROPERTY,
"http://" + httpServer.getAddress().getHostName() + ":" + httpServer.getAddress().getPort()));
(PrivilegedAction<String>) () -> System.setProperty(
"http://" + httpServer.getAddress().getHostName() + ":" + httpServer.getAddress().getPort()
@ -117,14 +121,17 @@ public class Ec2NetworkTests extends OpenSearchTestCase {
public void testNetworkHostUnableToResolveEc2() {
// redirect EC2 metadata service to unknown location
AccessController.doPrivileged((PrivilegedAction<String>) () -> System.setProperty(EC2_METADATA_SERVICE_OVERRIDE_SYSTEM_PROPERTY,
(PrivilegedAction<String>) () -> System.setProperty(EC2_METADATA_SERVICE_OVERRIDE_SYSTEM_PROPERTY, "")
try {
resolveEc2("_ec2_", (InetAddress[]) null);
} catch (IOException e) {
equalTo("IOException caught when fetching InetAddress from []"));
equalTo("IOException caught when fetching InetAddress from []")
@ -170,15 +177,14 @@ public class Ec2NetworkTests extends OpenSearchTestCase {
resolveEc2("_ec2:publicDns_", InetAddress.getByName(""));
private InetAddress[] resolveEc2(String host, InetAddress ... expected) throws IOException {
Settings nodeSettings = Settings.builder()
.put("network.host", host)
private InetAddress[] resolveEc2(String host, InetAddress... expected) throws IOException {
Settings nodeSettings = Settings.builder().put("network.host", host).build();
NetworkService networkService = new NetworkService(Collections.singletonList(new Ec2NameResolver()));
InetAddress[] addresses = networkService.resolveBindHostAddresses(
if (expected == null) {
fail("We should get an IOException, resolved addressed:" + Arrays.toString(addresses));
@ -49,4 +49,3 @@ public class CloudAwsClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTest
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -127,54 +127,66 @@ public class GCEFixture extends AbstractHttpFixture {
// https://cloud.google.com/compute/docs/storing-retrieving-metadata
handlers.insert(nonAuthPath(HttpGet.METHOD_NAME, "/computeMetadata/v1/project/project-id"),
request -> simpleValue.apply(PROJECT_ID));
handlers.insert(nonAuthPath(HttpGet.METHOD_NAME, "/computeMetadata/v1/project/attributes/google-compute-default-zone"),
request -> simpleValue.apply(ZONE));
nonAuthPath(HttpGet.METHOD_NAME, "/computeMetadata/v1/project/project-id"),
request -> simpleValue.apply(PROJECT_ID)
nonAuthPath(HttpGet.METHOD_NAME, "/computeMetadata/v1/project/attributes/google-compute-default-zone"),
request -> simpleValue.apply(ZONE)
// https://cloud.google.com/compute/docs/access/create-enable-service-accounts-for-instances
handlers.insert(nonAuthPath(HttpGet.METHOD_NAME, "/computeMetadata/v1/instance/service-accounts/default/token"),
request -> jsonValue.apply(Strings.toString(jsonBuilder()
.field("access_token", TOKEN)
.field("expires_in", TimeUnit.HOURS.toSeconds(1))
.field("token_type", TOKEN_TYPE)
nonAuthPath(HttpGet.METHOD_NAME, "/computeMetadata/v1/instance/service-accounts/default/token"),
request -> jsonValue.apply(
.field("access_token", TOKEN)
.field("expires_in", TimeUnit.HOURS.toSeconds(1))
.field("token_type", TOKEN_TYPE)
// https://cloud.google.com/compute/docs/reference/rest/v1/instances
handlers.insert(authPath(HttpGet.METHOD_NAME, "/compute/v1/projects/{project}/zones/{zone}/instances"),
request -> {
final List<Map<String, Object>> items = new ArrayList<>();
int count = 0;
for (String address : Files.readAllLines(nodes)) {
items.add(MapBuilder.<String, Object>newMapBuilder()
handlers.insert(authPath(HttpGet.METHOD_NAME, "/compute/v1/projects/{project}/zones/{zone}/instances"), request -> {
final List<Map<String, Object>> items = new ArrayList<>();
int count = 0;
for (String address : Files.readAllLines(nodes)) {
MapBuilder.<String, Object>newMapBuilder()
.put("id", Long.toString(9309873766405L + count))
.put("description", "ES node" + count)
.put("name", "test" + count)
.put("kind", "compute#instance")
.put("machineType", "n1-standard-1")
Collections.singletonList(MapBuilder.<String, Object>newMapBuilder()
.put("accessConfigs", Collections.emptyList())
.put("name", "nic0")
.put("network", "default")
.put("networkIP", address)
MapBuilder.<String, Object>newMapBuilder()
.put("accessConfigs", Collections.emptyList())
.put("name", "nic0")
.put("network", "default")
.put("networkIP", address)
.put("status", "RUNNING")
.put("zone", ZONE)
final String json = Strings.toString(jsonBuilder()
.field("id", "test-instances")
.field("items", items)
final String json = Strings.toString(
jsonBuilder().startObject().field("id", "test-instances").field("items", items).endObject()
final byte[] responseAsBytes = json.getBytes(StandardCharsets.UTF_8);
final Map<String, String> headers = new HashMap<>(JSON_CONTENT_TYPE);
return new Response(RestStatus.OK.getStatus(), headers, responseAsBytes);
final byte[] responseAsBytes = json.getBytes(StandardCharsets.UTF_8);
final Map<String, String> headers = new HashMap<>(JSON_CONTENT_TYPE);
return new Response(RestStatus.OK.getStatus(), headers, responseAsBytes);
return handlers;
@ -201,22 +213,29 @@ public class GCEFixture extends AbstractHttpFixture {
private static Response newError(final RestStatus status, final String code, final String message) throws IOException {
final String response = Strings.toString(jsonBuilder()
.field("error", MapBuilder.<String, Object>newMapBuilder()
.put("errors", Collections.singletonList(
final String response = Strings.toString(
MapBuilder.<String, Object>newMapBuilder()
.put("domain", "global")
.put("reason", "required")
.put("message", message)
.put("locationType", "header")
.put("location", code)
.put("code", status.getStatus())
.put("message", message)
MapBuilder.<String, Object>newMapBuilder()
.put("domain", "global")
.put("reason", "required")
.put("message", message)
.put("locationType", "header")
.put("location", code)
.put("code", status.getStatus())
.put("message", message)
return new Response(status.getStatus(), JSON_CONTENT_TYPE, response.getBytes(UTF_8));
@ -77,10 +77,10 @@ public class GceDiscoverTests extends OpenSearchIntegTestCase {
protected Settings nodeSettings(int nodeOrdinal) {
return Settings.builder()
.put("cloud.gce.project_id", "test")
.put("cloud.gce.zone", "test")
.put("cloud.gce.project_id", "test")
.put("cloud.gce.zone", "test")
@ -89,22 +89,26 @@ public class GceDiscoverTests extends OpenSearchIntegTestCase {
final String masterNode = internalCluster().startMasterOnlyNode();
ClusterStateResponse clusterStateResponse = client(masterNode).admin().cluster().prepareState()
ClusterStateResponse clusterStateResponse = client(masterNode).admin()
// start another node
final String secondNode = internalCluster().startNode();
clusterStateResponse = client(secondNode).admin().cluster().prepareState()
clusterStateResponse = client(secondNode).admin()
// wait for the cluster to form
@ -187,8 +191,7 @@ public class GceDiscoverTests extends OpenSearchIntegTestCase {
public void close() throws IOException {
public void close() throws IOException {}
@ -60,15 +60,22 @@ public interface GceInstancesService extends Closeable {
* cloud.gce.zone: Google Compute Engine zones
Setting<List<String>> ZONE_SETTING =
Setting.listSetting("cloud.gce.zone", Collections.emptyList(), Function.identity(), Property.NodeScope);
Setting<List<String>> ZONE_SETTING = Setting.listSetting(
* cloud.gce.refresh_interval: How long the list of hosts is cached to prevent further requests to the AWS API. 0 disables caching.
* A negative value will cause infinite caching. Defaults to 0s.
Setting<TimeValue> REFRESH_SETTING =
Setting.timeSetting("cloud.gce.refresh_interval", TimeValue.timeValueSeconds(0), Property.NodeScope);
Setting<TimeValue> REFRESH_SETTING = Setting.timeSetting(
* cloud.gce.retry: Should we retry calling GCE API in case of error? Defaults to true.
@ -80,8 +87,7 @@ public interface GceInstancesService extends Closeable {
* It's a total time since the initial call is made.
* A negative value will retry indefinitely. Defaults to `-1s` (retry indefinitely).
Setting<TimeValue> MAX_WAIT_SETTING =
Setting.timeSetting("cloud.gce.max_wait", TimeValue.timeValueSeconds(-1), Property.NodeScope);
Setting<TimeValue> MAX_WAIT_SETTING = Setting.timeSetting("cloud.gce.max_wait", TimeValue.timeValueSeconds(-1), Property.NodeScope);
* Return a collection of running instances within the same GCE project
@ -67,14 +67,21 @@ import java.util.List;
import java.util.function.Function;
public class GceInstancesServiceImpl implements GceInstancesService {
private static final Logger logger = LogManager.getLogger(GceInstancesServiceImpl.class);
// all settings just used for testing - not registered by default
public static final Setting<Boolean> GCE_VALIDATE_CERTIFICATES =
Setting.boolSetting("cloud.gce.validate_certificates", true, Property.NodeScope);
public static final Setting<String> GCE_ROOT_URL =
new Setting<>("cloud.gce.root_url", "https://www.googleapis.com", Function.identity(), Property.NodeScope);
public static final Setting<Boolean> GCE_VALIDATE_CERTIFICATES = Setting.boolSetting(
public static final Setting<String> GCE_ROOT_URL = new Setting<>(
private final String project;
private final List<String> zones;
@ -91,8 +98,9 @@ public class GceInstancesServiceImpl implements GceInstancesService {
return list.execute();
// assist type inference
return instanceList.isEmpty() || instanceList.getItems() == null ?
Collections.<Instance>emptyList() : instanceList.getItems();
return instanceList.isEmpty() || instanceList.getItems() == null
? Collections.<Instance>emptyList()
: instanceList.getItems();
} catch (IOException e) {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("Problem fetching instance list for zone {}", zoneId), e);
logger.debug("Full exception:", e);
@ -151,8 +159,9 @@ public class GceInstancesServiceImpl implements GceInstancesService {
try {
final String defaultZone =
final String defaultZone = getAppEngineValueFromMetadataServer(
return Collections.singletonList(defaultZone);
} catch (Exception e) {
logger.warn("unable to resolve default zone from metadata server for GCE discovery service", e);
@ -194,8 +203,7 @@ public class GceInstancesServiceImpl implements GceInstancesService {
public synchronized Compute client() {
if (refreshInterval != null && refreshInterval.millis() != 0) {
if (client != null &&
(refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) {
if (client != null && (refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) {
if (logger.isTraceEnabled()) logger.trace("using cache to retrieve client");
return client;
@ -207,13 +215,13 @@ public class GceInstancesServiceImpl implements GceInstancesService {
logger.info("starting GCE discovery service");
// Forcing Google Token API URL as set in GCE SDK to
// http://metadata/computeMetadata/v1/instance/service-accounts/default/token
// http://metadata/computeMetadata/v1/instance/service-accounts/default/token
// See https://developers.google.com/compute/docs/metadata#metadataserver
String tokenServerEncodedUrl = GceMetadataService.GCE_HOST.get(settings) +
ComputeCredential credential = new ComputeCredential.Builder(getGceHttpTransport(), gceJsonFactory)
String tokenServerEncodedUrl = GceMetadataService.GCE_HOST.get(settings)
+ "/computeMetadata/v1/instance/service-accounts/default/token";
ComputeCredential credential = new ComputeCredential.Builder(getGceHttpTransport(), gceJsonFactory).setTokenServerEncodedUrl(
// hack around code messiness in GCE code
// TODO: get this fixed
@ -224,7 +232,6 @@ public class GceInstancesServiceImpl implements GceInstancesService {
refreshInterval = TimeValue.timeValueSeconds(credential.getExpiresInSeconds() - 1);
Compute.Builder builder = new Compute.Builder(getGceHttpTransport(), gceJsonFactory, null).setApplicationName(VERSION)
@ -54,11 +54,15 @@ public class GceMetadataService extends AbstractLifecycleComponent {
private static final Logger logger = LogManager.getLogger(GceMetadataService.class);
// Forcing Google Token API URL as set in GCE SDK to
// http://metadata/computeMetadata/v1/instance/service-accounts/default/token
// http://metadata/computeMetadata/v1/instance/service-accounts/default/token
// See https://developers.google.com/compute/docs/metadata#metadataserver
// all settings just used for testing - not registered by default
public static final Setting<String> GCE_HOST =
new Setting<>("cloud.gce.host", "http://metadata.google.internal", Function.identity(), Setting.Property.NodeScope);
public static final Setting<String> GCE_HOST = new Setting<>(
private final Settings settings;
@ -78,7 +82,7 @@ public class GceMetadataService extends AbstractLifecycleComponent {
public String metadata(String metadataPath) throws IOException, URISyntaxException {
// Forcing Google Token API URL as set in GCE SDK to
// http://metadata/computeMetadata/v1/instance/service-accounts/default/token
// http://metadata/computeMetadata/v1/instance/service-accounts/default/token
// See https://developers.google.com/compute/docs/metadata#metadataserver
final URI urlMetadataNetwork = new URI(GCE_HOST.get(settings)).resolve("/computeMetadata/v1/instance/").resolve(metadataPath);
logger.debug("get metadata from [{}]", urlMetadataNetwork);
@ -91,11 +95,9 @@ public class GceMetadataService extends AbstractLifecycleComponent {
// This is needed to query meta data: https://cloud.google.com/compute/docs/metadata
headers.put("Metadata-Flavor", "Google");
HttpResponse response = Access.doPrivilegedIOException(() ->
HttpResponse response = Access.doPrivilegedIOException(
() -> getGceHttpTransport().createRequestFactory().buildGetRequest(genericUrl).setHeaders(headers).execute()
String metadata = response.parseAsString();
logger.debug("metadata found [{}]", metadata);
return metadata;
@ -111,8 +111,12 @@ public class GceNameResolver implements CustomNameResolver {
// We replace network placeholder with network interface value
gceMetadataPath = Strings.replace(GceAddressResolverType.PRIVATE_IP.gceName, "{{network}}", network);
} else {
throw new IllegalArgumentException("[" + value + "] is not one of the supported GCE network.host setting. " +
"Expecting _gce_, _gce:privateIp:X_, _gce:hostname_");
throw new IllegalArgumentException(
+ value
+ "] is not one of the supported GCE network.host setting. "
+ "Expecting _gce_, _gce:privateIp:X_, _gce:hostname_"
try {
@ -65,8 +65,7 @@ public final class Access {
public static <T> T doPrivilegedIOException(final PrivilegedExceptionAction<T> operation)
throws IOException {
public static <T> T doPrivilegedIOException(final PrivilegedExceptionAction<T> operation) throws IOException {
try {
return AccessController.doPrivileged(operation);
@ -68,8 +68,12 @@ public class GceSeedHostsProvider implements SeedHostsProvider {
* discovery.gce.tags: The gce discovery can filter machines to include in the cluster based on tags.
public static final Setting<List<String>> TAGS_SETTING =
Setting.listSetting("discovery.gce.tags", emptyList(), Function.identity(), Property.NodeScope);
public static final Setting<List<String>> TAGS_SETTING = Setting.listSetting(
static final class Status {
private static final String TERMINATED = "TERMINATED";
@ -88,9 +92,12 @@ public class GceSeedHostsProvider implements SeedHostsProvider {
private long lastRefresh;
private List<TransportAddress> cachedDynamicHosts;
public GceSeedHostsProvider(Settings settings, GceInstancesService gceInstancesService,
TransportService transportService,
NetworkService networkService) {
public GceSeedHostsProvider(
Settings settings,
GceInstancesService gceInstancesService,
TransportService transportService,
NetworkService networkService
) {
this.settings = settings;
this.gceInstancesService = gceInstancesService;
this.transportService = transportService;
@ -114,14 +121,19 @@ public class GceSeedHostsProvider implements SeedHostsProvider {
public List<TransportAddress> getSeedAddresses(HostsResolver hostsResolver) {
// We check that needed properties have been set
if (this.project == null || this.project.isEmpty() || this.zones == null || this.zones.isEmpty()) {
throw new IllegalArgumentException("one or more gce discovery settings are missing. " +
"Check opensearch.yml file. Should have [" + GceInstancesService.PROJECT_SETTING.getKey() +
"] and [" + GceInstancesService.ZONE_SETTING.getKey() + "].");
throw new IllegalArgumentException(
"one or more gce discovery settings are missing. "
+ "Check opensearch.yml file. Should have ["
+ GceInstancesService.PROJECT_SETTING.getKey()
+ "] and ["
+ GceInstancesService.ZONE_SETTING.getKey()
+ "]."
if (refreshInterval.millis() != 0) {
if (cachedDynamicHosts != null &&
(refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) {
if (cachedDynamicHosts != null
&& (refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) {
if (logger.isTraceEnabled()) logger.trace("using cache to retrieve node list");
return cachedDynamicHosts;
@ -133,7 +145,8 @@ public class GceSeedHostsProvider implements SeedHostsProvider {
String ipAddress = null;
try {
InetAddress inetAddress = networkService.resolvePublishHostAddresses(
if (inetAddress != null) {
ipAddress = NetworkAddress.format(inetAddress);
@ -168,8 +181,10 @@ public class GceSeedHostsProvider implements SeedHostsProvider {
boolean filterByTag = false;
if (tags.isEmpty() == false) {
logger.trace("start filtering instance {} with tags {}.", name, tags);
if (instance.getTags() == null || instance.getTags().isEmpty()
|| instance.getTags().getItems() == null || instance.getTags().getItems().isEmpty()) {
if (instance.getTags() == null
|| instance.getTags().isEmpty()
|| instance.getTags().getItems() == null
|| instance.getTags().getItems().isEmpty()) {
// If this instance have no tag, we filter it
logger.trace("no tags for this instance but we asked for tags. {} won't be part of the cluster.", name);
filterByTag = true;
@ -192,8 +207,12 @@ public class GceSeedHostsProvider implements SeedHostsProvider {
if (filterByTag) {
logger.trace("filtering out instance {} based tags {}, not part of {}", name, tags,
instance.getTags() == null || instance.getTags().getItems() == null ? "" : instance.getTags());
"filtering out instance {} based tags {}, not part of {}",
instance.getTags() == null || instance.getTags().getItems() == null ? "" : instance.getTags()
} else {
logger.trace("instance {} with tags {} is added to discovery", name, tags);
@ -249,8 +268,14 @@ public class GceSeedHostsProvider implements SeedHostsProvider {
TransportAddress[] addresses = transportService.addressesFromString(address);
for (TransportAddress transportAddress : addresses) {
logger.trace("adding {}, type {}, address {}, transport_address {}, status {}", name, type,
ip_private, transportAddress, status);
"adding {}, type {}, address {}, transport_address {}, status {}",
@ -73,8 +73,7 @@ public class RetryHttpInitializerWrapper implements HttpRequestInitializer {
// Use only for testing.
Credential wrappedCredential, Sleeper sleeper, TimeValue maxWait) {
RetryHttpInitializerWrapper(Credential wrappedCredential, Sleeper sleeper, TimeValue maxWait) {
this.wrappedCredential = Objects.requireNonNull(wrappedCredential);
this.sleeper = sleeper;
this.maxWait = maxWait;
@ -88,45 +87,35 @@ public class RetryHttpInitializerWrapper implements HttpRequestInitializer {
public void initialize(HttpRequest httpRequest) {
final HttpUnsuccessfulResponseHandler backoffHandler =
new HttpBackOffUnsuccessfulResponseHandler(
new ExponentialBackOff.Builder()
.setMaxElapsedTimeMillis(((int) maxWait.getMillis()))
final HttpUnsuccessfulResponseHandler backoffHandler = new HttpBackOffUnsuccessfulResponseHandler(
new ExponentialBackOff.Builder().setMaxElapsedTimeMillis(((int) maxWait.getMillis())).build()
new HttpUnsuccessfulResponseHandler() {
int retry = 0;
httpRequest.setUnsuccessfulResponseHandler(new HttpUnsuccessfulResponseHandler() {
int retry = 0;
public boolean handleResponse(HttpRequest request, HttpResponse response, boolean supportsRetry) throws IOException {
if (wrappedCredential.handleResponse(
request, response, supportsRetry)) {
// If credential decides it can handle it,
// the return code or message indicated
// something specific to authentication,
// and no backoff is desired.
return true;
} else if (backoffHandler.handleResponse(
request, response, supportsRetry)) {
// Otherwise, we defer to the judgement of
// our internal backoff handler.
logger.debug("Retrying [{}] times : [{}]", retry, request.getUrl());
return true;
} else {
return false;
public boolean handleResponse(HttpRequest request, HttpResponse response, boolean supportsRetry) throws IOException {
if (wrappedCredential.handleResponse(request, response, supportsRetry)) {
// If credential decides it can handle it,
// the return code or message indicated
// something specific to authentication,
// and no backoff is desired.
return true;
} else if (backoffHandler.handleResponse(request, response, supportsRetry)) {
// Otherwise, we defer to the judgement of
// our internal backoff handler.
logger.debug("Retrying [{}] times : [{}]", retry, request.getUrl());
return true;
} else {
return false;
new HttpBackOffIOExceptionHandler(
new ExponentialBackOff.Builder()
.setMaxElapsedTimeMillis(((int) maxWait.getMillis()))
new HttpBackOffIOExceptionHandler(new ExponentialBackOff.Builder().setMaxElapsedTimeMillis(((int) maxWait.getMillis())).build())
@ -65,8 +65,9 @@ import java.util.function.Supplier;
public class GceDiscoveryPlugin extends Plugin implements DiscoveryPlugin, Closeable {
/** Determines whether settings those reroutes GCE call should be allowed (for testing purposes only). */
private static final boolean ALLOW_REROUTE_GCE_SETTINGS =
Booleans.parseBoolean(System.getProperty("opensearch.allow_reroute_gce_settings", "false"));
private static final boolean ALLOW_REROUTE_GCE_SETTINGS = Booleans.parseBoolean(
System.getProperty("opensearch.allow_reroute_gce_settings", "false")
public static final String GCE = "gce";
protected final Settings settings;
@ -83,7 +84,7 @@ public class GceDiscoveryPlugin extends Plugin implements DiscoveryPlugin, Close
* our plugin permissions don't allow core to "reach through" plugins to
* change the permission. Because that'd be silly.
Access.doPrivilegedVoid( () -> ClassInfo.of(HttpHeaders.class, true));
Access.doPrivilegedVoid(() -> ClassInfo.of(HttpHeaders.class, true));
public GceDiscoveryPlugin(Settings settings) {
@ -97,8 +98,7 @@ public class GceDiscoveryPlugin extends Plugin implements DiscoveryPlugin, Close
public Map<String, Supplier<SeedHostsProvider>> getSeedHostProviders(TransportService transportService,
NetworkService networkService) {
public Map<String, Supplier<SeedHostsProvider>> getSeedHostProviders(TransportService transportService, NetworkService networkService) {
return Collections.singletonMap(GCE, () -> {
return new GceSeedHostsProvider(settings, gceInstancesService.get(), transportService, networkService);
@ -121,7 +121,8 @@ public class GceDiscoveryPlugin extends Plugin implements DiscoveryPlugin, Close
@ -131,8 +132,6 @@ public class GceDiscoveryPlugin extends Plugin implements DiscoveryPlugin, Close
return Collections.unmodifiableList(settings);
public void close() throws IOException {
@ -120,8 +120,12 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
protected List<TransportAddress> buildDynamicNodes(GceInstancesServiceImpl gceInstancesService, Settings nodeSettings) {
GceSeedHostsProvider provider = new GceSeedHostsProvider(nodeSettings, gceInstancesService,
transportService, new NetworkService(Collections.emptyList()));
GceSeedHostsProvider provider = new GceSeedHostsProvider(
new NetworkService(Collections.emptyList())
List<TransportAddress> dynamicHosts = provider.getSeedAddresses(null);
logger.info("--> addresses found: {}", dynamicHosts);
@ -130,9 +134,9 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testNodesWithDifferentTagsAndNoTagSet() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(2));
@ -140,10 +144,10 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testNodesWithDifferentTagsAndOneTagSet() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(1));
@ -151,10 +155,10 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testNodesWithDifferentTagsAndTwoTagSet() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch", "dev")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch", "dev")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(1));
@ -162,9 +166,9 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testNodesWithSameTagsAndNoTagSet() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(2));
@ -172,10 +176,10 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testNodesWithSameTagsAndOneTagSet() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(2));
@ -183,10 +187,10 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testNodesWithSameTagsAndTwoTagsSet() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch", "dev")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.put(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "europe-west1-b")
.putList(GceSeedHostsProvider.TAGS_SETTING.getKey(), "opensearch", "dev")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(2));
@ -194,9 +198,9 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testMultipleZonesAndTwoNodesInSameZone() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.putList(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "us-central1-a", "europe-west1-b")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.putList(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "us-central1-a", "europe-west1-b")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(2));
@ -204,9 +208,9 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testMultipleZonesAndTwoNodesInDifferentZones() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.putList(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "us-central1-a", "europe-west1-b")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.putList(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "us-central1-a", "europe-west1-b")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(2));
@ -217,9 +221,9 @@ public class GceDiscoveryTests extends OpenSearchTestCase {
public void testZeroNode43() {
Settings nodeSettings = Settings.builder()
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.putList(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "us-central1-a", "us-central1-b")
.put(GceInstancesServiceImpl.PROJECT_SETTING.getKey(), projectName)
.putList(GceInstancesServiceImpl.ZONE_SETTING.getKey(), "us-central1-a", "us-central1-b")
mock = new GceInstancesServiceMock(nodeSettings);
List<TransportAddress> dynamicHosts = buildDynamicNodes(mock, nodeSettings);
assertThat(dynamicHosts, hasSize(0));
@ -94,8 +94,10 @@ public class GceNetworkTests extends OpenSearchTestCase {
* network.host: _local_
public void networkHostCoreLocal() throws IOException {
resolveGce("_local_", new NetworkService(Collections.emptyList())
.resolveBindHostAddresses(new String[] { NetworkService.DEFAULT_NETWORK_HOST }));
new NetworkService(Collections.emptyList()).resolveBindHostAddresses(new String[] { NetworkService.DEFAULT_NETWORK_HOST })
@ -105,7 +107,7 @@ public class GceNetworkTests extends OpenSearchTestCase {
* @throws IOException Well... If something goes wrong :)
private void resolveGce(String gceNetworkSetting, InetAddress expected) throws IOException {
resolveGce(gceNetworkSetting, expected == null ? null : new InetAddress [] { expected });
resolveGce(gceNetworkSetting, expected == null ? null : new InetAddress[] { expected });
@ -115,15 +117,14 @@ public class GceNetworkTests extends OpenSearchTestCase {
* @throws IOException Well... If something goes wrong :)
private void resolveGce(String gceNetworkSetting, InetAddress[] expected) throws IOException {
Settings nodeSettings = Settings.builder()
.put("network.host", gceNetworkSetting)
Settings nodeSettings = Settings.builder().put("network.host", gceNetworkSetting).build();
GceMetadataServiceMock mock = new GceMetadataServiceMock(nodeSettings);
NetworkService networkService = new NetworkService(Collections.singletonList(new GceNameResolver(mock)));
try {
InetAddress[] addresses = networkService.resolveBindHostAddresses(
if (expected == null) {
fail("We should get a IllegalArgumentException when setting network.host: _gce:doesnotexist_");
@ -107,24 +107,24 @@ public class RetryHttpInitializerWrapperTests extends OpenSearchTestCase {
public void testSimpleRetry() throws Exception {
FailThenSuccessBackoffTransport fakeTransport =
new FailThenSuccessBackoffTransport(HttpStatusCodes.STATUS_CODE_SERVER_ERROR, 3);
FailThenSuccessBackoffTransport fakeTransport = new FailThenSuccessBackoffTransport(HttpStatusCodes.STATUS_CODE_SERVER_ERROR, 3);
MockGoogleCredential credential = RetryHttpInitializerWrapper.newMockCredentialBuilder()
MockGoogleCredential credential = RetryHttpInitializerWrapper.newMockCredentialBuilder().build();
MockSleeper mockSleeper = new MockSleeper();
RetryHttpInitializerWrapper retryHttpInitializerWrapper = new RetryHttpInitializerWrapper(credential, mockSleeper,
RetryHttpInitializerWrapper retryHttpInitializerWrapper = new RetryHttpInitializerWrapper(
Compute client = new Compute.Builder(fakeTransport, new JacksonFactory(), null)
Compute client = new Compute.Builder(fakeTransport, new JacksonFactory(), null).setHttpRequestInitializer(
// TODO (URL) replace w/ opensearch url
HttpRequest request = client.getRequestFactory().buildRequest(
"Get", new GenericUrl("https://github.com/opensearch-project/OpenSearch"), null);
HttpRequest request = client.getRequestFactory()
.buildRequest("Get", new GenericUrl("https://github.com/opensearch-project/OpenSearch"), null);
HttpResponse response = request.execute();
assertThat(mockSleeper.getCount(), equalTo(3));
@ -135,11 +135,12 @@ public class RetryHttpInitializerWrapperTests extends OpenSearchTestCase {
TimeValue maxWaitTime = TimeValue.timeValueMillis(10);
int maxRetryTimes = 50;
FailThenSuccessBackoffTransport fakeTransport =
new FailThenSuccessBackoffTransport(HttpStatusCodes.STATUS_CODE_SERVER_ERROR, maxRetryTimes);
FailThenSuccessBackoffTransport fakeTransport = new FailThenSuccessBackoffTransport(
JsonFactory jsonFactory = new JacksonFactory();
MockGoogleCredential credential = RetryHttpInitializerWrapper.newMockCredentialBuilder()
MockGoogleCredential credential = RetryHttpInitializerWrapper.newMockCredentialBuilder().build();
MockSleeper oneTimeSleeper = new MockSleeper() {
@ -151,14 +152,13 @@ public class RetryHttpInitializerWrapperTests extends OpenSearchTestCase {
RetryHttpInitializerWrapper retryHttpInitializerWrapper = new RetryHttpInitializerWrapper(credential, oneTimeSleeper, maxWaitTime);
Compute client = new Compute.Builder(fakeTransport, jsonFactory, null)
Compute client = new Compute.Builder(fakeTransport, jsonFactory, null).setHttpRequestInitializer(retryHttpInitializerWrapper)
// TODO (URL) replace w/ opensearch URL
HttpRequest request1 = client.getRequestFactory().buildRequest("Get", new GenericUrl(
"https://github.com/opensearch-project/OpenSearch"), null);
HttpRequest request1 = client.getRequestFactory()
.buildRequest("Get", new GenericUrl("https://github.com/opensearch-project/OpenSearch"), null);
try {
fail("Request should fail if wait too long");
@ -170,23 +170,27 @@ public class RetryHttpInitializerWrapperTests extends OpenSearchTestCase {
public void testIOExceptionRetry() throws Exception {
FailThenSuccessBackoffTransport fakeTransport =
new FailThenSuccessBackoffTransport(HttpStatusCodes.STATUS_CODE_SERVER_ERROR, 1, true);
FailThenSuccessBackoffTransport fakeTransport = new FailThenSuccessBackoffTransport(
MockGoogleCredential credential = RetryHttpInitializerWrapper.newMockCredentialBuilder()
MockGoogleCredential credential = RetryHttpInitializerWrapper.newMockCredentialBuilder().build();
MockSleeper mockSleeper = new MockSleeper();
RetryHttpInitializerWrapper retryHttpInitializerWrapper = new RetryHttpInitializerWrapper(credential, mockSleeper,
RetryHttpInitializerWrapper retryHttpInitializerWrapper = new RetryHttpInitializerWrapper(
Compute client = new Compute.Builder(fakeTransport, new JacksonFactory(), null)
Compute client = new Compute.Builder(fakeTransport, new JacksonFactory(), null).setHttpRequestInitializer(
// TODO (URL) replace w/ opensearch URL
HttpRequest request = client.getRequestFactory().buildRequest("Get", new GenericUrl(
"https://github.com/opensearch-project/OpenSearch"), null);
HttpRequest request = client.getRequestFactory()
.buildRequest("Get", new GenericUrl("https://github.com/opensearch-project/OpenSearch"), null);
HttpResponse response = request.execute();
assertThat(mockSleeper.getCount(), equalTo(1));
@ -49,4 +49,3 @@ public class DiscoveryGceClientYamlTestSuiteIT extends OpenSearchClientYamlSuite
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -71,8 +71,16 @@ public final class AttachmentProcessor extends AbstractProcessor {
private final boolean ignoreMissing;
private final String indexedCharsField;
AttachmentProcessor(String tag, String description, String field, String targetField, Set<Property> properties,
int indexedChars, boolean ignoreMissing, String indexedCharsField) {
String tag,
String description,
String field,
String targetField,
Set<Property> properties,
int indexedChars,
boolean ignoreMissing,
String indexedCharsField
) {
super(tag, description);
this.field = field;
this.targetField = targetField;
@ -208,8 +216,12 @@ public final class AttachmentProcessor extends AbstractProcessor {
static final Set<Property> DEFAULT_PROPERTIES = EnumSet.allOf(Property.class);
public AttachmentProcessor create(Map<String, Processor.Factory> registry, String processorTag,
String description, Map<String, Object> config) throws Exception {
public AttachmentProcessor create(
Map<String, Processor.Factory> registry,
String processorTag,
String description,
Map<String, Object> config
) throws Exception {
String field = readStringProperty(TYPE, processorTag, config, "field");
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "attachment");
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
@ -224,16 +236,28 @@ public final class AttachmentProcessor extends AbstractProcessor {
try {
} catch (Exception e) {
throw newConfigurationException(TYPE, processorTag, "properties", "illegal field option [" +
fieldName + "]. valid values are " + Arrays.toString(Property.values()));
throw newConfigurationException(
"illegal field option [" + fieldName + "]. valid values are " + Arrays.toString(Property.values())
} else {
return new AttachmentProcessor(processorTag, description, field, targetField, properties, indexedChars, ignoreMissing,
return new AttachmentProcessor(
@ -77,15 +77,17 @@ import java.util.Set;
final class TikaImpl {
/** Exclude some formats */
private static final Set<MediaType> EXCLUDES = new HashSet<>(Arrays.asList(
private static final Set<MediaType> EXCLUDES = new HashSet<>(
/** subset of parsers for types we support */
private static final Parser PARSERS[] = new Parser[] {
@ -100,8 +102,7 @@ final class TikaImpl {
new org.apache.tika.parser.odf.OpenDocumentParser(),
new org.apache.tika.parser.iwork.IWorkPackageParser(),
new org.apache.tika.parser.xml.DcXMLParser(),
new org.apache.tika.parser.epub.EpubParser(),
new org.apache.tika.parser.epub.EpubParser(), };
/** autodetector based on this subset */
private static final AutoDetectParser PARSER_INSTANCE = new AutoDetectParser(PARSERS);
@ -117,8 +118,10 @@ final class TikaImpl {
try {
return AccessController.doPrivileged((PrivilegedExceptionAction<String>)
() -> TIKA_INSTANCE.parseToString(new ByteArrayInputStream(content), metadata, limit), RESTRICTED_CONTEXT);
return AccessController.doPrivileged(
(PrivilegedExceptionAction<String>) () -> TIKA_INSTANCE.parseToString(new ByteArrayInputStream(content), metadata, limit),
} catch (PrivilegedActionException e) {
// checked exception from tika: unbox it
Throwable cause = e.getCause();
@ -135,9 +138,7 @@ final class TikaImpl {
// apply additional containment for parsers, this is intersected with the current permissions
// its hairy, but worth it so we don't have some XML flaw reading random crap from the FS
private static final AccessControlContext RESTRICTED_CONTEXT = new AccessControlContext(
new ProtectionDomain[] {
new ProtectionDomain(null, getRestrictedPermissions())
new ProtectionDomain[] { new ProtectionDomain(null, getRestrictedPermissions()) }
// compute some minimal permissions for parsers. they only get r/w access to the java temp directory,
@ -155,7 +156,7 @@ final class TikaImpl {
addReadPermissions(perms, JarHell.parseClassPath());
// plugin jars
if (TikaImpl.class.getClassLoader() instanceof URLClassLoader) {
URL[] urls = ((URLClassLoader)TikaImpl.class.getClassLoader()).getURLs();
URL[] urls = ((URLClassLoader) TikaImpl.class.getClassLoader()).getURLs();
Set<URL> set = new LinkedHashSet<>(Arrays.asList(urls));
if (set.size() != urls.length) {
throw new AssertionError("duplicate jars: " + Arrays.toString(urls));
@ -163,8 +164,13 @@ final class TikaImpl {
addReadPermissions(perms, set);
// jvm's java.io.tmpdir (needs read/write)
FilePermissionUtils.addDirectoryPath(perms, "java.io.tmpdir", PathUtils.get(System.getProperty("java.io.tmpdir")),
"read,readlink,write,delete", false);
} catch (IOException e) {
throw new UncheckedIOException(e);
@ -69,8 +69,16 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void createStandardProcessor() {
processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field",
"target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000, false, null);
processor = new AttachmentProcessor(
public void testEnglishTextDocument() throws Exception {
@ -84,9 +92,10 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void testHtmlDocumentWithRandomFields() throws Exception {
//date is not present in the html doc
ArrayList<AttachmentProcessor.Property> fieldsList = new ArrayList<>(EnumSet.complementOf(EnumSet.of
// date is not present in the html doc
ArrayList<AttachmentProcessor.Property> fieldsList = new ArrayList<>(
Set<AttachmentProcessor.Property> selectedProperties = new HashSet<>();
int numFields = randomIntBetween(1, fieldsList.size());
@ -102,8 +111,16 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
if (randomBoolean()) {
processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field",
"target_field", selectedProperties, 10000, false, null);
processor = new AttachmentProcessor(
Map<String, Object> attachmentData = parseDocument("htmlWithEmptyDateMeta.html", processor);
assertThat(attachmentData.keySet(), hasSize(selectedFieldNames.length));
@ -133,49 +150,51 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void testWordDocument() throws Exception {
Map<String, Object> attachmentData = parseDocument("issue-104.docx", processor);
assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type",
assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type", "content_length"));
assertThat(attachmentData.get("content"), is(notNullValue()));
assertThat(attachmentData.get("language"), is("en"));
assertThat(attachmentData.get("date"), is("2012-10-12T11:17:00Z"));
assertThat(attachmentData.get("author"), is("Windows User"));
assertThat(attachmentData.get("content_length"), is(notNullValue()));
public void testWordDocumentWithVisioSchema() throws Exception {
Map<String, Object> attachmentData = parseDocument("issue-22077.docx", processor);
assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type",
assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type", "content_length"));
assertThat(attachmentData.get("content").toString(), containsString("Table of Contents"));
assertThat(attachmentData.get("language"), is("en"));
assertThat(attachmentData.get("date"), is("2015-01-06T18:07:00Z"));
assertThat(attachmentData.get("author"), is(notNullValue()));
assertThat(attachmentData.get("content_length"), is(notNullValue()));
public void testLegacyWordDocumentWithVisioSchema() throws Exception {
Map<String, Object> attachmentData = parseDocument("issue-22077.doc", processor);
assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type",
assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type", "content_length"));
assertThat(attachmentData.get("content").toString(), containsString("Table of Contents"));
assertThat(attachmentData.get("language"), is("en"));
assertThat(attachmentData.get("date"), is("2016-12-16T15:04:00Z"));
assertThat(attachmentData.get("author"), is(notNullValue()));
assertThat(attachmentData.get("content_length"), is(notNullValue()));
assertThat(attachmentData.get("content_type").toString(), is("application/msword"));
public void testPdf() throws Exception {
Map<String, Object> attachmentData = parseDocument("test.pdf", processor);
is("This is a test, with umlauts, from München\n\nAlso contains newlines for testing.\n\nAnd one more."));
is("This is a test, with umlauts, from München\n\nAlso contains newlines for testing.\n\nAnd one more.")
assertThat(attachmentData.get("content_type").toString(), is("application/pdf"));
assertThat(attachmentData.get("content_length"), is(notNullValue()));
@ -195,8 +214,10 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void testHtmlDocument() throws Exception {
Map<String, Object> attachmentData = parseDocument("htmlWithEmptyDateMeta.html", processor);
assertThat(attachmentData.keySet(), containsInAnyOrder("language", "content", "author", "keywords", "title", "content_type",
containsInAnyOrder("language", "content", "author", "keywords", "title", "content_type", "content_length")
assertThat(attachmentData.get("language"), is("en"));
assertThat(attachmentData.get("content"), is(notNullValue()));
assertThat(attachmentData.get("content_length"), is(notNullValue()));
@ -216,8 +237,10 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void testEpubDocument() throws Exception {
Map<String, Object> attachmentData = parseDocument("testEPUB.epub", processor);
assertThat(attachmentData.keySet(), containsInAnyOrder("language", "content", "author", "title", "content_type", "content_length",
"date", "keywords"));
containsInAnyOrder("language", "content", "author", "title", "content_type", "content_length", "date", "keywords")
assertThat(attachmentData.get("content_type").toString(), containsString("application/epub+zip"));
@ -259,8 +282,10 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void testNullValueWithIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(
Collections.singletonMap("source_field", null)
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", "randomTarget", null, 10, true, null);
@ -276,8 +301,10 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void testNullWithoutIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(
Collections.singletonMap("source_field", null)
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", "randomTarget", null, 10, false, null);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
@ -311,8 +338,16 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
public void testIndexedChars() throws Exception {
processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field",
"target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 19, false, null);
processor = new AttachmentProcessor(
Map<String, Object> attachmentData = parseDocument("text-in-english.txt", processor);
@ -322,8 +357,16 @@ public class AttachmentProcessorTests extends OpenSearchTestCase {
assertThat(attachmentData.get("content_type").toString(), containsString("text/plain"));
assertThat(attachmentData.get("content_length"), is(19L));
processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field",
"target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 19, false, "max_length");
processor = new AttachmentProcessor(
attachmentData = parseDocument("text-in-english.txt", processor);
@ -65,8 +65,8 @@ public class TikaDocTests extends OpenSearchTestCase {
try (DirectoryStream<Path> stream = Files.newDirectoryStream(tmp)) {
for (Path doc : stream) {
logger.debug("parsing: {}", doc);
logger.debug("parsing: {}", doc);
@ -36,8 +36,8 @@ import org.opensearch.test.OpenSearchTestCase;
public class TikaImplTests extends OpenSearchTestCase {
public void testTikaLoads() throws Exception {
public void testTikaLoads() throws Exception {
@ -49,4 +49,3 @@ public class IngestAttachmentClientYamlTestSuiteIT extends OpenSearchClientYamlS
return OpenSearchClientYamlSuiteTestCase.createParameters();
@ -106,16 +106,14 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
protected void registerParameters(ParameterChecker checker) throws IOException {
checker.registerUpdateCheck(b -> {
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name()));
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name()));
checker.registerUpdateCheck(b -> {
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
b.field("search_quote_analyzer", "keyword");
m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name()));
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
b.field("search_quote_analyzer", "keyword");
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name()));
checker.registerConflictCheck("store", b -> b.field("store", true));
checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs"));
@ -126,26 +124,20 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
checker.registerConflictCheck("position_increment_gap", b -> b.field("position_increment_gap", 10));
// norms can be set from true to false, but not vice versa
fieldMapping(b -> {
b.field("type", "annotated_text");
b.field("norms", false);
fieldMapping(b -> {
b.field("type", "annotated_text");
b.field("norms", true);
b -> {
b.field("type", "annotated_text");
b.field("norms", true);
b -> {
b.field("type", "annotated_text");
b.field("norms", false);
m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())
checker.registerConflictCheck("norms", fieldMapping(b -> {
b.field("type", "annotated_text");
b.field("norms", false);
}), fieldMapping(b -> {
b.field("type", "annotated_text");
b.field("norms", true);
checker.registerUpdateCheck(b -> {
b.field("type", "annotated_text");
b.field("norms", true);
}, b -> {
b.field("type", "annotated_text");
b.field("norms", false);
}, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms()));
checker.registerUpdateCheck(b -> b.field("boost", 2.0), m -> assertEquals(m.fieldType().boost(), 2.0, 0));
@ -221,7 +213,6 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.nextPosition());
assertTrue(terms.seekExact(new BytesRef("hush")));
postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
@ -270,8 +261,7 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
String text = "the quick [brown](Color) fox jumped over the lazy dog";
ParsedDocument doc
= mapperService.documentMapper().parse(source(b -> b.field("field", text)));
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", text)));
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
LeafReader leaf = reader.leaves().get(0).reader();
@ -282,7 +272,7 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
while ((term = iterator.next()) != null) {
//Check we have both text and annotation tokens
// Check we have both text and annotation tokens
@ -406,62 +396,92 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
public void testSearchAnalyzerSerialization() throws IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("_doc")
String mapping = Strings.toString(
.field("type", "annotated_text")
.field("analyzer", "standard")
.field("search_analyzer", "keyword")
.field("type", "annotated_text")
.field("analyzer", "standard")
.field("search_analyzer", "keyword")
DocumentMapper mapper = createDocumentMapper("_doc", mapping);
assertEquals(mapping, mapper.mappingSource().toString());
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default index analyzer
mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("_doc")
mapping = Strings.toString(
.field("type", "annotated_text")
.field("analyzer", "default")
.field("search_analyzer", "keyword")
.field("type", "annotated_text")
.field("analyzer", "default")
.field("search_analyzer", "keyword")
mapper = createDocumentMapper("_doc", mapping);
assertEquals(mapping, mapper.mappingSource().toString());
assertEquals(mapping, mapper.mappingSource().toString());
mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("_doc")
.field("type", "annotated_text")
.field("analyzer", "keyword")
mapping = Strings.toString(
.field("type", "annotated_text")
.field("analyzer", "keyword")
mapper = createDocumentMapper("_doc", mapping);
assertEquals(mapping, mapper.mappingSource().toString());
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default search analyzer
mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("_doc")
.field("type", "annotated_text")
.field("analyzer", "keyword")
.field("search_analyzer", "default")
mapping = Strings.toString(
.field("type", "annotated_text")
.field("analyzer", "keyword")
.field("search_analyzer", "default")
mapper = createDocumentMapper("_doc", mapping);
assertEquals(mapping, mapper.mappingSource().toString());
assertEquals(mapping, mapper.mappingSource().toString());
mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("_doc")
.field("type", "annotated_text")
.field("analyzer", "keyword")
mapping = Strings.toString(
.field("type", "annotated_text")
.field("analyzer", "keyword")
mapper = createDocumentMapper("_doc", mapping);
XContentBuilder builder = XContentFactory.jsonBuilder();
@ -476,32 +496,44 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
public void testSearchQuoteAnalyzerSerialization() throws IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("_doc")
String mapping = Strings.toString(
.field("analyzer", "standard")
.field("search_analyzer", "standard")
.field("search_quote_analyzer", "keyword")
.field("type", "annotated_text")
.field("analyzer", "standard")
.field("search_analyzer", "standard")
.field("search_quote_analyzer", "keyword")
DocumentMapper mapper = createDocumentMapper("_doc", mapping);
assertEquals(mapping, mapper.mappingSource().toString());
assertEquals(mapping, mapper.mappingSource().toString());
// special case: default index/search analyzer
mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("_doc")
mapping = Strings.toString(
.field("type", "annotated_text")
.field("analyzer", "default")
.field("search_analyzer", "default")
.field("search_quote_analyzer", "keyword")
.field("type", "annotated_text")
.field("analyzer", "default")
.field("search_analyzer", "default")
.field("search_quote_analyzer", "keyword")
mapper = createDocumentMapper("_doc", mapping);
assertEquals(mapping, mapper.mappingSource().toString());
assertEquals(mapping, mapper.mappingSource().toString());
public void testTermVectors() throws IOException {
@ -578,8 +610,7 @@ public class AnnotatedTextFieldMapperTests extends MapperTestCase {
b.field("index_options", indexOptions);
b.field("position_increment_gap", 0);
containsString("Cannot set position_increment_gap on field [field] without positions enabled"));
assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled"));
@ -90,7 +90,7 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
private static Builder builder(FieldMapper in) {
return ((AnnotatedTextFieldMapper)in).builder;
return ((AnnotatedTextFieldMapper) in).builder;
public static class Builder extends ParametrizedFieldMapper.Builder {
@ -98,20 +98,22 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
private final Parameter<Boolean> store = Parameter.storeParam(m -> builder(m).store.getValue(), false);
final TextParams.Analyzers analyzers;
final Parameter<SimilarityProvider> similarity
= TextParams.similarity(m -> builder(m).similarity.getValue());
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.getValue());
final Parameter<String> indexOptions = TextParams.indexOptions(m -> builder(m).indexOptions.getValue());
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.getValue());
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue());
final Parameter<Integer> positionIncrementGap = Parameter.intParam("position_increment_gap", false,
m -> builder(m).positionIncrementGap.getValue(), POSITION_INCREMENT_GAP_USE_ANALYZER)
.setValidator(v -> {
throw new MapperParsingException("[positions_increment_gap] must be positive, got [" + v + "]");
final Parameter<Integer> positionIncrementGap = Parameter.intParam(
m -> builder(m).positionIncrementGap.getValue(),
).setValidator(v -> {
throw new MapperParsingException("[positions_increment_gap] must be positive, got [" + v + "]");
private final Parameter<Float> boost = Parameter.boostParam();
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
@ -123,14 +125,23 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
protected List<Parameter<?>> getParameters() {
return Arrays.asList(store, indexOptions, norms, termVectors, similarity,
analyzers.indexAnalyzer, analyzers.searchAnalyzer, analyzers.searchQuoteAnalyzer, positionIncrementGap,
boost, meta);
return Arrays.asList(
private NamedAnalyzer wrapAnalyzer(NamedAnalyzer in, int positionIncrementGap) {
return new NamedAnalyzer(in.name(), AnalyzerScope.INDEX,
new AnnotationAnalyzerWrapper(in.analyzer()), positionIncrementGap);
return new NamedAnalyzer(in.name(), AnalyzerScope.INDEX, new AnnotationAnalyzerWrapper(in.analyzer()), positionIncrementGap);
private AnnotatedTextFieldType buildFieldType(FieldType fieldType, BuilderContext context) {
@ -139,8 +150,9 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
posGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
} else {
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name()
+ "] without positions enabled");
throw new IllegalArgumentException(
"Cannot set position_increment_gap on field [" + name() + "] without positions enabled"
posGap = positionIncrementGap.get();
@ -148,12 +160,9 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
wrapAnalyzer(analyzers.getSearchAnalyzer(), posGap),
wrapAnalyzer(analyzers.getSearchQuoteAnalyzer(), posGap));
AnnotatedTextFieldType ft = new AnnotatedTextFieldType(
wrapAnalyzer(analyzers.getSearchQuoteAnalyzer(), posGap)
AnnotatedTextFieldType ft = new AnnotatedTextFieldType(buildFullName(context), store.getValue(), tsi, meta.getValue());
ft.setIndexAnalyzer(wrapAnalyzer(analyzers.getIndexAnalyzer(), posGap));
return ft;
@ -162,12 +171,17 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
public AnnotatedTextFieldMapper build(BuilderContext context) {
FieldType fieldType = TextParams.buildFieldType(() -> true, store, indexOptions, norms, termVectors);
if (fieldType.indexOptions() == IndexOptions.NONE ) {
if (fieldType.indexOptions() == IndexOptions.NONE) {
throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
return new AnnotatedTextFieldMapper(
name, fieldType, buildFieldType(fieldType, context),
multiFieldsBuilder.build(this, context), copyTo.build(), this);
buildFieldType(fieldType, context),
multiFieldsBuilder.build(this, context),
@ -183,16 +197,16 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
List<AnnotationToken> annotations;
// Format is markdown-like syntax for URLs eg:
// "New mayor is [John Smith](type=person&value=John%20Smith) "
// "New mayor is [John Smith](type=person&value=John%20Smith) "
static Pattern markdownPattern = Pattern.compile("\\[([^]\\[]*)]\\(([^)(]*)\\)");
public static AnnotatedText parse (String textPlusMarkup) {
List<AnnotationToken> annotations =new ArrayList<>();
public static AnnotatedText parse(String textPlusMarkup) {
List<AnnotationToken> annotations = new ArrayList<>();
Matcher m = markdownPattern.matcher(textPlusMarkup);
int lastPos = 0;
StringBuilder sb = new StringBuilder();
if(m.start() > lastPos){
while (m.find()) {
if (m.start() > lastPos) {
sb.append(textPlusMarkup, lastPos, m.start());
@ -210,9 +224,9 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
throw new OpenSearchParseException("key=value pairs are not supported in annotations");
if (kv.length == 1) {
//Check "=" sign wasn't in the pair string
// Check "=" sign wasn't in the pair string
if (kv[0].length() == pair.length()) {
//untyped value
// untyped value
value = URLDecoder.decode(kv[0], "UTF-8");
@ -224,7 +238,7 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
if(lastPos < textPlusMarkup.length()){
if (lastPos < textPlusMarkup.length()) {
return new AnnotatedText(sb.toString(), textPlusMarkup, annotations);
@ -241,19 +255,22 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
public final int endOffset;
public final String value;
public AnnotationToken(int offset, int endOffset, String value) {
this.offset = offset;
this.endOffset = endOffset;
this.value = value;
public String toString() {
return value +" ("+offset+" - "+endOffset+")";
return value + " (" + offset + " - " + endOffset + ")";
public boolean intersects(int start, int end) {
return (start <= offset && end >= offset) || (start <= endOffset && end >= endOffset)
|| (start >= offset && end <= endOffset);
return (start <= offset && end >= offset)
|| (start <= endOffset && end >= endOffset)
|| (start >= offset && end <= endOffset);
@ -268,29 +285,27 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
AnnotationToken other = (AnnotationToken) obj;
return Objects.equals(endOffset, other.endOffset) && Objects.equals(offset, other.offset)
&& Objects.equals(value, other.value);
return Objects.equals(endOffset, other.endOffset)
&& Objects.equals(offset, other.offset)
&& Objects.equals(value, other.value);
public String toString() {
StringBuilder sb = new StringBuilder();
annotations.forEach(a -> {
return sb.toString();
StringBuilder sb = new StringBuilder();
annotations.forEach(a -> {
return sb.toString();
public int numAnnotations() {
@ -311,14 +326,14 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
private final Analyzer delegate;
private AnnotatedText[] annotations;
public AnnotatedHighlighterAnalyzer(Analyzer delegate){
public AnnotatedHighlighterAnalyzer(Analyzer delegate) {
this.delegate = delegate;
public Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
return delegate;
public void setAnnotations(AnnotatedText[] annotations) {
@ -344,13 +359,13 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
private final Analyzer delegate;
public AnnotationAnalyzerWrapper(Analyzer delegate) {
this.delegate = delegate;
this.delegate = delegate;
public Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
return delegate;
@ -383,7 +398,6 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
public static final class AnnotationsInjector extends TokenFilter {
private AnnotatedText annotatedText;
@ -400,17 +414,17 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public AnnotationsInjector(TokenStream in) {
public void setAnnotations(AnnotatedText annotatedText) {
this.annotatedText = annotatedText;
currentAnnotationIndex = 0;
if(annotatedText!=null && annotatedText.numAnnotations()>0){
nextAnnotationForInjection = annotatedText.getAnnotation(0);
} else {
nextAnnotationForInjection = null;
this.annotatedText = annotatedText;
currentAnnotationIndex = 0;
if (annotatedText != null && annotatedText.numAnnotations() > 0) {
nextAnnotationForInjection = annotatedText.getAnnotation(0);
} else {
nextAnnotationForInjection = null;
@ -423,17 +437,17 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
// Abstracts if we are pulling from some pre-cached buffer of
// text tokens or directly from the wrapped TokenStream
private boolean internalNextToken() throws IOException{
if (pendingStatePos < pendingStates.size()){
private boolean internalNextToken() throws IOException {
if (pendingStatePos < pendingStates.size()) {
pendingStatePos ++;
if(pendingStatePos >=pendingStates.size()){
pendingStatePos =0;
if (pendingStatePos >= pendingStates.size()) {
pendingStatePos = 0;
return true;
if(inputExhausted) {
if (inputExhausted) {
return false;
return input.incrementToken();
@ -458,7 +472,7 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
// Buffer up all the other tokens spanned by this annotation to determine length.
if (input.incrementToken()) {
if (textOffsetAtt.endOffset() <= nextAnnotationForInjection.endOffset
&& textOffsetAtt.startOffset() < nextAnnotationForInjection.endOffset) {
&& textOffsetAtt.startOffset() < nextAnnotationForInjection.endOffset) {
annotationPosLen += posAtt.getPositionIncrement();
@ -479,7 +493,7 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
private void setType() {
//Default annotation type - in future AnnotationTokens may contain custom type info
// Default annotation type - in future AnnotationTokens may contain custom type info
@ -494,22 +508,20 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
final AnnotatedText.AnnotationToken firstAnnotationAtThisPos = nextAnnotationForInjection;
while (nextAnnotationForInjection != null && nextAnnotationForInjection.offset == annotationOffset) {
termAtt.copyBuffer(nextAnnotationForInjection.value.toCharArray(), 0, nextAnnotationForInjection.value.length());
if (nextAnnotationForInjection == firstAnnotationAtThisPos) {
//Put at the head of the queue of tokens to be emitted
// Put at the head of the queue of tokens to be emitted
pendingStates.add(0, captureState());
} else {
//Put after the head of the queue of tokens to be emitted
// Put after the head of the queue of tokens to be emitted
pendingStates.add(1, captureState());
// Flag the inject annotation as null to prevent re-injection.
if (currentAnnotationIndex < annotatedText.numAnnotations()) {
@ -522,7 +534,7 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {
@ -543,8 +555,14 @@ public class AnnotatedTextFieldMapper extends ParametrizedFieldMapper {
private final FieldType fieldType;
private final Builder builder;
protected AnnotatedTextFieldMapper(String simpleName, FieldType fieldType, AnnotatedTextFieldType mappedFieldType,
MultiFields multiFields, CopyTo copyTo, Builder builder) {
protected AnnotatedTextFieldMapper(
String simpleName,
FieldType fieldType,
AnnotatedTextFieldType mappedFieldType,
MultiFields multiFields,
CopyTo copyTo,
Builder builder
) {
super(simpleName, mappedFieldType, multiFields, copyTo);
assert fieldType.tokenized();
this.fieldType = fieldType;
@ -52,7 +52,6 @@ import java.util.List;
public class AnnotatedPassageFormatter extends PassageFormatter {
public static final String SEARCH_HIT_TYPE = "_hit_term";
private final Encoder encoder;
AnnotatedText[] annotations;
@ -70,72 +69,79 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
int lastMarkupEnd = -1;
public void addUnlessOverlapping(Markup newMarkup) {
// Fast exit.
if(newMarkup.start > lastMarkupEnd) {
if (newMarkup.start > lastMarkupEnd) {
lastMarkupEnd = newMarkup.end;
lastMarkupEnd = newMarkup.end;
// Check to see if this new markup overlaps with any prior
int index=0;
for (Markup existingMarkup: markups) {
if(existingMarkup.samePosition(newMarkup)) {
int index = 0;
for (Markup existingMarkup : markups) {
if (existingMarkup.samePosition(newMarkup)) {
if(existingMarkup.overlaps(newMarkup)) {
if (existingMarkup.overlaps(newMarkup)) {
// existing markup wins - we throw away the new markup that would span this position
// markup list is in start offset order so we can insert at this position then shift others right
if(existingMarkup.isAfter(newMarkup)) {
// markup list is in start offset order so we can insert at this position then shift others right
if (existingMarkup.isAfter(newMarkup)) {
markups.add(index, newMarkup);
lastMarkupEnd = newMarkup.end;
lastMarkupEnd = newMarkup.end;
static class Markup {
int start;
int end;
String metadata;
Markup(int start, int end, String metadata) {
this.start = start;
this.end = end;
this.metadata = metadata;
boolean isAfter(Markup other) {
return start > other.end;
void merge(Markup newMarkup) {
// metadata is key1=value&key2=value&.... syntax used for urls
// metadata is key1=value&key2=value&.... syntax used for urls
assert samePosition(newMarkup);
metadata += "&" + newMarkup.metadata;
boolean samePosition(Markup other) {
return this.start == other.start && this.end == other.end;
boolean overlaps(Markup other) {
return (start<=other.start && end >= other.start)
|| (start <= other.end && end >=other.end)
|| (start>=other.start && end<=other.end);
return (start <= other.start && end >= other.start)
|| (start <= other.end && end >= other.end)
|| (start >= other.start && end <= other.end);
public String toString() {
return "Markup [start=" + start + ", end=" + end + ", metadata=" + metadata + "]";
// Merge original annotations and search hits into a single set of markups for each passage
static MarkupPassage mergeAnnotations(AnnotationToken [] annotations, Passage passage){
static MarkupPassage mergeAnnotations(AnnotationToken[] annotations, Passage passage) {
try {
MarkupPassage markupPassage = new MarkupPassage();
@ -144,28 +150,31 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
int start = passage.getMatchStarts()[i];
int end = passage.getMatchEnds()[i];
String searchTerm = passage.getMatchTerms()[i].utf8ToString();
Markup markup = new Markup(start, end, SEARCH_HIT_TYPE+"="+URLEncoder.encode(searchTerm, StandardCharsets.UTF_8.name()));
Markup markup = new Markup(
SEARCH_HIT_TYPE + "=" + URLEncoder.encode(searchTerm, StandardCharsets.UTF_8.name())
// Now add original text's annotations - ignoring any that might conflict with the search hits markup.
for (AnnotationToken token: annotations) {
for (AnnotationToken token : annotations) {
int start = token.offset;
int end = token.endOffset;
if(start >= passage.getStartOffset() && end<=passage.getEndOffset()) {
if (start >= passage.getStartOffset() && end <= passage.getEndOffset()) {
String escapedValue = URLEncoder.encode(token.value, StandardCharsets.UTF_8.name());
Markup markup = new Markup(start, end, escapedValue);
return markupPassage;
} catch (UnsupportedEncodingException e) {
// We should always have UTF-8 support
throw new IllegalStateException(e);
public Snippet[] format(Passage[] passages, String content) {
@ -174,13 +183,12 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
int pos;
int j = 0;
for (Passage passage : passages) {
AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(),
AnnotationToken[] annotations = getIntersectingAnnotations(passage.getStartOffset(), passage.getEndOffset());
MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
StringBuilder sb = new StringBuilder();
pos = passage.getStartOffset();
for(Markup markup: mergedMarkup.markups) {
pos = passage.getStartOffset();
for (Markup markup : mergedMarkup.markups) {
int start = markup.start;
int end = markup.end;
// its possible to have overlapping terms
@ -190,7 +198,7 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
if (end > pos) {
append(sb, content, Math.max(pos, start), end);
@ -199,38 +207,38 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
//we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
// we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
} else if (sb.charAt(sb.length() - 1) == HighlightUtils.NULL_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
//and we trim the snippets too
// and we trim the snippets too
snippets[j++] = new Snippet(sb.toString().trim(), passage.getScore(), passage.getNumMatches() > 0);
return snippets;
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
int fieldValueOffset =0;
int fieldValueOffset = 0;
for (AnnotatedText fieldValueAnnotations : this.annotations) {
//This is called from a highlighter where all of the field values are concatenated
// so each annotation offset will need to be adjusted so that it takes into account
// This is called from a highlighter where all of the field values are concatenated
// so each annotation offset will need to be adjusted so that it takes into account
// the previous values AND the MULTIVAL delimiter
for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
.add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset +
fieldValueOffset, token.value));
new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + fieldValueOffset, token.value)
//add 1 for the fieldvalue separator character
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
// add 1 for the fieldvalue separator character
fieldValueOffset += fieldValueAnnotations.textMinusMarkup.length() + 1;
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
private void append(StringBuilder dest, String content, int start, int end) {
dest.append(encoder.encodeText(content.substring(start, end)));
@ -56,9 +56,9 @@ public class AnnotatedTextFieldTypeTests extends FieldTypeTestCase {
public void testFetchSourceValue() throws IOException {
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", createDefaultIndexAnalyzers())
.build(new Mapper.BuilderContext(Settings.EMPTY, new ContentPath()))
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", createDefaultIndexAnalyzers()).build(
new Mapper.BuilderContext(Settings.EMPTY, new ContentPath())
assertEquals(Collections.singletonList("value"), fetchSourceValue(fieldType, "value"));
assertEquals(Collections.singletonList("42"), fetchSourceValue(fieldType, 42L));
@ -54,33 +54,35 @@ public class AnnotatedTextParsingTests extends OpenSearchTestCase {
public void testSingleValueMarkup() {
checkParsing("foo [bar](Y)", "foo bar", new AnnotationToken(4,7,"Y"));
checkParsing("foo [bar](Y)", "foo bar", new AnnotationToken(4, 7, "Y"));
public void testMultiValueMarkup() {
checkParsing("foo [bar](Y&B)", "foo bar", new AnnotationToken(4,7,"Y"),
new AnnotationToken(4,7,"B"));
checkParsing("foo [bar](Y&B)", "foo bar", new AnnotationToken(4, 7, "Y"), new AnnotationToken(4, 7, "B"));
public void testBlankTextAnnotation() {
checkParsing("It sounded like this:[](theSoundOfOneHandClapping)", "It sounded like this:",
new AnnotationToken(21,21,"theSoundOfOneHandClapping"));
"It sounded like this:[](theSoundOfOneHandClapping)",
"It sounded like this:",
new AnnotationToken(21, 21, "theSoundOfOneHandClapping")
public void testMissingBracket() {
checkParsing("[foo](MissingEndBracket bar",
"[foo](MissingEndBracket bar", new AnnotationToken[0]);
checkParsing("[foo](MissingEndBracket bar", "[foo](MissingEndBracket bar", new AnnotationToken[0]);
public void testAnnotationWithType() {
Exception expectedException = expectThrows(OpenSearchParseException.class,
() -> checkParsing("foo [bar](type=foo) baz", "foo bar baz", new AnnotationToken(4,7, "noType")));
assertThat(expectedException.getMessage(), equalTo("key=value pairs are not supported in annotations"));
Exception expectedException = expectThrows(
() -> checkParsing("foo [bar](type=foo) baz", "foo bar baz", new AnnotationToken(4, 7, "noType"))
assertThat(expectedException.getMessage(), equalTo("key=value pairs are not supported in annotations"));
public void testMissingValue() {
checkParsing("[foo]() bar", "foo bar", new AnnotationToken[0]);
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user