Analysis: Wrap at 140 columns (#34494)

Applies our standard column width to all analysis plugins.
This commit is contained in:
Nik Everett 2018-10-17 16:17:25 -04:00 committed by GitHub
parent 1452d55155
commit a45626deb5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 26 additions and 18 deletions

View File

@ -658,12 +658,6 @@
<suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]versioning[/\\]SimpleVersioningIT.java" checks="LineLength" /> <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]versioning[/\\]SimpleVersioningIT.java" checks="LineLength" />
<suppress files="modules[/\\]lang-painless[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]painless[/\\]ContextExampleTests.java" checks="LineLength" /> <suppress files="modules[/\\]lang-painless[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]painless[/\\]ContextExampleTests.java" checks="LineLength" />
<suppress files="modules[/\\]reindex[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]reindex[/\\]TransportUpdateByQueryAction.java" checks="LineLength" /> <suppress files="modules[/\\]reindex[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]reindex[/\\]TransportUpdateByQueryAction.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IcuCollationTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IcuFoldingTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IndexableBinaryStringTools.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-kuromoji[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]JapaneseStopTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-kuromoji[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]KuromojiAnalysisTests.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-phonetic[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PhoneticTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AbstractAwsTestCase.java" checks="LineLength" /> <suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AbstractAwsTestCase.java" checks="LineLength" />
<suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AmazonEC2Mock.java" checks="LineLength" /> <suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AmazonEC2Mock.java" checks="LineLength" />
<suppress files="plugins[/\\]mapper-murmur3[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]murmur3[/\\]Murmur3FieldMapper.java" checks="LineLength" /> <suppress files="plugins[/\\]mapper-murmur3[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]murmur3[/\\]Murmur3FieldMapper.java" checks="LineLength" />

View File

@ -36,12 +36,16 @@ import com.ibm.icu.util.ULocale;
/** /**
* An ICU based collation token filter. There are two ways to configure collation: * An ICU based collation token filter. There are two ways to configure collation:
* <p>The first is simply specifying the locale (defaults to the default locale). The {@code language} * <p>The first is simply specifying the locale (defaults to the default
* parameter is the lowercase two-letter ISO-639 code. An additional {@code country} and {@code variant} * locale). The {@code language} parameter is the lowercase two-letter
* ISO-639 code. An additional {@code country} and {@code variant}
* can be provided. * can be provided.
* <p>The second option is to specify collation rules as defined in the <a href="http://www.icu-project.org/userguide/Collate_Customization.html"> * <p>The second option is to specify collation rules as defined in the
* Collation customization</a> chapter in icu docs. The {@code rules} parameter can either embed the rules definition * <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
* in the settings or refer to an external location (preferable located under the {@code config} location, relative to it). * Collation customization</a> chapter in icu docs. The {@code rules}
* parameter can either embed the rules definition
* in the settings or refer to an external location (preferable located under
* the {@code config} location, relative to it).
*/ */
public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory { public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory {

View File

@ -32,10 +32,12 @@ import org.elasticsearch.index.IndexSettings;
* Uses the {@link org.apache.lucene.analysis.icu.ICUFoldingFilter}. * Uses the {@link org.apache.lucene.analysis.icu.ICUFoldingFilter}.
* Applies foldings from UTR#30 Character Foldings. * Applies foldings from UTR#30 Character Foldings.
* <p> * <p>
* Can be filtered to handle certain characters in a specified way (see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html) * Can be filtered to handle certain characters in a specified way
* (see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html)
* E.g national chars that should be retained (filter : "[^åäöÅÄÖ]"). * E.g national chars that should be retained (filter : "[^åäöÅÄÖ]").
* *
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering. * <p>The {@code unicodeSetFilter} attribute can be used to provide the
* UniCodeSet for filtering.
* *
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */

View File

@ -133,7 +133,10 @@ public final class IndexableBinaryStringTools {
codingCase = CODING_CASES[caseNum]; codingCase = CODING_CASES[caseNum];
if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3 if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
outputArray[outputCharNum++] = (char) ((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF); outputArray[outputCharNum++] = (char) (
( ((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
) & (short) 0x7FFF);
// Add trailing char containing the number of full bytes in final char // Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = (char) 1; outputArray[outputCharNum++] = (char) 1;
} else if (inputByteNum < inputLength) { } else if (inputByteNum < inputLength) {

View File

@ -47,7 +47,8 @@ public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory{
super(indexSettings, name, settings); super(indexSettings, name, settings);
this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.removeTrailing = settings.getAsBoolean("remove_trailing", true); this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase); this.stopWords = Analysis.parseWords(env, settings, "stopwords",
JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
} }
@Override @Override

View File

@ -139,7 +139,8 @@ public class KuromojiAnalysisTests extends ESTestCase {
// パーティー should be stemmed by default // パーティー should be stemmed by default
// (min len) コピー should not be stemmed // (min len) コピー should not be stemmed
String[] expected_tokens_katakana = new String[]{"明後日", "パーティ", "", "行く", "予定", "", "ある", "図書館", "", "資料", "", "コピー", "", "まし", ""}; String[] expected_tokens_katakana = new String[] {
"明後日", "パーティ", "", "行く", "予定", "", "ある", "図書館", "", "資料", "", "コピー", "", "まし", ""};
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
tokenFilter = analysis.tokenFilter.get("kuromoji_ks"); tokenFilter = analysis.tokenFilter.get("kuromoji_ks");
@ -149,7 +150,8 @@ public class KuromojiAnalysisTests extends ESTestCase {
// パーティー should not be stemmed since min len == 6 // パーティー should not be stemmed since min len == 6
// コピー should not be stemmed // コピー should not be stemmed
expected_tokens_katakana = new String[]{"明後日", "パーティー", "", "行く", "予定", "", "ある", "図書館", "", "資料", "", "コピー", "", "まし", ""}; expected_tokens_katakana = new String[] {
"明後日", "パーティー", "", "行く", "予定", "", "ある", "図書館", "", "資料", "", "コピー", "", "まし", ""};
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
} }

View File

@ -82,7 +82,9 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
} else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) {
this.encoder = null; this.encoder = null;
this.maxcodelength = settings.getAsInt("max_code_len", 4); this.maxcodelength = settings.getAsInt("max_code_len", 4);
} else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { } else if ("bm".equalsIgnoreCase(encodername)
|| "beider_morse".equalsIgnoreCase(encodername)
|| "beidermorse".equalsIgnoreCase(encodername)) {
this.encoder = null; this.encoder = null;
this.languageset = settings.getAsList("languageset"); this.languageset = settings.getAsList("languageset");
String ruleType = settings.get("rule_type", "approx"); String ruleType = settings.get("rule_type", "approx");