Analysis: Wrap at 140 columns (#34494)
Applies our standard column width to all analysis plugins.
This commit is contained in:
parent
1452d55155
commit
a45626deb5
|
@ -658,12 +658,6 @@
|
|||
<suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]versioning[/\\]SimpleVersioningIT.java" checks="LineLength" />
|
||||
<suppress files="modules[/\\]lang-painless[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]painless[/\\]ContextExampleTests.java" checks="LineLength" />
|
||||
<suppress files="modules[/\\]reindex[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]reindex[/\\]TransportUpdateByQueryAction.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IcuCollationTokenFilterFactory.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IcuFoldingTokenFilterFactory.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IndexableBinaryStringTools.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]analysis-kuromoji[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]JapaneseStopTokenFilterFactory.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]analysis-kuromoji[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]KuromojiAnalysisTests.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]analysis-phonetic[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PhoneticTokenFilterFactory.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AbstractAwsTestCase.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AmazonEC2Mock.java" checks="LineLength" />
|
||||
<suppress files="plugins[/\\]mapper-murmur3[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]murmur3[/\\]Murmur3FieldMapper.java" checks="LineLength" />
|
||||
|
|
|
@ -36,12 +36,16 @@ import com.ibm.icu.util.ULocale;
|
|||
|
||||
/**
|
||||
* An ICU based collation token filter. There are two ways to configure collation:
|
||||
* <p>The first is simply specifying the locale (defaults to the default locale). The {@code language}
|
||||
* parameter is the lowercase two-letter ISO-639 code. An additional {@code country} and {@code variant}
|
||||
* <p>The first is simply specifying the locale (defaults to the default
|
||||
* locale). The {@code language} parameter is the lowercase two-letter
|
||||
* ISO-639 code. An additional {@code country} and {@code variant}
|
||||
* can be provided.
|
||||
* <p>The second option is to specify collation rules as defined in the <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
|
||||
* Collation customization</a> chapter in icu docs. The {@code rules} parameter can either embed the rules definition
|
||||
* in the settings or refer to an external location (preferable located under the {@code config} location, relative to it).
|
||||
* <p>The second option is to specify collation rules as defined in the
|
||||
* <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
|
||||
* Collation customization</a> chapter in icu docs. The {@code rules}
|
||||
* parameter can either embed the rules definition
|
||||
* in the settings or refer to an external location (preferable located under
|
||||
* the {@code config} location, relative to it).
|
||||
*/
|
||||
public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
|
|
|
@ -32,10 +32,12 @@ import org.elasticsearch.index.IndexSettings;
|
|||
* Uses the {@link org.apache.lucene.analysis.icu.ICUFoldingFilter}.
|
||||
* Applies foldings from UTR#30 Character Foldings.
|
||||
* <p>
|
||||
* Can be filtered to handle certain characters in a specified way (see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html)
|
||||
* Can be filtered to handle certain characters in a specified way
|
||||
* (see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html)
|
||||
* E.g national chars that should be retained (filter : "[^åäöÅÄÖ]").
|
||||
*
|
||||
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.
|
||||
* <p>The {@code unicodeSetFilter} attribute can be used to provide the
|
||||
* UniCodeSet for filtering.
|
||||
*
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
|
|
|
@ -133,7 +133,10 @@ public final class IndexableBinaryStringTools {
|
|||
codingCase = CODING_CASES[caseNum];
|
||||
|
||||
if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
|
||||
outputArray[outputCharNum++] = (char) ((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF);
|
||||
outputArray[outputCharNum++] = (char) (
|
||||
( ((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
||||
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
|
||||
) & (short) 0x7FFF);
|
||||
// Add trailing char containing the number of full bytes in final char
|
||||
outputArray[outputCharNum++] = (char) 1;
|
||||
} else if (inputByteNum < inputLength) {
|
||||
|
|
|
@ -47,7 +47,8 @@ public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory{
|
|||
super(indexSettings, name, settings);
|
||||
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
|
||||
this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
|
||||
this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
|
||||
this.stopWords = Analysis.parseWords(env, settings, "stopwords",
|
||||
JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -139,7 +139,8 @@ public class KuromojiAnalysisTests extends ESTestCase {
|
|||
|
||||
// パーティー should be stemmed by default
|
||||
// (min len) コピー should not be stemmed
|
||||
String[] expected_tokens_katakana = new String[]{"明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
|
||||
String[] expected_tokens_katakana = new String[] {
|
||||
"明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
|
||||
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
|
||||
|
||||
tokenFilter = analysis.tokenFilter.get("kuromoji_ks");
|
||||
|
@ -149,7 +150,8 @@ public class KuromojiAnalysisTests extends ESTestCase {
|
|||
|
||||
// パーティー should not be stemmed since min len == 6
|
||||
// コピー should not be stemmed
|
||||
expected_tokens_katakana = new String[]{"明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
|
||||
expected_tokens_katakana = new String[] {
|
||||
"明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
|
||||
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
|
||||
}
|
||||
|
||||
|
|
|
@ -82,7 +82,9 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
} else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) {
|
||||
this.encoder = null;
|
||||
this.maxcodelength = settings.getAsInt("max_code_len", 4);
|
||||
} else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) {
|
||||
} else if ("bm".equalsIgnoreCase(encodername)
|
||||
|| "beider_morse".equalsIgnoreCase(encodername)
|
||||
|| "beidermorse".equalsIgnoreCase(encodername)) {
|
||||
this.encoder = null;
|
||||
this.languageset = settings.getAsList("languageset");
|
||||
String ruleType = settings.get("rule_type", "approx");
|
||||
|
|
Loading…
Reference in New Issue