mirror of https://github.com/apache/lucene.git
LUCENE-8948: Change 'name' argument in ICU factories to 'form'.
This commit is contained in:
parent
9546d8612c
commit
407ba89aad
|
@ -30,6 +30,9 @@ API Changes
|
|||
* LUCENE-8909: The deprecated IndexWriter#getFieldNames() method has been removed.
|
||||
(Adrien Grand, Munendra S N)
|
||||
|
||||
* LUCENE-8948: Change "name" argument in ICU factories to "form". Here, "form" is
|
||||
named after "Unicode Normalization Form". (Tomoko Uchida)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-8757: When provided with an ExecutorService to run queries across
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
|
|||
* <p>
|
||||
* Supports the following attributes:
|
||||
* <ul>
|
||||
* <li>name: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
|
||||
* <li>form: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
|
||||
* one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf.
|
||||
* <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc
|
||||
* or nfkc, to get nfd or nfkd, respectively.
|
||||
|
@ -55,10 +55,10 @@ public class ICUNormalizer2CharFilterFactory extends CharFilterFactory {
|
|||
/** Creates a new ICUNormalizer2CharFilterFactory */
|
||||
public ICUNormalizer2CharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String name = get(args, "name", "nfkc_cf");
|
||||
String form = get(args, "form", "nfkc_cf");
|
||||
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
|
||||
Normalizer2 normalizer = Normalizer2.getInstance
|
||||
(null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
|
||||
(null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
|
||||
|
||||
String filter = get(args, "filter");
|
||||
if (filter != null) {
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <p>
|
||||
* Supports the following attributes:
|
||||
* <ul>
|
||||
* <li>name: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
|
||||
* <li>form: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
|
||||
* one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf.
|
||||
* <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc
|
||||
* or nfkc, to get nfd or nfkd, respectively.
|
||||
|
@ -54,10 +54,10 @@ public class ICUNormalizer2FilterFactory extends TokenFilterFactory {
|
|||
/** Creates a new ICUNormalizer2FilterFactory */
|
||||
public ICUNormalizer2FilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String name = get(args, "name", "nfkc_cf");
|
||||
String form = get(args, "form", "nfkc_cf");
|
||||
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
|
||||
Normalizer2 normalizer = Normalizer2.getInstance
|
||||
(null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
|
||||
(null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
|
||||
|
||||
String filter = get(args, "filter");
|
||||
if (filter != null) {
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -35,7 +36,18 @@ public class TestICUNormalizer2CharFilterFactory extends BaseTokenStreamTestCase
|
|||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
|
||||
}
|
||||
|
||||
|
||||
/** Test nfkc form */
|
||||
public void testFormArgument() throws Exception {
|
||||
Reader reader = new StringReader("This is a Test");
|
||||
Map<String, String> args = new HashMap<>();
|
||||
args.put("form", "nfkc");
|
||||
ICUNormalizer2CharFilterFactory factory = new ICUNormalizer2CharFilterFactory(args);
|
||||
reader = factory.create(reader);
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
assertTokenStreamContents(stream, new String[] { "This", "is", "a", "Test" });
|
||||
}
|
||||
|
||||
/** Test that bogus arguments result in exception */
|
||||
public void testBogusArguments() throws Exception {
|
||||
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -35,6 +36,17 @@ public class TestICUNormalizer2FilterFactory extends BaseTokenStreamTestCase {
|
|||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
|
||||
}
|
||||
|
||||
/** Test nfkc form */
|
||||
public void testFormArgument() throws Exception {
|
||||
Reader reader = new StringReader("This is a Test");
|
||||
Map<String, String> args = new HashMap<>();
|
||||
args.put("form", "nfkc");
|
||||
ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory(args);
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "This", "is", "a", "Test" });
|
||||
}
|
||||
|
||||
/** Test that bogus arguments result in exception */
|
||||
public void testBogusArguments() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue