LUCENE-8948: Change 'name' argument in ICU factories to 'form'.

This commit is contained in:
Tomoko Uchida 2019-08-11 14:33:31 +09:00
parent 9546d8612c
commit 407ba89aad
5 changed files with 34 additions and 7 deletions

View File

@ -30,6 +30,9 @@ API Changes
* LUCENE-8909: The deprecated IndexWriter#getFieldNames() method has been removed.
(Adrien Grand, Munendra S N)
* LUCENE-8948: Change "name" argument in ICU factories to "form". Here, "form" is
named after "Unicode Normalization Form". (Tomoko Uchida)
Improvements
* LUCENE-8757: When provided with an ExecutorService to run queries across

View File

@ -31,7 +31,7 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
* <p>
* Supports the following attributes:
* <ul>
* <li>name: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
* <li>form: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
* one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf.
* <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc
* or nfkc, to get nfd or nfkd, respectively.
@ -55,10 +55,10 @@ public class ICUNormalizer2CharFilterFactory extends CharFilterFactory {
/** Creates a new ICUNormalizer2CharFilterFactory */
public ICUNormalizer2CharFilterFactory(Map<String,String> args) {
super(args);
String name = get(args, "name", "nfkc_cf");
String form = get(args, "form", "nfkc_cf");
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
Normalizer2 normalizer = Normalizer2.getInstance
(null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
(null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
String filter = get(args, "filter");
if (filter != null) {

View File

@ -31,7 +31,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* <p>
* Supports the following attributes:
* <ul>
* <li>name: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
* <li>form: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
* one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf.
* <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc
* or nfkc, to get nfd or nfkd, respectively.
@ -54,10 +54,10 @@ public class ICUNormalizer2FilterFactory extends TokenFilterFactory {
/** Creates a new ICUNormalizer2FilterFactory */
public ICUNormalizer2FilterFactory(Map<String,String> args) {
super(args);
String name = get(args, "name", "nfkc_cf");
String form = get(args, "form", "nfkc_cf");
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
Normalizer2 normalizer = Normalizer2.getInstance
(null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
(null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
String filter = get(args, "filter");
if (filter != null) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
@ -35,7 +36,18 @@ public class TestICUNormalizer2CharFilterFactory extends BaseTokenStreamTestCase
TokenStream stream = whitespaceMockTokenizer(reader);
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
}
/** Test nfkc form */
public void testFormArgument() throws Exception {
Reader reader = new StringReader("This is a ");
Map<String, String> args = new HashMap<>();
args.put("form", "nfkc");
ICUNormalizer2CharFilterFactory factory = new ICUNormalizer2CharFilterFactory(args);
reader = factory.create(reader);
TokenStream stream = whitespaceMockTokenizer(reader);
assertTokenStreamContents(stream, new String[] { "This", "is", "a", "Test" });
}
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
@ -35,6 +36,17 @@ public class TestICUNormalizer2FilterFactory extends BaseTokenStreamTestCase {
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
}
/** Test nfkc form */
public void testFormArgument() throws Exception {
Reader reader = new StringReader("This is a ");
Map<String, String> args = new HashMap<>();
args.put("form", "nfkc");
ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory(args);
TokenStream stream = whitespaceMockTokenizer(reader);
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "This", "is", "a", "Test" });
}
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {