LUCENE-8948: Change 'name' argument in ICU factories to 'form'.

This commit is contained in:
Tomoko Uchida 2019-08-11 14:33:31 +09:00
parent 9546d8612c
commit 407ba89aad
5 changed files with 34 additions and 7 deletions

View File

@ -30,6 +30,9 @@ API Changes
* LUCENE-8909: The deprecated IndexWriter#getFieldNames() method has been removed. * LUCENE-8909: The deprecated IndexWriter#getFieldNames() method has been removed.
(Adrien Grand, Munendra S N) (Adrien Grand, Munendra S N)
* LUCENE-8948: Change "name" argument in ICU factories to "form". Here, "form" is
named after "Unicode Normalization Form". (Tomoko Uchida)
Improvements Improvements
* LUCENE-8757: When provided with an ExecutorService to run queries across * LUCENE-8757: When provided with an ExecutorService to run queries across

View File

@ -31,7 +31,7 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
* <p> * <p>
* Supports the following attributes: * Supports the following attributes:
* <ul> * <ul>
* <li>name: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>, * <li>form: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
* one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf. * one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf.
* <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc * <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc
* or nfkc, to get nfd or nfkd, respectively. * or nfkc, to get nfd or nfkd, respectively.
@ -55,10 +55,10 @@ public class ICUNormalizer2CharFilterFactory extends CharFilterFactory {
/** Creates a new ICUNormalizer2CharFilterFactory */ /** Creates a new ICUNormalizer2CharFilterFactory */
public ICUNormalizer2CharFilterFactory(Map<String,String> args) { public ICUNormalizer2CharFilterFactory(Map<String,String> args) {
super(args); super(args);
String name = get(args, "name", "nfkc_cf"); String form = get(args, "form", "nfkc_cf");
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose"); String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
Normalizer2 normalizer = Normalizer2.getInstance Normalizer2 normalizer = Normalizer2.getInstance
(null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE); (null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
String filter = get(args, "filter"); String filter = get(args, "filter");
if (filter != null) { if (filter != null) {

View File

@ -31,7 +31,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* <p> * <p>
* Supports the following attributes: * Supports the following attributes:
* <ul> * <ul>
* <li>name: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>, * <li>form: A <a href="http://unicode.org/reports/tr15/">Unicode Normalization Form</a>,
* one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf. * one of 'nfc','nfkc', 'nfkc_cf'. Default is nfkc_cf.
* <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc * <li>mode: Either 'compose' or 'decompose'. Default is compose. Use "decompose" with nfc
* or nfkc, to get nfd or nfkd, respectively. * or nfkc, to get nfd or nfkd, respectively.
@ -54,10 +54,10 @@ public class ICUNormalizer2FilterFactory extends TokenFilterFactory {
/** Creates a new ICUNormalizer2FilterFactory */ /** Creates a new ICUNormalizer2FilterFactory */
public ICUNormalizer2FilterFactory(Map<String,String> args) { public ICUNormalizer2FilterFactory(Map<String,String> args) {
super(args); super(args);
String name = get(args, "name", "nfkc_cf"); String form = get(args, "form", "nfkc_cf");
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose"); String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
Normalizer2 normalizer = Normalizer2.getInstance Normalizer2 normalizer = Normalizer2.getInstance
(null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE); (null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
String filter = get(args, "filter"); String filter = get(args, "filter");
if (filter != null) { if (filter != null) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -36,6 +37,17 @@ public class TestICUNormalizer2CharFilterFactory extends BaseTokenStreamTestCase
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" }); assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
} }
/** Test nfkc form */
public void testFormArgument() throws Exception {
Reader reader = new StringReader("This is a ");
Map<String, String> args = new HashMap<>();
args.put("form", "nfkc");
ICUNormalizer2CharFilterFactory factory = new ICUNormalizer2CharFilterFactory(args);
reader = factory.create(reader);
TokenStream stream = whitespaceMockTokenizer(reader);
assertTokenStreamContents(stream, new String[] { "This", "is", "a", "Test" });
}
/** Test that bogus arguments result in exception */ /** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception { public void testBogusArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -36,6 +37,17 @@ public class TestICUNormalizer2FilterFactory extends BaseTokenStreamTestCase {
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" }); assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
} }
/** Test nfkc form */
public void testFormArgument() throws Exception {
Reader reader = new StringReader("This is a ");
Map<String, String> args = new HashMap<>();
args.put("form", "nfkc");
ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory(args);
TokenStream stream = whitespaceMockTokenizer(reader);
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "This", "is", "a", "Test" });
}
/** Test that bogus arguments result in exception */ /** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception { public void testBogusArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {