mirror of https://github.com/apache/lucene.git
LUCENE-10558: Implement URL ctor to support classpath/module usage in Kuromoji and Nori dictionaries (main branch) (#871)
This commit is contained in:
parent
5f832c64bf
commit
8aa4a56491
|
@ -73,6 +73,11 @@ API Changes
|
||||||
taxoEpoch decide. Add a test case that demonstrates the inconsistencies caused when you reuse taxoArrays on older
|
taxoEpoch decide. Add a test case that demonstrates the inconsistencies caused when you reuse taxoArrays on older
|
||||||
checkpoints. (Gautam Worah)
|
checkpoints. (Gautam Worah)
|
||||||
|
|
||||||
|
* LUCENE-10558: Add new constructors to Kuromoji and Nori dictionary classes to support classpath /
|
||||||
|
module system usage. It is now possible to use JDK's Class/ClassLoader/Module#getResource(...) apis
|
||||||
|
and pass their returned URL to dictionary constructors to load resources from Classpath or Module
|
||||||
|
resources. (Uwe Schindler, Tomoko Uchida, Mike Sokolov)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
@ -169,6 +174,10 @@ Bug Fixes
|
||||||
|
|
||||||
* LUCENE-10552: KnnVectorQuery has incorrect equals/ hashCode. (Lu Xugang)
|
* LUCENE-10552: KnnVectorQuery has incorrect equals/ hashCode. (Lu Xugang)
|
||||||
|
|
||||||
|
* LUCENE-10558: Restore behaviour of deprecated Kuromoji and Nori dictionary constructors for
|
||||||
|
custom dictionary support. Please also use new URL-based constructors for classpath/module
|
||||||
|
system ressources. (Uwe Schindler, Tomoko Uchida, Mike Sokolov)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -64,6 +64,19 @@ the [Log4j JDK Logging Adapter](https://logging.apache.org/log4j/2.x/log4j-jul/i
|
||||||
in combination with the corresponding system property:
|
in combination with the corresponding system property:
|
||||||
`java.util.logging.manager=org.apache.logging.log4j.jul.LogManager`.
|
`java.util.logging.manager=org.apache.logging.log4j.jul.LogManager`.
|
||||||
|
|
||||||
|
### Kuromoji and Nori analysis component constructors for custom dictionaries
|
||||||
|
|
||||||
|
The Kuromoji and Nori analysis modules had some way to customize the backing dictionaries
|
||||||
|
by passing a path to file or classpath resources using some inconsistently implemented
|
||||||
|
APIs. This was buggy from the beginning, but some users made use of it. Due to move to Java
|
||||||
|
module system, especially the resource lookup on classpath stopped to work correctly.
|
||||||
|
The Lucene team therefore implemented new APIs to create dictionary implementations
|
||||||
|
with custom data files. Unfortunately there were some shortcomings in the 9.1 version,
|
||||||
|
also when using the now deprecated ctors, so users are advised to upgrade to
|
||||||
|
Lucene 9.2 or stay with 9.0.
|
||||||
|
|
||||||
|
See LUCENE-10558 for more details and workarounds.
|
||||||
|
|
||||||
## Migration from Lucene 8.x to Lucene 9.0
|
## Migration from Lucene 8.x to Lucene 9.0
|
||||||
|
|
||||||
### Rename of binary artifacts from '**-analyzers-**' to '**-analysis-**' (LUCENE-9562)
|
### Rename of binary artifacts from '**-analyzers-**' to '**-analysis-**' (LUCENE-9562)
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.ja.dict;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import org.apache.lucene.util.IOSupplier;
|
import org.apache.lucene.util.IOSupplier;
|
||||||
|
@ -36,6 +37,17 @@ public final class ConnectionCosts extends org.apache.lucene.analysis.morph.Conn
|
||||||
this(() -> Files.newInputStream(connectionCostsFile));
|
this(() -> Files.newInputStream(connectionCostsFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link
|
||||||
|
* ClassLoader#getResource(String)}).
|
||||||
|
*
|
||||||
|
* @param connectionCostsUrl where to load connection costs resource
|
||||||
|
* @throws IOException if resource was not found or broken
|
||||||
|
*/
|
||||||
|
public ConnectionCosts(URL connectionCostsUrl) throws IOException {
|
||||||
|
this(() -> connectionCostsUrl.openStream());
|
||||||
|
}
|
||||||
|
|
||||||
private ConnectionCosts() throws IOException {
|
private ConnectionCosts() throws IOException {
|
||||||
this(ConnectionCosts::getClassResource);
|
this(ConnectionCosts::getClassResource);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ja.dict;
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
||||||
|
@ -58,6 +59,25 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
|
||||||
() -> Files.newInputStream(fstFile));
|
() -> Files.newInputStream(fstFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with
|
||||||
|
* {@link ClassLoader#getResource(String)}).
|
||||||
|
*
|
||||||
|
* @param targetMapUrl where to load target map resource
|
||||||
|
* @param posDictUrl where to load POS dictionary resource
|
||||||
|
* @param dictUrl where to load dictionary entries resource
|
||||||
|
* @param fstUrl where to load encoded FST data resource
|
||||||
|
* @throws IOException if resource was not found or broken
|
||||||
|
*/
|
||||||
|
public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
|
||||||
|
throws IOException {
|
||||||
|
this(
|
||||||
|
() -> targetMapUrl.openStream(),
|
||||||
|
() -> posDictUrl.openStream(),
|
||||||
|
() -> dictUrl.openStream(),
|
||||||
|
() -> fstUrl.openStream());
|
||||||
|
}
|
||||||
|
|
||||||
private TokenInfoDictionary() throws IOException {
|
private TokenInfoDictionary() throws IOException {
|
||||||
this(
|
this(
|
||||||
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
|
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.ja.dict;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
||||||
|
@ -45,6 +46,20 @@ public final class UnknownDictionary extends BinaryDictionary<UnknownMorphData>
|
||||||
() -> Files.newInputStream(dictFile));
|
() -> Files.newInputStream(dictFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with
|
||||||
|
* {@link ClassLoader#getResource(String)}).
|
||||||
|
*
|
||||||
|
* @param targetMapUrl where to load target map resource
|
||||||
|
* @param posDictUrl where to load POS dictionary resource
|
||||||
|
* @param dictUrl where to load dictionary entries resource
|
||||||
|
* @throws IOException if resource was not found or broken
|
||||||
|
*/
|
||||||
|
public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException {
|
||||||
|
this(
|
||||||
|
() -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream());
|
||||||
|
}
|
||||||
|
|
||||||
private UnknownDictionary() throws IOException {
|
private UnknownDictionary() throws IOException {
|
||||||
this(
|
this(
|
||||||
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
|
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.junit.Before;
|
||||||
public class TestExternalDictionary extends LuceneTestCase {
|
public class TestExternalDictionary extends LuceneTestCase {
|
||||||
|
|
||||||
private Path dir;
|
private Path dir;
|
||||||
|
private ClassLoader loader = getClass().getClassLoader();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Before
|
@Before
|
||||||
|
@ -97,4 +98,32 @@ public class TestExternalDictionary extends LuceneTestCase {
|
||||||
new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
|
new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
|
||||||
assertEquals(1, cc.get(0, 1));
|
assertEquals(1, cc.get(0, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testLoadExternalUrlTokenInfoDictionary() throws Exception {
|
||||||
|
String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
|
||||||
|
TokenInfoDictionary dict =
|
||||||
|
new TokenInfoDictionary(
|
||||||
|
loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX));
|
||||||
|
assertNotNull(dict.getFST());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLoadExternalUrlUnknownDictionary() throws Exception {
|
||||||
|
String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
|
||||||
|
UnknownDictionary dict =
|
||||||
|
new UnknownDictionary(
|
||||||
|
loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX));
|
||||||
|
assertNotNull(dict.getCharacterDefinition());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLoadExternalUrlConnectionCosts() throws Exception {
|
||||||
|
String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
|
||||||
|
ConnectionCosts cc =
|
||||||
|
new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
|
||||||
|
assertEquals(1, cc.get(0, 1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.ko.dict;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import org.apache.lucene.util.IOSupplier;
|
import org.apache.lucene.util.IOSupplier;
|
||||||
|
@ -36,6 +37,17 @@ public final class ConnectionCosts extends org.apache.lucene.analysis.morph.Conn
|
||||||
this(() -> Files.newInputStream(connectionCostsFile));
|
this(() -> Files.newInputStream(connectionCostsFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link
|
||||||
|
* ClassLoader#getResource(String)}).
|
||||||
|
*
|
||||||
|
* @param connectionCostsUrl where to load connection costs resource
|
||||||
|
* @throws IOException if resource was not found or broken
|
||||||
|
*/
|
||||||
|
public ConnectionCosts(URL connectionCostsUrl) throws IOException {
|
||||||
|
this(() -> connectionCostsUrl.openStream());
|
||||||
|
}
|
||||||
|
|
||||||
private ConnectionCosts() throws IOException {
|
private ConnectionCosts() throws IOException {
|
||||||
this(ConnectionCosts::getClassResource);
|
this(ConnectionCosts::getClassResource);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ko.dict;
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
||||||
|
@ -66,6 +67,25 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
|
||||||
() -> Files.newInputStream(fstFile));
|
() -> Files.newInputStream(fstFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with
|
||||||
|
* {@link ClassLoader#getResource(String)}).
|
||||||
|
*
|
||||||
|
* @param targetMapUrl where to load target map resource
|
||||||
|
* @param posDictUrl where to load POS dictionary resource
|
||||||
|
* @param dictUrl where to load dictionary entries resource
|
||||||
|
* @param fstUrl where to load encoded FST data resource
|
||||||
|
* @throws IOException if resource was not found or broken
|
||||||
|
*/
|
||||||
|
public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl)
|
||||||
|
throws IOException {
|
||||||
|
this(
|
||||||
|
() -> targetMapUrl.openStream(),
|
||||||
|
() -> posDictUrl.openStream(),
|
||||||
|
() -> dictUrl.openStream(),
|
||||||
|
() -> fstUrl.openStream());
|
||||||
|
}
|
||||||
|
|
||||||
private TokenInfoDictionary(
|
private TokenInfoDictionary(
|
||||||
IOSupplier<InputStream> targetMapResource,
|
IOSupplier<InputStream> targetMapResource,
|
||||||
IOSupplier<InputStream> posResource,
|
IOSupplier<InputStream> posResource,
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.ko.dict;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
import org.apache.lucene.analysis.morph.BinaryDictionary;
|
||||||
|
@ -44,6 +45,20 @@ public final class UnknownDictionary extends BinaryDictionary<UnknownMorphData>
|
||||||
() -> Files.newInputStream(dictFile));
|
() -> Files.newInputStream(dictFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with
|
||||||
|
* {@link ClassLoader#getResource(String)}).
|
||||||
|
*
|
||||||
|
* @param targetMapUrl where to load target map resource
|
||||||
|
* @param posDictUrl where to load POS dictionary resource
|
||||||
|
* @param dictUrl where to load dictionary entries resource
|
||||||
|
* @throws IOException if resource was not found or broken
|
||||||
|
*/
|
||||||
|
public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException {
|
||||||
|
this(
|
||||||
|
() -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream());
|
||||||
|
}
|
||||||
|
|
||||||
private UnknownDictionary() throws IOException {
|
private UnknownDictionary() throws IOException {
|
||||||
this(
|
this(
|
||||||
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
|
() -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.junit.Before;
|
||||||
public class TestExternalDictionary extends LuceneTestCase {
|
public class TestExternalDictionary extends LuceneTestCase {
|
||||||
|
|
||||||
private Path dir;
|
private Path dir;
|
||||||
|
private ClassLoader loader = getClass().getClassLoader();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Before
|
@Before
|
||||||
|
@ -97,4 +98,32 @@ public class TestExternalDictionary extends LuceneTestCase {
|
||||||
new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
|
new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
|
||||||
assertEquals(0, cc.get(1, 1));
|
assertEquals(0, cc.get(1, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testLoadExternalUrlTokenInfoDictionary() throws Exception {
|
||||||
|
String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/');
|
||||||
|
TokenInfoDictionary dict =
|
||||||
|
new TokenInfoDictionary(
|
||||||
|
loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX));
|
||||||
|
assertNotNull(dict.getFST());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLoadExternalUrlUnknownDictionary() throws Exception {
|
||||||
|
String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/');
|
||||||
|
UnknownDictionary dict =
|
||||||
|
new UnknownDictionary(
|
||||||
|
loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX),
|
||||||
|
loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX));
|
||||||
|
assertNotNull(dict.getCharacterDefinition());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLoadExternalUrlConnectionCosts() throws Exception {
|
||||||
|
String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/');
|
||||||
|
ConnectionCosts cc =
|
||||||
|
new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX));
|
||||||
|
assertEquals(0, cc.get(1, 1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue