LUCENE-10348: Make stopwords resources from analyzers modules visible to ClasspathResourceLoader and ModuleResourceLoader (#581)

This commit is contained in:
Uwe Schindler 2022-01-04 15:05:29 +01:00
parent 837e163eee
commit d17c54b5ff
7 changed files with 107 additions and 4 deletions

View File

@ -89,6 +89,61 @@ module org.apache.lucene.analysis.common {
exports org.tartarus.snowball.ext;
exports org.tartarus.snowball;
opens org.apache.lucene.analysis.ar to
org.apache.lucene.core;
opens org.apache.lucene.analysis.bg to
org.apache.lucene.core;
opens org.apache.lucene.analysis.bn to
org.apache.lucene.core;
opens org.apache.lucene.analysis.br to
org.apache.lucene.core;
opens org.apache.lucene.analysis.ca to
org.apache.lucene.core;
opens org.apache.lucene.analysis.cjk to
org.apache.lucene.core;
opens org.apache.lucene.analysis.ckb to
org.apache.lucene.core;
opens org.apache.lucene.analysis.cz to
org.apache.lucene.core;
opens org.apache.lucene.analysis.el to
org.apache.lucene.core;
opens org.apache.lucene.analysis.et to
org.apache.lucene.core;
opens org.apache.lucene.analysis.eu to
org.apache.lucene.core;
opens org.apache.lucene.analysis.fa to
org.apache.lucene.core;
opens org.apache.lucene.analysis.ga to
org.apache.lucene.core;
opens org.apache.lucene.analysis.gl to
org.apache.lucene.core;
opens org.apache.lucene.analysis.hi to
org.apache.lucene.core;
opens org.apache.lucene.analysis.hy to
org.apache.lucene.core;
opens org.apache.lucene.analysis.id to
org.apache.lucene.core;
opens org.apache.lucene.analysis.lt to
org.apache.lucene.core;
opens org.apache.lucene.analysis.lv to
org.apache.lucene.core;
opens org.apache.lucene.analysis.ne to
org.apache.lucene.core;
opens org.apache.lucene.analysis.ro to
org.apache.lucene.core;
opens org.apache.lucene.analysis.snowball to
org.apache.lucene.core;
opens org.apache.lucene.analysis.sr to
org.apache.lucene.core;
opens org.apache.lucene.analysis.ta to
org.apache.lucene.core;
opens org.apache.lucene.analysis.te to
org.apache.lucene.core;
opens org.apache.lucene.analysis.th to
org.apache.lucene.core;
opens org.apache.lucene.analysis.tr to
org.apache.lucene.core;
provides org.apache.lucene.analysis.CharFilterFactory with
org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory,
org.apache.lucene.analysis.charfilter.MappingCharFilterFactory,

View File

@ -26,6 +26,9 @@ module org.apache.lucene.analysis.icu {
exports org.apache.lucene.analysis.icu.segmentation;
exports org.apache.lucene.analysis.icu.tokenattributes;
opens org.apache.lucene.analysis.icu.segmentation to
org.apache.lucene.core;
provides org.apache.lucene.analysis.CharFilterFactory with
org.apache.lucene.analysis.icu.ICUNormalizer2CharFilterFactory;
provides org.apache.lucene.analysis.TokenizerFactory with

View File

@ -26,6 +26,11 @@ module org.apache.lucene.analysis.kuromoji {
exports org.apache.lucene.analysis.ja.tokenattributes;
exports org.apache.lucene.analysis.ja.util;
opens org.apache.lucene.analysis.ja to
org.apache.lucene.core;
opens org.apache.lucene.analysis.ja.completion to
org.apache.lucene.core;
provides org.apache.lucene.analysis.CharFilterFactory with
org.apache.lucene.analysis.ja.JapaneseIterationMarkCharFilterFactory;
provides org.apache.lucene.analysis.TokenizerFactory with

View File

@ -27,6 +27,9 @@ module org.apache.lucene.analysis.morfologik {
exports org.apache.lucene.analysis.morfologik;
exports org.apache.lucene.analysis.uk;
opens org.apache.lucene.analysis.uk to
org.apache.lucene.core;
provides org.apache.lucene.analysis.TokenFilterFactory with
org.apache.lucene.analysis.morfologik.MorfologikFilterFactory;
}

View File

@ -23,6 +23,9 @@ module org.apache.lucene.analysis.smartcn {
exports org.apache.lucene.analysis.cn.smart;
exports org.apache.lucene.analysis.cn.smart.hhmm;
opens org.apache.lucene.analysis.cn.smart to
org.apache.lucene.core;
provides org.apache.lucene.analysis.TokenizerFactory with
org.apache.lucene.analysis.cn.smart.HMMChineseTokenizerFactory;
}

View File

@ -24,6 +24,9 @@ module org.apache.lucene.analysis.stempel {
exports org.apache.lucene.analysis.stempel;
exports org.egothor.stemmer;
opens org.apache.lucene.analysis.pl to
org.apache.lucene.core;
provides org.apache.lucene.analysis.TokenFilterFactory with
org.apache.lucene.analysis.stempel.StempelPolishStemFilterFactory;
}

View File

@ -32,6 +32,7 @@ import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@ -262,9 +263,9 @@ public class TestModularLayer {
* module layer.
*/
@Test
public void testAllOpenPackagesInSync() throws IOException {
public void testAllExportedPackagesInSync() throws IOException {
for (var module : allCoreModules) {
Set<String> jarPackages = getJarPackages(module);
Set<String> jarPackages = getJarPackages(module, entry -> true);
Set<ModuleDescriptor.Exports> moduleExports = new HashSet<>(module.descriptor().exports());
if (module.descriptor().name().equals("org.apache.lucene.luke")) {
@ -307,7 +308,36 @@ public class TestModularLayer {
}
}
private Set<String> getJarPackages(ModuleReference module) throws IOException {
/** This test ensures that all analysis modules open their resources files to core. */
@Test
public void testAllOpenAnalysisPackagesInSync() throws IOException {
for (var module : allCoreModules) {
if (false == module.descriptor().name().startsWith("org.apache.lucene.analysis.")) {
continue; // at moment we only want to open resources inside analysis packages
}
// We only collect resources from the JAR file which are:
// - stopword files (*.txt)
// - ICU break iterator rules (*.brk)
var filter = Pattern.compile("/[^/]+\\.(txt|brk)$");
Set<String> jarPackages = getJarPackages(module, filter.asPredicate());
Set<ModuleDescriptor.Opens> moduleOpens = module.descriptor().opens();
Assertions.assertThat(moduleOpens)
.as("Open packages in module: " + module.descriptor().name())
.allSatisfy(
export -> {
Assertions.assertThat(export.targets())
.as("Opens should only be targeted to Lucene Core.")
.containsExactly("org.apache.lucene.core");
})
.map(ModuleDescriptor.Opens::source)
.containsExactlyInAnyOrderElementsOf(jarPackages);
}
}
private Set<String> getJarPackages(ModuleReference module, Predicate<String> entryFilter)
throws IOException {
try (ModuleReader reader = module.open()) {
return reader
.list()
@ -315,7 +345,8 @@ public class TestModularLayer {
entry ->
!entry.startsWith("META-INF/")
&& !entry.equals("module-info.class")
&& !entry.endsWith("/"))
&& !entry.endsWith("/")
&& entryFilter.test(entry))
.map(entry -> entry.replaceAll("/[^/]+$", ""))
.map(entry -> entry.replace('/', '.'))
.collect(Collectors.toCollection(TreeSet::new));