LUCENE-10192: drop jars from binary distribution and an aggregate merge of related minor tasks.

This commit is contained in:
Dawid Weiss 2021-10-31 10:50:11 +01:00
commit 1d152c5f67
15 changed files with 313 additions and 397 deletions

View File

@ -84,15 +84,8 @@ def getGitRev():
status = os.popen('git status').read().strip()
if 'nothing to commit, working directory clean' not in status and 'nothing to commit, working tree clean' not in status:
raise RuntimeError('git clone is dirty:\n\n%s' % status)
branch = os.popen('git rev-parse --abbrev-ref HEAD').read().strip()
command = 'git log origin/%s..' % branch
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if len(stdout.strip()) > 0:
raise RuntimeError('There are unpushed commits - "%s" output is:\n\n%s' % (command, stdout.decode('utf-8')))
if len(stderr.strip()) > 0:
raise RuntimeError('Command "%s" failed:\n\n%s' % (command, stderr.decode('utf-8')))
if 'Your branch is ahead of' in status:
raise RuntimeError('Your local branch is ahead of the remote? git status says:\n%s' % status)
print(' git clone is clean')
else:
print(' Ignoring dirty git clone due to dev-mode')

View File

@ -574,7 +574,6 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
# raise RuntimeError('lucene: file "%s" is missing from artifact %s' % (fileName, artifact))
# in_root_folder.remove(fileName)
# TODO: clean this up to not be a list of modules that we must maintain
expected_folders = ['analysis', 'backward-codecs', 'benchmark', 'classification', 'codecs', 'core',
'demo', 'expressions', 'facet', 'grouping', 'highlighter', 'join',
'luke', 'memory', 'misc', 'monitor', 'queries', 'queryparser', 'replicator',
@ -589,8 +588,7 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
if len(in_lucene_folder) > 0:
raise RuntimeError('lucene: unexpected files/dirs in artifact %s lucene/ folder: %s' % (artifact, in_lucene_folder))
else:
is_in_list(in_root_folder, expected_folders)
is_in_list(in_root_folder, ['docs'])
is_in_list(in_root_folder, ['bin', 'docs', 'licenses', 'modules', 'modules-test-framework', 'modules-thirdparty'])
if len(in_root_folder) > 0:
raise RuntimeError('lucene: unexpected files/dirs in artifact %s: %s' % (artifact, in_root_folder))
@ -648,21 +646,26 @@ def testDemo(run_java, isSrc, version, jdk):
print(' test demo with %s...' % jdk)
sep = ';' if cygwin else ':'
if isSrc:
# For source release, use the classpath for each module.
classPath = ['lucene/core/build/libs/lucene-core-%s.jar' % version,
'lucene/demo/build/libs/lucene-demo-%s.jar' % version,
'lucene/analysis/common/build/libs/lucene-analyzers-common-%s.jar' % version,
'lucene/queryparser/build/libs/lucene-queryparser-%s.jar' % version]
cp = sep.join(classPath)
docsDir = 'lucene/core/src'
checkIndexCmd = 'java -ea -cp "%s" org.apache.lucene.index.CheckIndex index' % cp
indexFilesCmd = 'java -cp "%s" -Dsmoketester=true org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir)
searchFilesCmd = 'java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp
else:
classPath = ['core/lucene-core-%s.jar' % version,
'demo/lucene-demo-%s.jar' % version,
'analysis/common/lucene-analyzers-common-%s.jar' % version,
'queryparser/lucene-queryparser-%s.jar' % version]
cp = sep.join(classPath)
# For binary release, set up classpath as modules.
cp = "--module-path modules"
docsDir = 'docs'
run_java('java -cp "%s" -Dsmoketester=true org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir), 'index.log')
run_java('java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp, 'search.log')
checkIndexCmd = 'java -ea %s --module lucene.core/org.apache.lucene.index.CheckIndex index' % cp
indexFilesCmd = 'java -Dsmoketester=true %s --module lucene.demo/org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir)
searchFilesCmd = 'java %s --module lucene.demo/org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp
run_java(indexFilesCmd, 'index.log')
run_java(searchFilesCmd, 'search.log')
reMatchingDocs = re.compile('(\d+) total matching documents')
m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read())
if m is None:
@ -672,8 +675,9 @@ def testDemo(run_java, isSrc, version, jdk):
if numHits < 100:
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
print(' got %d hits for query "lucene"' % numHits)
print(' checkindex with %s...' % jdk)
run_java('java -ea -cp "%s" org.apache.lucene.index.CheckIndex index' % cp, 'checkindex.log')
run_java(checkIndexCmd, 'checkindex.log')
s = open('checkindex.log').read()
m = re.search(r'^\s+version=(.*?)$', s, re.MULTILINE)
if m is None:

View File

@ -45,6 +45,7 @@ def licenseTypes = [
"MIT" : [name: "Massachusetts Institute of Tech. License", noticeOptional: true],
"MPL" : [name: "Mozilla Public License", noticeOptional: true /* NOT SURE on the required notice */],
"PD" : [name: "Public Domain", noticeOptional: true],
"PDDL" : [name: "Public Domain Dedication and License", noticeOptional: true],
"SUN" : [name: "Sun Open Source License", noticeOptional: true],
"COMPOUND": [name: "Compound license (details in NOTICE file)."],
]
@ -311,7 +312,7 @@ configure(project(":lucene")) {
// Used by Luke.
"elegant-icon-font-*",
// glove knn dictionary.
"pddl-10.txt",
"glove-LICENSE-PDDL.txt",
]
}

View File

@ -18,9 +18,8 @@
/**
* KnnVector example code.
*
* <p>The vector dictionary used in the demo is taken from the GloVe project hosted at
* https://nlp.stanford.edu/projects/glove, whose data is in the public domain, as described by
* http://opendatacommons.org/licenses/pddl/1.0, available in the Lucene distribution as
* lucene/licenses/pddl-10.txt.
* <p>The vector dictionary used in the demo is taken from the <a
* href="https://nlp.stanford.edu/projects/glove">GloVe</a> project, whose data is in the <a
* href="http://opendatacommons.org/licenses/pddl/1.0">public domain</a>.
*/
package org.apache.lucene.demo.knn;

View File

@ -1,73 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Configure binary distribution content defaults for each subproject.
configure(rootProject.ext.mavenProjects) {
plugins.withType(JavaPlugin) {
ext {
binaryArtifactsDir = file("${buildDir}/binaryArtifacts")
}
configurations {
// All of the module's "binary" artifacts that should go to the release.
binaryArtifacts
// Legacy binary distribution contains Lucene artifacts (JARs) and their dependencies
// under 'lib/'. We have to go through some hoops to split the artifact from all of its
// transitive dependencies and separate their output location.
transitiveDependencies {
// All of runtime elements
extendsFrom runtimeElements
// Exclude cross-module dependencies
exclude group: "org.apache.lucene"
// Exclude the following from all projects, if they appear.
exclude group: "commons-logging"
exclude group: "org.slf4j"
}
}
task assembleBinaryArtifacts(type: Sync) {
from(tasks.findByName("jar"))
from(projectDir, {
include "README.*"
})
from(configurations.transitiveDependencies, {
into "lib/"
})
into binaryArtifactsDir
}
artifacts {
binaryArtifacts binaryArtifactsDir, {
builtBy assembleBinaryArtifacts
}
}
}
}
// Add launch scripts for Luke.
configure(project(":lucene:luke")) {
tasks.matching { it.name == "assembleBinaryArtifacts" }.all {
from("${projectDir}/bin", {
})
}
}

View File

@ -15,41 +15,52 @@
* limitations under the License.
*/
// Configure Lucene's binary release. This is a bit convoluted so is placed
// in a separate script.
configure(project(":lucene:distribution")) {
def packageBaseName = "${buildDir}/packages/lucene-${version}"
// All the maven-published projects are part of the binary distribution.
def includeInBinaries = rootProject.ext.mavenProjects
// Legacy binary distribution contains Lucene artifacts (JARs) and their dependencies
// under 'lib/'. We have to go through some hoops to split the artifact from all of its
// transitive dependencies and separate their output location.
def moduleArtifactConfigurations = []
for (Project module : includeInBinaries) {
def binaryArtifactsConfiguration = configurations.create("binary-artifacts" + module.path.replace(':', '-'))
dependencies { DependencyHandler handler ->
handler.add(binaryArtifactsConfiguration.name, project(path: module.path, configuration: "binaryArtifacts"))
}
moduleArtifactConfigurations += [
"module": module,
"binaryArtifactsConfiguration": binaryArtifactsConfiguration
]
}
// Prepare site documentation dependency for inclusion.
// Prepare site documentation dependency for inclusion.
configurations {
docs
jars
jarsTestFramework
jarsThirdParty
}
dependencies {
dependencies { DependencyHandler handler ->
docs project(path: ':lucene:documentation', configuration: 'site')
// Maven-published submodule JARs are part of the binary distribution
// (with a few exceptions explicitly filtered below).
// We don't copy their transitive dependencies.
def binaryModules = rootProject.ext.mavenProjects.findAll { p -> !(p in [
// Placed in a separate folder (module layer conflicts).
project(":lucene:test-framework"),
]) }
for (Project module : binaryModules) {
jars(module, {
transitive = false
})
}
// The test framework has split packages and breaks the module system layer so
// it's in a separate folder.
jarsTestFramework(project(":lucene:test-framework"), {
transitive = false
})
// The third-party JARs consist of all the transitive dependencies from these modules.
// Not sure whether we have to include all the thirdparty JARs from across all the modules.
for (Project module : [
project(":lucene:luke")
]) {
jarsThirdParty(module, {
transitive = true
})
}
}
@ -67,8 +78,9 @@ configure(project(":lucene:distribution")) {
copy.setMode(0755)
}
// Attach binary release exclusive files.
// Attach binary release - only files.
from(file("src/binary-release"), {
filteringCharset = 'UTF-8'
})
// Cherry-pick certain files from the root.
@ -83,7 +95,6 @@ configure(project(":lucene:distribution")) {
include "JRE_VERSION_MIGRATION.md"
include "MIGRATE.md"
include "SYSTEM_REQUIREMENTS.md"
include "licenses/*"
})
@ -92,14 +103,18 @@ configure(project(":lucene:distribution")) {
into 'docs'
})
// Each module's binary artifacts and their dependencies.
moduleArtifactConfigurations.each {
def toPath = it.module.path.replaceFirst("^:lucene:", "").replace(':', '/')
// Binary modules (Lucene).
from(configurations.jars, {
into 'modules'
})
from(configurations.jarsTestFramework, {
into 'modules-test-framework'
})
from(it.binaryArtifactsConfiguration, {
into toPath
})
}
// Binary modules (with dependencies). Don't duplicate project artifacts.
from((configurations.jarsThirdParty - configurations.jars), {
into 'modules-thirdparty'
})
// Internal archive folder for all files.
into "lucene-${rootProject.version}/"

View File

@ -37,7 +37,6 @@ ext {
apply from: buildscript.sourceFile.toPath().resolveSibling("source-release.gradle")
// Prepare the "binary" distribution artifact.
apply from: buildscript.sourceFile.toPath().resolveSibling("binary-artifacts.gradle")
apply from: buildscript.sourceFile.toPath().resolveSibling("binary-release.gradle")
// Configure maven artifact collection to a local build folder (required to collect artifacts for the release).

View File

@ -29,10 +29,16 @@ and an API that can easily be used to add search capabilities to applications.
## Files in a binary distribution
Files are organized by module, for example in core/:
The following sub-folders are included in the binary Lucene distribution:
* `core/lucene-core-XX.jar`:
The compiled core Lucene library.
* `bin/`:
Convenience scripts to launch Lucene Luke and other index-maintenance tools.
* `modules/`:
All binary Lucene Java modules (JARs).
* `modules-thirdparty/`
Third-party binary modules required to run Lucene Luke.
* `licenses/`
Third-party licenses and notice files.
To review the documentation, read the main documentation page, located at:
`docs/index.html`

View File

@ -14,15 +14,8 @@
@rem limitations under the License.
@echo off
@setlocal enabledelayedexpansion
cd /d %~dp0
set JAVA_OPTIONS=%JAVA_OPTIONS% -Xmx1024m -Xms512m -XX:MaxMetaspaceSize=256m
set CLASSPATHS=.\*;.\lib\*;..\core\*;..\codecs\*;..\backward-codecs\*;..\queries\*;..\queryparser\*;..\suggest\*;..\misc\*
for /d %%A in (..\analysis\*) do (
set "CLASSPATHS=!CLASSPATHS!;%%A\*;%%A\lib\*"
)
start javaw -cp %CLASSPATHS% %JAVA_OPTIONS% org.apache.lucene.luke.app.desktop.LukeMain
SETLOCAL
SET MODULES=%~dp0..
start javaw --module-path %MODULES%\modules;%MODULES%\modules-thirdparty --add-modules org.apache.logging.log4j --module lucene.luke
ENDLOCAL

View File

@ -15,19 +15,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
LUKE_HOME=$(cd $(dirname $0) && pwd)
cd ${LUKE_HOME}
JAVA_OPTIONS="${JAVA_OPTIONS} -Xmx1024m -Xms512m -XX:MaxMetaspaceSize=256m"
CLASSPATHS="./*:./lib/*:../core/*:../codecs/*:../backward-codecs/*:../queries/*:../queryparser/*:../suggest/*:../misc/*"
for dir in `ls ../analysis`; do
CLASSPATHS="${CLASSPATHS}:../analysis/${dir}/*:../analysis/${dir}/lib/*"
done
LOG_DIR=${HOME}/.luke.d/
if [[ ! -d ${LOG_DIR} ]]; then
mkdir ${LOG_DIR}
fi
nohup java -cp ${CLASSPATHS} ${JAVA_OPTIONS} org.apache.lucene.luke.app.desktop.LukeMain > ${LOG_DIR}/luke_out.log 2>&1 &
MODULES=$(cd $(dirname $0) && pwd)
java --module-path $MODULES/modules;$MODULES/modules-thirdparty --add-modules org.apache.logging.log4j --module lucene.luke

View File

@ -1,3 +1,6 @@
This license applies to knn-token-vectors resource in Lucene demo.
https://nlp.stanford.edu/projects/glove
Public Domain Dedication and License (PDDL)

View File

@ -57,12 +57,12 @@ tasks.withType(Jar) {
}
}
// Configure the default JAR without any class path information
// (this may actually be wrong - perhaps we should add the
// "distribution" paths here.
jar {
manifest {
}
// Process UTF8 property files to unicode escapes.
tasks.withType(ProcessResources).configureEach { task ->
task.filesMatching("**/messages*.properties", {
filteringCharset = 'UTF-8'
filter(EscapeUnicode)
})
}
// Configure "stand-alone" JAR with proper dependency classpath links.

View File

@ -20,6 +20,14 @@
This is Luke, Apache Lucene low-level index inspection and repair utility.
Luke requires Java ${required.java.version}. You can start it with:
```
java -jar ${luke.cmd}
```
or, using Java modules:
```
java --module-path . --add-modules org.apache.logging.log4j --module lucene.luke
```
Happy index hacking!

View File

@ -17,13 +17,8 @@
package org.apache.lucene.luke.app.desktop.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.util.Locale;
import java.util.PropertyResourceBundle;
import java.util.ResourceBundle;
/** Utilities for accessing message resources. */
@ -41,24 +36,8 @@ public class MessageUtils {
return new MessageFormat(pattern, Locale.ENGLISH).format(args);
}
// https://stackoverflow.com/questions/4659929/how-to-use-utf-8-in-resource-properties-with-resourcebundle
private static ResourceBundle.Control UTF8_RESOURCEBUNDLE_CONTROL =
new ResourceBundle.Control() {
@Override
public ResourceBundle newBundle(
String baseName, Locale locale, String format, ClassLoader loader, boolean reload)
throws IllegalAccessException, InstantiationException, IOException {
String bundleName = toBundleName(baseName, locale);
String resourceName = toResourceName(bundleName, "properties");
try (InputStream is = loader.getResourceAsStream(resourceName)) {
return new PropertyResourceBundle(new InputStreamReader(is, StandardCharsets.UTF_8));
}
}
};
private static ResourceBundle bundle =
ResourceBundle.getBundle(
MESSAGE_BUNDLE_BASENAME, Locale.ENGLISH, UTF8_RESOURCEBUNDLE_CONTROL);
ResourceBundle.getBundle(MESSAGE_BUNDLE_BASENAME, Locale.ENGLISH);
private MessageUtils() {}
}

View File

@ -15,6 +15,9 @@
# limitations under the License.
#
# This file must use UTF8 encoding. UTF8 is replaced with unicode
# escapes at build time.
# Common
label.status=Status:
label.help=Help