mirror of https://github.com/apache/lucene.git
LUCENE-3312: Merged revision(s) 1366639-1371131 from lucene/dev/trunk:
SOLR-3259: default /get to json ........ minor tweaks to update script example ........ LUCENE-4268: Rename ResourceAsStreamReasourceLoader to ClasspathResourceLoader, provide FilesystemResourceLoader, bug fixing ........ LUCENE-4268: Fix test bug ........ SOLR-3648: Fix Velocity template loading in SolrCloud mode ........ fix confusing IW infoStream message ........ Fix rawtypes warning in java 7 and 8, make the SuppressWarnings more local ........ Nicer solution to generic array creation (still problematic in Java 6, but correct in Java 7 if done this way) ........ Disable test failing with Java 8 ........ Allow detecting of Java 8 ........ LUCENE-4109: BooleanQueries are not parsed correctly with the flexible queryparser ........ LUCENE-4269: remove BalancedSegmentMergePolicy (use TieredMergePolicy instead) ........ LUCENE-4269: deprecate BalancedSegmentMergePolicy (use TieredMergePolicy instead) ........ LUCENE-4190: restrict allowed filenames to reduce risk of deleting non-lucene file from the index directory ........ fix the monkey: connection loss and expiration cause NPE ........ upgrade checkJavaDocs.py to python3 ........ LUCENE-3884: Move ElisionFilter out of .fr package ........ fix encoding in javadocs checker ........ LUCENE-2501: fix thread hazard when threads add same field with different IndexOptions at the same time ........ git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3312@1371142 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
148d99cbbc
65
build.xml
65
build.xml
|
@ -176,22 +176,57 @@
|
|||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="jar-checksums" depends="resolve" description="Recompute SHA1 checksums for all JAR files.">
|
||||
<delete>
|
||||
<fileset dir="${basedir}">
|
||||
<include name="**/*.jar.sha1"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
<target name="jar-checksums" description="Recompute SHA1 checksums for all JAR files.">
|
||||
<sequential>
|
||||
<subant target="jar-checksums" inheritall="false" failonerror="true">
|
||||
<fileset dir="lucene" includes="build.xml" />
|
||||
<fileset dir="solr" includes="build.xml" />
|
||||
</subant>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<checksum algorithm="SHA1" fileext=".sha1">
|
||||
<fileset dir="${basedir}">
|
||||
<include name="**/*.jar"/>
|
||||
</fileset>
|
||||
</checksum>
|
||||
<property name="python32.exe" value="python3.2" />
|
||||
<property name="JAVA6_HOME" value="/usr/local/jdk1.6.0_27"/>
|
||||
<property name="JAVA7_HOME" value="/usr/local/jdk1.7.0_01"/>
|
||||
<property name="fakeRelease" value="lucene/build/fakeRelease"/>
|
||||
<property name="fakeReleaseTmp" value="lucene/build/fakeReleaseTmp"/>
|
||||
<property name="fakeReleaseVersion" value="5.0"/> <!-- *not* -SNAPSHOT, the real version -->
|
||||
|
||||
<fixcrlf
|
||||
srcdir="${basedir}"
|
||||
includes="**/*.jar.sha1"
|
||||
eol="lf" fixlast="true" encoding="US-ASCII" />
|
||||
<target name="nightly-smoke" description="Builds an unsigned release and smoke tests it." depends="clean">
|
||||
<sequential>
|
||||
<subant target="prepare-release-no-sign" inheritall="false" failonerror="true">
|
||||
<fileset dir="lucene" includes="build.xml" />
|
||||
<fileset dir="solr" includes="build.xml" />
|
||||
<property name="version" value="${fakeReleaseVersion}" />
|
||||
</subant>
|
||||
<delete dir="${fakeRelease}"/>
|
||||
<delete dir="${fakeReleaseTmp}"/>
|
||||
<mkdir dir="${fakeRelease}"/>
|
||||
<copy todir="${fakeRelease}/lucene">
|
||||
<fileset dir="lucene/dist"/>
|
||||
</copy>
|
||||
<copy todir="${fakeRelease}/lucene/changes">
|
||||
<fileset dir="lucene/build/docs/changes"/>
|
||||
</copy>
|
||||
<get src="http://people.apache.org/keys/group/lucene.asc"
|
||||
dest="${fakeRelease}/lucene/KEYS"/>
|
||||
<copy todir="${fakeRelease}/solr">
|
||||
<fileset dir="solr/package"/>
|
||||
</copy>
|
||||
<copy file="${fakeRelease}/lucene/KEYS" todir="${fakeRelease}/solr"/>
|
||||
<makeurl file="${fakeRelease}" validate="false" property="fakeRelease.uri"/>
|
||||
<exec executable="${python32.exe}" failonerror="true">
|
||||
<arg value="-u"/>
|
||||
<arg value="dev-tools/scripts/smokeTestRelease.py"/>
|
||||
<arg value="${fakeRelease.uri}"/>
|
||||
<arg value="${fakeReleaseVersion}"/>
|
||||
<arg value="${fakeReleaseTmp}"/>
|
||||
<arg value="false"/>
|
||||
<env key="JAVA6_HOME" value="${JAVA6_HOME}"/>
|
||||
<env key="JAVA7_HOME" value="${JAVA7_HOME}"/>
|
||||
</exec>
|
||||
<delete dir="${fakeRelease}"/>
|
||||
<delete dir="${fakeReleaseTmp}"/>
|
||||
</sequential>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
@ -15,30 +15,30 @@
|
|||
<classpathentry kind="src" path="lucene/sandbox/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/sandbox/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/test-framework/src/java"/>
|
||||
<classpathentry kind="src" output="bin.tests-framework" path="lucene/test-framework/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/tests-framework" path="lucene/test-framework/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/common/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-common" path="lucene/analysis/common/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-common" path="lucene/analysis/common/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/common/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/icu/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-icu" path="lucene/analysis/icu/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-icu" path="lucene/analysis/icu/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/icu/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/phonetic/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/phonetic/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/smartcn/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/smartcn/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/stempel/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/stempel/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/morfologik/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/morfologik/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/uima/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-uima" path="lucene/analysis/uima/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-uima" path="lucene/analysis/uima/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/uima/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/benchmark/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/benchmark/src/test"/>
|
||||
|
@ -120,7 +120,7 @@
|
|||
<classpathentry kind="lib" path="solr/lib/slf4j-api-1.6.4.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/slf4j-jdk14-1.6.4.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/wstx-asl-3.2.7.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.5.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.6.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-continuation-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-deploy-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-http-8.1.2.v20120308.jar"/>
|
||||
|
@ -175,5 +175,5 @@
|
|||
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
<classpathentry kind="output" path="bin/other"/>
|
||||
</classpath>
|
||||
|
|
|
@ -36,27 +36,25 @@ A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
|
|||
|
||||
B. How to generate Lucene/Solr Maven artifacts
|
||||
|
||||
Prerequisites: JDK 1.6+ and Ant 1.7.X
|
||||
Prerequisites: JDK 1.6+ and Ant 1.8.2+
|
||||
|
||||
Run 'ant generate-maven-artifacts' to create an internal Maven
|
||||
repository, including POMs, binary .jars, source .jars, and javadoc
|
||||
.jars.
|
||||
|
||||
You can run the above command in four possible places: the top-level
|
||||
directory; under lucene/; under solr/; or under modules/. From the
|
||||
top-level directory, from lucene/, or from modules/, the internal
|
||||
repository will be located at dist/maven/. From solr/, the internal
|
||||
repository will be located at package/maven/.
|
||||
You can run the above command in three possible places: the top-level
|
||||
directory; under lucene/; or under solr/. From the top-level directory
|
||||
or from lucene/, the internal repository will be located at dist/maven/.
|
||||
From solr/, the internal repository will be located at package/maven/.
|
||||
|
||||
|
||||
C. How to deploy Maven artifacts to a repository
|
||||
|
||||
Prerequisites: JDK 1.6+ and Ant 1.7.X
|
||||
Prerequisites: JDK 1.6+ and Ant 1.8.2+
|
||||
|
||||
You can deploy targets for all of Lucene/Solr, only Lucene, only Solr,
|
||||
or only modules/, as in B. above. To deploy to a Maven repository, the
|
||||
command is the same as in B. above, with the addition of two system
|
||||
properties:
|
||||
You can deploy targets for all of Lucene/Solr, only Lucene, or only Solr,
|
||||
as in B. above. To deploy to a Maven repository, the command is the same
|
||||
as in B. above, with the addition of two system properties:
|
||||
|
||||
ant -Dm2.repository.id=my-repo-id \
|
||||
-Dm2.repository.url=http://example.org/my/repo \
|
||||
|
@ -101,7 +99,7 @@ D. How to use Maven to build Lucene/Solr
|
|||
the default, you can supply an alternate version on the command line
|
||||
with the above command, e.g.:
|
||||
|
||||
ant -Dversion=5.0-my-special-version get-maven-poms
|
||||
ant -Dversion=my-special-version get-maven-poms
|
||||
|
||||
Note: if you change the version in the POMs, there is one test method
|
||||
that will fail under maven-surefire-plugin:
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -77,33 +71,5 @@
|
|||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>appassembler-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
||||
<repositoryLayout>flat</repositoryLayout>
|
||||
<platforms>
|
||||
<platform>windows</platform>
|
||||
<platform>unix</platform>
|
||||
</platforms>
|
||||
<programs>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.analysis.charfilter.HtmlStripCharFilter</mainClass>
|
||||
<name>HtmlStripCharFilter</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.analysis.en.PorterStemmer</mainClass>
|
||||
<name>EnglishPorterStemmer</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.tartarus.snowball.TestApp</mainClass>
|
||||
<name>SnowballTestApp</name>
|
||||
</program>
|
||||
</programs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
|
@ -40,15 +40,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -39,15 +39,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -39,15 +39,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -75,6 +69,11 @@
|
|||
<build>
|
||||
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
||||
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${module-path}/src/resources</directory>
|
||||
</resource>
|
||||
</resources>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
|
|
|
@ -39,15 +39,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -75,6 +69,11 @@
|
|||
<build>
|
||||
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
||||
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${module-path}/src/resources</directory>
|
||||
</resource>
|
||||
</resources>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -41,15 +41,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -120,41 +114,5 @@
|
|||
</includes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>appassembler-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
||||
<repositoryLayout>flat</repositoryLayout>
|
||||
<platforms>
|
||||
<platform>windows</platform>
|
||||
<platform>unix</platform>
|
||||
</platforms>
|
||||
<programs>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.benchmark.byTask.Benchmark</mainClass>
|
||||
<name>Benchmark</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.benchmark.quality.trec.QueryDriver</mainClass>
|
||||
<name>QueryDriver</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder</mainClass>
|
||||
<name>QualityQueriesFinder</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.benchmark.utils.ExtractReuters</mainClass>
|
||||
<name>ExtractReuters</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.benchmark.utils.ExtractWikipedia</mainClass>
|
||||
<name>ExtractWikipedia</name>
|
||||
</program>
|
||||
</programs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -104,40 +98,6 @@
|
|||
</systemPropertyVariables>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>appassembler-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
||||
<repositoryLayout>flat</repositoryLayout>
|
||||
<platforms>
|
||||
<platform>windows</platform>
|
||||
<platform>unix</platform>
|
||||
</platforms>
|
||||
<programs>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.index.CheckIndex</mainClass>
|
||||
<name>CheckIndex</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.index.IndexReader</mainClass>
|
||||
<name>IndexReader</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.store.LockStressTest</mainClass>
|
||||
<name>LockStressTest</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.store.LockVerifyServer</mainClass>
|
||||
<name>LockVerifyServer</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.util.English</mainClass>
|
||||
<name>English</name>
|
||||
</program>
|
||||
</programs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -87,30 +81,5 @@
|
|||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>appassembler-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
||||
<repositoryLayout>flat</repositoryLayout>
|
||||
<assembleDirectory>${build-directory}</assembleDirectory>
|
||||
<platforms>
|
||||
<platform>windows</platform>
|
||||
<platform>unix</platform>
|
||||
</platforms>
|
||||
<programs>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.demo.IndexFiles</mainClass>
|
||||
<name>IndexFiles</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.demo.SearchFiles</mainClass>
|
||||
<name>SearchFiles</name>
|
||||
</program>
|
||||
</programs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
|
@ -39,15 +39,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -39,15 +39,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -39,15 +39,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -72,49 +66,5 @@
|
|||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>appassembler-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
||||
<repositoryLayout>flat</repositoryLayout>
|
||||
<platforms>
|
||||
<platform>windows</platform>
|
||||
<platform>unix</platform>
|
||||
</platforms>
|
||||
<programs>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.index.FieldNormModifier</mainClass>
|
||||
<name>FieldNormModifier</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.index.IndexSplitter</mainClass>
|
||||
<name>IndexSplitter</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.index.MultiPassIndexSplitter</mainClass>
|
||||
<name>MultiPassIndexSplitter</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.misc.GetTermInfo</mainClass>
|
||||
<name>GetTermInfo</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.misc.HighFreqTerms</mainClass>
|
||||
<name>HighFreqTerms</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.misc.IndexMergeTool</mainClass>
|
||||
<name>IndexMergeTool</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.lucene.misc.LengthNormModifier</mainClass>
|
||||
<name>LengthNormModifier</name>
|
||||
</program>
|
||||
</programs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
|
@ -35,15 +35,9 @@
|
|||
<module-directory>lucene</module-directory>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<modules>
|
||||
<module>core</module>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -31,15 +31,18 @@
|
|||
<version>@version@</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>Grandparent POM for Apache Lucene Core and Apache Solr</name>
|
||||
<description>Parent POM for Apache Lucene Core and Apache Solr</description>
|
||||
<url>http://lucene.apache.org/java</url>
|
||||
<description>Grandparent POM for Apache Lucene Core and Apache Solr</description>
|
||||
<url>http://lucene.apache.org</url>
|
||||
<modules>
|
||||
<module>lucene</module>
|
||||
<module>solr</module>
|
||||
</modules>
|
||||
<properties>
|
||||
<top-level>..</top-level>
|
||||
<base.specification.version>4.0.0</base.specification.version>
|
||||
<vc-anonymous-base-url>http://svn.apache.org/repos/asf/lucene/dev/trunk</vc-anonymous-base-url>
|
||||
<vc-dev-base-url>https://svn.apache.org/repos/asf/lucene/dev/trunk</vc-dev-base-url>
|
||||
<vc-browse-base-url>http://svn.apache.org/viewvc/lucene/dev/trunk</vc-browse-base-url>
|
||||
<base.specification.version>5.0.0</base.specification.version>
|
||||
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
|
||||
<java.compat.version>1.6</java.compat.version>
|
||||
<jetty.version>8.1.2.v20120308</jetty.version>
|
||||
|
@ -69,11 +72,11 @@
|
|||
</properties>
|
||||
<issueManagement>
|
||||
<system>JIRA</system>
|
||||
<url>http://issues.apache.org/jira/browse/LUCENE</url>
|
||||
<url>https://issues.apache.org/jira/browse/LUCENE</url>
|
||||
</issueManagement>
|
||||
<ciManagement>
|
||||
<system>Hudson</system>
|
||||
<url>http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/</url>
|
||||
<system>Jenkins</system>
|
||||
<url>https://builds.apache.org/computer/lucene/</url>
|
||||
</ciManagement>
|
||||
<mailingLists>
|
||||
<mailingList>
|
||||
|
@ -109,15 +112,9 @@
|
|||
</mailingLists>
|
||||
<inceptionYear>2000</inceptionYear>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}</developerConnection>
|
||||
<url>${vc-browse-base-url}</url>
|
||||
</scm>
|
||||
<licenses>
|
||||
<license>
|
||||
|
@ -298,7 +295,7 @@
|
|||
<dependency>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper</artifactId>
|
||||
<version>3.3.5</version>
|
||||
<version>3.3.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.carrot2</groupId>
|
||||
|
@ -549,11 +546,6 @@
|
|||
</archive>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>appassembler-maven-plugin</artifactId>
|
||||
<version>1.2.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
|
|
|
@ -38,15 +38,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -38,15 +38,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -38,15 +38,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -38,15 +38,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -41,15 +41,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -42,15 +42,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -38,15 +38,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -38,15 +38,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -38,15 +38,9 @@
|
|||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -254,37 +248,6 @@
|
|||
</systemPropertyVariables>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>appassembler-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
||||
<repositoryLayout>flat</repositoryLayout>
|
||||
<platforms>
|
||||
<platform>windows</platform>
|
||||
<platform>unix</platform>
|
||||
</platforms>
|
||||
<programs>
|
||||
<program>
|
||||
<mainClass>org.apache.solr.client.solrj.embedded.JettySolrRunner</mainClass>
|
||||
<name>JettySolrRunner</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.solr.util.BitSetPerf</mainClass>
|
||||
<name>BitSetPerf</name>
|
||||
<extraJvmArguments>-Xms128m -Xbatch</extraJvmArguments>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.solr.util.SimplePostTool</mainClass>
|
||||
<name>SimplePostTool</name>
|
||||
</program>
|
||||
<program>
|
||||
<mainClass>org.apache.solr.util.SuggestMissingFactories</mainClass>
|
||||
<name>SuggestMissingFactories</name>
|
||||
</program>
|
||||
</programs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
|
|
|
@ -43,26 +43,14 @@
|
|||
<module-directory>solr</module-directory>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<issueManagement>
|
||||
<system>JIRA</system>
|
||||
<url>http://issues.apache.org/jira/browse/SOLR</url>
|
||||
<url>https://issues.apache.org/jira/browse/SOLR</url>
|
||||
</issueManagement>
|
||||
<ciManagement>
|
||||
<system>Hudson</system>
|
||||
<url>
|
||||
http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/
|
||||
</url>
|
||||
</ciManagement>
|
||||
<mailingLists>
|
||||
<mailingList>
|
||||
<name>Solr User List</name>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<!-- These dependencies are compile scope because this is a test framework. -->
|
||||
|
@ -60,20 +54,27 @@
|
|||
<artifactId>solr-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
|
||||
you can exclude the three Jetty dependencies below. -->
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<scope>runtime</scope>
|
||||
<artifactId>jetty-servlet</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
</dependency>
|
||||
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
|
||||
you can exclude the two Jetty dependencies below. -->
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -37,15 +37,9 @@
|
|||
<module-path>${top-level}/${module-directory}</module-path>
|
||||
</properties>
|
||||
<scm>
|
||||
<connection>
|
||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</connection>
|
||||
<developerConnection>
|
||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
||||
</developerConnection>
|
||||
<url>
|
||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
||||
</url>
|
||||
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -23,7 +23,7 @@ reMarkup = re.compile('<.*?>')
|
|||
|
||||
def checkSummary(fullPath):
|
||||
printed = False
|
||||
f = open(fullPath)
|
||||
f = open(fullPath, encoding='UTF-8')
|
||||
anyMissing = False
|
||||
sawPackage = False
|
||||
desc = []
|
||||
|
@ -41,10 +41,10 @@ def checkSummary(fullPath):
|
|||
desc = desc.strip()
|
||||
if desc == '':
|
||||
if not printed:
|
||||
print
|
||||
print fullPath
|
||||
print()
|
||||
print(fullPath)
|
||||
printed = True
|
||||
print ' no package description (missing package.html in src?)'
|
||||
print(' no package description (missing package.html in src?)')
|
||||
anyMissing = True
|
||||
desc = None
|
||||
else:
|
||||
|
@ -52,17 +52,17 @@ def checkSummary(fullPath):
|
|||
|
||||
if lineLower in ('<td> </td>', '<td></td>', '<td class="collast"> </td>'):
|
||||
if not printed:
|
||||
print
|
||||
print fullPath
|
||||
print()
|
||||
print(fullPath)
|
||||
printed = True
|
||||
print ' missing: %s' % unescapeHTML(lastHREF)
|
||||
print(' missing: %s' % unescapeHTML(lastHREF))
|
||||
anyMissing = True
|
||||
elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
|
||||
if not printed:
|
||||
print
|
||||
print fullPath
|
||||
print()
|
||||
print(fullPath)
|
||||
printed = True
|
||||
print ' license-is-javadoc: %s' % unescapeHTML(lastHREF)
|
||||
print(' license-is-javadoc: %s' % unescapeHTML(lastHREF))
|
||||
anyMissing = True
|
||||
m = reHREF.search(line)
|
||||
if m is not None:
|
||||
|
@ -85,17 +85,17 @@ def checkPackageSummaries(root, level='class'):
|
|||
"""
|
||||
|
||||
if level != 'class' and level != 'package':
|
||||
print 'unsupported level: %s, must be "class" or "package"' % level
|
||||
print('unsupported level: %s, must be "class" or "package"' % level)
|
||||
sys.exit(1)
|
||||
|
||||
#for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root):
|
||||
|
||||
if False:
|
||||
os.chdir(root)
|
||||
print
|
||||
print 'Run "ant javadocs" > javadocs.log...'
|
||||
print()
|
||||
print('Run "ant javadocs" > javadocs.log...')
|
||||
if os.system('ant javadocs > javadocs.log 2>&1'):
|
||||
print ' FAILED'
|
||||
print(' FAILED')
|
||||
sys.exit(1)
|
||||
|
||||
anyMissing = False
|
||||
|
@ -116,14 +116,14 @@ def checkPackageSummaries(root, level='class'):
|
|||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2 or len(sys.argv) > 3:
|
||||
print 'usage: %s <dir> [class|package]' % sys.argv[0]
|
||||
print('usage: %s <dir> [class|package]' % sys.argv[0])
|
||||
sys.exit(1)
|
||||
if len(sys.argv) == 2:
|
||||
level = 'class'
|
||||
else:
|
||||
level = sys.argv[2]
|
||||
if checkPackageSummaries(sys.argv[1], level):
|
||||
print
|
||||
print 'Missing javadocs were found!'
|
||||
print()
|
||||
print('Missing javadocs were found!')
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
|
|
|
@ -20,12 +20,12 @@ import subprocess
|
|||
import signal
|
||||
import shutil
|
||||
import hashlib
|
||||
import httplib
|
||||
import http.client
|
||||
import re
|
||||
import urllib2
|
||||
import urlparse
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import urllib.parse
|
||||
import sys
|
||||
import HTMLParser
|
||||
import html.parser
|
||||
from collections import defaultdict
|
||||
import xml.etree.ElementTree as ET
|
||||
import filecmp
|
||||
|
@ -38,9 +38,9 @@ import checkJavadocLinks
|
|||
# tested on Linux and on Cygwin under Windows 7.
|
||||
|
||||
def unshortenURL(url):
|
||||
parsed = urlparse.urlparse(url)
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if parsed[0] in ('http', 'https'):
|
||||
h = httplib.HTTPConnection(parsed.netloc)
|
||||
h = http.client.HTTPConnection(parsed.netloc)
|
||||
h.request('HEAD', parsed.path)
|
||||
response = h.getresponse()
|
||||
if response.status/100 == 3 and response.getheader('Location'):
|
||||
|
@ -101,8 +101,8 @@ def getHREFs(urlString):
|
|||
|
||||
# Deref any redirects
|
||||
while True:
|
||||
url = urlparse.urlparse(urlString)
|
||||
h = httplib.HTTPConnection(url.netloc)
|
||||
url = urllib.parse.urlparse(urlString)
|
||||
h = http.client.HTTPConnection(url.netloc)
|
||||
h.request('GET', url.path)
|
||||
r = h.getresponse()
|
||||
newLoc = r.getheader('location')
|
||||
|
@ -112,8 +112,8 @@ def getHREFs(urlString):
|
|||
break
|
||||
|
||||
links = []
|
||||
for subUrl, text in reHREF.findall(urllib2.urlopen(urlString).read()):
|
||||
fullURL = urlparse.urljoin(urlString, subUrl)
|
||||
for subUrl, text in reHREF.findall(urllib.request.urlopen(urlString).read().decode('UTF-8')):
|
||||
fullURL = urllib.parse.urljoin(urlString, subUrl)
|
||||
links.append((text, fullURL))
|
||||
return links
|
||||
|
||||
|
@ -121,15 +121,15 @@ def download(name, urlString, tmpDir, quiet=False):
|
|||
fileName = '%s/%s' % (tmpDir, name)
|
||||
if DEBUG and os.path.exists(fileName):
|
||||
if not quiet and fileName.find('.asc') == -1:
|
||||
print ' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
|
||||
print(' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
|
||||
return
|
||||
fIn = urllib2.urlopen(urlString)
|
||||
fIn = urllib.request.urlopen(urlString)
|
||||
fOut = open(fileName, 'wb')
|
||||
success = False
|
||||
try:
|
||||
while True:
|
||||
s = fIn.read(65536)
|
||||
if s == '':
|
||||
if s == b'':
|
||||
break
|
||||
fOut.write(s)
|
||||
fOut.close()
|
||||
|
@ -141,14 +141,14 @@ def download(name, urlString, tmpDir, quiet=False):
|
|||
if not success:
|
||||
os.remove(fileName)
|
||||
if not quiet and fileName.find('.asc') == -1:
|
||||
print ' %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
|
||||
print(' %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
|
||||
|
||||
def load(urlString):
|
||||
return urllib2.urlopen(urlString).read()
|
||||
return urllib.request.urlopen(urlString).read().decode('utf-8')
|
||||
|
||||
def checkSigs(project, urlString, version, tmpDir, isSigned):
|
||||
|
||||
print ' test basics...'
|
||||
print(' test basics...')
|
||||
ents = getDirEntries(urlString)
|
||||
artifact = None
|
||||
keysURL = None
|
||||
|
@ -210,7 +210,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
if keysURL is None:
|
||||
raise RuntimeError('%s is missing KEYS' % project)
|
||||
|
||||
print ' get KEYS'
|
||||
print(' get KEYS')
|
||||
download('%s.KEYS' % project, keysURL, tmpDir)
|
||||
|
||||
keysFile = '%s/%s.KEYS' % (tmpDir, project)
|
||||
|
@ -219,7 +219,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
|
||||
if os.path.exists(gpgHomeDir):
|
||||
shutil.rmtree(gpgHomeDir)
|
||||
os.makedirs(gpgHomeDir, 0700)
|
||||
os.makedirs(gpgHomeDir, 0o700)
|
||||
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
|
||||
'%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
|
||||
|
||||
|
@ -232,12 +232,12 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
testChanges(project, version, changesURL)
|
||||
|
||||
for artifact, urlString in artifacts:
|
||||
print ' download %s...' % artifact
|
||||
print(' download %s...' % artifact)
|
||||
download(artifact, urlString, tmpDir)
|
||||
verifyDigests(artifact, urlString, tmpDir)
|
||||
|
||||
if isSigned:
|
||||
print ' verify sig'
|
||||
print(' verify sig')
|
||||
# Test sig (this is done with a clean brand-new GPG world)
|
||||
download(artifact + '.asc', urlString + '.asc', tmpDir)
|
||||
sigFile = '%s/%s.asc' % (tmpDir, artifact)
|
||||
|
@ -246,28 +246,28 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
|
||||
logFile)
|
||||
# Forward any GPG warnings, except the expected one (since its a clean world)
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1 \
|
||||
and line.find('WARNING: This key is not certified with a trusted signature') == -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
# Test trust (this is done with the real users config)
|
||||
run('gpg --import %s' % (keysFile),
|
||||
'%s/%s.gpg.trust.import.log 2>&1' % (tmpDir, project))
|
||||
print ' verify trust'
|
||||
print(' verify trust')
|
||||
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
|
||||
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
|
||||
# Forward any GPG warnings:
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
def testChanges(project, version, changesURLString):
|
||||
print ' check changes HTML...'
|
||||
print(' check changes HTML...')
|
||||
changesURL = None
|
||||
for text, subURL in getDirEntries(changesURLString):
|
||||
if text == 'Changes.html':
|
||||
|
@ -287,7 +287,7 @@ def testChangesText(dir, version, project):
|
|||
if 'CHANGES.txt' in files:
|
||||
fullPath = '%s/CHANGES.txt' % root
|
||||
#print 'CHECK %s' % fullPath
|
||||
checkChangesContent(open(fullPath).read(), version, fullPath, project, False)
|
||||
checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False)
|
||||
|
||||
def checkChangesContent(s, version, name, project, isHTML):
|
||||
|
||||
|
@ -336,7 +336,7 @@ def run(command, logFile):
|
|||
raise RuntimeError('command "%s" failed; see log file %s' % (command, logPath))
|
||||
|
||||
def verifyDigests(artifact, urlString, tmpDir):
|
||||
print ' verify md5/sha1 digests'
|
||||
print(' verify md5/sha1 digests')
|
||||
md5Expected, t = load(urlString + '.md5').strip().split()
|
||||
if t != '*'+artifact:
|
||||
raise RuntimeError('MD5 %s.md5 lists artifact %s but expected *%s' % (urlString, t, artifact))
|
||||
|
@ -347,10 +347,10 @@ def verifyDigests(artifact, urlString, tmpDir):
|
|||
|
||||
m = hashlib.md5()
|
||||
s = hashlib.sha1()
|
||||
f = open('%s/%s' % (tmpDir, artifact))
|
||||
f = open('%s/%s' % (tmpDir, artifact), 'rb')
|
||||
while True:
|
||||
x = f.read(65536)
|
||||
if x == '':
|
||||
if len(x) == 0:
|
||||
break
|
||||
m.update(x)
|
||||
s.update(x)
|
||||
|
@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, tmpDir):
|
|||
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
|
||||
|
||||
def getDirEntries(urlString):
|
||||
if urlString.startswith('file:/') and not urlString.startswith('file://'):
|
||||
# stupid bogus ant URI
|
||||
urlString = "file:///" + urlString[6:]
|
||||
|
||||
if urlString.startswith('file://'):
|
||||
path = urlString[7:]
|
||||
if path.endswith('/'):
|
||||
|
@ -388,7 +392,7 @@ def unpack(project, tmpDir, artifact, version):
|
|||
shutil.rmtree(destDir)
|
||||
os.makedirs(destDir)
|
||||
os.chdir(destDir)
|
||||
print ' unpack %s...' % artifact
|
||||
print(' unpack %s...' % artifact)
|
||||
unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact)
|
||||
if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'):
|
||||
run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile)
|
||||
|
@ -437,12 +441,14 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
|
|||
|
||||
if project == 'lucene':
|
||||
# TODO: clean this up to not be a list of modules that we must maintain
|
||||
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework')
|
||||
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
|
||||
if isSrc:
|
||||
extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'backwards', 'tools', 'site')
|
||||
else:
|
||||
extras = ()
|
||||
|
||||
# TODO: if solr, verify lucene/licenses, solr/licenses are present
|
||||
|
||||
for e in extras:
|
||||
if e not in l:
|
||||
raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact))
|
||||
|
@ -453,81 +459,81 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
|
|||
raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l))
|
||||
|
||||
if isSrc:
|
||||
print ' make sure no JARs/WARs in src dist...'
|
||||
print(' make sure no JARs/WARs in src dist...')
|
||||
lines = os.popen('find . -name \\*.jar').readlines()
|
||||
if len(lines) != 0:
|
||||
print ' FAILED:'
|
||||
print(' FAILED:')
|
||||
for line in lines:
|
||||
print ' %s' % line.strip()
|
||||
print(' %s' % line.strip())
|
||||
raise RuntimeError('source release has JARs...')
|
||||
lines = os.popen('find . -name \\*.war').readlines()
|
||||
if len(lines) != 0:
|
||||
print ' FAILED:'
|
||||
print(' FAILED:')
|
||||
for line in lines:
|
||||
print ' %s' % line.strip()
|
||||
print(' %s' % line.strip())
|
||||
raise RuntimeError('source release has WARs...')
|
||||
|
||||
print ' run "ant validate"'
|
||||
print(' run "ant validate"')
|
||||
run('%s; ant validate' % javaExe('1.7'), '%s/validate.log' % unpackPath)
|
||||
|
||||
if project == 'lucene':
|
||||
print ' run tests w/ Java 6...'
|
||||
print(' run tests w/ Java 6...')
|
||||
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
|
||||
run('%s; ant jar' % javaExe('1.6'), '%s/compile.log' % unpackPath)
|
||||
testDemo(isSrc, version)
|
||||
# test javadocs
|
||||
print ' generate javadocs w/ Java 6...'
|
||||
print(' generate javadocs w/ Java 6...')
|
||||
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
|
||||
checkJavadocpath('%s/build/docs' % unpackPath)
|
||||
else:
|
||||
print ' run tests w/ Java 6...'
|
||||
print(' run tests w/ Java 6...')
|
||||
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
|
||||
|
||||
# test javadocs
|
||||
print ' generate javadocs w/ Java 6...'
|
||||
print(' generate javadocs w/ Java 6...')
|
||||
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
|
||||
checkJavadocpath('%s/build/docs' % unpackPath)
|
||||
|
||||
print ' run tests w/ Java 7...'
|
||||
print(' run tests w/ Java 7...')
|
||||
run('%s; ant test' % javaExe('1.7'), '%s/test.log' % unpackPath)
|
||||
|
||||
# test javadocs
|
||||
print ' generate javadocs w/ Java 7...'
|
||||
print(' generate javadocs w/ Java 7...')
|
||||
run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath)
|
||||
checkJavadocpath('%s/build/docs' % unpackPath)
|
||||
|
||||
os.chdir('solr')
|
||||
print ' test solr example w/ Java 6...'
|
||||
print(' test solr example w/ Java 6...')
|
||||
run('%s; ant clean example' % javaExe('1.6'), '%s/antexample.log' % unpackPath)
|
||||
testSolrExample(unpackPath, JAVA6_HOME, True)
|
||||
|
||||
print ' test solr example w/ Java 7...'
|
||||
print(' test solr example w/ Java 7...')
|
||||
run('%s; ant clean example' % javaExe('1.7'), '%s/antexample.log' % unpackPath)
|
||||
testSolrExample(unpackPath, JAVA7_HOME, True)
|
||||
os.chdir('..')
|
||||
|
||||
print ' check NOTICE'
|
||||
print(' check NOTICE')
|
||||
testNotice(unpackPath)
|
||||
|
||||
else:
|
||||
if project == 'lucene':
|
||||
testDemo(isSrc, version)
|
||||
else:
|
||||
print ' test solr example w/ Java 6...'
|
||||
print(' test solr example w/ Java 6...')
|
||||
testSolrExample(unpackPath, JAVA6_HOME, False)
|
||||
|
||||
print ' test solr example w/ Java 7...'
|
||||
print(' test solr example w/ Java 7...')
|
||||
testSolrExample(unpackPath, JAVA7_HOME, False)
|
||||
|
||||
testChangesText('.', version, project)
|
||||
|
||||
if project == 'lucene' and not isSrc:
|
||||
print ' check Lucene\'s javadoc JAR'
|
||||
print(' check Lucene\'s javadoc JAR')
|
||||
checkJavadocpath('%s/docs' % unpackPath)
|
||||
|
||||
def testNotice(unpackPath):
|
||||
solrNotice = open('%s/NOTICE.txt' % unpackPath).read()
|
||||
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath).read()
|
||||
solrNotice = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
|
||||
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
|
||||
|
||||
expected = """
|
||||
=========================================================================
|
||||
|
@ -545,12 +551,12 @@ def readSolrOutput(p, startupEvent, logFile):
|
|||
try:
|
||||
while True:
|
||||
line = p.readline()
|
||||
if line == '':
|
||||
if len(line) == 0:
|
||||
break
|
||||
f.write(line)
|
||||
f.flush()
|
||||
# print 'SOLR: %s' % line.strip()
|
||||
if line.find('Started SocketConnector@0.0.0.0:8983') != -1:
|
||||
if line.decode('UTF-8').find('Started SocketConnector@0.0.0.0:8983') != -1:
|
||||
startupEvent.set()
|
||||
finally:
|
||||
f.close()
|
||||
|
@ -558,7 +564,7 @@ def readSolrOutput(p, startupEvent, logFile):
|
|||
def testSolrExample(unpackPath, javaPath, isSrc):
|
||||
logFile = '%s/solr-example.log' % unpackPath
|
||||
os.chdir('example')
|
||||
print ' start Solr instance (log=%s)...' % logFile
|
||||
print(' start Solr instance (log=%s)...' % logFile)
|
||||
env = {}
|
||||
env.update(os.environ)
|
||||
env['JAVA_HOME'] = javaPath
|
||||
|
@ -572,21 +578,21 @@ def testSolrExample(unpackPath, javaPath, isSrc):
|
|||
|
||||
# Make sure Solr finishes startup:
|
||||
startupEvent.wait()
|
||||
print ' startup done'
|
||||
print(' startup done')
|
||||
|
||||
try:
|
||||
print ' test utf8...'
|
||||
print(' test utf8...')
|
||||
run('sh ./exampledocs/test_utf8.sh', 'utf8.log')
|
||||
print ' index example docs...'
|
||||
print(' index example docs...')
|
||||
run('sh ./exampledocs/post.sh ./exampledocs/*.xml', 'post-example-docs.log')
|
||||
print ' run query...'
|
||||
s = urllib2.urlopen('http://localhost:8983/solr/select/?q=video').read()
|
||||
print(' run query...')
|
||||
s = urllib.request.urlopen('http://localhost:8983/solr/select/?q=video').read().decode('UTF-8')
|
||||
if s.find('<result name="response" numFound="3" start="0">') == -1:
|
||||
print 'FAILED: response is:\n%s' % s
|
||||
print('FAILED: response is:\n%s' % s)
|
||||
raise RuntimeError('query on solr example instance failed')
|
||||
finally:
|
||||
# Stop server:
|
||||
print ' stop server (SIGINT)...'
|
||||
print(' stop server (SIGINT)...')
|
||||
os.kill(server.pid, signal.SIGINT)
|
||||
|
||||
# Give it 10 seconds to gracefully shut down
|
||||
|
@ -594,14 +600,14 @@ def testSolrExample(unpackPath, javaPath, isSrc):
|
|||
|
||||
if serverThread.isAlive():
|
||||
# Kill server:
|
||||
print '***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...'
|
||||
print('***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...')
|
||||
os.kill(server.pid, signal.SIGKILL)
|
||||
|
||||
serverThread.join(10.0)
|
||||
|
||||
if serverThread.isAlive():
|
||||
# Shouldn't happen unless something is seriously wrong...
|
||||
print '***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...'
|
||||
print('***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...')
|
||||
|
||||
os.chdir('..')
|
||||
|
||||
|
@ -615,13 +621,13 @@ def checkJavadocpath(path):
|
|||
if checkJavaDocs.checkPackageSummaries(path):
|
||||
# disabled: RM cannot fix all this, see LUCENE-3887
|
||||
# raise RuntimeError('javadoc problems')
|
||||
print '\n***WARNING***: javadocs want to fail!\n'
|
||||
print('\n***WARNING***: javadocs want to fail!\n')
|
||||
|
||||
if checkJavadocLinks.checkAll(path):
|
||||
raise RuntimeError('broken javadocs links found!')
|
||||
|
||||
def testDemo(isSrc, version):
|
||||
print ' test demo...'
|
||||
print(' test demo...')
|
||||
sep = ';' if cygwin else ':'
|
||||
if isSrc:
|
||||
cp = 'build/core/classes/java{0}build/demo/classes/java{0}build/analysis/common/classes/java{0}build/queryparser/classes/java'.format(sep)
|
||||
|
@ -632,14 +638,14 @@ def testDemo(isSrc, version):
|
|||
run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe('1.6'), cp, docsDir), 'index.log')
|
||||
run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe('1.6'), cp), 'search.log')
|
||||
reMatchingDocs = re.compile('(\d+) total matching documents')
|
||||
m = reMatchingDocs.search(open('search.log', 'rb').read())
|
||||
m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read())
|
||||
if m is None:
|
||||
raise RuntimeError('lucene demo\'s SearchFiles found no results')
|
||||
else:
|
||||
numHits = int(m.group(1))
|
||||
if numHits < 100:
|
||||
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
|
||||
print ' got %d hits for query "lucene"' % numHits
|
||||
print(' got %d hits for query "lucene"' % numHits)
|
||||
|
||||
def checkMaven(baseURL, tmpDir, version, isSigned):
|
||||
# Locate the release branch in subversion
|
||||
|
@ -652,11 +658,11 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
|
|||
if text == releaseBranchText:
|
||||
releaseBranchSvnURL = subURL
|
||||
|
||||
print ' get POM templates',
|
||||
print(' get POM templates', end=' ')
|
||||
POMtemplates = defaultdict()
|
||||
getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
|
||||
print
|
||||
print ' download artifacts',
|
||||
print()
|
||||
print(' download artifacts', end=' ')
|
||||
artifacts = {'lucene': [], 'solr': []}
|
||||
for project in ('lucene', 'solr'):
|
||||
artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
|
||||
|
@ -664,30 +670,30 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
|
|||
if not os.path.exists(targetDir):
|
||||
os.makedirs(targetDir)
|
||||
crawl(artifacts[project], artifactsURL, targetDir)
|
||||
print
|
||||
print ' verify that each binary artifact has a deployed POM...'
|
||||
print()
|
||||
print(' verify that each binary artifact has a deployed POM...')
|
||||
verifyPOMperBinaryArtifact(artifacts, version)
|
||||
print ' verify that there is an artifact for each POM template...'
|
||||
print(' verify that there is an artifact for each POM template...')
|
||||
verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
|
||||
print " verify Maven artifacts' md5/sha1 digests..."
|
||||
print(" verify Maven artifacts' md5/sha1 digests...")
|
||||
verifyMavenDigests(artifacts)
|
||||
print ' verify that all non-Mavenized deps are deployed...'
|
||||
print(' verify that all non-Mavenized deps are deployed...')
|
||||
nonMavenizedDeps = dict()
|
||||
checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir,
|
||||
version, releaseBranchSvnURL)
|
||||
print ' check for javadoc and sources artifacts...'
|
||||
print(' check for javadoc and sources artifacts...')
|
||||
checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
|
||||
print " verify deployed POMs' coordinates..."
|
||||
print(" verify deployed POMs' coordinates...")
|
||||
verifyDeployedPOMsCoordinates(artifacts, version)
|
||||
if isSigned:
|
||||
print ' verify maven artifact sigs',
|
||||
print(' verify maven artifact sigs', end=' ')
|
||||
verifyMavenSigs(baseURL, tmpDir, artifacts)
|
||||
|
||||
distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
|
||||
|
||||
print ' verify that non-Mavenized deps are same as in the binary distribution...'
|
||||
print(' verify that non-Mavenized deps are same as in the binary distribution...')
|
||||
checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
|
||||
print ' verify that Maven artifacts are same as in the binary distribution...'
|
||||
print(' verify that Maven artifacts are same as in the binary distribution...')
|
||||
checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
|
||||
|
||||
def getDistributionsForMavenChecks(tmpDir, version, baseURL):
|
||||
|
@ -697,19 +703,19 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
|
|||
if project == 'solr': distribution = 'apache-' + distribution
|
||||
if not os.path.exists('%s/%s' % (tmpDir, distribution)):
|
||||
distURL = '%s/%s/%s' % (baseURL, project, distribution)
|
||||
print ' download %s...' % distribution,
|
||||
print(' download %s...' % distribution, end=' ')
|
||||
download(distribution, distURL, tmpDir)
|
||||
destDir = '%s/unpack-%s-maven' % (tmpDir, project)
|
||||
if os.path.exists(destDir):
|
||||
shutil.rmtree(destDir)
|
||||
os.makedirs(destDir)
|
||||
os.chdir(destDir)
|
||||
print ' unpack %s...' % distribution
|
||||
print(' unpack %s...' % distribution)
|
||||
unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
|
||||
run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
|
||||
if project == 'solr': # unpack the Solr war
|
||||
unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
|
||||
print ' unpack Solr war...'
|
||||
print(' unpack Solr war...')
|
||||
run('jar xvf */dist/*.war', unpackLogFile)
|
||||
distributionFiles[project] = []
|
||||
for root, dirs, files in os.walk(destDir):
|
||||
|
@ -719,7 +725,7 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
|
|||
def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
|
||||
for project in ('lucene', 'solr'):
|
||||
for artifact in artifacts[project]:
|
||||
if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps.keys():
|
||||
if artifact.endswith(version + '.jar') and artifact not in list(nonMavenizedDeps.keys()):
|
||||
javadocJar = artifact[:-4] + '-javadoc.jar'
|
||||
if javadocJar not in artifacts[project]:
|
||||
raise RuntimeError('missing: %s' % javadocJar)
|
||||
|
@ -732,7 +738,7 @@ def checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps):
|
|||
distFilenames = dict()
|
||||
for file in distributionFiles[project]:
|
||||
distFilenames[os.path.basename(file)] = file
|
||||
for dep in nonMavenizedDeps.keys():
|
||||
for dep in list(nonMavenizedDeps.keys()):
|
||||
if ('/%s/' % project) in dep:
|
||||
depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
|
||||
if not depOrigFilename in distFilenames:
|
||||
|
@ -753,9 +759,9 @@ def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts,
|
|||
distFilenames[baseName] = file
|
||||
for artifact in artifacts[project]:
|
||||
if reJarWar.search(artifact):
|
||||
if artifact not in nonMavenizedDeps.keys():
|
||||
if artifact not in list(nonMavenizedDeps.keys()):
|
||||
artifactFilename = os.path.basename(artifact)
|
||||
if artifactFilename not in distFilenames.keys():
|
||||
if artifactFilename not in list(distFilenames.keys()):
|
||||
raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
|
||||
% (artifact, project))
|
||||
# TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
|
||||
|
@ -772,16 +778,17 @@ def verifyMavenDigests(artifacts):
|
|||
raise RuntimeError('missing: MD5 digest for %s' % artifactFile)
|
||||
if artifactFile + '.sha1' not in artifacts[project]:
|
||||
raise RuntimeError('missing: SHA1 digest for %s' % artifactFile)
|
||||
with open(artifactFile + '.md5', 'r') as md5File:
|
||||
with open(artifactFile + '.md5', encoding='UTF-8') as md5File:
|
||||
md5Expected = md5File.read().strip()
|
||||
with open(artifactFile + '.sha1', 'r') as sha1File:
|
||||
with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File:
|
||||
sha1Expected = sha1File.read().strip()
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
inputFile = open(artifactFile)
|
||||
inputFile = open(artifactFile, 'rb')
|
||||
while True:
|
||||
bytes = inputFile.read(65536)
|
||||
if bytes == '': break
|
||||
if len(bytes) == 0:
|
||||
break
|
||||
md5.update(bytes)
|
||||
sha1.update(bytes)
|
||||
inputFile.close()
|
||||
|
@ -846,7 +853,7 @@ def checkNonMavenizedDeps(nonMavenizedDependencies, POMtemplates, artifacts,
|
|||
if releaseBranchSvnURL is None:
|
||||
pomPath = '%s/%s/%s' % (workingCopy, pomDir, pomFile)
|
||||
if os.path.exists(pomPath):
|
||||
doc2 = ET.XML(open(pomPath).read())
|
||||
doc2 = ET.XML(open(pomPath, encoding='UTF-8').read())
|
||||
break
|
||||
else:
|
||||
entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir))
|
||||
|
@ -891,7 +898,7 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
|
|||
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
|
||||
if os.path.exists(gpgHomeDir):
|
||||
shutil.rmtree(gpgHomeDir)
|
||||
os.makedirs(gpgHomeDir, 0700)
|
||||
os.makedirs(gpgHomeDir, 0o700)
|
||||
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
|
||||
'%s/%s.gpg.import.log' % (tmpDir, project))
|
||||
|
||||
|
@ -904,12 +911,12 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
|
|||
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
|
||||
logFile)
|
||||
# Forward any GPG warnings, except the expected one (since its a clean world)
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1 \
|
||||
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
|
||||
and line.find('WARNING: using insecure memory') == -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
# Test trust (this is done with the real users config)
|
||||
|
@ -918,16 +925,16 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
|
|||
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
|
||||
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
|
||||
# Forward any GPG warnings:
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1 \
|
||||
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
|
||||
and line.find('WARNING: using insecure memory') == -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
sys.stdout.write('.')
|
||||
print
|
||||
print()
|
||||
|
||||
def verifyPOMperBinaryArtifact(artifacts, version):
|
||||
"""verify that each binary jar and war has a corresponding POM file"""
|
||||
|
@ -1023,17 +1030,20 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
|
|||
|
||||
def main():
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
print
|
||||
print 'Usage python -u %s BaseURL version tmpDir' % sys.argv[0]
|
||||
print
|
||||
if len(sys.argv) < 4:
|
||||
print()
|
||||
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
|
||||
print()
|
||||
sys.exit(1)
|
||||
|
||||
baseURL = sys.argv[1]
|
||||
version = sys.argv[2]
|
||||
tmpDir = os.path.abspath(sys.argv[3])
|
||||
isSigned = True
|
||||
if len(sys.argv) == 5:
|
||||
isSigned = (sys.argv[4] == "True")
|
||||
|
||||
smokeTest(baseURL, version, tmpDir, True)
|
||||
smokeTest(baseURL, version, tmpDir, isSigned)
|
||||
|
||||
def smokeTest(baseURL, version, tmpDir, isSigned):
|
||||
|
||||
|
@ -1046,11 +1056,11 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
|
|||
|
||||
lucenePath = None
|
||||
solrPath = None
|
||||
print
|
||||
print 'Load release URL "%s"...' % baseURL
|
||||
print()
|
||||
print('Load release URL "%s"...' % baseURL)
|
||||
newBaseURL = unshortenURL(baseURL)
|
||||
if newBaseURL != baseURL:
|
||||
print ' unshortened: %s' % newBaseURL
|
||||
print(' unshortened: %s' % newBaseURL)
|
||||
baseURL = newBaseURL
|
||||
|
||||
for text, subURL in getDirEntries(baseURL):
|
||||
|
@ -1064,23 +1074,28 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
|
|||
if solrPath is None:
|
||||
raise RuntimeError('could not find solr subdir')
|
||||
|
||||
print
|
||||
print 'Test Lucene...'
|
||||
print()
|
||||
print('Test Lucene...')
|
||||
checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
|
||||
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
|
||||
unpack('lucene', tmpDir, artifact, version)
|
||||
unpack('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
|
||||
|
||||
print
|
||||
print 'Test Solr...'
|
||||
print()
|
||||
print('Test Solr...')
|
||||
checkSigs('solr', solrPath, version, tmpDir, isSigned)
|
||||
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
|
||||
unpack('solr', tmpDir, artifact, version)
|
||||
unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
|
||||
|
||||
print 'Test Maven artifacts for Lucene and Solr...'
|
||||
print('Test Maven artifacts for Lucene and Solr...')
|
||||
checkMaven(baseURL, tmpDir, version, isSigned)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
try:
|
||||
main()
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
|
|
|
@ -6,6 +6,15 @@ http://s.apache.org/luceneversions
|
|||
|
||||
======================= Lucene 5.0.0 =======================
|
||||
|
||||
======================= Lucene 4.0.0 =======================
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
|
||||
twice for conjunctions: for most users this is no problem, but
|
||||
if you had a customized Similarity that returned something other
|
||||
than 1 when overlap == maxOverlap (always the case for conjunctions),
|
||||
then the score would be incorrect. (Pascal Chollet, Robert Muir)
|
||||
|
||||
======================= Lucene 4.0.0-BETA =======================
|
||||
|
||||
|
@ -15,6 +24,9 @@ New features
|
|||
underlying PayloadFunction's explanation as the explanation
|
||||
for the payload score. (Scott Smerchek via Robert Muir)
|
||||
|
||||
* LUCENE-4069: Added BloomFilteringPostingsFormat for use with low-frequency terms
|
||||
such as primary keys (Mark Harwood, Mike McCandless)
|
||||
|
||||
* LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
|
||||
iteration marks. (Robert Muir, Christian Moen)
|
||||
|
||||
|
@ -34,6 +46,22 @@ New features
|
|||
CharFilterFactories to the lucene-analysis module. The API is still
|
||||
experimental. (Chris Male, Robert Muir, Uwe Schindler)
|
||||
|
||||
* LUCENE-4230: When pulling a DocsAndPositionsEnum you can now
|
||||
specify whether or not you require payloads (in addition to
|
||||
offsets); turning one or both off may allow some codec
|
||||
implementations to optimize the enum implementation. (Robert Muir,
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
|
||||
int docID), to attempt deletion by docID as long as the provided
|
||||
reader is an NRT reader, and the segment has not yet been merged
|
||||
away (Mike McCandless).
|
||||
|
||||
* LUCENE-4286: Added option to CJKBigramFilter to always also output
|
||||
unigrams. This can be used for a unigram+bigram approach, or at
|
||||
index-time only for better support of short queries.
|
||||
(Tom Burton-West, Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
|
||||
|
@ -69,6 +97,23 @@ API Changes
|
|||
|
||||
* LUCENE-3747: Support Unicode 6.1.0. (Steve Rowe)
|
||||
|
||||
* LUCENE-3884: Moved ElisionFilter out of org.apache.lucene.analysis.fr
|
||||
package into org.apache.lucene.analysis.util. (Robert Muir)
|
||||
|
||||
* LUCENE-4230: When pulling a DocsAndPositionsEnum you now pass an int
|
||||
flags instead of the previous boolean needOffsets. Currently
|
||||
recognized flags are DocsAndPositionsEnum.FLAG_PAYLOADS and
|
||||
DocsAndPositionsEnum.FLAG_OFFSETS (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
|
||||
instead of the previous boolean needsFlags; consistent with the changes
|
||||
for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
|
||||
is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
|
||||
as the Store parameter didn't make sense: if you supplied Store.YES, you would only
|
||||
receive an exception anyway. (Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4171: Performance improvements to Packed64.
|
||||
|
@ -80,8 +125,19 @@ Optimizations
|
|||
* LUCENE-4235: Remove enforcing of Filter rewrite for NRQ queries.
|
||||
(Uwe Schindler)
|
||||
|
||||
* LUCENE-4279: Regenerated snowball Stemmers from snowball r554,
|
||||
making them substantially more lightweight. Behavior is unchanged.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers
|
||||
such as StandardTokenizer from 32kb to 8kb.
|
||||
(Raintung Li, Steven Rowe, Robert Muir)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-4109: BooleanQueries are not parsed correctly with the
|
||||
flexible query parser. (Karsten Rauch via Robert Muir)
|
||||
|
||||
* LUCENE-4176: Fix AnalyzingQueryParser to analyze range endpoints as bytes,
|
||||
so that it works correctly with Analyzers that produce binary non-UTF-8 terms
|
||||
such as CollationAnalyzer. (Nattapong Sirilappanich via Robert Muir)
|
||||
|
@ -113,6 +169,30 @@ Bug Fixes
|
|||
* LUCENE-4245: Make IndexWriter#close() and MergeScheduler#close()
|
||||
non-interruptible. (Mark Miller, Uwe Schindler)
|
||||
|
||||
* LUCENE-4190: restrict allowed filenames that a codec may create to
|
||||
the patterns recognized by IndexFileNames. This also fixes
|
||||
IndexWriter to only delete files matching this pattern from an index
|
||||
directory, to reduce risk when the wrong index path is accidentally
|
||||
passed to IndexWriter (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-4277: Fix IndexWriter deadlock during rollback if flushable DWPT
|
||||
instance are already checked out and queued up but not yet flushed.
|
||||
(Simon Willnauer)
|
||||
|
||||
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
|
||||
(Johannes Christen, Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
|
||||
(Robert Muir)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
|
||||
(Karsten Rauch via Robert Muir)
|
||||
|
||||
* LUCENE-3616: Field throws exception if you try to set a boost on an
|
||||
unindexed field or one that omits norms. (Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-4094: Support overriding file.encoding on forked test JVMs
|
||||
|
|
|
@ -57,30 +57,6 @@ enumeration APIs. Here are the major changes:
|
|||
...
|
||||
}
|
||||
|
||||
The bulk read API has also changed. Instead of this:
|
||||
|
||||
int[] docs = new int[256];
|
||||
int[] freqs = new int[256];
|
||||
|
||||
while(true) {
|
||||
int count = td.read(docs, freqs)
|
||||
if (count == 0) {
|
||||
break;
|
||||
}
|
||||
// use docs[i], freqs[i]
|
||||
}
|
||||
|
||||
do this:
|
||||
|
||||
DocsEnum.BulkReadResult bulk = td.getBulkResult();
|
||||
while(true) {
|
||||
int count = td.read();
|
||||
if (count == 0) {
|
||||
break;
|
||||
}
|
||||
// use bulk.docs.ints[i] and bulk.freqs.ints[i]
|
||||
}
|
||||
|
||||
* TermPositions is renamed to DocsAndPositionsEnum, and no longer
|
||||
extends the docs only enumerator (DocsEnum).
|
||||
|
||||
|
@ -170,7 +146,7 @@ enumeration APIs. Here are the major changes:
|
|||
Bits liveDocs = reader.getLiveDocs();
|
||||
DocsEnum docsEnum = null;
|
||||
|
||||
docsEnum = termsEnum.docs(liveDocs, docsEnum);
|
||||
docsEnum = termsEnum.docs(liveDocs, docsEnum, needsFreqs);
|
||||
|
||||
You can pass in a prior DocsEnum and it will be reused if possible.
|
||||
|
||||
|
@ -187,7 +163,7 @@ enumeration APIs. Here are the major changes:
|
|||
|
||||
String field;
|
||||
BytesRef text;
|
||||
DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text);
|
||||
DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text, needsFreqs);
|
||||
|
||||
Likewise for DocsAndPositionsEnum.
|
||||
|
||||
|
@ -340,11 +316,12 @@ an AtomicReader. Note: using "atomicity emulators" can cause serious
|
|||
slowdowns due to the need to merge terms, postings, DocValues, and
|
||||
FieldCache, use them with care!
|
||||
|
||||
## LUCENE-2413: Analyzer package changes
|
||||
## LUCENE-2413,LUCENE-3396: Analyzer package changes
|
||||
|
||||
Lucene's core and contrib analyzers, along with Solr's analyzers,
|
||||
were consolidated into lucene/analysis. During the refactoring some
|
||||
package names have changed:
|
||||
package names have changed, and ReusableAnalyzerBase was renamed to
|
||||
Analyzer:
|
||||
|
||||
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
|
||||
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
|
||||
|
@ -369,7 +346,7 @@ package names have changed:
|
|||
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
|
||||
- o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
|
||||
- o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
|
||||
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
|
||||
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
|
||||
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
|
||||
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
|
||||
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Arrays;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.fr.ElisionFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -32,6 +31,7 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.CatalanStemmer;
|
||||
|
@ -127,7 +127,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
|
|||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(matchVersion, source);
|
||||
result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||
|
||||
package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
|
@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 7/26/12 6:22 PM from the specification file
|
||||
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
||||
* on 8/6/12 11:57 AM from the specification file
|
||||
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
||||
*/
|
||||
public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||
|
||||
|
@ -31255,6 +31255,93 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|||
{ yybegin(STYLE);
|
||||
}
|
||||
case 55: break;
|
||||
case 27:
|
||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
|
||||
}
|
||||
case 56: break;
|
||||
case 30:
|
||||
{ int length = yylength();
|
||||
inputSegment.write(zzBuffer, zzStartRead, length);
|
||||
entitySegment.clear();
|
||||
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
|
||||
entitySegment.append(ch);
|
||||
outputSegment = entitySegment;
|
||||
yybegin(CHARACTER_REFERENCE_TAIL);
|
||||
}
|
||||
case 57: break;
|
||||
case 48:
|
||||
{ inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
// add (previously matched input length) -- current match and substitution handled below
|
||||
cumulativeDiff += yychar - inputStart;
|
||||
// position the offset correction at (already output length) -- substitution handled below
|
||||
int offsetCorrectionPos = outputCharCount;
|
||||
int returnValue;
|
||||
if (escapeSTYLE) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
returnValue = outputSegment.nextChar();
|
||||
} else {
|
||||
// add (this match length) - (substitution length)
|
||||
cumulativeDiff += yylength() - 1;
|
||||
// add (substitution length)
|
||||
++offsetCorrectionPos;
|
||||
returnValue = STYLE_REPLACEMENT;
|
||||
}
|
||||
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
||||
return returnValue;
|
||||
}
|
||||
case 58: break;
|
||||
case 8:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(START_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(START_TAG_TAIL_SUBSTITUTE);
|
||||
}
|
||||
}
|
||||
case 59: break;
|
||||
case 2:
|
||||
{ inputStart = yychar;
|
||||
inputSegment.clear();
|
||||
inputSegment.append('<');
|
||||
yybegin(LEFT_ANGLE_BRACKET);
|
||||
}
|
||||
case 60: break;
|
||||
case 44:
|
||||
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||
}
|
||||
case 61: break;
|
||||
case 21:
|
||||
{ previousRestoreState = restoreState;
|
||||
restoreState = SERVER_SIDE_INCLUDE;
|
||||
yybegin(SINGLE_QUOTED_STRING);
|
||||
}
|
||||
case 62: break;
|
||||
case 11:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
yybegin(LEFT_ANGLE_BRACKET_SPACE);
|
||||
}
|
||||
case 63: break;
|
||||
case 35:
|
||||
{ yybegin(SCRIPT);
|
||||
}
|
||||
case 64: break;
|
||||
case 42:
|
||||
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||
}
|
||||
case 65: break;
|
||||
case 10:
|
||||
{ inputSegment.append('!'); yybegin(BANG);
|
||||
}
|
||||
case 66: break;
|
||||
case 51:
|
||||
{ // Handle paired UTF-16 surrogates.
|
||||
String surrogatePair = yytext();
|
||||
|
@ -31288,13 +31375,331 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|||
inputSegment.append('#');
|
||||
yybegin(NUMERIC_CHARACTER);
|
||||
}
|
||||
case 56: break;
|
||||
case 21:
|
||||
case 67: break;
|
||||
case 4:
|
||||
{ yypushback(1);
|
||||
outputSegment = inputSegment;
|
||||
outputSegment.restart();
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
case 68: break;
|
||||
case 43:
|
||||
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||
}
|
||||
case 69: break;
|
||||
case 52:
|
||||
{ // Handle paired UTF-16 surrogates.
|
||||
String surrogatePair = yytext();
|
||||
char highSurrogate = '\u0000';
|
||||
try { // High surrogates are in decimal range [55296, 56319]
|
||||
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing high surrogate '"
|
||||
+ surrogatePair.substring(1, 6) + "'";
|
||||
}
|
||||
if (Character.isHighSurrogate(highSurrogate)) {
|
||||
outputSegment = entitySegment;
|
||||
outputSegment.clear();
|
||||
try {
|
||||
outputSegment.unsafeWrite
|
||||
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing low surrogate '"
|
||||
+ surrogatePair.substring(10, 14) + "'";
|
||||
}
|
||||
// add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return highSurrogate;
|
||||
}
|
||||
yypushback(surrogatePair.length() - 1); // Consume only '#'
|
||||
inputSegment.append('#');
|
||||
yybegin(NUMERIC_CHARACTER);
|
||||
}
|
||||
case 70: break;
|
||||
case 28:
|
||||
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
||||
}
|
||||
case 71: break;
|
||||
case 50:
|
||||
{ // Handle paired UTF-16 surrogates.
|
||||
outputSegment = entitySegment;
|
||||
outputSegment.clear();
|
||||
String surrogatePair = yytext();
|
||||
char highSurrogate = '\u0000';
|
||||
try {
|
||||
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing high surrogate '"
|
||||
+ surrogatePair.substring(2, 6) + "'";
|
||||
}
|
||||
try {
|
||||
outputSegment.unsafeWrite
|
||||
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing low surrogate '"
|
||||
+ surrogatePair.substring(10, 14) + "'";
|
||||
}
|
||||
// add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return highSurrogate;
|
||||
}
|
||||
case 72: break;
|
||||
case 16:
|
||||
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
||||
}
|
||||
case 73: break;
|
||||
case 22:
|
||||
{ previousRestoreState = restoreState;
|
||||
restoreState = SERVER_SIDE_INCLUDE;
|
||||
yybegin(SINGLE_QUOTED_STRING);
|
||||
yybegin(DOUBLE_QUOTED_STRING);
|
||||
}
|
||||
case 57: break;
|
||||
case 74: break;
|
||||
case 26:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += inputSegment.length() + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 75: break;
|
||||
case 20:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
}
|
||||
case 76: break;
|
||||
case 47:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += inputSegment.length() + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(CDATA);
|
||||
}
|
||||
case 77: break;
|
||||
case 33:
|
||||
{ yybegin(YYINITIAL);
|
||||
if (escapeBR) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
return outputSegment.nextChar();
|
||||
} else {
|
||||
// add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||
inputSegment.reset();
|
||||
return BR_START_TAG_REPLACEMENT;
|
||||
}
|
||||
}
|
||||
case 78: break;
|
||||
case 23:
|
||||
{ yybegin(restoreState); restoreState = previousRestoreState;
|
||||
}
|
||||
case 79: break;
|
||||
case 32:
|
||||
{ yybegin(COMMENT);
|
||||
}
|
||||
case 80: break;
|
||||
case 24:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
case 81: break;
|
||||
case 3:
|
||||
{ inputStart = yychar;
|
||||
inputSegment.clear();
|
||||
inputSegment.append('&');
|
||||
yybegin(AMPERSAND);
|
||||
}
|
||||
case 82: break;
|
||||
case 46:
|
||||
{ yybegin(SCRIPT);
|
||||
if (escapeSCRIPT) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
inputStart += 1 + yylength();
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
}
|
||||
case 83: break;
|
||||
case 14:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += inputSegment.length() + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 84: break;
|
||||
case 6:
|
||||
{ int matchLength = yylength();
|
||||
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
||||
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
|
||||
String decimalCharRef = yytext();
|
||||
int codePoint = 0;
|
||||
try {
|
||||
codePoint = Integer.parseInt(decimalCharRef);
|
||||
} catch(Exception e) {
|
||||
assert false: "Exception parsing code point '" + decimalCharRef + "'";
|
||||
}
|
||||
if (codePoint <= 0x10FFFF) {
|
||||
outputSegment = entitySegment;
|
||||
outputSegment.clear();
|
||||
if (codePoint >= Character.MIN_SURROGATE
|
||||
&& codePoint <= Character.MAX_SURROGATE) {
|
||||
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
|
||||
} else {
|
||||
outputSegment.setLength
|
||||
(Character.toChars(codePoint, outputSegment.getArray(), 0));
|
||||
}
|
||||
yybegin(CHARACTER_REFERENCE_TAIL);
|
||||
} else {
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
} else {
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
}
|
||||
case 85: break;
|
||||
case 34:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
|
||||
cumulativeDiff += yychar - inputStart + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 86: break;
|
||||
case 5:
|
||||
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
|
||||
}
|
||||
case 87: break;
|
||||
case 13:
|
||||
{ inputSegment.append(zzBuffer[zzStartRead]);
|
||||
}
|
||||
case 88: break;
|
||||
case 18:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(END_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(END_TAG_TAIL_SUBSTITUTE);
|
||||
}
|
||||
}
|
||||
case 89: break;
|
||||
case 40:
|
||||
{ yybegin(SCRIPT_COMMENT);
|
||||
}
|
||||
case 90: break;
|
||||
case 37:
|
||||
{ // add (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 91: break;
|
||||
case 12:
|
||||
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
|
||||
}
|
||||
case 92: break;
|
||||
case 9:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(START_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(START_TAG_TAIL_EXCLUDE);
|
||||
}
|
||||
}
|
||||
case 93: break;
|
||||
case 49:
|
||||
{ inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
// add (previously matched input length) -- current match and substitution handled below
|
||||
cumulativeDiff += yychar - inputStart;
|
||||
// position at (already output length) -- substitution handled below
|
||||
int offsetCorrectionPos = outputCharCount;
|
||||
int returnValue;
|
||||
if (escapeSCRIPT) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
returnValue = outputSegment.nextChar();
|
||||
} else {
|
||||
// add (this match length) - (substitution length)
|
||||
cumulativeDiff += yylength() - 1;
|
||||
// add (substitution length)
|
||||
++offsetCorrectionPos;
|
||||
returnValue = SCRIPT_REPLACEMENT;
|
||||
}
|
||||
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
||||
return returnValue;
|
||||
}
|
||||
case 94: break;
|
||||
case 29:
|
||||
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
||||
}
|
||||
case 95: break;
|
||||
case 17:
|
||||
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
||||
}
|
||||
case 96: break;
|
||||
case 45:
|
||||
{ yybegin(STYLE);
|
||||
if (escapeSTYLE) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
inputStart += 1 + yylength();
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
}
|
||||
case 97: break;
|
||||
case 7:
|
||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
case 98: break;
|
||||
case 19:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(END_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(END_TAG_TAIL_EXCLUDE);
|
||||
}
|
||||
}
|
||||
case 99: break;
|
||||
case 25:
|
||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
|
||||
}
|
||||
case 100: break;
|
||||
case 31:
|
||||
{ int matchLength = yylength();
|
||||
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
||||
|
@ -31329,66 +31734,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|||
return outputSegment.nextChar();
|
||||
}
|
||||
}
|
||||
case 58: break;
|
||||
case 19:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(END_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(END_TAG_TAIL_EXCLUDE);
|
||||
}
|
||||
}
|
||||
case 59: break;
|
||||
case 2:
|
||||
{ inputStart = yychar;
|
||||
inputSegment.clear();
|
||||
inputSegment.append('<');
|
||||
yybegin(LEFT_ANGLE_BRACKET);
|
||||
}
|
||||
case 60: break;
|
||||
case 27:
|
||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
|
||||
}
|
||||
case 61: break;
|
||||
case 44:
|
||||
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||
}
|
||||
case 62: break;
|
||||
case 35:
|
||||
{ yybegin(SCRIPT);
|
||||
}
|
||||
case 63: break;
|
||||
case 42:
|
||||
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||
}
|
||||
case 64: break;
|
||||
case 10:
|
||||
{ inputSegment.append('!'); yybegin(BANG);
|
||||
}
|
||||
case 65: break;
|
||||
case 33:
|
||||
{ yybegin(YYINITIAL);
|
||||
if (escapeBR) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
return outputSegment.nextChar();
|
||||
} else {
|
||||
// add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||
inputSegment.reset();
|
||||
return BR_START_TAG_REPLACEMENT;
|
||||
}
|
||||
}
|
||||
case 66: break;
|
||||
case 101: break;
|
||||
case 53:
|
||||
{ // Handle paired UTF-16 surrogates.
|
||||
String surrogatePair = yytext();
|
||||
|
@ -31424,288 +31770,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|||
inputSegment.append('#');
|
||||
yybegin(NUMERIC_CHARACTER);
|
||||
}
|
||||
case 67: break;
|
||||
case 43:
|
||||
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||
}
|
||||
case 68: break;
|
||||
case 30:
|
||||
{ int length = yylength();
|
||||
inputSegment.write(zzBuffer, zzStartRead, length);
|
||||
entitySegment.clear();
|
||||
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
|
||||
entitySegment.append(ch);
|
||||
outputSegment = entitySegment;
|
||||
yybegin(CHARACTER_REFERENCE_TAIL);
|
||||
}
|
||||
case 69: break;
|
||||
case 28:
|
||||
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
||||
}
|
||||
case 70: break;
|
||||
case 3:
|
||||
{ inputStart = yychar;
|
||||
inputSegment.clear();
|
||||
inputSegment.append('&');
|
||||
yybegin(AMPERSAND);
|
||||
}
|
||||
case 71: break;
|
||||
case 16:
|
||||
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
||||
}
|
||||
case 72: break;
|
||||
case 52:
|
||||
{ // Handle paired UTF-16 surrogates.
|
||||
String surrogatePair = yytext();
|
||||
char highSurrogate = '\u0000';
|
||||
try { // High surrogates are in decimal range [55296, 56319]
|
||||
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing high surrogate '"
|
||||
+ surrogatePair.substring(1, 6) + "'";
|
||||
}
|
||||
if (Character.isHighSurrogate(highSurrogate)) {
|
||||
outputSegment = entitySegment;
|
||||
outputSegment.clear();
|
||||
try {
|
||||
outputSegment.unsafeWrite
|
||||
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing low surrogate '"
|
||||
+ surrogatePair.substring(10, 14) + "'";
|
||||
}
|
||||
// add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return highSurrogate;
|
||||
}
|
||||
yypushback(surrogatePair.length() - 1); // Consume only '#'
|
||||
inputSegment.append('#');
|
||||
yybegin(NUMERIC_CHARACTER);
|
||||
}
|
||||
case 73: break;
|
||||
case 6:
|
||||
{ int matchLength = yylength();
|
||||
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
||||
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
|
||||
String decimalCharRef = yytext();
|
||||
int codePoint = 0;
|
||||
try {
|
||||
codePoint = Integer.parseInt(decimalCharRef);
|
||||
} catch(Exception e) {
|
||||
assert false: "Exception parsing code point '" + decimalCharRef + "'";
|
||||
}
|
||||
if (codePoint <= 0x10FFFF) {
|
||||
outputSegment = entitySegment;
|
||||
outputSegment.clear();
|
||||
if (codePoint >= Character.MIN_SURROGATE
|
||||
&& codePoint <= Character.MAX_SURROGATE) {
|
||||
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
|
||||
} else {
|
||||
outputSegment.setLength
|
||||
(Character.toChars(codePoint, outputSegment.getArray(), 0));
|
||||
}
|
||||
yybegin(CHARACTER_REFERENCE_TAIL);
|
||||
} else {
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
} else {
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
}
|
||||
case 74: break;
|
||||
case 37:
|
||||
{ // add (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 75: break;
|
||||
case 8:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(START_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(START_TAG_TAIL_SUBSTITUTE);
|
||||
}
|
||||
}
|
||||
case 76: break;
|
||||
case 46:
|
||||
{ yybegin(SCRIPT);
|
||||
if (escapeSCRIPT) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
inputStart += 1 + yylength();
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
}
|
||||
case 77: break;
|
||||
case 11:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
yybegin(LEFT_ANGLE_BRACKET_SPACE);
|
||||
}
|
||||
case 78: break;
|
||||
case 20:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
}
|
||||
case 79: break;
|
||||
case 34:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
|
||||
cumulativeDiff += yychar - inputStart + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 80: break;
|
||||
case 23:
|
||||
{ yybegin(restoreState); restoreState = previousRestoreState;
|
||||
}
|
||||
case 81: break;
|
||||
case 32:
|
||||
{ yybegin(COMMENT);
|
||||
}
|
||||
case 82: break;
|
||||
case 14:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += inputSegment.length() + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 83: break;
|
||||
case 18:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(END_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(END_TAG_TAIL_SUBSTITUTE);
|
||||
}
|
||||
}
|
||||
case 84: break;
|
||||
case 25:
|
||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
|
||||
}
|
||||
case 85: break;
|
||||
case 7:
|
||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
case 86: break;
|
||||
case 48:
|
||||
{ inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
// add (previously matched input length) -- current match and substitution handled below
|
||||
cumulativeDiff += yychar - inputStart;
|
||||
// position the offset correction at (already output length) -- substitution handled below
|
||||
int offsetCorrectionPos = outputCharCount;
|
||||
int returnValue;
|
||||
if (escapeSTYLE) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
returnValue = outputSegment.nextChar();
|
||||
} else {
|
||||
// add (this match length) - (substitution length)
|
||||
cumulativeDiff += yylength() - 1;
|
||||
// add (substitution length)
|
||||
++offsetCorrectionPos;
|
||||
returnValue = STYLE_REPLACEMENT;
|
||||
}
|
||||
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
||||
return returnValue;
|
||||
}
|
||||
case 87: break;
|
||||
case 5:
|
||||
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
|
||||
}
|
||||
case 88: break;
|
||||
case 26:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += inputSegment.length() + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
}
|
||||
case 89: break;
|
||||
case 13:
|
||||
{ inputSegment.append(zzBuffer[zzStartRead]);
|
||||
}
|
||||
case 90: break;
|
||||
case 50:
|
||||
{ // Handle paired UTF-16 surrogates.
|
||||
outputSegment = entitySegment;
|
||||
outputSegment.clear();
|
||||
String surrogatePair = yytext();
|
||||
char highSurrogate = '\u0000';
|
||||
try {
|
||||
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing high surrogate '"
|
||||
+ surrogatePair.substring(2, 6) + "'";
|
||||
}
|
||||
try {
|
||||
outputSegment.unsafeWrite
|
||||
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
||||
} catch(Exception e) { // should never happen
|
||||
assert false: "Exception parsing low surrogate '"
|
||||
+ surrogatePair.substring(10, 14) + "'";
|
||||
}
|
||||
// add (previously matched input length) + (this match length) - (substitution length)
|
||||
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
||||
// position the correction at (already output length) + (substitution length)
|
||||
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
return highSurrogate;
|
||||
}
|
||||
case 91: break;
|
||||
case 40:
|
||||
{ yybegin(SCRIPT_COMMENT);
|
||||
}
|
||||
case 92: break;
|
||||
case 45:
|
||||
{ yybegin(STYLE);
|
||||
if (escapeSTYLE) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
inputStart += 1 + yylength();
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
}
|
||||
case 93: break;
|
||||
case 22:
|
||||
{ previousRestoreState = restoreState;
|
||||
restoreState = SERVER_SIDE_INCLUDE;
|
||||
yybegin(DOUBLE_QUOTED_STRING);
|
||||
}
|
||||
case 94: break;
|
||||
case 12:
|
||||
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
|
||||
}
|
||||
case 95: break;
|
||||
case 102: break;
|
||||
case 36:
|
||||
{ yybegin(YYINITIAL);
|
||||
if (escapeBR) {
|
||||
|
@ -31721,83 +31786,18 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|||
return BR_END_TAG_REPLACEMENT;
|
||||
}
|
||||
}
|
||||
case 96: break;
|
||||
case 24:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
case 97: break;
|
||||
case 47:
|
||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||
cumulativeDiff += inputSegment.length() + yylength();
|
||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||
inputSegment.clear();
|
||||
yybegin(CDATA);
|
||||
}
|
||||
case 98: break;
|
||||
case 29:
|
||||
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
||||
}
|
||||
case 99: break;
|
||||
case 17:
|
||||
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
||||
}
|
||||
case 100: break;
|
||||
case 9:
|
||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
if (null != escapedTags
|
||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||
yybegin(START_TAG_TAIL_INCLUDE);
|
||||
} else {
|
||||
yybegin(START_TAG_TAIL_EXCLUDE);
|
||||
}
|
||||
}
|
||||
case 101: break;
|
||||
case 49:
|
||||
{ inputSegment.clear();
|
||||
yybegin(YYINITIAL);
|
||||
// add (previously matched input length) -- current match and substitution handled below
|
||||
cumulativeDiff += yychar - inputStart;
|
||||
// position at (already output length) -- substitution handled below
|
||||
int offsetCorrectionPos = outputCharCount;
|
||||
int returnValue;
|
||||
if (escapeSCRIPT) {
|
||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||
outputSegment = inputSegment;
|
||||
returnValue = outputSegment.nextChar();
|
||||
} else {
|
||||
// add (this match length) - (substitution length)
|
||||
cumulativeDiff += yylength() - 1;
|
||||
// add (substitution length)
|
||||
++offsetCorrectionPos;
|
||||
returnValue = SCRIPT_REPLACEMENT;
|
||||
}
|
||||
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
||||
return returnValue;
|
||||
}
|
||||
case 102: break;
|
||||
case 103: break;
|
||||
case 38:
|
||||
{ yybegin(restoreState);
|
||||
}
|
||||
case 103: break;
|
||||
case 104: break;
|
||||
case 41:
|
||||
{ yybegin(STYLE_COMMENT);
|
||||
}
|
||||
case 104: break;
|
||||
case 105: break;
|
||||
case 1:
|
||||
{ return zzBuffer[zzStartRead];
|
||||
}
|
||||
case 105: break;
|
||||
case 4:
|
||||
{ yypushback(1);
|
||||
outputSegment = inputSegment;
|
||||
outputSegment.restart();
|
||||
yybegin(YYINITIAL);
|
||||
return outputSegment.nextChar();
|
||||
}
|
||||
case 106: break;
|
||||
default:
|
||||
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
||||
|
|
|
@ -141,9 +141,9 @@ InlineElment = ( [aAbBiIqQsSuU] |
|
|||
[vV][aA][rR] )
|
||||
|
||||
|
||||
%include src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex
|
||||
%include HTMLCharacterEntities.jflex
|
||||
|
||||
%include src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
|
||||
%include HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
|
||||
|
||||
%{
|
||||
private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;
|
||||
|
|
|
@ -24,6 +24,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
|
@ -35,6 +37,12 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
* {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
|
||||
* of the CJK scripts are turned into bigrams.
|
||||
* <p>
|
||||
* By default, when a CJK character has no adjacent characters to form
|
||||
* a bigram, it is output in unigram form. If you want to always output
|
||||
* both unigrams and bigrams, set the <code>outputUnigrams</code>
|
||||
* flag in {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)}.
|
||||
* This can be used for a combined unigram+bigram approach.
|
||||
* <p>
|
||||
* In all cases, all non-CJK input is passed thru unmodified.
|
||||
*/
|
||||
public final class CJKBigramFilter extends TokenFilter {
|
||||
|
@ -67,10 +75,16 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
private final Object doHiragana;
|
||||
private final Object doKatakana;
|
||||
private final Object doHangul;
|
||||
|
||||
// true if we should output unigram tokens always
|
||||
private final boolean outputUnigrams;
|
||||
private boolean ngramState; // false = output unigram, true = output bigram
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
|
||||
|
||||
// buffers containing codepoint and offsets in parallel
|
||||
int buffer[] = new int[8];
|
||||
|
@ -88,23 +102,36 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
|
||||
/**
|
||||
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
|
||||
* CJKBigramFilter(HAN | HIRAGANA | KATAKANA | HANGUL)}
|
||||
* CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
|
||||
*/
|
||||
public CJKBigramFilter(TokenStream in) {
|
||||
this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed.
|
||||
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
|
||||
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
|
||||
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
|
||||
* CJKBigramFilter(in, flags, false)}
|
||||
*/
|
||||
public CJKBigramFilter(TokenStream in, int flags) {
|
||||
this(in, flags, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
|
||||
* and whether or not unigrams should also be output.
|
||||
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
|
||||
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
|
||||
* @param outputUnigrams true if unigrams for the selected writing systems should also be output.
|
||||
* when this is false, this is only done when there are no adjacent characters to form
|
||||
* a bigram.
|
||||
*/
|
||||
public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
|
||||
super(in);
|
||||
doHan = (flags & HAN) == 0 ? NO : HAN_TYPE;
|
||||
doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
|
||||
doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
|
||||
doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE;
|
||||
this.outputUnigrams = outputUnigrams;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -120,7 +147,24 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
// case 1: we have multiple remaining codepoints buffered,
|
||||
// so we can emit a bigram here.
|
||||
|
||||
flushBigram();
|
||||
if (outputUnigrams) {
|
||||
|
||||
// when also outputting unigrams, we output the unigram first,
|
||||
// then rewind back to revisit the bigram.
|
||||
// so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
|
||||
// the logic in hasBufferedUnigram ensures we output the C,
|
||||
// even though it did actually have adjacent CJK characters.
|
||||
|
||||
if (ngramState) {
|
||||
flushBigram();
|
||||
} else {
|
||||
flushUnigram();
|
||||
index--;
|
||||
}
|
||||
ngramState = !ngramState;
|
||||
} else {
|
||||
flushBigram();
|
||||
}
|
||||
return true;
|
||||
} else if (doNext()) {
|
||||
|
||||
|
@ -260,6 +304,11 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
termAtt.setLength(len2);
|
||||
offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
|
||||
typeAtt.setType(DOUBLE_TYPE);
|
||||
// when outputting unigrams, all bigrams are synonyms that span two unigrams
|
||||
if (outputUnigrams) {
|
||||
posIncAtt.setPositionIncrement(0);
|
||||
posLengthAtt.setPositionLength(2);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
|
@ -292,7 +341,13 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
* inputs.
|
||||
*/
|
||||
private boolean hasBufferedUnigram() {
|
||||
return bufferLen == 1 && index == 0;
|
||||
if (outputUnigrams) {
|
||||
// when outputting unigrams always
|
||||
return bufferLen - index == 1;
|
||||
} else {
|
||||
// otherwise its only when we have a lone CJK character
|
||||
return bufferLen == 1 && index == 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -303,5 +358,6 @@ public final class CJKBigramFilter extends TokenFilter {
|
|||
lastEndOffset = 0;
|
||||
loneState = null;
|
||||
exhausted = false;
|
||||
ngramState = false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,12 +33,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.CJKBigramFilterFactory"
|
||||
* han="true" hiragana="true"
|
||||
* katakana="true" hangul="true" />
|
||||
* katakana="true" hangul="true" outputUnigrams="false" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class CJKBigramFilterFactory extends TokenFilterFactory {
|
||||
int flags;
|
||||
boolean outputUnigrams;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
|
@ -56,10 +57,11 @@ public class CJKBigramFilterFactory extends TokenFilterFactory {
|
|||
if (getBoolean("hangul", true)) {
|
||||
flags |= CJKBigramFilter.HANGUL;
|
||||
}
|
||||
outputUnigrams = getBoolean("outputUnigrams", false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new CJKBigramFilter(input, flags);
|
||||
return new CJKBigramFilter(input, flags, outputUnigrams);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
|
|||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -35,6 +36,7 @@ import org.apache.lucene.util.Version;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for French language.
|
||||
|
@ -54,6 +56,11 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
/** File containing default French stopwords. */
|
||||
public final static String DEFAULT_STOPWORD_FILE = "french_stop.txt";
|
||||
|
||||
/** Default set of articles for ElisionFilter */
|
||||
public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
|
||||
new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
|
||||
"l", "m", "t", "qu", "n", "s", "j"), true));
|
||||
|
||||
/**
|
||||
* Contains words that should be indexed but not stemmed.
|
||||
*/
|
||||
|
@ -134,7 +141,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(matchVersion, source);
|
||||
result = new ElisionFilter(matchVersion, result);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!excltable.isEmpty())
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Arrays;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.fr.ElisionFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.IrishStemmer;
|
||||
|
@ -140,7 +140,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
|
|||
StopFilter s = new StopFilter(matchVersion, result, HYPHENATIONS);
|
||||
s.setEnablePositionIncrements(false);
|
||||
result = s;
|
||||
result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
result = new IrishLowerCaseFilter(result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Arrays;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.fr.ElisionFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -32,6 +31,7 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -129,7 +129,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(matchVersion, source);
|
||||
result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
|
|
|
@ -27,13 +27,47 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link PathHierarchyTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* <p>
|
||||
* This factory is typically configured for use only in the <code>index</code>
|
||||
* Analyzer (or only in the <code>query</code> Analyzer, but never both).
|
||||
* </p>
|
||||
* <p>
|
||||
* For example, in the configuration below a query for
|
||||
* <code>Books/NonFic</code> will match documents indexed with values like
|
||||
* <code>Books/NonFic</code>, <code>Books/NonFic/Law</code>,
|
||||
* <code>Books/NonFic/Science/Physics</code>, etc. But it will not match
|
||||
* documents indexed with values like <code>Books</code>, or
|
||||
* <code>Books/Fic</code>...
|
||||
* </p>
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="descendent_path" class="solr.TextField">
|
||||
* <analyzer type="index">
|
||||
* <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
||||
* </analyzer>
|
||||
* <analyzer type="query">
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
* </analyzer>
|
||||
* </fieldType>
|
||||
* </pre>
|
||||
* <p>
|
||||
* In this example however we see the oposite configuration, so that a query
|
||||
* for <code>Books/NonFic/Science/Physics</code> would match documents
|
||||
* containing <code>Books/NonFic</code>, <code>Books/NonFic/Science</code>,
|
||||
* or <code>Books/NonFic/Science/Physics</code>, but not
|
||||
* <code>Books/NonFic/Science/Physics/Theory</code> or
|
||||
* <code>Books/NonFic/Law</code>.
|
||||
* </p>
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="descendent_path" class="solr.TextField">
|
||||
* <analyzer type="index">
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
* </analyzer>
|
||||
* <analyzer type="query">
|
||||
* <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
||||
* </analyzer>
|
||||
* </fieldType>
|
||||
* </pre>
|
||||
*/
|
||||
public class PathHierarchyTokenizerFactory extends TokenizerFactory {
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
|
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 7/15/12 1:57 AM from the specification file
|
||||
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
||||
* on 8/6/12 11:57 AM from the specification file
|
||||
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||
|
||||
|
@ -42,7 +42,7 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
|||
public static final int YYEOF = -1;
|
||||
|
||||
/** initial size of the lookahead buffer */
|
||||
private static final int ZZ_BUFFERSIZE = 16384;
|
||||
private static final int ZZ_BUFFERSIZE = 4096;
|
||||
|
||||
/** lexical states */
|
||||
public static final int YYINITIAL = 0;
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
%function getNextToken
|
||||
%pack
|
||||
%char
|
||||
%buffer 4096
|
||||
|
||||
%{
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Generated using ICU4J 49.1.0.0 on Thursday, July 26, 2012 10:22:01 PM UTC
|
||||
// Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
|
||||
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
|
@ -43,7 +43,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
public static final int YYEOF = -1;
|
||||
|
||||
/** initial size of the lookahead buffer */
|
||||
private static final int ZZ_BUFFERSIZE = 16384;
|
||||
private static final int ZZ_BUFFERSIZE = 4096;
|
||||
|
||||
/** lexical states */
|
||||
public static final int YYINITIAL = 0;
|
||||
|
|
|
@ -44,8 +44,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
%implements StandardTokenizerInterface
|
||||
%function getNextToken
|
||||
%char
|
||||
%buffer 4096
|
||||
|
||||
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
|
||||
%include SUPPLEMENTARY.jflex-macro
|
||||
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
||||
Format = ([\p{WB:Format}] | {FormatSupp})
|
||||
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
|
@ -46,7 +46,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
|
|||
public static final int YYEOF = -1;
|
||||
|
||||
/** initial size of the lookahead buffer */
|
||||
private static final int ZZ_BUFFERSIZE = 16384;
|
||||
private static final int ZZ_BUFFERSIZE = 4096;
|
||||
|
||||
/** lexical states */
|
||||
public static final int YYINITIAL = 0;
|
||||
|
|
|
@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
%implements StandardTokenizerInterface
|
||||
%function getNextToken
|
||||
%char
|
||||
%buffer 4096
|
||||
|
||||
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
|
||||
%include SUPPLEMENTARY.jflex-macro
|
||||
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
||||
Format = ([\p{WB:Format}] | {FormatSupp})
|
||||
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
||||
|
@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
|||
// RFC-5321: Simple Mail Transfer Protocol
|
||||
// RFC-5322: Internet Message Format
|
||||
|
||||
%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
|
||||
%include ASCIITLD.jflex-macro
|
||||
|
||||
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
|
||||
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
package org.apache.lucene.analysis.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Simple {@link ResourceLoader} that uses {@link ClassLoader#getResourceAsStream(String)}
|
||||
* and {@link Class#forName(String,boolean,ClassLoader)} to open resources and
|
||||
* classes, respectively.
|
||||
*/
|
||||
public final class ClasspathResourceLoader implements ResourceLoader {
|
||||
private final Class<?> clazz;
|
||||
private final ClassLoader loader;
|
||||
|
||||
/**
|
||||
* Creates an instance using the context classloader to load Resources and classes.
|
||||
* Resource paths must be absolute.
|
||||
*/
|
||||
public ClasspathResourceLoader() {
|
||||
this(Thread.currentThread().getContextClassLoader());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance using the given classloader to load Resources and classes.
|
||||
* Resource paths must be absolute.
|
||||
*/
|
||||
public ClasspathResourceLoader(ClassLoader loader) {
|
||||
this(null, loader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance using the context classloader to load Resources and classes
|
||||
* Resources are resolved relative to the given class, if path is not absolute.
|
||||
*/
|
||||
public ClasspathResourceLoader(Class<?> clazz) {
|
||||
this(clazz, clazz.getClassLoader());
|
||||
}
|
||||
|
||||
private ClasspathResourceLoader(Class<?> clazz, ClassLoader loader) {
|
||||
this.clazz = clazz;
|
||||
this.loader = loader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream openResource(String resource) throws IOException {
|
||||
final InputStream stream = (clazz != null) ?
|
||||
clazz.getResourceAsStream(resource) :
|
||||
loader.getResourceAsStream(resource);
|
||||
if (stream == null)
|
||||
throw new IOException("Resource not found: " + resource);
|
||||
return stream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T newInstance(String cname, Class<T> expectedType) {
|
||||
try {
|
||||
final Class<? extends T> clazz = Class.forName(cname, true, loader).asSubclass(expectedType);
|
||||
return clazz.newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Cannot instantiate class: " + cname, e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis.fr;
|
||||
package org.apache.lucene.analysis.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -18,13 +18,11 @@ package org.apache.lucene.analysis.fr;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
|
||||
|
@ -33,31 +31,17 @@ import org.apache.lucene.util.Version;
|
|||
* @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
|
||||
*/
|
||||
public final class ElisionFilter extends TokenFilter {
|
||||
private CharArraySet articles = CharArraySet.EMPTY_SET;
|
||||
private final CharArraySet articles;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
|
||||
new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
|
||||
"l", "m", "t", "qu", "n", "s", "j"), true));
|
||||
|
||||
private static char[] apostrophes = {'\'', '\u2019'};
|
||||
|
||||
/**
|
||||
* Constructs an elision filter with standard stop words
|
||||
*/
|
||||
public ElisionFilter(Version matchVersion, TokenStream input) {
|
||||
this(matchVersion, input, DEFAULT_ARTICLES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an elision filter with a Set of stop words
|
||||
* @param matchVersion the lucene backwards compatibility version
|
||||
* @param input the source {@link TokenStream}
|
||||
* @param articles a set of stopword articles
|
||||
*/
|
||||
public ElisionFilter(Version matchVersion, TokenStream input, CharArraySet articles) {
|
||||
public ElisionFilter(TokenStream input, CharArraySet articles) {
|
||||
super(input);
|
||||
this.articles = CharArraySet.unmodifiableSet(
|
||||
new CharArraySet(matchVersion, articles, true));
|
||||
this.articles = articles;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -69,22 +53,18 @@ public final class ElisionFilter extends TokenFilter {
|
|||
char[] termBuffer = termAtt.buffer();
|
||||
int termLength = termAtt.length();
|
||||
|
||||
int minPoz = Integer.MAX_VALUE;
|
||||
for (int i = 0; i < apostrophes.length; i++) {
|
||||
char apos = apostrophes[i];
|
||||
// The equivalent of String.indexOf(ch)
|
||||
for (int poz = 0; poz < termLength ; poz++) {
|
||||
if (termBuffer[poz] == apos) {
|
||||
minPoz = Math.min(poz, minPoz);
|
||||
break;
|
||||
}
|
||||
int index = -1;
|
||||
for (int i = 0; i < termLength; i++) {
|
||||
char ch = termBuffer[i];
|
||||
if (ch == '\'' || ch == '\u2019') {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// An apostrophe has been found. If the prefix is an article strip it off.
|
||||
if (minPoz != Integer.MAX_VALUE
|
||||
&& articles.contains(termAtt.buffer(), 0, minPoz)) {
|
||||
termAtt.copyBuffer(termAtt.buffer(), minPoz + 1, termAtt.length() - (minPoz + 1));
|
||||
if (index >= 0 && articles.contains(termBuffer, 0, index)) {
|
||||
termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1));
|
||||
}
|
||||
|
||||
return true;
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis.fr;
|
||||
package org.apache.lucene.analysis.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -17,10 +17,9 @@ package org.apache.lucene.analysis.fr;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
|
||||
/**
|
||||
* Factory for {@link ElisionFilter}.
|
||||
|
@ -46,12 +45,13 @@ public class ElisionFilterFactory extends TokenFilterFactory implements Resource
|
|||
if (articlesFile != null) {
|
||||
articles = getWordSet(loader, articlesFile, ignoreCase);
|
||||
}
|
||||
if (articles == null) {
|
||||
articles = FrenchAnalyzer.DEFAULT_ARTICLES;
|
||||
}
|
||||
}
|
||||
|
||||
public ElisionFilter create(TokenStream input) {
|
||||
assureMatchVersion();
|
||||
return articles == null ? new ElisionFilter(luceneMatchVersion,input) :
|
||||
new ElisionFilter(luceneMatchVersion,input,articles);
|
||||
return new ElisionFilter(input, articles);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
package org.apache.lucene.analysis.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Simple {@link ResourceLoader} that opens resource files
|
||||
* from the local file system, optionally resolving against
|
||||
* a base directory.
|
||||
*
|
||||
* <p>This loader wraps a delegate {@link ResourceLoader}
|
||||
* that is used to resolve all files, the current base directory
|
||||
* does not contain. {@link #newInstance} is always resolved
|
||||
* against the delegate, as a {@link ClassLoader} is needed.
|
||||
*
|
||||
* <p>You can chain several {@code FilesystemResourceLoader}s
|
||||
* to allow lookup of files in more than one base directory.
|
||||
*/
|
||||
public final class FilesystemResourceLoader implements ResourceLoader {
|
||||
private final File baseDirectory;
|
||||
private final ResourceLoader delegate;
|
||||
|
||||
/**
|
||||
* Creates a resource loader that requires absolute filenames or relative to CWD
|
||||
* to resolve resources. Files not found in file system and class lookups
|
||||
* are delegated to context classloader.
|
||||
*/
|
||||
public FilesystemResourceLoader() {
|
||||
this((File) null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a resource loader that resolves resources against the given
|
||||
* base directory (may be {@code null} to refer to CWD).
|
||||
* Files not found in file system and class lookups are delegated to context
|
||||
* classloader.
|
||||
*/
|
||||
public FilesystemResourceLoader(File baseDirectory) {
|
||||
this(baseDirectory, new ClasspathResourceLoader());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a resource loader that resolves resources against the given
|
||||
* base directory (may be {@code null} to refer to CWD).
|
||||
* Files not found in file system and class lookups are delegated
|
||||
* to the given delegate {@link ResourceLoader}.
|
||||
*/
|
||||
public FilesystemResourceLoader(File baseDirectory, ResourceLoader delegate) {
|
||||
if (baseDirectory != null && !baseDirectory.isDirectory())
|
||||
throw new IllegalArgumentException("baseDirectory is not a directory or null");
|
||||
if (delegate == null)
|
||||
throw new IllegalArgumentException("delegate ResourceLoader may not be null");
|
||||
this.baseDirectory = baseDirectory;
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream openResource(String resource) throws IOException {
|
||||
try {
|
||||
File file = new File (resource);
|
||||
if (baseDirectory != null && !file.isAbsolute()) {
|
||||
file = new File(baseDirectory, resource);
|
||||
}
|
||||
return new FileInputStream(file);
|
||||
} catch (FileNotFoundException fnfe) {
|
||||
return delegate.openResource(resource);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T newInstance(String cname, Class<T> expectedType) {
|
||||
return delegate.newInstance(cname, expectedType);
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||
|
||||
package org.apache.lucene.analysis.wikipedia;
|
||||
|
||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 7/15/12 1:57 AM from the specification file
|
||||
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
||||
* on 8/6/12 11:57 AM from the specification file
|
||||
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class WikipediaTokenizerImpl {
|
||||
|
||||
|
@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
|
|||
public static final int YYEOF = -1;
|
||||
|
||||
/** initial size of the lookahead buffer */
|
||||
private static final int ZZ_BUFFERSIZE = 16384;
|
||||
private static final int ZZ_BUFFERSIZE = 4096;
|
||||
|
||||
/** lexical states */
|
||||
public static final int THREE_SINGLE_QUOTES_STATE = 10;
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
%function getNextToken
|
||||
%pack
|
||||
%char
|
||||
%buffer 4096
|
||||
|
||||
%{
|
||||
|
||||
|
|
|
@ -1,423 +1,439 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* Generated class implementing code defined by a snowball script.
|
||||
*/
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class DanishStemmer extends SnowballProgram {
|
||||
|
||||
private Among a_0[] = {
|
||||
new Among ( "hed", -1, 1, "", this),
|
||||
new Among ( "ethed", 0, 1, "", this),
|
||||
new Among ( "ered", -1, 1, "", this),
|
||||
new Among ( "e", -1, 1, "", this),
|
||||
new Among ( "erede", 3, 1, "", this),
|
||||
new Among ( "ende", 3, 1, "", this),
|
||||
new Among ( "erende", 5, 1, "", this),
|
||||
new Among ( "ene", 3, 1, "", this),
|
||||
new Among ( "erne", 3, 1, "", this),
|
||||
new Among ( "ere", 3, 1, "", this),
|
||||
new Among ( "en", -1, 1, "", this),
|
||||
new Among ( "heden", 10, 1, "", this),
|
||||
new Among ( "eren", 10, 1, "", this),
|
||||
new Among ( "er", -1, 1, "", this),
|
||||
new Among ( "heder", 13, 1, "", this),
|
||||
new Among ( "erer", 13, 1, "", this),
|
||||
new Among ( "s", -1, 2, "", this),
|
||||
new Among ( "heds", 16, 1, "", this),
|
||||
new Among ( "es", 16, 1, "", this),
|
||||
new Among ( "endes", 18, 1, "", this),
|
||||
new Among ( "erendes", 19, 1, "", this),
|
||||
new Among ( "enes", 18, 1, "", this),
|
||||
new Among ( "ernes", 18, 1, "", this),
|
||||
new Among ( "eres", 18, 1, "", this),
|
||||
new Among ( "ens", 16, 1, "", this),
|
||||
new Among ( "hedens", 24, 1, "", this),
|
||||
new Among ( "erens", 24, 1, "", this),
|
||||
new Among ( "ers", 16, 1, "", this),
|
||||
new Among ( "ets", 16, 1, "", this),
|
||||
new Among ( "erets", 28, 1, "", this),
|
||||
new Among ( "et", -1, 1, "", this),
|
||||
new Among ( "eret", 30, 1, "", this)
|
||||
};
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private Among a_1[] = {
|
||||
new Among ( "gd", -1, -1, "", this),
|
||||
new Among ( "dt", -1, -1, "", this),
|
||||
new Among ( "gt", -1, -1, "", this),
|
||||
new Among ( "kt", -1, -1, "", this)
|
||||
};
|
||||
private final static DanishStemmer methodObject = new DanishStemmer ();
|
||||
|
||||
private Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", this),
|
||||
new Among ( "lig", 0, 1, "", this),
|
||||
new Among ( "elig", 1, 1, "", this),
|
||||
new Among ( "els", -1, 1, "", this),
|
||||
new Among ( "l\u00F8st", -1, 2, "", this)
|
||||
};
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "hed", -1, 1, "", methodObject ),
|
||||
new Among ( "ethed", 0, 1, "", methodObject ),
|
||||
new Among ( "ered", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "erede", 3, 1, "", methodObject ),
|
||||
new Among ( "ende", 3, 1, "", methodObject ),
|
||||
new Among ( "erende", 5, 1, "", methodObject ),
|
||||
new Among ( "ene", 3, 1, "", methodObject ),
|
||||
new Among ( "erne", 3, 1, "", methodObject ),
|
||||
new Among ( "ere", 3, 1, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "heden", 10, 1, "", methodObject ),
|
||||
new Among ( "eren", 10, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heder", 13, 1, "", methodObject ),
|
||||
new Among ( "erer", 13, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "heds", 16, 1, "", methodObject ),
|
||||
new Among ( "es", 16, 1, "", methodObject ),
|
||||
new Among ( "endes", 18, 1, "", methodObject ),
|
||||
new Among ( "erendes", 19, 1, "", methodObject ),
|
||||
new Among ( "enes", 18, 1, "", methodObject ),
|
||||
new Among ( "ernes", 18, 1, "", methodObject ),
|
||||
new Among ( "eres", 18, 1, "", methodObject ),
|
||||
new Among ( "ens", 16, 1, "", methodObject ),
|
||||
new Among ( "hedens", 24, 1, "", methodObject ),
|
||||
new Among ( "erens", 24, 1, "", methodObject ),
|
||||
new Among ( "ers", 16, 1, "", methodObject ),
|
||||
new Among ( "ets", 16, 1, "", methodObject ),
|
||||
new Among ( "erets", 28, 1, "", methodObject ),
|
||||
new Among ( "et", -1, 1, "", methodObject ),
|
||||
new Among ( "eret", 30, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "gd", -1, -1, "", methodObject ),
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "gt", -1, -1, "", methodObject ),
|
||||
new Among ( "kt", -1, -1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "lig", 0, 1, "", methodObject ),
|
||||
new Among ( "elig", 1, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "l\u00F8st", -1, 2, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
private StringBuilder S_ch = new StringBuilder();
|
||||
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
|
||||
|
||||
private void copy_from(DanishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
S_ch = other.S_ch;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private void copy_from(DanishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
S_ch = other.S_ch;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 29
|
||||
I_p1 = limit;
|
||||
// test, line 33
|
||||
v_1 = cursor;
|
||||
// (, line 33
|
||||
// hop, line 33
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 33
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 34
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
// (, line 29
|
||||
I_p1 = limit;
|
||||
// test, line 33
|
||||
v_1 = cursor;
|
||||
// (, line 33
|
||||
// hop, line 33
|
||||
{
|
||||
break lab1;
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 34
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
// setmark x, line 33
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 34
|
||||
golab0: while(true)
|
||||
{
|
||||
break lab3;
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
// gopast, line 34
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 34
|
||||
I_p1 = cursor;
|
||||
// try, line 35
|
||||
lab4: do {
|
||||
// (, line 35
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 34
|
||||
I_p1 = cursor;
|
||||
// try, line 35
|
||||
lab4: do {
|
||||
// (, line 35
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 40
|
||||
// setlimit, line 41
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 41
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 41
|
||||
// [, line 41
|
||||
ket = cursor;
|
||||
// substring, line 41
|
||||
among_var = find_among_b(a_0, 32);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 41
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 48
|
||||
// delete, line 48
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 50
|
||||
if (!(in_grouping_b(g_s_ending, 97, 229)))
|
||||
// (, line 40
|
||||
// setlimit, line 41
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 41
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 50
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 41
|
||||
// [, line 41
|
||||
ket = cursor;
|
||||
// substring, line 41
|
||||
among_var = find_among_b(a_0, 32);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 41
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 48
|
||||
// delete, line 48
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 50
|
||||
if (!(in_grouping_b(g_s_ending, 97, 229)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 50
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 54
|
||||
// test, line 55
|
||||
v_1 = limit - cursor;
|
||||
// (, line 55
|
||||
// setlimit, line 56
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 56
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 56
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
if (find_among_b(a_1, 4) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 62
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 62
|
||||
bra = cursor;
|
||||
// delete, line 62
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
// (, line 54
|
||||
// test, line 55
|
||||
v_1 = limit - cursor;
|
||||
// (, line 55
|
||||
// setlimit, line 56
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 56
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 56
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
if (find_among_b(a_1, 4) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 62
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 62
|
||||
bra = cursor;
|
||||
// delete, line 62
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 65
|
||||
// do, line 66
|
||||
v_1 = limit - cursor;
|
||||
lab0: do {
|
||||
// (, line 66
|
||||
// [, line 66
|
||||
ket = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "st")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// ], line 66
|
||||
bra = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "ig")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// delete, line 66
|
||||
slice_del();
|
||||
} while (false);
|
||||
cursor = limit - v_1;
|
||||
// setlimit, line 67
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 67
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 67
|
||||
// [, line 67
|
||||
ket = cursor;
|
||||
// substring, line 67
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 67
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 70
|
||||
// delete, line 70
|
||||
slice_del();
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab1: do {
|
||||
// call consonant_pair, line 70
|
||||
if (!r_consonant_pair())
|
||||
// (, line 65
|
||||
// do, line 66
|
||||
v_1 = limit - cursor;
|
||||
lab0: do {
|
||||
// (, line 66
|
||||
// [, line 66
|
||||
ket = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "st")))
|
||||
{
|
||||
break lab1;
|
||||
break lab0;
|
||||
}
|
||||
// ], line 66
|
||||
bra = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "ig")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// delete, line 66
|
||||
slice_del();
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
break;
|
||||
case 2:
|
||||
// (, line 72
|
||||
// <-, line 72
|
||||
slice_from("l\u00F8s");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
// setlimit, line 67
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 67
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 67
|
||||
// [, line 67
|
||||
ket = cursor;
|
||||
// substring, line 67
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 67
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 70
|
||||
// delete, line 70
|
||||
slice_del();
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab1: do {
|
||||
// call consonant_pair, line 70
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
break;
|
||||
case 2:
|
||||
// (, line 72
|
||||
// <-, line 72
|
||||
slice_from("l\u00F8s");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_undouble() {
|
||||
private boolean r_undouble() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 75
|
||||
// setlimit, line 76
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 76
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 76
|
||||
// [, line 76
|
||||
ket = cursor;
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 76
|
||||
bra = cursor;
|
||||
// -> ch, line 76
|
||||
S_ch = slice_to(S_ch);
|
||||
limit_backward = v_2;
|
||||
// name ch, line 77
|
||||
if (!(eq_v_b(S_ch)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 78
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
// (, line 75
|
||||
// setlimit, line 76
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 76
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 76
|
||||
// [, line 76
|
||||
ket = cursor;
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 76
|
||||
bra = cursor;
|
||||
// -> ch, line 76
|
||||
S_ch = slice_to(S_ch);
|
||||
limit_backward = v_2;
|
||||
// name ch, line 77
|
||||
if (!(eq_v_b(S_ch)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 78
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
int v_5;
|
||||
// (, line 82
|
||||
// do, line 84
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 84
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
// (, line 82
|
||||
// do, line 84
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 84
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 85
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 85
|
||||
// do, line 86
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 86
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 87
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 87
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 88
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 88
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 89
|
||||
v_5 = limit - cursor;
|
||||
lab4: do {
|
||||
// call undouble, line 89
|
||||
if (!r_undouble())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 85
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 85
|
||||
// do, line 86
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 86
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 87
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 87
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 88
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 88
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 89
|
||||
v_5 = limit - cursor;
|
||||
lab4: do {
|
||||
// call undouble, line 89
|
||||
if (!r_undouble())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward; return true;
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof DanishStemmer;
|
||||
}
|
||||
|
||||
}
|
||||
public int hashCode() {
|
||||
return DanishStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,358 +1,375 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* Generated class implementing code defined by a snowball script.
|
||||
*/
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class NorwegianStemmer extends SnowballProgram {
|
||||
|
||||
private Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", this),
|
||||
new Among ( "e", -1, 1, "", this),
|
||||
new Among ( "ede", 1, 1, "", this),
|
||||
new Among ( "ande", 1, 1, "", this),
|
||||
new Among ( "ende", 1, 1, "", this),
|
||||
new Among ( "ane", 1, 1, "", this),
|
||||
new Among ( "ene", 1, 1, "", this),
|
||||
new Among ( "hetene", 6, 1, "", this),
|
||||
new Among ( "erte", 1, 3, "", this),
|
||||
new Among ( "en", -1, 1, "", this),
|
||||
new Among ( "heten", 9, 1, "", this),
|
||||
new Among ( "ar", -1, 1, "", this),
|
||||
new Among ( "er", -1, 1, "", this),
|
||||
new Among ( "heter", 12, 1, "", this),
|
||||
new Among ( "s", -1, 2, "", this),
|
||||
new Among ( "as", 14, 1, "", this),
|
||||
new Among ( "es", 14, 1, "", this),
|
||||
new Among ( "edes", 16, 1, "", this),
|
||||
new Among ( "endes", 16, 1, "", this),
|
||||
new Among ( "enes", 16, 1, "", this),
|
||||
new Among ( "hetenes", 19, 1, "", this),
|
||||
new Among ( "ens", 14, 1, "", this),
|
||||
new Among ( "hetens", 21, 1, "", this),
|
||||
new Among ( "ers", 14, 1, "", this),
|
||||
new Among ( "ets", 14, 1, "", this),
|
||||
new Among ( "et", -1, 1, "", this),
|
||||
new Among ( "het", 25, 1, "", this),
|
||||
new Among ( "ert", -1, 3, "", this),
|
||||
new Among ( "ast", -1, 1, "", this)
|
||||
};
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private Among a_1[] = {
|
||||
new Among ( "dt", -1, -1, "", this),
|
||||
new Among ( "vt", -1, -1, "", this)
|
||||
};
|
||||
private final static NorwegianStemmer methodObject = new NorwegianStemmer ();
|
||||
|
||||
private Among a_2[] = {
|
||||
new Among ( "leg", -1, 1, "", this),
|
||||
new Among ( "eleg", 0, 1, "", this),
|
||||
new Among ( "ig", -1, 1, "", this),
|
||||
new Among ( "eig", 2, 1, "", this),
|
||||
new Among ( "lig", 2, 1, "", this),
|
||||
new Among ( "elig", 4, 1, "", this),
|
||||
new Among ( "els", -1, 1, "", this),
|
||||
new Among ( "lov", -1, 1, "", this),
|
||||
new Among ( "elov", 7, 1, "", this),
|
||||
new Among ( "slov", 7, 1, "", this),
|
||||
new Among ( "hetslov", 9, 1, "", this)
|
||||
};
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "ede", 1, 1, "", methodObject ),
|
||||
new Among ( "ande", 1, 1, "", methodObject ),
|
||||
new Among ( "ende", 1, 1, "", methodObject ),
|
||||
new Among ( "ane", 1, 1, "", methodObject ),
|
||||
new Among ( "ene", 1, 1, "", methodObject ),
|
||||
new Among ( "hetene", 6, 1, "", methodObject ),
|
||||
new Among ( "erte", 1, 3, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "heten", 9, 1, "", methodObject ),
|
||||
new Among ( "ar", -1, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heter", 12, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "as", 14, 1, "", methodObject ),
|
||||
new Among ( "es", 14, 1, "", methodObject ),
|
||||
new Among ( "edes", 16, 1, "", methodObject ),
|
||||
new Among ( "endes", 16, 1, "", methodObject ),
|
||||
new Among ( "enes", 16, 1, "", methodObject ),
|
||||
new Among ( "hetenes", 19, 1, "", methodObject ),
|
||||
new Among ( "ens", 14, 1, "", methodObject ),
|
||||
new Among ( "hetens", 21, 1, "", methodObject ),
|
||||
new Among ( "ers", 14, 1, "", methodObject ),
|
||||
new Among ( "ets", 14, 1, "", methodObject ),
|
||||
new Among ( "et", -1, 1, "", methodObject ),
|
||||
new Among ( "het", 25, 1, "", methodObject ),
|
||||
new Among ( "ert", -1, 3, "", methodObject ),
|
||||
new Among ( "ast", -1, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "vt", -1, -1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_s_ending[] = {119, 125, 149, 1 };
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "leg", -1, 1, "", methodObject ),
|
||||
new Among ( "eleg", 0, 1, "", methodObject ),
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "eig", 2, 1, "", methodObject ),
|
||||
new Among ( "lig", 2, 1, "", methodObject ),
|
||||
new Among ( "elig", 4, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "lov", -1, 1, "", methodObject ),
|
||||
new Among ( "elov", 7, 1, "", methodObject ),
|
||||
new Among ( "slov", 7, 1, "", methodObject ),
|
||||
new Among ( "hetslov", 9, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
private static final char g_s_ending[] = {119, 125, 149, 1 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
|
||||
private void copy_from(NorwegianStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private void copy_from(NorwegianStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 30
|
||||
v_1 = cursor;
|
||||
// (, line 30
|
||||
// hop, line 30
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 30
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 31
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 30
|
||||
v_1 = cursor;
|
||||
// (, line 30
|
||||
// hop, line 30
|
||||
{
|
||||
break lab1;
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 31
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
// setmark x, line 30
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 31
|
||||
golab0: while(true)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 31
|
||||
I_p1 = cursor;
|
||||
// try, line 32
|
||||
lab4: do {
|
||||
// (, line 32
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 37
|
||||
// setlimit, line 38
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 38
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 38
|
||||
// [, line 38
|
||||
ket = cursor;
|
||||
// substring, line 38
|
||||
among_var = find_among_b(a_0, 29);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 38
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
// or, line 46
|
||||
lab0: do {
|
||||
v_3 = limit - cursor;
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping_b(g_s_ending, 98, 122)))
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
break lab0;
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// (, line 46
|
||||
// literal, line 46
|
||||
if (!(eq_s_b(1, "k")))
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 31
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 31
|
||||
I_p1 = cursor;
|
||||
// try, line 32
|
||||
lab4: do {
|
||||
// (, line 32
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 48
|
||||
// <-, line 48
|
||||
slice_from("er");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 52
|
||||
// test, line 53
|
||||
v_1 = limit - cursor;
|
||||
// (, line 53
|
||||
// setlimit, line 54
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 54
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 54
|
||||
// [, line 54
|
||||
ket = cursor;
|
||||
// substring, line 54
|
||||
if (find_among_b(a_1, 2) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 54
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 59
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 59
|
||||
bra = cursor;
|
||||
// delete, line 59
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 62
|
||||
// setlimit, line 63
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 63
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 63
|
||||
// [, line 63
|
||||
ket = cursor;
|
||||
// substring, line 63
|
||||
among_var = find_among_b(a_2, 11);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 63
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 67
|
||||
// delete, line 67
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int v_3;
|
||||
// (, line 37
|
||||
// setlimit, line 38
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 38
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 38
|
||||
// [, line 38
|
||||
ket = cursor;
|
||||
// substring, line 38
|
||||
among_var = find_among_b(a_0, 29);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 38
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
// or, line 46
|
||||
lab0: do {
|
||||
v_3 = limit - cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping_b(g_s_ending, 98, 122)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
break lab0;
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// (, line 46
|
||||
// literal, line 46
|
||||
if (!(eq_s_b(1, "k")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} while (false);
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 48
|
||||
// <-, line 48
|
||||
slice_from("er");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 52
|
||||
// test, line 53
|
||||
v_1 = limit - cursor;
|
||||
// (, line 53
|
||||
// setlimit, line 54
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 54
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 54
|
||||
// [, line 54
|
||||
ket = cursor;
|
||||
// substring, line 54
|
||||
if (find_among_b(a_1, 2) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 54
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 59
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 59
|
||||
bra = cursor;
|
||||
// delete, line 59
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 62
|
||||
// setlimit, line 63
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 63
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 63
|
||||
// [, line 63
|
||||
ket = cursor;
|
||||
// substring, line 63
|
||||
among_var = find_among_b(a_2, 11);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 63
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 67
|
||||
// delete, line 67
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 72
|
||||
// do, line 74
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 74
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
// (, line 72
|
||||
// do, line 74
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 74
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 75
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 75
|
||||
// do, line 76
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 76
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 77
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 77
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 78
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 78
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 75
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 75
|
||||
// do, line 76
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 76
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 77
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 77
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 78
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 78
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof NorwegianStemmer;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return NorwegianStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,349 +1,366 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* Generated class implementing code defined by a snowball script.
|
||||
*/
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class SwedishStemmer extends SnowballProgram {
|
||||
|
||||
private Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", this),
|
||||
new Among ( "arna", 0, 1, "", this),
|
||||
new Among ( "erna", 0, 1, "", this),
|
||||
new Among ( "heterna", 2, 1, "", this),
|
||||
new Among ( "orna", 0, 1, "", this),
|
||||
new Among ( "ad", -1, 1, "", this),
|
||||
new Among ( "e", -1, 1, "", this),
|
||||
new Among ( "ade", 6, 1, "", this),
|
||||
new Among ( "ande", 6, 1, "", this),
|
||||
new Among ( "arne", 6, 1, "", this),
|
||||
new Among ( "are", 6, 1, "", this),
|
||||
new Among ( "aste", 6, 1, "", this),
|
||||
new Among ( "en", -1, 1, "", this),
|
||||
new Among ( "anden", 12, 1, "", this),
|
||||
new Among ( "aren", 12, 1, "", this),
|
||||
new Among ( "heten", 12, 1, "", this),
|
||||
new Among ( "ern", -1, 1, "", this),
|
||||
new Among ( "ar", -1, 1, "", this),
|
||||
new Among ( "er", -1, 1, "", this),
|
||||
new Among ( "heter", 18, 1, "", this),
|
||||
new Among ( "or", -1, 1, "", this),
|
||||
new Among ( "s", -1, 2, "", this),
|
||||
new Among ( "as", 21, 1, "", this),
|
||||
new Among ( "arnas", 22, 1, "", this),
|
||||
new Among ( "ernas", 22, 1, "", this),
|
||||
new Among ( "ornas", 22, 1, "", this),
|
||||
new Among ( "es", 21, 1, "", this),
|
||||
new Among ( "ades", 26, 1, "", this),
|
||||
new Among ( "andes", 26, 1, "", this),
|
||||
new Among ( "ens", 21, 1, "", this),
|
||||
new Among ( "arens", 29, 1, "", this),
|
||||
new Among ( "hetens", 29, 1, "", this),
|
||||
new Among ( "erns", 21, 1, "", this),
|
||||
new Among ( "at", -1, 1, "", this),
|
||||
new Among ( "andet", -1, 1, "", this),
|
||||
new Among ( "het", -1, 1, "", this),
|
||||
new Among ( "ast", -1, 1, "", this)
|
||||
};
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private Among a_1[] = {
|
||||
new Among ( "dd", -1, -1, "", this),
|
||||
new Among ( "gd", -1, -1, "", this),
|
||||
new Among ( "nn", -1, -1, "", this),
|
||||
new Among ( "dt", -1, -1, "", this),
|
||||
new Among ( "gt", -1, -1, "", this),
|
||||
new Among ( "kt", -1, -1, "", this),
|
||||
new Among ( "tt", -1, -1, "", this)
|
||||
};
|
||||
private final static SwedishStemmer methodObject = new SwedishStemmer ();
|
||||
|
||||
private Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", this),
|
||||
new Among ( "lig", 0, 1, "", this),
|
||||
new Among ( "els", -1, 1, "", this),
|
||||
new Among ( "fullt", -1, 3, "", this),
|
||||
new Among ( "l\u00F6st", -1, 2, "", this)
|
||||
};
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", methodObject ),
|
||||
new Among ( "arna", 0, 1, "", methodObject ),
|
||||
new Among ( "erna", 0, 1, "", methodObject ),
|
||||
new Among ( "heterna", 2, 1, "", methodObject ),
|
||||
new Among ( "orna", 0, 1, "", methodObject ),
|
||||
new Among ( "ad", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "ade", 6, 1, "", methodObject ),
|
||||
new Among ( "ande", 6, 1, "", methodObject ),
|
||||
new Among ( "arne", 6, 1, "", methodObject ),
|
||||
new Among ( "are", 6, 1, "", methodObject ),
|
||||
new Among ( "aste", 6, 1, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "anden", 12, 1, "", methodObject ),
|
||||
new Among ( "aren", 12, 1, "", methodObject ),
|
||||
new Among ( "heten", 12, 1, "", methodObject ),
|
||||
new Among ( "ern", -1, 1, "", methodObject ),
|
||||
new Among ( "ar", -1, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heter", 18, 1, "", methodObject ),
|
||||
new Among ( "or", -1, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "as", 21, 1, "", methodObject ),
|
||||
new Among ( "arnas", 22, 1, "", methodObject ),
|
||||
new Among ( "ernas", 22, 1, "", methodObject ),
|
||||
new Among ( "ornas", 22, 1, "", methodObject ),
|
||||
new Among ( "es", 21, 1, "", methodObject ),
|
||||
new Among ( "ades", 26, 1, "", methodObject ),
|
||||
new Among ( "andes", 26, 1, "", methodObject ),
|
||||
new Among ( "ens", 21, 1, "", methodObject ),
|
||||
new Among ( "arens", 29, 1, "", methodObject ),
|
||||
new Among ( "hetens", 29, 1, "", methodObject ),
|
||||
new Among ( "erns", 21, 1, "", methodObject ),
|
||||
new Among ( "at", -1, 1, "", methodObject ),
|
||||
new Among ( "andet", -1, 1, "", methodObject ),
|
||||
new Among ( "het", -1, 1, "", methodObject ),
|
||||
new Among ( "ast", -1, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "dd", -1, -1, "", methodObject ),
|
||||
new Among ( "gd", -1, -1, "", methodObject ),
|
||||
new Among ( "nn", -1, -1, "", methodObject ),
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "gt", -1, -1, "", methodObject ),
|
||||
new Among ( "kt", -1, -1, "", methodObject ),
|
||||
new Among ( "tt", -1, -1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_s_ending[] = {119, 127, 149 };
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "lig", 0, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "fullt", -1, 3, "", methodObject ),
|
||||
new Among ( "l\u00F6st", -1, 2, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
|
||||
|
||||
private static final char g_s_ending[] = {119, 127, 149 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
|
||||
private void copy_from(SwedishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private void copy_from(SwedishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 29
|
||||
v_1 = cursor;
|
||||
// (, line 29
|
||||
// hop, line 29
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 29
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 30
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 246)))
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 29
|
||||
v_1 = cursor;
|
||||
// (, line 29
|
||||
// hop, line 29
|
||||
{
|
||||
break lab1;
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 30
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 246)))
|
||||
// setmark x, line 29
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 30
|
||||
golab0: while(true)
|
||||
{
|
||||
break lab3;
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
// gopast, line 30
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 30
|
||||
I_p1 = cursor;
|
||||
// try, line 31
|
||||
lab4: do {
|
||||
// (, line 31
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 30
|
||||
I_p1 = cursor;
|
||||
// try, line 31
|
||||
lab4: do {
|
||||
// (, line 31
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 36
|
||||
// setlimit, line 37
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 37
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 37
|
||||
// [, line 37
|
||||
ket = cursor;
|
||||
// substring, line 37
|
||||
among_var = find_among_b(a_0, 37);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 37
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
if (!(in_grouping_b(g_s_ending, 98, 121)))
|
||||
// (, line 36
|
||||
// setlimit, line 37
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 37
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 37
|
||||
// [, line 37
|
||||
ket = cursor;
|
||||
// substring, line 37
|
||||
among_var = find_among_b(a_0, 37);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 37
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
if (!(in_grouping_b(g_s_ending, 98, 121)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// setlimit, line 50
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 50
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 50
|
||||
// and, line 52
|
||||
v_3 = limit - cursor;
|
||||
// among, line 51
|
||||
if (find_among_b(a_1, 7) == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor = limit - v_3;
|
||||
// (, line 52
|
||||
// [, line 52
|
||||
ket = cursor;
|
||||
// next, line 52
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 52
|
||||
bra = cursor;
|
||||
// delete, line 52
|
||||
slice_del();
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
// setlimit, line 50
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 50
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 50
|
||||
// and, line 52
|
||||
v_3 = limit - cursor;
|
||||
// among, line 51
|
||||
if (find_among_b(a_1, 7) == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor = limit - v_3;
|
||||
// (, line 52
|
||||
// [, line 52
|
||||
ket = cursor;
|
||||
// next, line 52
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 52
|
||||
bra = cursor;
|
||||
// delete, line 52
|
||||
slice_del();
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// setlimit, line 55
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 55
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 55
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
// setlimit, line 55
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 55
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 55
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 57
|
||||
// delete, line 57
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 58
|
||||
// <-, line 58
|
||||
slice_from("l\u00F6s");
|
||||
break;
|
||||
case 3:
|
||||
// (, line 59
|
||||
// <-, line 59
|
||||
slice_from("full");
|
||||
break;
|
||||
}
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 57
|
||||
// delete, line 57
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 58
|
||||
// <-, line 58
|
||||
slice_from("l\u00F6s");
|
||||
break;
|
||||
case 3:
|
||||
// (, line 59
|
||||
// <-, line 59
|
||||
slice_from("full");
|
||||
break;
|
||||
}
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 64
|
||||
// do, line 66
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 66
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
// (, line 64
|
||||
// do, line 66
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 66
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 67
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 67
|
||||
// do, line 68
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 68
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 69
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 69
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 70
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 67
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 67
|
||||
// do, line 68
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 68
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 69
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 69
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 70
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof SwedishStemmer;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return SwedishStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -40,7 +40,6 @@ org.apache.lucene.analysis.en.PorterStemFilterFactory
|
|||
org.apache.lucene.analysis.es.SpanishLightStemFilterFactory
|
||||
org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory
|
||||
org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory
|
||||
org.apache.lucene.analysis.fr.ElisionFilterFactory
|
||||
org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory
|
||||
org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory
|
||||
org.apache.lucene.analysis.ga.IrishLowerCaseFilterFactory
|
||||
|
@ -88,3 +87,4 @@ org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory
|
|||
org.apache.lucene.analysis.synonym.SynonymFilterFactory
|
||||
org.apache.lucene.analysis.th.ThaiWordFilterFactory
|
||||
org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory
|
||||
org.apache.lucene.analysis.util.ElisionFilterFactory
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
|
|||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -33,6 +34,15 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
Analyzer unibiAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(t,
|
||||
new CJKBigramFilter(t, 0xff, true));
|
||||
}
|
||||
};
|
||||
|
||||
public void testHuge() throws Exception {
|
||||
assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||
|
@ -62,6 +72,96 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
||||
new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" });
|
||||
new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" },
|
||||
new int[] { 0, 1, 2, 3, 5, 6, 8, 9, 10, 11 },
|
||||
new int[] { 1, 2, 3, 5, 6, 8, 9, 10, 11, 12 },
|
||||
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>",
|
||||
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
|
||||
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
|
||||
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
||||
}
|
||||
|
||||
public void testAllScripts() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(t,
|
||||
new CJKBigramFilter(t, 0xff, false));
|
||||
}
|
||||
};
|
||||
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
||||
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
|
||||
}
|
||||
|
||||
public void testUnigramsAndBigramsAllScripts() throws Exception {
|
||||
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた。",
|
||||
new String[] {
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生",
|
||||
"生が", "が", "が試", "試", "試験", "験", "験に", "に",
|
||||
"に落", "落", "落ち", "ち", "ちた", "た"
|
||||
},
|
||||
new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
|
||||
6, 7, 7, 8, 8, 9, 9, 10, 10, 11 },
|
||||
new int[] { 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
|
||||
8, 8, 9, 9, 10, 10, 11, 11, 12, 12 },
|
||||
new String[] { "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
|
||||
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
|
||||
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>" },
|
||||
new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
|
||||
new int[] { 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
|
||||
2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }
|
||||
);
|
||||
}
|
||||
|
||||
public void testUnigramsAndBigramsHanOnly() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
|
||||
}
|
||||
};
|
||||
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
||||
new String[] { "多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た" },
|
||||
new int[] { 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11 },
|
||||
new int[] { 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12 },
|
||||
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>",
|
||||
"<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>",
|
||||
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
|
||||
new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
|
||||
new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
|
||||
}
|
||||
|
||||
public void testUnigramsAndBigramsHuge() throws Exception {
|
||||
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた",
|
||||
new String[] {
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た"
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomUnibiStrings() throws Exception {
|
||||
checkRandomData(random(), unibiAnalyzer, 1000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomUnibiHugeStrings() throws Exception {
|
||||
Random random = random();
|
||||
checkRandomData(random, unibiAnalyzer, 100*RANDOM_MULTIPLIER, 8192);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,4 +52,16 @@ public class TestCJKBigramFilterFactory extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(stream,
|
||||
new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" });
|
||||
}
|
||||
|
||||
public void testHanOnlyUnigrams() throws Exception {
|
||||
Reader reader = new StringReader("多くの学生が試験に落ちた。");
|
||||
CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("hiragana", "false");
|
||||
args.put("outputUnigrams", "true");
|
||||
factory.init(args);
|
||||
TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] { "多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.TestStopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
@ -39,7 +39,7 @@ import java.util.HashMap;
|
|||
public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
|
@ -89,7 +89,7 @@ public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
|
|||
* If no words are provided, then a set of english default stopwords is used.
|
||||
*/
|
||||
public void testDefaults() throws Exception {
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.TestStopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
@ -39,7 +39,7 @@ import java.util.HashMap;
|
|||
public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
|
@ -89,7 +89,7 @@ public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
|
|||
* If no words are provided, then a set of english default stopwords is used.
|
||||
*/
|
||||
public void testDefaults() throws Exception {
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
||||
/**
|
||||
|
@ -40,7 +40,7 @@ public class TestDictionaryCompoundWordTokenFilterFactory extends BaseTokenStrea
|
|||
Reader reader = new StringReader("I like to play softball");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("dictionary", "compoundDictionary.txt");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
||||
/**
|
||||
|
@ -40,7 +40,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenStre
|
|||
Reader reader = new StringReader("min veninde som er lidt af en læsehest");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("hyphenator", "da_UTF8.xml");
|
||||
args.put("dictionary", "da_compoundDictionary.txt");
|
||||
|
@ -64,7 +64,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenStre
|
|||
Reader reader = new StringReader("basketballkurv");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("hyphenator", "da_UTF8.xml");
|
||||
args.put("minSubwordSize", "2");
|
||||
|
|
|
@ -285,8 +285,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
|
|||
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
"content",
|
||||
new BytesRef("another"),
|
||||
false);
|
||||
new BytesRef("another"));
|
||||
assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(1, tps.freq());
|
||||
assertEquals(3, tps.nextPosition());
|
||||
|
|
|
@ -103,7 +103,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
new BytesRef("Q36"),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
0);
|
||||
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
td = _TestUtil.docs(random(),
|
||||
reader,
|
||||
|
@ -111,7 +111,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
new BytesRef("Q37"),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
0);
|
||||
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -32,7 +32,7 @@ import java.util.HashMap;
|
|||
public class TestStopFilterFactory extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
StopFilterFactory factory = new StopFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -34,7 +34,7 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamTestCase {
|
|||
|
||||
@Test
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("types", "stoptypes-1.txt");
|
||||
|
@ -94,7 +94,7 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamTestCase {
|
|||
args.put("enablePositionIncrements", "false");
|
||||
typeTokenFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
typeTokenFilterFactory.init(args);
|
||||
typeTokenFilterFactory.inform(new ResourceAsStreamResourceLoader(getClass()));
|
||||
typeTokenFilterFactory.inform(new ClasspathResourceLoader(getClass()));
|
||||
fail("not supplying 'types' parameter should cause an IllegalArgumentException");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// everything ok
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
|
||||
/**
|
||||
* Simple tests to ensure the Hunspell stemmer loads from factory
|
||||
|
@ -38,7 +38,7 @@ public class TestHunspellStemFilterFactory extends BaseTokenStreamTestCase {
|
|||
args.put("affix", "test.aff");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
factory.init(args);
|
||||
factory.inform(new ResourceAsStreamResourceLoader(getClass()));
|
||||
factory.inform(new ClasspathResourceLoader(getClass()));
|
||||
|
||||
Reader reader = new StringReader("abc");
|
||||
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -32,7 +32,7 @@ import java.util.HashMap;
|
|||
public class TestKeepFilterFactory extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
KeepWordFilterFactory factory = new KeepWordFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
|
|
|
@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
TermsEnum termsEnum = vector.iterator(null);
|
||||
termsEnum.next();
|
||||
assertEquals(2, termsEnum.totalTermFreq());
|
||||
DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null, true);
|
||||
DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);
|
||||
assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(2, positions.freq());
|
||||
positions.nextPosition();
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -116,4 +117,21 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "", "");
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomStrings() throws IOException {
|
||||
for (String lang : SNOWBALL_LANGS) {
|
||||
checkRandomStrings(lang);
|
||||
}
|
||||
}
|
||||
|
||||
public void checkRandomStrings(final String snowballLanguage) throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer t = new MockTokenizer(reader);
|
||||
return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
|
||||
}
|
||||
};
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue