LUCENE-3312: Merged revision(s) 1366639-1371131 from lucene/dev/trunk:

SOLR-3259: default /get to json
........
minor tweaks to update script example
........
LUCENE-4268: Rename ResourceAsStreamReasourceLoader to ClasspathResourceLoader, provide FilesystemResourceLoader, bug fixing
........
LUCENE-4268: Fix test bug
........
SOLR-3648: Fix Velocity template loading in SolrCloud mode
........
fix confusing IW infoStream message
........
Fix rawtypes warning in java 7 and 8, make the SuppressWarnings more local
........
Nicer solution to generic array creation (still problematic in Java 6, but correct in Java 7 if done this way)
........
Disable test failing with Java 8
........
Allow detecting of Java 8
........
LUCENE-4109: BooleanQueries are not parsed correctly with the flexible queryparser
........
LUCENE-4269: remove BalancedSegmentMergePolicy (use TieredMergePolicy instead)
........
LUCENE-4269: deprecate BalancedSegmentMergePolicy (use TieredMergePolicy instead)
........
LUCENE-4190: restrict allowed filenames to reduce risk of deleting non-lucene file from the index directory
........
fix the monkey: connection loss and expiration cause NPE
........
upgrade checkJavaDocs.py to python3
........
LUCENE-3884: Move ElisionFilter out of .fr package
........
fix encoding in javadocs checker
........
LUCENE-2501: fix thread hazard when threads add same field with different IndexOptions at the same time
........


git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3312@1371142 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-08-09 10:20:53 +00:00
commit 148d99cbbc
526 changed files with 30002 additions and 25473 deletions

View File

@ -176,22 +176,57 @@
</subant>
</target>
<target name="jar-checksums" depends="resolve" description="Recompute SHA1 checksums for all JAR files.">
<delete>
<fileset dir="${basedir}">
<include name="**/*.jar.sha1"/>
</fileset>
</delete>
<target name="jar-checksums" description="Recompute SHA1 checksums for all JAR files.">
<sequential>
<subant target="jar-checksums" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
</subant>
</sequential>
</target>
<checksum algorithm="SHA1" fileext=".sha1">
<fileset dir="${basedir}">
<include name="**/*.jar"/>
</fileset>
</checksum>
<property name="python32.exe" value="python3.2" />
<property name="JAVA6_HOME" value="/usr/local/jdk1.6.0_27"/>
<property name="JAVA7_HOME" value="/usr/local/jdk1.7.0_01"/>
<property name="fakeRelease" value="lucene/build/fakeRelease"/>
<property name="fakeReleaseTmp" value="lucene/build/fakeReleaseTmp"/>
<property name="fakeReleaseVersion" value="5.0"/> <!-- *not* -SNAPSHOT, the real version -->
<fixcrlf
srcdir="${basedir}"
includes="**/*.jar.sha1"
eol="lf" fixlast="true" encoding="US-ASCII" />
<target name="nightly-smoke" description="Builds an unsigned release and smoke tests it." depends="clean">
<sequential>
<subant target="prepare-release-no-sign" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
<property name="version" value="${fakeReleaseVersion}" />
</subant>
<delete dir="${fakeRelease}"/>
<delete dir="${fakeReleaseTmp}"/>
<mkdir dir="${fakeRelease}"/>
<copy todir="${fakeRelease}/lucene">
<fileset dir="lucene/dist"/>
</copy>
<copy todir="${fakeRelease}/lucene/changes">
<fileset dir="lucene/build/docs/changes"/>
</copy>
<get src="http://people.apache.org/keys/group/lucene.asc"
dest="${fakeRelease}/lucene/KEYS"/>
<copy todir="${fakeRelease}/solr">
<fileset dir="solr/package"/>
</copy>
<copy file="${fakeRelease}/lucene/KEYS" todir="${fakeRelease}/solr"/>
<makeurl file="${fakeRelease}" validate="false" property="fakeRelease.uri"/>
<exec executable="${python32.exe}" failonerror="true">
<arg value="-u"/>
<arg value="dev-tools/scripts/smokeTestRelease.py"/>
<arg value="${fakeRelease.uri}"/>
<arg value="${fakeReleaseVersion}"/>
<arg value="${fakeReleaseTmp}"/>
<arg value="false"/>
<env key="JAVA6_HOME" value="${JAVA6_HOME}"/>
<env key="JAVA7_HOME" value="${JAVA7_HOME}"/>
</exec>
<delete dir="${fakeRelease}"/>
<delete dir="${fakeReleaseTmp}"/>
</sequential>
</target>
</project>

View File

@ -15,30 +15,30 @@
<classpathentry kind="src" path="lucene/sandbox/src/java"/>
<classpathentry kind="src" path="lucene/sandbox/src/test"/>
<classpathentry kind="src" path="lucene/test-framework/src/java"/>
<classpathentry kind="src" output="bin.tests-framework" path="lucene/test-framework/src/resources"/>
<classpathentry kind="src" output="bin/tests-framework" path="lucene/test-framework/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/common/src/java"/>
<classpathentry kind="src" output="bin.analysis-common" path="lucene/analysis/common/src/resources"/>
<classpathentry kind="src" output="bin/analysis-common" path="lucene/analysis/common/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/common/src/test"/>
<classpathentry kind="src" path="lucene/analysis/icu/src/java"/>
<classpathentry kind="src" output="bin.analysis-icu" path="lucene/analysis/icu/src/resources"/>
<classpathentry kind="src" output="bin/analysis-icu" path="lucene/analysis/icu/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/icu/src/test"/>
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/java"/>
<classpathentry kind="src" output="bin.analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
<classpathentry kind="src" output="bin/analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/test"/>
<classpathentry kind="src" path="lucene/analysis/phonetic/src/java"/>
<classpathentry kind="src" output="bin.analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
<classpathentry kind="src" output="bin/analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/phonetic/src/test"/>
<classpathentry kind="src" path="lucene/analysis/smartcn/src/java"/>
<classpathentry kind="src" output="bin.analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
<classpathentry kind="src" output="bin/analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/smartcn/src/test"/>
<classpathentry kind="src" path="lucene/analysis/stempel/src/java"/>
<classpathentry kind="src" output="bin.analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
<classpathentry kind="src" output="bin/analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/stempel/src/test"/>
<classpathentry kind="src" path="lucene/analysis/morfologik/src/java"/>
<classpathentry kind="src" output="bin.analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
<classpathentry kind="src" output="bin/analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/morfologik/src/test"/>
<classpathentry kind="src" path="lucene/analysis/uima/src/java"/>
<classpathentry kind="src" output="bin.analysis-uima" path="lucene/analysis/uima/src/resources"/>
<classpathentry kind="src" output="bin/analysis-uima" path="lucene/analysis/uima/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/uima/src/test"/>
<classpathentry kind="src" path="lucene/benchmark/src/java"/>
<classpathentry kind="src" path="lucene/benchmark/src/test"/>
@ -120,7 +120,7 @@
<classpathentry kind="lib" path="solr/lib/slf4j-api-1.6.4.jar"/>
<classpathentry kind="lib" path="solr/lib/slf4j-jdk14-1.6.4.jar"/>
<classpathentry kind="lib" path="solr/lib/wstx-asl-3.2.7.jar"/>
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.5.jar"/>
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.6.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-continuation-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-deploy-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-http-8.1.2.v20120308.jar"/>
@ -175,5 +175,5 @@
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar"/>
<classpathentry kind="output" path="bin"/>
<classpathentry kind="output" path="bin/other"/>
</classpath>

View File

@ -36,27 +36,25 @@ A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
B. How to generate Lucene/Solr Maven artifacts
Prerequisites: JDK 1.6+ and Ant 1.7.X
Prerequisites: JDK 1.6+ and Ant 1.8.2+
Run 'ant generate-maven-artifacts' to create an internal Maven
repository, including POMs, binary .jars, source .jars, and javadoc
.jars.
You can run the above command in four possible places: the top-level
directory; under lucene/; under solr/; or under modules/. From the
top-level directory, from lucene/, or from modules/, the internal
repository will be located at dist/maven/. From solr/, the internal
repository will be located at package/maven/.
You can run the above command in three possible places: the top-level
directory; under lucene/; or under solr/. From the top-level directory
or from lucene/, the internal repository will be located at dist/maven/.
From solr/, the internal repository will be located at package/maven/.
C. How to deploy Maven artifacts to a repository
Prerequisites: JDK 1.6+ and Ant 1.7.X
Prerequisites: JDK 1.6+ and Ant 1.8.2+
You can deploy targets for all of Lucene/Solr, only Lucene, only Solr,
or only modules/, as in B. above. To deploy to a Maven repository, the
command is the same as in B. above, with the addition of two system
properties:
You can deploy targets for all of Lucene/Solr, only Lucene, or only Solr,
as in B. above. To deploy to a Maven repository, the command is the same
as in B. above, with the addition of two system properties:
ant -Dm2.repository.id=my-repo-id \
-Dm2.repository.url=http://example.org/my/repo \
@ -101,7 +99,7 @@ D. How to use Maven to build Lucene/Solr
the default, you can supply an alternate version on the command line
with the above command, e.g.:
ant -Dversion=5.0-my-special-version get-maven-poms
ant -Dversion=my-special-version get-maven-poms
Note: if you change the version in the POMs, there is one test method
that will fail under maven-surefire-plugin:

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -77,33 +71,5 @@
</excludes>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.analysis.charfilter.HtmlStripCharFilter</mainClass>
<name>HtmlStripCharFilter</name>
</program>
<program>
<mainClass>org.apache.lucene.analysis.en.PorterStemmer</mainClass>
<name>EnglishPorterStemmer</name>
</program>
<program>
<mainClass>org.tartarus.snowball.TestApp</mainClass>
<name>SnowballTestApp</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -40,15 +40,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -75,6 +69,11 @@
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>${project.build.testSourceDirectory}</directory>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -75,6 +69,11 @@
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>${project.build.testSourceDirectory}</directory>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -41,15 +41,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -120,41 +114,5 @@
</includes>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.benchmark.byTask.Benchmark</mainClass>
<name>Benchmark</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.quality.trec.QueryDriver</mainClass>
<name>QueryDriver</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder</mainClass>
<name>QualityQueriesFinder</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.utils.ExtractReuters</mainClass>
<name>ExtractReuters</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.utils.ExtractWikipedia</mainClass>
<name>ExtractWikipedia</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -104,40 +98,6 @@
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.index.CheckIndex</mainClass>
<name>CheckIndex</name>
</program>
<program>
<mainClass>org.apache.lucene.index.IndexReader</mainClass>
<name>IndexReader</name>
</program>
<program>
<mainClass>org.apache.lucene.store.LockStressTest</mainClass>
<name>LockStressTest</name>
</program>
<program>
<mainClass>org.apache.lucene.store.LockVerifyServer</mainClass>
<name>LockVerifyServer</name>
</program>
<program>
<mainClass>org.apache.lucene.util.English</mainClass>
<name>English</name>
</program>
</programs>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -87,30 +81,5 @@
</excludes>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<assembleDirectory>${build-directory}</assembleDirectory>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.demo.IndexFiles</mainClass>
<name>IndexFiles</name>
</program>
<program>
<mainClass>org.apache.lucene.demo.SearchFiles</mainClass>
<name>SearchFiles</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -72,49 +66,5 @@
</excludes>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.index.FieldNormModifier</mainClass>
<name>FieldNormModifier</name>
</program>
<program>
<mainClass>org.apache.lucene.index.IndexSplitter</mainClass>
<name>IndexSplitter</name>
</program>
<program>
<mainClass>org.apache.lucene.index.MultiPassIndexSplitter</mainClass>
<name>MultiPassIndexSplitter</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.GetTermInfo</mainClass>
<name>GetTermInfo</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.HighFreqTerms</mainClass>
<name>HighFreqTerms</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.IndexMergeTool</mainClass>
<name>IndexMergeTool</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.LengthNormModifier</mainClass>
<name>LengthNormModifier</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -35,15 +35,9 @@
<module-directory>lucene</module-directory>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<modules>
<module>core</module>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -31,15 +31,18 @@
<version>@version@</version>
<packaging>pom</packaging>
<name>Grandparent POM for Apache Lucene Core and Apache Solr</name>
<description>Parent POM for Apache Lucene Core and Apache Solr</description>
<url>http://lucene.apache.org/java</url>
<description>Grandparent POM for Apache Lucene Core and Apache Solr</description>
<url>http://lucene.apache.org</url>
<modules>
<module>lucene</module>
<module>solr</module>
</modules>
<properties>
<top-level>..</top-level>
<base.specification.version>4.0.0</base.specification.version>
<vc-anonymous-base-url>http://svn.apache.org/repos/asf/lucene/dev/trunk</vc-anonymous-base-url>
<vc-dev-base-url>https://svn.apache.org/repos/asf/lucene/dev/trunk</vc-dev-base-url>
<vc-browse-base-url>http://svn.apache.org/viewvc/lucene/dev/trunk</vc-browse-base-url>
<base.specification.version>5.0.0</base.specification.version>
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
<java.compat.version>1.6</java.compat.version>
<jetty.version>8.1.2.v20120308</jetty.version>
@ -69,11 +72,11 @@
</properties>
<issueManagement>
<system>JIRA</system>
<url>http://issues.apache.org/jira/browse/LUCENE</url>
<url>https://issues.apache.org/jira/browse/LUCENE</url>
</issueManagement>
<ciManagement>
<system>Hudson</system>
<url>http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/</url>
<system>Jenkins</system>
<url>https://builds.apache.org/computer/lucene/</url>
</ciManagement>
<mailingLists>
<mailingList>
@ -109,15 +112,9 @@
</mailingLists>
<inceptionYear>2000</inceptionYear>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk
</url>
<connection>scm:svn:${vc-anonymous-base-url}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}</developerConnection>
<url>${vc-browse-base-url}</url>
</scm>
<licenses>
<license>
@ -298,7 +295,7 @@
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.3.5</version>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.carrot2</groupId>
@ -549,11 +546,6 @@
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<version>1.2.1</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>

View File

@ -38,15 +38,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -38,15 +38,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -38,15 +38,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -38,15 +38,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -41,15 +41,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -42,15 +42,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -38,15 +38,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -38,15 +38,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -38,15 +38,9 @@
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@ -254,37 +248,6 @@
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.solr.client.solrj.embedded.JettySolrRunner</mainClass>
<name>JettySolrRunner</name>
</program>
<program>
<mainClass>org.apache.solr.util.BitSetPerf</mainClass>
<name>BitSetPerf</name>
<extraJvmArguments>-Xms128m -Xbatch</extraJvmArguments>
</program>
<program>
<mainClass>org.apache.solr.util.SimplePostTool</mainClass>
<name>SimplePostTool</name>
</program>
<program>
<mainClass>org.apache.solr.util.SuggestMissingFactories</mainClass>
<name>SuggestMissingFactories</name>
</program>
</programs>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>

View File

@ -43,26 +43,14 @@
<module-directory>solr</module-directory>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<issueManagement>
<system>JIRA</system>
<url>http://issues.apache.org/jira/browse/SOLR</url>
<url>https://issues.apache.org/jira/browse/SOLR</url>
</issueManagement>
<ciManagement>
<system>Hudson</system>
<url>
http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/
</url>
</ciManagement>
<mailingLists>
<mailingList>
<name>Solr User List</name>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<!-- These dependencies are compile scope because this is a test framework. -->
@ -60,20 +54,27 @@
<artifactId>solr-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
you can exclude the three Jetty dependencies below. -->
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<scope>runtime</scope>
<artifactId>jetty-servlet</artifactId>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
</dependency>
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
you can exclude the two Jetty dependencies below. -->
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
<connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</connection>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
<url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>

View File

@ -23,7 +23,7 @@ reMarkup = re.compile('<.*?>')
def checkSummary(fullPath):
printed = False
f = open(fullPath)
f = open(fullPath, encoding='UTF-8')
anyMissing = False
sawPackage = False
desc = []
@ -41,10 +41,10 @@ def checkSummary(fullPath):
desc = desc.strip()
if desc == '':
if not printed:
print
print fullPath
print()
print(fullPath)
printed = True
print ' no package description (missing package.html in src?)'
print(' no package description (missing package.html in src?)')
anyMissing = True
desc = None
else:
@ -52,17 +52,17 @@ def checkSummary(fullPath):
if lineLower in ('<td>&nbsp;</td>', '<td></td>', '<td class="collast">&nbsp;</td>'):
if not printed:
print
print fullPath
print()
print(fullPath)
printed = True
print ' missing: %s' % unescapeHTML(lastHREF)
print(' missing: %s' % unescapeHTML(lastHREF))
anyMissing = True
elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
if not printed:
print
print fullPath
print()
print(fullPath)
printed = True
print ' license-is-javadoc: %s' % unescapeHTML(lastHREF)
print(' license-is-javadoc: %s' % unescapeHTML(lastHREF))
anyMissing = True
m = reHREF.search(line)
if m is not None:
@ -85,17 +85,17 @@ def checkPackageSummaries(root, level='class'):
"""
if level != 'class' and level != 'package':
print 'unsupported level: %s, must be "class" or "package"' % level
print('unsupported level: %s, must be "class" or "package"' % level)
sys.exit(1)
#for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root):
if False:
os.chdir(root)
print
print 'Run "ant javadocs" > javadocs.log...'
print()
print('Run "ant javadocs" > javadocs.log...')
if os.system('ant javadocs > javadocs.log 2>&1'):
print ' FAILED'
print(' FAILED')
sys.exit(1)
anyMissing = False
@ -116,14 +116,14 @@ def checkPackageSummaries(root, level='class'):
if __name__ == '__main__':
if len(sys.argv) < 2 or len(sys.argv) > 3:
print 'usage: %s <dir> [class|package]' % sys.argv[0]
print('usage: %s <dir> [class|package]' % sys.argv[0])
sys.exit(1)
if len(sys.argv) == 2:
level = 'class'
else:
level = sys.argv[2]
if checkPackageSummaries(sys.argv[1], level):
print
print 'Missing javadocs were found!'
print()
print('Missing javadocs were found!')
sys.exit(1)
sys.exit(0)

View File

@ -20,12 +20,12 @@ import subprocess
import signal
import shutil
import hashlib
import httplib
import http.client
import re
import urllib2
import urlparse
import urllib.request, urllib.error, urllib.parse
import urllib.parse
import sys
import HTMLParser
import html.parser
from collections import defaultdict
import xml.etree.ElementTree as ET
import filecmp
@ -38,9 +38,9 @@ import checkJavadocLinks
# tested on Linux and on Cygwin under Windows 7.
def unshortenURL(url):
parsed = urlparse.urlparse(url)
parsed = urllib.parse.urlparse(url)
if parsed[0] in ('http', 'https'):
h = httplib.HTTPConnection(parsed.netloc)
h = http.client.HTTPConnection(parsed.netloc)
h.request('HEAD', parsed.path)
response = h.getresponse()
if response.status/100 == 3 and response.getheader('Location'):
@ -101,8 +101,8 @@ def getHREFs(urlString):
# Deref any redirects
while True:
url = urlparse.urlparse(urlString)
h = httplib.HTTPConnection(url.netloc)
url = urllib.parse.urlparse(urlString)
h = http.client.HTTPConnection(url.netloc)
h.request('GET', url.path)
r = h.getresponse()
newLoc = r.getheader('location')
@ -112,8 +112,8 @@ def getHREFs(urlString):
break
links = []
for subUrl, text in reHREF.findall(urllib2.urlopen(urlString).read()):
fullURL = urlparse.urljoin(urlString, subUrl)
for subUrl, text in reHREF.findall(urllib.request.urlopen(urlString).read().decode('UTF-8')):
fullURL = urllib.parse.urljoin(urlString, subUrl)
links.append((text, fullURL))
return links
@ -121,15 +121,15 @@ def download(name, urlString, tmpDir, quiet=False):
fileName = '%s/%s' % (tmpDir, name)
if DEBUG and os.path.exists(fileName):
if not quiet and fileName.find('.asc') == -1:
print ' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
print(' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
return
fIn = urllib2.urlopen(urlString)
fIn = urllib.request.urlopen(urlString)
fOut = open(fileName, 'wb')
success = False
try:
while True:
s = fIn.read(65536)
if s == '':
if s == b'':
break
fOut.write(s)
fOut.close()
@ -141,14 +141,14 @@ def download(name, urlString, tmpDir, quiet=False):
if not success:
os.remove(fileName)
if not quiet and fileName.find('.asc') == -1:
print ' %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
print(' %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
def load(urlString):
return urllib2.urlopen(urlString).read()
return urllib.request.urlopen(urlString).read().decode('utf-8')
def checkSigs(project, urlString, version, tmpDir, isSigned):
print ' test basics...'
print(' test basics...')
ents = getDirEntries(urlString)
artifact = None
keysURL = None
@ -210,7 +210,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
if keysURL is None:
raise RuntimeError('%s is missing KEYS' % project)
print ' get KEYS'
print(' get KEYS')
download('%s.KEYS' % project, keysURL, tmpDir)
keysFile = '%s/%s.KEYS' % (tmpDir, project)
@ -219,7 +219,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
if os.path.exists(gpgHomeDir):
shutil.rmtree(gpgHomeDir)
os.makedirs(gpgHomeDir, 0700)
os.makedirs(gpgHomeDir, 0o700)
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
'%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
@ -232,12 +232,12 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
testChanges(project, version, changesURL)
for artifact, urlString in artifacts:
print ' download %s...' % artifact
print(' download %s...' % artifact)
download(artifact, urlString, tmpDir)
verifyDigests(artifact, urlString, tmpDir)
if isSigned:
print ' verify sig'
print(' verify sig')
# Test sig (this is done with a clean brand-new GPG world)
download(artifact + '.asc', urlString + '.asc', tmpDir)
sigFile = '%s/%s.asc' % (tmpDir, artifact)
@ -246,28 +246,28 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
logFile)
# Forward any GPG warnings, except the expected one (since its a clean world)
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
# Test trust (this is done with the real users config)
run('gpg --import %s' % (keysFile),
'%s/%s.gpg.trust.import.log 2>&1' % (tmpDir, project))
print ' verify trust'
print(' verify trust')
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
# Forward any GPG warnings:
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
def testChanges(project, version, changesURLString):
print ' check changes HTML...'
print(' check changes HTML...')
changesURL = None
for text, subURL in getDirEntries(changesURLString):
if text == 'Changes.html':
@ -287,7 +287,7 @@ def testChangesText(dir, version, project):
if 'CHANGES.txt' in files:
fullPath = '%s/CHANGES.txt' % root
#print 'CHECK %s' % fullPath
checkChangesContent(open(fullPath).read(), version, fullPath, project, False)
checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False)
def checkChangesContent(s, version, name, project, isHTML):
@ -336,7 +336,7 @@ def run(command, logFile):
raise RuntimeError('command "%s" failed; see log file %s' % (command, logPath))
def verifyDigests(artifact, urlString, tmpDir):
print ' verify md5/sha1 digests'
print(' verify md5/sha1 digests')
md5Expected, t = load(urlString + '.md5').strip().split()
if t != '*'+artifact:
raise RuntimeError('MD5 %s.md5 lists artifact %s but expected *%s' % (urlString, t, artifact))
@ -347,10 +347,10 @@ def verifyDigests(artifact, urlString, tmpDir):
m = hashlib.md5()
s = hashlib.sha1()
f = open('%s/%s' % (tmpDir, artifact))
f = open('%s/%s' % (tmpDir, artifact), 'rb')
while True:
x = f.read(65536)
if x == '':
if len(x) == 0:
break
m.update(x)
s.update(x)
@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, tmpDir):
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
def getDirEntries(urlString):
if urlString.startswith('file:/') and not urlString.startswith('file://'):
# stupid bogus ant URI
urlString = "file:///" + urlString[6:]
if urlString.startswith('file://'):
path = urlString[7:]
if path.endswith('/'):
@ -388,7 +392,7 @@ def unpack(project, tmpDir, artifact, version):
shutil.rmtree(destDir)
os.makedirs(destDir)
os.chdir(destDir)
print ' unpack %s...' % artifact
print(' unpack %s...' % artifact)
unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact)
if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'):
run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile)
@ -437,12 +441,14 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
if project == 'lucene':
# TODO: clean this up to not be a list of modules that we must maintain
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework')
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
if isSrc:
extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'backwards', 'tools', 'site')
else:
extras = ()
# TODO: if solr, verify lucene/licenses, solr/licenses are present
for e in extras:
if e not in l:
raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact))
@ -453,81 +459,81 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l))
if isSrc:
print ' make sure no JARs/WARs in src dist...'
print(' make sure no JARs/WARs in src dist...')
lines = os.popen('find . -name \\*.jar').readlines()
if len(lines) != 0:
print ' FAILED:'
print(' FAILED:')
for line in lines:
print ' %s' % line.strip()
print(' %s' % line.strip())
raise RuntimeError('source release has JARs...')
lines = os.popen('find . -name \\*.war').readlines()
if len(lines) != 0:
print ' FAILED:'
print(' FAILED:')
for line in lines:
print ' %s' % line.strip()
print(' %s' % line.strip())
raise RuntimeError('source release has WARs...')
print ' run "ant validate"'
print(' run "ant validate"')
run('%s; ant validate' % javaExe('1.7'), '%s/validate.log' % unpackPath)
if project == 'lucene':
print ' run tests w/ Java 6...'
print(' run tests w/ Java 6...')
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
run('%s; ant jar' % javaExe('1.6'), '%s/compile.log' % unpackPath)
testDemo(isSrc, version)
# test javadocs
print ' generate javadocs w/ Java 6...'
print(' generate javadocs w/ Java 6...')
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
else:
print ' run tests w/ Java 6...'
print(' run tests w/ Java 6...')
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
# test javadocs
print ' generate javadocs w/ Java 6...'
print(' generate javadocs w/ Java 6...')
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
print ' run tests w/ Java 7...'
print(' run tests w/ Java 7...')
run('%s; ant test' % javaExe('1.7'), '%s/test.log' % unpackPath)
# test javadocs
print ' generate javadocs w/ Java 7...'
print(' generate javadocs w/ Java 7...')
run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
os.chdir('solr')
print ' test solr example w/ Java 6...'
print(' test solr example w/ Java 6...')
run('%s; ant clean example' % javaExe('1.6'), '%s/antexample.log' % unpackPath)
testSolrExample(unpackPath, JAVA6_HOME, True)
print ' test solr example w/ Java 7...'
print(' test solr example w/ Java 7...')
run('%s; ant clean example' % javaExe('1.7'), '%s/antexample.log' % unpackPath)
testSolrExample(unpackPath, JAVA7_HOME, True)
os.chdir('..')
print ' check NOTICE'
print(' check NOTICE')
testNotice(unpackPath)
else:
if project == 'lucene':
testDemo(isSrc, version)
else:
print ' test solr example w/ Java 6...'
print(' test solr example w/ Java 6...')
testSolrExample(unpackPath, JAVA6_HOME, False)
print ' test solr example w/ Java 7...'
print(' test solr example w/ Java 7...')
testSolrExample(unpackPath, JAVA7_HOME, False)
testChangesText('.', version, project)
if project == 'lucene' and not isSrc:
print ' check Lucene\'s javadoc JAR'
print(' check Lucene\'s javadoc JAR')
checkJavadocpath('%s/docs' % unpackPath)
def testNotice(unpackPath):
solrNotice = open('%s/NOTICE.txt' % unpackPath).read()
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath).read()
solrNotice = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
expected = """
=========================================================================
@ -545,12 +551,12 @@ def readSolrOutput(p, startupEvent, logFile):
try:
while True:
line = p.readline()
if line == '':
if len(line) == 0:
break
f.write(line)
f.flush()
# print 'SOLR: %s' % line.strip()
if line.find('Started SocketConnector@0.0.0.0:8983') != -1:
if line.decode('UTF-8').find('Started SocketConnector@0.0.0.0:8983') != -1:
startupEvent.set()
finally:
f.close()
@ -558,7 +564,7 @@ def readSolrOutput(p, startupEvent, logFile):
def testSolrExample(unpackPath, javaPath, isSrc):
logFile = '%s/solr-example.log' % unpackPath
os.chdir('example')
print ' start Solr instance (log=%s)...' % logFile
print(' start Solr instance (log=%s)...' % logFile)
env = {}
env.update(os.environ)
env['JAVA_HOME'] = javaPath
@ -572,21 +578,21 @@ def testSolrExample(unpackPath, javaPath, isSrc):
# Make sure Solr finishes startup:
startupEvent.wait()
print ' startup done'
print(' startup done')
try:
print ' test utf8...'
print(' test utf8...')
run('sh ./exampledocs/test_utf8.sh', 'utf8.log')
print ' index example docs...'
print(' index example docs...')
run('sh ./exampledocs/post.sh ./exampledocs/*.xml', 'post-example-docs.log')
print ' run query...'
s = urllib2.urlopen('http://localhost:8983/solr/select/?q=video').read()
print(' run query...')
s = urllib.request.urlopen('http://localhost:8983/solr/select/?q=video').read().decode('UTF-8')
if s.find('<result name="response" numFound="3" start="0">') == -1:
print 'FAILED: response is:\n%s' % s
print('FAILED: response is:\n%s' % s)
raise RuntimeError('query on solr example instance failed')
finally:
# Stop server:
print ' stop server (SIGINT)...'
print(' stop server (SIGINT)...')
os.kill(server.pid, signal.SIGINT)
# Give it 10 seconds to gracefully shut down
@ -594,14 +600,14 @@ def testSolrExample(unpackPath, javaPath, isSrc):
if serverThread.isAlive():
# Kill server:
print '***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...'
print('***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...')
os.kill(server.pid, signal.SIGKILL)
serverThread.join(10.0)
if serverThread.isAlive():
# Shouldn't happen unless something is seriously wrong...
print '***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...'
print('***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...')
os.chdir('..')
@ -615,13 +621,13 @@ def checkJavadocpath(path):
if checkJavaDocs.checkPackageSummaries(path):
# disabled: RM cannot fix all this, see LUCENE-3887
# raise RuntimeError('javadoc problems')
print '\n***WARNING***: javadocs want to fail!\n'
print('\n***WARNING***: javadocs want to fail!\n')
if checkJavadocLinks.checkAll(path):
raise RuntimeError('broken javadocs links found!')
def testDemo(isSrc, version):
print ' test demo...'
print(' test demo...')
sep = ';' if cygwin else ':'
if isSrc:
cp = 'build/core/classes/java{0}build/demo/classes/java{0}build/analysis/common/classes/java{0}build/queryparser/classes/java'.format(sep)
@ -632,14 +638,14 @@ def testDemo(isSrc, version):
run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe('1.6'), cp, docsDir), 'index.log')
run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe('1.6'), cp), 'search.log')
reMatchingDocs = re.compile('(\d+) total matching documents')
m = reMatchingDocs.search(open('search.log', 'rb').read())
m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read())
if m is None:
raise RuntimeError('lucene demo\'s SearchFiles found no results')
else:
numHits = int(m.group(1))
if numHits < 100:
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
print ' got %d hits for query "lucene"' % numHits
print(' got %d hits for query "lucene"' % numHits)
def checkMaven(baseURL, tmpDir, version, isSigned):
# Locate the release branch in subversion
@ -652,11 +658,11 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
if text == releaseBranchText:
releaseBranchSvnURL = subURL
print ' get POM templates',
print(' get POM templates', end=' ')
POMtemplates = defaultdict()
getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
print
print ' download artifacts',
print()
print(' download artifacts', end=' ')
artifacts = {'lucene': [], 'solr': []}
for project in ('lucene', 'solr'):
artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
@ -664,30 +670,30 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
if not os.path.exists(targetDir):
os.makedirs(targetDir)
crawl(artifacts[project], artifactsURL, targetDir)
print
print ' verify that each binary artifact has a deployed POM...'
print()
print(' verify that each binary artifact has a deployed POM...')
verifyPOMperBinaryArtifact(artifacts, version)
print ' verify that there is an artifact for each POM template...'
print(' verify that there is an artifact for each POM template...')
verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
print " verify Maven artifacts' md5/sha1 digests..."
print(" verify Maven artifacts' md5/sha1 digests...")
verifyMavenDigests(artifacts)
print ' verify that all non-Mavenized deps are deployed...'
print(' verify that all non-Mavenized deps are deployed...')
nonMavenizedDeps = dict()
checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir,
version, releaseBranchSvnURL)
print ' check for javadoc and sources artifacts...'
print(' check for javadoc and sources artifacts...')
checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
print " verify deployed POMs' coordinates..."
print(" verify deployed POMs' coordinates...")
verifyDeployedPOMsCoordinates(artifacts, version)
if isSigned:
print ' verify maven artifact sigs',
print(' verify maven artifact sigs', end=' ')
verifyMavenSigs(baseURL, tmpDir, artifacts)
distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
print ' verify that non-Mavenized deps are same as in the binary distribution...'
print(' verify that non-Mavenized deps are same as in the binary distribution...')
checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
print ' verify that Maven artifacts are same as in the binary distribution...'
print(' verify that Maven artifacts are same as in the binary distribution...')
checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
def getDistributionsForMavenChecks(tmpDir, version, baseURL):
@ -697,19 +703,19 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
if project == 'solr': distribution = 'apache-' + distribution
if not os.path.exists('%s/%s' % (tmpDir, distribution)):
distURL = '%s/%s/%s' % (baseURL, project, distribution)
print ' download %s...' % distribution,
print(' download %s...' % distribution, end=' ')
download(distribution, distURL, tmpDir)
destDir = '%s/unpack-%s-maven' % (tmpDir, project)
if os.path.exists(destDir):
shutil.rmtree(destDir)
os.makedirs(destDir)
os.chdir(destDir)
print ' unpack %s...' % distribution
print(' unpack %s...' % distribution)
unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
if project == 'solr': # unpack the Solr war
unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
print ' unpack Solr war...'
print(' unpack Solr war...')
run('jar xvf */dist/*.war', unpackLogFile)
distributionFiles[project] = []
for root, dirs, files in os.walk(destDir):
@ -719,7 +725,7 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
for project in ('lucene', 'solr'):
for artifact in artifacts[project]:
if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps.keys():
if artifact.endswith(version + '.jar') and artifact not in list(nonMavenizedDeps.keys()):
javadocJar = artifact[:-4] + '-javadoc.jar'
if javadocJar not in artifacts[project]:
raise RuntimeError('missing: %s' % javadocJar)
@ -732,7 +738,7 @@ def checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps):
distFilenames = dict()
for file in distributionFiles[project]:
distFilenames[os.path.basename(file)] = file
for dep in nonMavenizedDeps.keys():
for dep in list(nonMavenizedDeps.keys()):
if ('/%s/' % project) in dep:
depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
if not depOrigFilename in distFilenames:
@ -753,9 +759,9 @@ def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts,
distFilenames[baseName] = file
for artifact in artifacts[project]:
if reJarWar.search(artifact):
if artifact not in nonMavenizedDeps.keys():
if artifact not in list(nonMavenizedDeps.keys()):
artifactFilename = os.path.basename(artifact)
if artifactFilename not in distFilenames.keys():
if artifactFilename not in list(distFilenames.keys()):
raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
% (artifact, project))
# TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
@ -772,16 +778,17 @@ def verifyMavenDigests(artifacts):
raise RuntimeError('missing: MD5 digest for %s' % artifactFile)
if artifactFile + '.sha1' not in artifacts[project]:
raise RuntimeError('missing: SHA1 digest for %s' % artifactFile)
with open(artifactFile + '.md5', 'r') as md5File:
with open(artifactFile + '.md5', encoding='UTF-8') as md5File:
md5Expected = md5File.read().strip()
with open(artifactFile + '.sha1', 'r') as sha1File:
with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File:
sha1Expected = sha1File.read().strip()
md5 = hashlib.md5()
sha1 = hashlib.sha1()
inputFile = open(artifactFile)
inputFile = open(artifactFile, 'rb')
while True:
bytes = inputFile.read(65536)
if bytes == '': break
if len(bytes) == 0:
break
md5.update(bytes)
sha1.update(bytes)
inputFile.close()
@ -846,7 +853,7 @@ def checkNonMavenizedDeps(nonMavenizedDependencies, POMtemplates, artifacts,
if releaseBranchSvnURL is None:
pomPath = '%s/%s/%s' % (workingCopy, pomDir, pomFile)
if os.path.exists(pomPath):
doc2 = ET.XML(open(pomPath).read())
doc2 = ET.XML(open(pomPath, encoding='UTF-8').read())
break
else:
entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir))
@ -891,7 +898,7 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
if os.path.exists(gpgHomeDir):
shutil.rmtree(gpgHomeDir)
os.makedirs(gpgHomeDir, 0700)
os.makedirs(gpgHomeDir, 0o700)
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
'%s/%s.gpg.import.log' % (tmpDir, project))
@ -904,12 +911,12 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
logFile)
# Forward any GPG warnings, except the expected one (since its a clean world)
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
and line.find('WARNING: using insecure memory') == -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
# Test trust (this is done with the real users config)
@ -918,16 +925,16 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
# Forward any GPG warnings:
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
and line.find('WARNING: using insecure memory') == -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
sys.stdout.write('.')
print
print()
def verifyPOMperBinaryArtifact(artifacts, version):
"""verify that each binary jar and war has a corresponding POM file"""
@ -1023,17 +1030,20 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
def main():
if len(sys.argv) != 4:
print
print 'Usage python -u %s BaseURL version tmpDir' % sys.argv[0]
print
if len(sys.argv) < 4:
print()
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
print()
sys.exit(1)
baseURL = sys.argv[1]
version = sys.argv[2]
tmpDir = os.path.abspath(sys.argv[3])
isSigned = True
if len(sys.argv) == 5:
isSigned = (sys.argv[4] == "True")
smokeTest(baseURL, version, tmpDir, True)
smokeTest(baseURL, version, tmpDir, isSigned)
def smokeTest(baseURL, version, tmpDir, isSigned):
@ -1046,11 +1056,11 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
lucenePath = None
solrPath = None
print
print 'Load release URL "%s"...' % baseURL
print()
print('Load release URL "%s"...' % baseURL)
newBaseURL = unshortenURL(baseURL)
if newBaseURL != baseURL:
print ' unshortened: %s' % newBaseURL
print(' unshortened: %s' % newBaseURL)
baseURL = newBaseURL
for text, subURL in getDirEntries(baseURL):
@ -1064,23 +1074,28 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
if solrPath is None:
raise RuntimeError('could not find solr subdir')
print
print 'Test Lucene...'
print()
print('Test Lucene...')
checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
unpack('lucene', tmpDir, artifact, version)
unpack('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
print
print 'Test Solr...'
print()
print('Test Solr...')
checkSigs('solr', solrPath, version, tmpDir, isSigned)
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
unpack('solr', tmpDir, artifact, version)
unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
print 'Test Maven artifacts for Lucene and Solr...'
print('Test Maven artifacts for Lucene and Solr...')
checkMaven(baseURL, tmpDir, version, isSigned)
if __name__ == '__main__':
main()
try:
main()
except:
import traceback
traceback.print_exc()
sys.exit(1)
sys.exit(0)

View File

@ -6,6 +6,15 @@ http://s.apache.org/luceneversions
======================= Lucene 5.0.0 =======================
======================= Lucene 4.0.0 =======================
Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
twice for conjunctions: for most users this is no problem, but
if you had a customized Similarity that returned something other
than 1 when overlap == maxOverlap (always the case for conjunctions),
then the score would be incorrect. (Pascal Chollet, Robert Muir)
======================= Lucene 4.0.0-BETA =======================
@ -15,6 +24,9 @@ New features
underlying PayloadFunction's explanation as the explanation
for the payload score. (Scott Smerchek via Robert Muir)
* LUCENE-4069: Added BloomFilteringPostingsFormat for use with low-frequency terms
such as primary keys (Mark Harwood, Mike McCandless)
* LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
iteration marks. (Robert Muir, Christian Moen)
@ -34,6 +46,22 @@ New features
CharFilterFactories to the lucene-analysis module. The API is still
experimental. (Chris Male, Robert Muir, Uwe Schindler)
* LUCENE-4230: When pulling a DocsAndPositionsEnum you can now
specify whether or not you require payloads (in addition to
offsets); turning one or both off may allow some codec
implementations to optimize the enum implementation. (Robert Muir,
Mike McCandless)
* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
int docID), to attempt deletion by docID as long as the provided
reader is an NRT reader, and the segment has not yet been merged
away (Mike McCandless).
* LUCENE-4286: Added option to CJKBigramFilter to always also output
unigrams. This can be used for a unigram+bigram approach, or at
index-time only for better support of short queries.
(Tom Burton-West, Robert Muir)
API Changes
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
@ -69,6 +97,23 @@ API Changes
* LUCENE-3747: Support Unicode 6.1.0. (Steve Rowe)
* LUCENE-3884: Moved ElisionFilter out of org.apache.lucene.analysis.fr
package into org.apache.lucene.analysis.util. (Robert Muir)
* LUCENE-4230: When pulling a DocsAndPositionsEnum you now pass an int
flags instead of the previous boolean needOffsets. Currently
recognized flags are DocsAndPositionsEnum.FLAG_PAYLOADS and
DocsAndPositionsEnum.FLAG_OFFSETS (Robert Muir, Mike McCandless)
* LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
instead of the previous boolean needsFlags; consistent with the changes
for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
as the Store parameter didn't make sense: if you supplied Store.YES, you would only
receive an exception anyway. (Robert Muir)
Optimizations
* LUCENE-4171: Performance improvements to Packed64.
@ -80,8 +125,19 @@ Optimizations
* LUCENE-4235: Remove enforcing of Filter rewrite for NRQ queries.
(Uwe Schindler)
* LUCENE-4279: Regenerated snowball Stemmers from snowball r554,
making them substantially more lightweight. Behavior is unchanged.
(Robert Muir)
* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers
such as StandardTokenizer from 32kb to 8kb.
(Raintung Li, Steven Rowe, Robert Muir)
Bug Fixes
* LUCENE-4109: BooleanQueries are not parsed correctly with the
flexible query parser. (Karsten Rauch via Robert Muir)
* LUCENE-4176: Fix AnalyzingQueryParser to analyze range endpoints as bytes,
so that it works correctly with Analyzers that produce binary non-UTF-8 terms
such as CollationAnalyzer. (Nattapong Sirilappanich via Robert Muir)
@ -113,6 +169,30 @@ Bug Fixes
* LUCENE-4245: Make IndexWriter#close() and MergeScheduler#close()
non-interruptible. (Mark Miller, Uwe Schindler)
* LUCENE-4190: restrict allowed filenames that a codec may create to
the patterns recognized by IndexFileNames. This also fixes
IndexWriter to only delete files matching this pattern from an index
directory, to reduce risk when the wrong index path is accidentally
passed to IndexWriter (Robert Muir, Mike McCandless)
* LUCENE-4277: Fix IndexWriter deadlock during rollback if flushable DWPT
instance are already checked out and queued up but not yet flushed.
(Simon Willnauer)
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
(Johannes Christen, Uwe Schindler, Robert Muir)
* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
(Robert Muir)
Changes in Runtime Behavior
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
(Karsten Rauch via Robert Muir)
* LUCENE-3616: Field throws exception if you try to set a boost on an
unindexed field or one that omits norms. (Robert Muir)
Build
* LUCENE-4094: Support overriding file.encoding on forked test JVMs

View File

@ -57,30 +57,6 @@ enumeration APIs. Here are the major changes:
...
}
The bulk read API has also changed. Instead of this:
int[] docs = new int[256];
int[] freqs = new int[256];
while(true) {
int count = td.read(docs, freqs)
if (count == 0) {
break;
}
// use docs[i], freqs[i]
}
do this:
DocsEnum.BulkReadResult bulk = td.getBulkResult();
while(true) {
int count = td.read();
if (count == 0) {
break;
}
// use bulk.docs.ints[i] and bulk.freqs.ints[i]
}
* TermPositions is renamed to DocsAndPositionsEnum, and no longer
extends the docs only enumerator (DocsEnum).
@ -170,7 +146,7 @@ enumeration APIs. Here are the major changes:
Bits liveDocs = reader.getLiveDocs();
DocsEnum docsEnum = null;
docsEnum = termsEnum.docs(liveDocs, docsEnum);
docsEnum = termsEnum.docs(liveDocs, docsEnum, needsFreqs);
You can pass in a prior DocsEnum and it will be reused if possible.
@ -187,7 +163,7 @@ enumeration APIs. Here are the major changes:
String field;
BytesRef text;
DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text);
DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text, needsFreqs);
Likewise for DocsAndPositionsEnum.
@ -340,11 +316,12 @@ an AtomicReader. Note: using "atomicity emulators" can cause serious
slowdowns due to the need to merge terms, postings, DocValues, and
FieldCache, use them with care!
## LUCENE-2413: Analyzer package changes
## LUCENE-2413,LUCENE-3396: Analyzer package changes
Lucene's core and contrib analyzers, along with Solr's analyzers,
were consolidated into lucene/analysis. During the refactoring some
package names have changed:
package names have changed, and ReusableAnalyzerBase was renamed to
Analyzer:
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
@ -369,7 +346,7 @@ package names have changed:
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
- o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
- o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer

View File

@ -24,7 +24,6 @@ import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@ -32,6 +31,7 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.CatalanStemmer;
@ -127,7 +127,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.charfilter;
@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 7/26/12 6:22 PM from the specification file
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
* on 8/6/12 11:57 AM from the specification file
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
*/
public final class HTMLStripCharFilter extends BaseCharFilter {
@ -31255,6 +31255,93 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
{ yybegin(STYLE);
}
case 55: break;
case 27:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
}
case 56: break;
case 30:
{ int length = yylength();
inputSegment.write(zzBuffer, zzStartRead, length);
entitySegment.clear();
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
entitySegment.append(ch);
outputSegment = entitySegment;
yybegin(CHARACTER_REFERENCE_TAIL);
}
case 57: break;
case 48:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position the offset correction at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = STYLE_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 58: break;
case 8:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_SUBSTITUTE);
}
}
case 59: break;
case 2:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('<');
yybegin(LEFT_ANGLE_BRACKET);
}
case 60: break;
case 44:
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 61: break;
case 21:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(SINGLE_QUOTED_STRING);
}
case 62: break;
case 11:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
yybegin(LEFT_ANGLE_BRACKET_SPACE);
}
case 63: break;
case 35:
{ yybegin(SCRIPT);
}
case 64: break;
case 42:
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 65: break;
case 10:
{ inputSegment.append('!'); yybegin(BANG);
}
case 66: break;
case 51:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
@ -31288,13 +31375,331 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 56: break;
case 21:
case 67: break;
case 4:
{ yypushback(1);
outputSegment = inputSegment;
outputSegment.restart();
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 68: break;
case 43:
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 69: break;
case 52:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try { // High surrogates are in decimal range [55296, 56319]
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(1, 6) + "'";
}
if (Character.isHighSurrogate(highSurrogate)) {
outputSegment = entitySegment;
outputSegment.clear();
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 70: break;
case 28:
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 71: break;
case 50:
{ // Handle paired UTF-16 surrogates.
outputSegment = entitySegment;
outputSegment.clear();
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try {
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(2, 6) + "'";
}
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
case 72: break;
case 16:
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 73: break;
case 22:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(SINGLE_QUOTED_STRING);
yybegin(DOUBLE_QUOTED_STRING);
}
case 57: break;
case 74: break;
case 26:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
outputSegment = inputSegment;
yybegin(YYINITIAL);
}
case 75: break;
case 20:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
}
case 76: break;
case 47:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(CDATA);
}
case 77: break;
case 33:
{ yybegin(YYINITIAL);
if (escapeBR) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
return outputSegment.nextChar();
} else {
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.reset();
return BR_START_TAG_REPLACEMENT;
}
}
case 78: break;
case 23:
{ yybegin(restoreState); restoreState = previousRestoreState;
}
case 79: break;
case 32:
{ yybegin(COMMENT);
}
case 80: break;
case 24:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 81: break;
case 3:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('&');
yybegin(AMPERSAND);
}
case 82: break;
case 46:
{ yybegin(SCRIPT);
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 83: break;
case 14:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 84: break;
case 6:
{ int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength);
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
String decimalCharRef = yytext();
int codePoint = 0;
try {
codePoint = Integer.parseInt(decimalCharRef);
} catch(Exception e) {
assert false: "Exception parsing code point '" + decimalCharRef + "'";
}
if (codePoint <= 0x10FFFF) {
outputSegment = entitySegment;
outputSegment.clear();
if (codePoint >= Character.MIN_SURROGATE
&& codePoint <= Character.MAX_SURROGATE) {
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
} else {
outputSegment.setLength
(Character.toChars(codePoint, outputSegment.getArray(), 0));
}
yybegin(CHARACTER_REFERENCE_TAIL);
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
}
case 85: break;
case 34:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
cumulativeDiff += yychar - inputStart + yylength();
// position the correction at (already output length) [ + (substitution length) = 0]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 86: break;
case 5:
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
}
case 87: break;
case 13:
{ inputSegment.append(zzBuffer[zzStartRead]);
}
case 88: break;
case 18:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_SUBSTITUTE);
}
}
case 89: break;
case 40:
{ yybegin(SCRIPT_COMMENT);
}
case 90: break;
case 37:
{ // add (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
yybegin(YYINITIAL);
}
case 91: break;
case 12:
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
}
case 92: break;
case 9:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_EXCLUDE);
}
}
case 93: break;
case 49:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = SCRIPT_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 94: break;
case 29:
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 95: break;
case 17:
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 96: break;
case 45:
{ yybegin(STYLE);
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 97: break;
case 7:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 98: break;
case 19:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_EXCLUDE);
}
}
case 99: break;
case 25:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
}
case 100: break;
case 31:
{ int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength);
@ -31329,66 +31734,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
return outputSegment.nextChar();
}
}
case 58: break;
case 19:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_EXCLUDE);
}
}
case 59: break;
case 2:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('<');
yybegin(LEFT_ANGLE_BRACKET);
}
case 60: break;
case 27:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
}
case 61: break;
case 44:
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 62: break;
case 35:
{ yybegin(SCRIPT);
}
case 63: break;
case 42:
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 64: break;
case 10:
{ inputSegment.append('!'); yybegin(BANG);
}
case 65: break;
case 33:
{ yybegin(YYINITIAL);
if (escapeBR) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
return outputSegment.nextChar();
} else {
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.reset();
return BR_START_TAG_REPLACEMENT;
}
}
case 66: break;
case 101: break;
case 53:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
@ -31424,288 +31770,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 67: break;
case 43:
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 68: break;
case 30:
{ int length = yylength();
inputSegment.write(zzBuffer, zzStartRead, length);
entitySegment.clear();
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
entitySegment.append(ch);
outputSegment = entitySegment;
yybegin(CHARACTER_REFERENCE_TAIL);
}
case 69: break;
case 28:
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 70: break;
case 3:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('&');
yybegin(AMPERSAND);
}
case 71: break;
case 16:
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 72: break;
case 52:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try { // High surrogates are in decimal range [55296, 56319]
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(1, 6) + "'";
}
if (Character.isHighSurrogate(highSurrogate)) {
outputSegment = entitySegment;
outputSegment.clear();
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 73: break;
case 6:
{ int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength);
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
String decimalCharRef = yytext();
int codePoint = 0;
try {
codePoint = Integer.parseInt(decimalCharRef);
} catch(Exception e) {
assert false: "Exception parsing code point '" + decimalCharRef + "'";
}
if (codePoint <= 0x10FFFF) {
outputSegment = entitySegment;
outputSegment.clear();
if (codePoint >= Character.MIN_SURROGATE
&& codePoint <= Character.MAX_SURROGATE) {
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
} else {
outputSegment.setLength
(Character.toChars(codePoint, outputSegment.getArray(), 0));
}
yybegin(CHARACTER_REFERENCE_TAIL);
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
}
case 74: break;
case 37:
{ // add (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
yybegin(YYINITIAL);
}
case 75: break;
case 8:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_SUBSTITUTE);
}
}
case 76: break;
case 46:
{ yybegin(SCRIPT);
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 77: break;
case 11:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
yybegin(LEFT_ANGLE_BRACKET_SPACE);
}
case 78: break;
case 20:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
}
case 79: break;
case 34:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
cumulativeDiff += yychar - inputStart + yylength();
// position the correction at (already output length) [ + (substitution length) = 0]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 80: break;
case 23:
{ yybegin(restoreState); restoreState = previousRestoreState;
}
case 81: break;
case 32:
{ yybegin(COMMENT);
}
case 82: break;
case 14:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 83: break;
case 18:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_SUBSTITUTE);
}
}
case 84: break;
case 25:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
}
case 85: break;
case 7:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 86: break;
case 48:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position the offset correction at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = STYLE_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 87: break;
case 5:
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
}
case 88: break;
case 26:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
outputSegment = inputSegment;
yybegin(YYINITIAL);
}
case 89: break;
case 13:
{ inputSegment.append(zzBuffer[zzStartRead]);
}
case 90: break;
case 50:
{ // Handle paired UTF-16 surrogates.
outputSegment = entitySegment;
outputSegment.clear();
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try {
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(2, 6) + "'";
}
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
case 91: break;
case 40:
{ yybegin(SCRIPT_COMMENT);
}
case 92: break;
case 45:
{ yybegin(STYLE);
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 93: break;
case 22:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(DOUBLE_QUOTED_STRING);
}
case 94: break;
case 12:
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
}
case 95: break;
case 102: break;
case 36:
{ yybegin(YYINITIAL);
if (escapeBR) {
@ -31721,83 +31786,18 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
return BR_END_TAG_REPLACEMENT;
}
}
case 96: break;
case 24:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 97: break;
case 47:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(CDATA);
}
case 98: break;
case 29:
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 99: break;
case 17:
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 100: break;
case 9:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_EXCLUDE);
}
}
case 101: break;
case 49:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = SCRIPT_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 102: break;
case 103: break;
case 38:
{ yybegin(restoreState);
}
case 103: break;
case 104: break;
case 41:
{ yybegin(STYLE_COMMENT);
}
case 104: break;
case 105: break;
case 1:
{ return zzBuffer[zzStartRead];
}
case 105: break;
case 4:
{ yypushback(1);
outputSegment = inputSegment;
outputSegment.restart();
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 106: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {

View File

@ -141,9 +141,9 @@ InlineElment = ( [aAbBiIqQsSuU] |
[vV][aA][rR] )
%include src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex
%include HTMLCharacterEntities.jflex
%include src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
%include HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
%{
private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;

View File

@ -24,6 +24,8 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.ArrayUtil;
@ -35,6 +37,12 @@ import org.apache.lucene.util.ArrayUtil;
* {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
* of the CJK scripts are turned into bigrams.
* <p>
* By default, when a CJK character has no adjacent characters to form
* a bigram, it is output in unigram form. If you want to always output
* both unigrams and bigrams, set the <code>outputUnigrams</code>
* flag in {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)}.
* This can be used for a combined unigram+bigram approach.
* <p>
* In all cases, all non-CJK input is passed thru unmodified.
*/
public final class CJKBigramFilter extends TokenFilter {
@ -67,10 +75,16 @@ public final class CJKBigramFilter extends TokenFilter {
private final Object doHiragana;
private final Object doKatakana;
private final Object doHangul;
// true if we should output unigram tokens always
private final boolean outputUnigrams;
private boolean ngramState; // false = output unigram, true = output bigram
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
// buffers containing codepoint and offsets in parallel
int buffer[] = new int[8];
@ -88,23 +102,36 @@ public final class CJKBigramFilter extends TokenFilter {
/**
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
* CJKBigramFilter(HAN | HIRAGANA | KATAKANA | HANGUL)}
* CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
*/
public CJKBigramFilter(TokenStream in) {
this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
}
/**
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed.
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
* CJKBigramFilter(in, flags, false)}
*/
public CJKBigramFilter(TokenStream in, int flags) {
this(in, flags, false);
}
/**
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
* and whether or not unigrams should also be output.
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
* @param outputUnigrams true if unigrams for the selected writing systems should also be output.
* when this is false, this is only done when there are no adjacent characters to form
* a bigram.
*/
public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
super(in);
doHan = (flags & HAN) == 0 ? NO : HAN_TYPE;
doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE;
this.outputUnigrams = outputUnigrams;
}
/*
@ -120,7 +147,24 @@ public final class CJKBigramFilter extends TokenFilter {
// case 1: we have multiple remaining codepoints buffered,
// so we can emit a bigram here.
flushBigram();
if (outputUnigrams) {
// when also outputting unigrams, we output the unigram first,
// then rewind back to revisit the bigram.
// so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
// the logic in hasBufferedUnigram ensures we output the C,
// even though it did actually have adjacent CJK characters.
if (ngramState) {
flushBigram();
} else {
flushUnigram();
index--;
}
ngramState = !ngramState;
} else {
flushBigram();
}
return true;
} else if (doNext()) {
@ -260,6 +304,11 @@ public final class CJKBigramFilter extends TokenFilter {
termAtt.setLength(len2);
offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
typeAtt.setType(DOUBLE_TYPE);
// when outputting unigrams, all bigrams are synonyms that span two unigrams
if (outputUnigrams) {
posIncAtt.setPositionIncrement(0);
posLengthAtt.setPositionLength(2);
}
index++;
}
@ -292,7 +341,13 @@ public final class CJKBigramFilter extends TokenFilter {
* inputs.
*/
private boolean hasBufferedUnigram() {
return bufferLen == 1 && index == 0;
if (outputUnigrams) {
// when outputting unigrams always
return bufferLen - index == 1;
} else {
// otherwise its only when we have a lone CJK character
return bufferLen == 1 && index == 0;
}
}
@Override
@ -303,5 +358,6 @@ public final class CJKBigramFilter extends TokenFilter {
lastEndOffset = 0;
loneState = null;
exhausted = false;
ngramState = false;
}
}

View File

@ -33,12 +33,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.CJKBigramFilterFactory"
* han="true" hiragana="true"
* katakana="true" hangul="true" /&gt;
* katakana="true" hangul="true" outputUnigrams="false" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class CJKBigramFilterFactory extends TokenFilterFactory {
int flags;
boolean outputUnigrams;
@Override
public void init(Map<String,String> args) {
@ -56,10 +57,11 @@ public class CJKBigramFilterFactory extends TokenFilterFactory {
if (getBoolean("hangul", true)) {
flags |= CJKBigramFilter.HANGUL;
}
outputUnigrams = getBoolean("outputUnigrams", false);
}
@Override
public TokenStream create(TokenStream input) {
return new CJKBigramFilter(input, flags);
return new CJKBigramFilter(input, flags, outputUnigrams);
}
}

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
@ -35,6 +36,7 @@ import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
/**
* {@link Analyzer} for French language.
@ -54,6 +56,11 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
/** File containing default French stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "french_stop.txt";
/** Default set of articles for ElisionFilter */
public static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
"l", "m", "t", "qu", "n", "s", "j"), true));
/**
* Contains words that should be indexed but not stemmed.
*/
@ -134,7 +141,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
result = new ElisionFilter(matchVersion, result);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!excltable.isEmpty())

View File

@ -23,7 +23,6 @@ import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.IrishStemmer;
@ -140,7 +140,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
StopFilter s = new StopFilter(matchVersion, result, HYPHENATIONS);
s.setEnablePositionIncrements(false);
result = s;
result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new IrishLowerCaseFilter(result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())

View File

@ -24,7 +24,6 @@ import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@ -32,6 +31,7 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
@ -129,7 +129,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())

View File

@ -27,13 +27,47 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
/**
* Factory for {@link PathHierarchyTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_path" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* <p>
* This factory is typically configured for use only in the <code>index</code>
* Analyzer (or only in the <code>query</code> Analyzer, but never both).
* </p>
* <p>
* For example, in the configuration below a query for
* <code>Books/NonFic</code> will match documents indexed with values like
* <code>Books/NonFic</code>, <code>Books/NonFic/Law</code>,
* <code>Books/NonFic/Science/Physics</code>, etc. But it will not match
* documents indexed with values like <code>Books</code>, or
* <code>Books/Fic</code>...
* </p>
*
* <pre class="prettyprint" >
* &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
* &lt;analyzer type="index"&gt;
* &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /&gt;
* &lt;/analyzer&gt;
* &lt;analyzer type="query"&gt;
* &lt;tokenizer class="solr.KeywordTokenizerFactory" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
* <p>
* In this example however we see the oposite configuration, so that a query
* for <code>Books/NonFic/Science/Physics</code> would match documents
* containing <code>Books/NonFic</code>, <code>Books/NonFic/Science</code>,
* or <code>Books/NonFic/Science/Physics</code>, but not
* <code>Books/NonFic/Science/Physics/Theory</code> or
* <code>Books/NonFic/Law</code>.
* </p>
* <pre class="prettyprint" >
* &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
* &lt;analyzer type="index"&gt;
* &lt;tokenizer class="solr.KeywordTokenizerFactory" /&gt;
* &lt;/analyzer&gt;
* &lt;analyzer type="query"&gt;
* &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
*/
public class PathHierarchyTokenizerFactory extends TokenizerFactory {

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard;
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 7/15/12 1:57 AM from the specification file
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
* on 8/6/12 11:57 AM from the specification file
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
*/
class ClassicTokenizerImpl implements StandardTokenizerInterface {
@ -42,7 +42,7 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384;
private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int YYINITIAL = 0;

View File

@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%function getNextToken
%pack
%char
%buffer 4096
%{

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
// Generated using ICU4J 49.1.0.0 on Thursday, July 26, 2012 10:22:01 PM UTC
// Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard;
@ -43,7 +43,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384;
private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int YYINITIAL = 0;

View File

@ -44,8 +44,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%implements StandardTokenizerInterface
%function getNextToken
%char
%buffer 4096
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
%include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp})

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard;
@ -46,7 +46,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384;
private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int YYINITIAL = 0;

View File

@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%implements StandardTokenizerInterface
%function getNextToken
%char
%buffer 4096
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
%include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
// RFC-5321: Simple Mail Transfer Protocol
// RFC-5322: Internet Message Format
%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
%include ASCIITLD.jflex-macro
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}

View File

@ -0,0 +1,80 @@
package org.apache.lucene.analysis.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.InputStream;
/**
* Simple {@link ResourceLoader} that uses {@link ClassLoader#getResourceAsStream(String)}
* and {@link Class#forName(String,boolean,ClassLoader)} to open resources and
* classes, respectively.
*/
public final class ClasspathResourceLoader implements ResourceLoader {
private final Class<?> clazz;
private final ClassLoader loader;
/**
* Creates an instance using the context classloader to load Resources and classes.
* Resource paths must be absolute.
*/
public ClasspathResourceLoader() {
this(Thread.currentThread().getContextClassLoader());
}
/**
* Creates an instance using the given classloader to load Resources and classes.
* Resource paths must be absolute.
*/
public ClasspathResourceLoader(ClassLoader loader) {
this(null, loader);
}
/**
* Creates an instance using the context classloader to load Resources and classes
* Resources are resolved relative to the given class, if path is not absolute.
*/
public ClasspathResourceLoader(Class<?> clazz) {
this(clazz, clazz.getClassLoader());
}
private ClasspathResourceLoader(Class<?> clazz, ClassLoader loader) {
this.clazz = clazz;
this.loader = loader;
}
@Override
public InputStream openResource(String resource) throws IOException {
final InputStream stream = (clazz != null) ?
clazz.getResourceAsStream(resource) :
loader.getResourceAsStream(resource);
if (stream == null)
throw new IOException("Resource not found: " + resource);
return stream;
}
@Override
public <T> T newInstance(String cname, Class<T> expectedType) {
try {
final Class<? extends T> clazz = Class.forName(cname, true, loader).asSubclass(expectedType);
return clazz.newInstance();
} catch (Exception e) {
throw new RuntimeException("Cannot instantiate class: " + cname, e);
}
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.fr;
package org.apache.lucene.analysis.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -18,13 +18,11 @@ package org.apache.lucene.analysis.fr;
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
/**
* Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
@ -33,31 +31,17 @@ import org.apache.lucene.util.Version;
* @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
*/
public final class ElisionFilter extends TokenFilter {
private CharArraySet articles = CharArraySet.EMPTY_SET;
private final CharArraySet articles;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
"l", "m", "t", "qu", "n", "s", "j"), true));
private static char[] apostrophes = {'\'', '\u2019'};
/**
* Constructs an elision filter with standard stop words
*/
public ElisionFilter(Version matchVersion, TokenStream input) {
this(matchVersion, input, DEFAULT_ARTICLES);
}
/**
* Constructs an elision filter with a Set of stop words
* @param matchVersion the lucene backwards compatibility version
* @param input the source {@link TokenStream}
* @param articles a set of stopword articles
*/
public ElisionFilter(Version matchVersion, TokenStream input, CharArraySet articles) {
public ElisionFilter(TokenStream input, CharArraySet articles) {
super(input);
this.articles = CharArraySet.unmodifiableSet(
new CharArraySet(matchVersion, articles, true));
this.articles = articles;
}
/**
@ -69,22 +53,18 @@ public final class ElisionFilter extends TokenFilter {
char[] termBuffer = termAtt.buffer();
int termLength = termAtt.length();
int minPoz = Integer.MAX_VALUE;
for (int i = 0; i < apostrophes.length; i++) {
char apos = apostrophes[i];
// The equivalent of String.indexOf(ch)
for (int poz = 0; poz < termLength ; poz++) {
if (termBuffer[poz] == apos) {
minPoz = Math.min(poz, minPoz);
break;
}
int index = -1;
for (int i = 0; i < termLength; i++) {
char ch = termBuffer[i];
if (ch == '\'' || ch == '\u2019') {
index = i;
break;
}
}
// An apostrophe has been found. If the prefix is an article strip it off.
if (minPoz != Integer.MAX_VALUE
&& articles.contains(termAtt.buffer(), 0, minPoz)) {
termAtt.copyBuffer(termAtt.buffer(), minPoz + 1, termAtt.length() - (minPoz + 1));
if (index >= 0 && articles.contains(termBuffer, 0, index)) {
termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1));
}
return true;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.fr;
package org.apache.lucene.analysis.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -17,10 +17,9 @@ package org.apache.lucene.analysis.fr;
* limitations under the License.
*/
import org.apache.lucene.analysis.util.*;
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
/**
* Factory for {@link ElisionFilter}.
@ -46,12 +45,13 @@ public class ElisionFilterFactory extends TokenFilterFactory implements Resource
if (articlesFile != null) {
articles = getWordSet(loader, articlesFile, ignoreCase);
}
if (articles == null) {
articles = FrenchAnalyzer.DEFAULT_ARTICLES;
}
}
public ElisionFilter create(TokenStream input) {
assureMatchVersion();
return articles == null ? new ElisionFilter(luceneMatchVersion,input) :
new ElisionFilter(luceneMatchVersion,input,articles);
return new ElisionFilter(input, articles);
}
}

View File

@ -0,0 +1,94 @@
package org.apache.lucene.analysis.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
/**
* Simple {@link ResourceLoader} that opens resource files
* from the local file system, optionally resolving against
* a base directory.
*
* <p>This loader wraps a delegate {@link ResourceLoader}
* that is used to resolve all files, the current base directory
* does not contain. {@link #newInstance} is always resolved
* against the delegate, as a {@link ClassLoader} is needed.
*
* <p>You can chain several {@code FilesystemResourceLoader}s
* to allow lookup of files in more than one base directory.
*/
public final class FilesystemResourceLoader implements ResourceLoader {
private final File baseDirectory;
private final ResourceLoader delegate;
/**
* Creates a resource loader that requires absolute filenames or relative to CWD
* to resolve resources. Files not found in file system and class lookups
* are delegated to context classloader.
*/
public FilesystemResourceLoader() {
this((File) null);
}
/**
* Creates a resource loader that resolves resources against the given
* base directory (may be {@code null} to refer to CWD).
* Files not found in file system and class lookups are delegated to context
* classloader.
*/
public FilesystemResourceLoader(File baseDirectory) {
this(baseDirectory, new ClasspathResourceLoader());
}
/**
* Creates a resource loader that resolves resources against the given
* base directory (may be {@code null} to refer to CWD).
* Files not found in file system and class lookups are delegated
* to the given delegate {@link ResourceLoader}.
*/
public FilesystemResourceLoader(File baseDirectory, ResourceLoader delegate) {
if (baseDirectory != null && !baseDirectory.isDirectory())
throw new IllegalArgumentException("baseDirectory is not a directory or null");
if (delegate == null)
throw new IllegalArgumentException("delegate ResourceLoader may not be null");
this.baseDirectory = baseDirectory;
this.delegate = delegate;
}
@Override
public InputStream openResource(String resource) throws IOException {
try {
File file = new File (resource);
if (baseDirectory != null && !file.isAbsolute()) {
file = new File(baseDirectory, resource);
}
return new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
return delegate.openResource(resource);
}
}
@Override
public <T> T newInstance(String cname, Class<T> expectedType) {
return delegate.newInstance(cname, expectedType);
}
}

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.wikipedia;
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 7/15/12 1:57 AM from the specification file
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
* on 8/6/12 11:57 AM from the specification file
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384;
private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int THREE_SINGLE_QUOTES_STATE = 10;

View File

@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%function getNextToken
%pack
%char
%buffer 4096
%{

View File

@ -1,423 +1,439 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.Among;
/**
* Generated class implementing code defined by a snowball script.
*/
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class DanishStemmer extends SnowballProgram {
private Among a_0[] = {
new Among ( "hed", -1, 1, "", this),
new Among ( "ethed", 0, 1, "", this),
new Among ( "ered", -1, 1, "", this),
new Among ( "e", -1, 1, "", this),
new Among ( "erede", 3, 1, "", this),
new Among ( "ende", 3, 1, "", this),
new Among ( "erende", 5, 1, "", this),
new Among ( "ene", 3, 1, "", this),
new Among ( "erne", 3, 1, "", this),
new Among ( "ere", 3, 1, "", this),
new Among ( "en", -1, 1, "", this),
new Among ( "heden", 10, 1, "", this),
new Among ( "eren", 10, 1, "", this),
new Among ( "er", -1, 1, "", this),
new Among ( "heder", 13, 1, "", this),
new Among ( "erer", 13, 1, "", this),
new Among ( "s", -1, 2, "", this),
new Among ( "heds", 16, 1, "", this),
new Among ( "es", 16, 1, "", this),
new Among ( "endes", 18, 1, "", this),
new Among ( "erendes", 19, 1, "", this),
new Among ( "enes", 18, 1, "", this),
new Among ( "ernes", 18, 1, "", this),
new Among ( "eres", 18, 1, "", this),
new Among ( "ens", 16, 1, "", this),
new Among ( "hedens", 24, 1, "", this),
new Among ( "erens", 24, 1, "", this),
new Among ( "ers", 16, 1, "", this),
new Among ( "ets", 16, 1, "", this),
new Among ( "erets", 28, 1, "", this),
new Among ( "et", -1, 1, "", this),
new Among ( "eret", 30, 1, "", this)
};
private static final long serialVersionUID = 1L;
private Among a_1[] = {
new Among ( "gd", -1, -1, "", this),
new Among ( "dt", -1, -1, "", this),
new Among ( "gt", -1, -1, "", this),
new Among ( "kt", -1, -1, "", this)
};
private final static DanishStemmer methodObject = new DanishStemmer ();
private Among a_2[] = {
new Among ( "ig", -1, 1, "", this),
new Among ( "lig", 0, 1, "", this),
new Among ( "elig", 1, 1, "", this),
new Among ( "els", -1, 1, "", this),
new Among ( "l\u00F8st", -1, 2, "", this)
};
private final static Among a_0[] = {
new Among ( "hed", -1, 1, "", methodObject ),
new Among ( "ethed", 0, 1, "", methodObject ),
new Among ( "ered", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "erede", 3, 1, "", methodObject ),
new Among ( "ende", 3, 1, "", methodObject ),
new Among ( "erende", 5, 1, "", methodObject ),
new Among ( "ene", 3, 1, "", methodObject ),
new Among ( "erne", 3, 1, "", methodObject ),
new Among ( "ere", 3, 1, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "heden", 10, 1, "", methodObject ),
new Among ( "eren", 10, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heder", 13, 1, "", methodObject ),
new Among ( "erer", 13, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "heds", 16, 1, "", methodObject ),
new Among ( "es", 16, 1, "", methodObject ),
new Among ( "endes", 18, 1, "", methodObject ),
new Among ( "erendes", 19, 1, "", methodObject ),
new Among ( "enes", 18, 1, "", methodObject ),
new Among ( "ernes", 18, 1, "", methodObject ),
new Among ( "eres", 18, 1, "", methodObject ),
new Among ( "ens", 16, 1, "", methodObject ),
new Among ( "hedens", 24, 1, "", methodObject ),
new Among ( "erens", 24, 1, "", methodObject ),
new Among ( "ers", 16, 1, "", methodObject ),
new Among ( "ets", 16, 1, "", methodObject ),
new Among ( "erets", 28, 1, "", methodObject ),
new Among ( "et", -1, 1, "", methodObject ),
new Among ( "eret", 30, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private final static Among a_1[] = {
new Among ( "gd", -1, -1, "", methodObject ),
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "gt", -1, -1, "", methodObject ),
new Among ( "kt", -1, -1, "", methodObject )
};
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
private final static Among a_2[] = {
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "lig", 0, 1, "", methodObject ),
new Among ( "elig", 1, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "l\u00F8st", -1, 2, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
private int I_x;
private int I_p1;
private StringBuilder S_ch = new StringBuilder();
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
private void copy_from(DanishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
S_ch = other.S_ch;
super.copy_from(other);
}
private void copy_from(DanishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
S_ch = other.S_ch;
super.copy_from(other);
}
private boolean r_mark_regions() {
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 29
I_p1 = limit;
// test, line 33
v_1 = cursor;
// (, line 33
// hop, line 33
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 33
I_x = cursor;
cursor = v_1;
// goto, line 34
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
// (, line 29
I_p1 = limit;
// test, line 33
v_1 = cursor;
// (, line 33
// hop, line 33
{
break lab1;
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 34
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
// setmark x, line 33
I_x = cursor;
cursor = v_1;
// goto, line 34
golab0: while(true)
{
break lab3;
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
// gopast, line 34
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 34
I_p1 = cursor;
// try, line 35
lab4: do {
// (, line 35
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
cursor++;
}
// setmark p1, line 34
I_p1 = cursor;
// try, line 35
lab4: do {
// (, line 35
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 40
// setlimit, line 41
v_1 = limit - cursor;
// tomark, line 41
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 41
// [, line 41
ket = cursor;
// substring, line 41
among_var = find_among_b(a_0, 32);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 41
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 48
// delete, line 48
slice_del();
break;
case 2:
// (, line 50
if (!(in_grouping_b(g_s_ending, 97, 229)))
// (, line 40
// setlimit, line 41
v_1 = limit - cursor;
// tomark, line 41
if (cursor < I_p1)
{
return false;
}
// delete, line 50
slice_del();
break;
}
return true;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 41
// [, line 41
ket = cursor;
// substring, line 41
among_var = find_among_b(a_0, 32);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 41
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 48
// delete, line 48
slice_del();
break;
case 2:
// (, line 50
if (!(in_grouping_b(g_s_ending, 97, 229)))
{
return false;
}
// delete, line 50
slice_del();
break;
}
return true;
}
private boolean r_consonant_pair() {
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 54
// test, line 55
v_1 = limit - cursor;
// (, line 55
// setlimit, line 56
v_2 = limit - cursor;
// tomark, line 56
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 56
// [, line 56
ket = cursor;
// substring, line 56
if (find_among_b(a_1, 4) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 56
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 62
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 62
bra = cursor;
// delete, line 62
slice_del();
return true;
}
// (, line 54
// test, line 55
v_1 = limit - cursor;
// (, line 55
// setlimit, line 56
v_2 = limit - cursor;
// tomark, line 56
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 56
// [, line 56
ket = cursor;
// substring, line 56
if (find_among_b(a_1, 4) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 56
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 62
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 62
bra = cursor;
// delete, line 62
slice_del();
return true;
}
private boolean r_other_suffix() {
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 65
// do, line 66
v_1 = limit - cursor;
lab0: do {
// (, line 66
// [, line 66
ket = cursor;
// literal, line 66
if (!(eq_s_b(2, "st")))
{
break lab0;
}
// ], line 66
bra = cursor;
// literal, line 66
if (!(eq_s_b(2, "ig")))
{
break lab0;
}
// delete, line 66
slice_del();
} while (false);
cursor = limit - v_1;
// setlimit, line 67
v_2 = limit - cursor;
// tomark, line 67
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 67
// [, line 67
ket = cursor;
// substring, line 67
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_3;
return false;
}
// ], line 67
bra = cursor;
limit_backward = v_3;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 70
// delete, line 70
slice_del();
// do, line 70
v_4 = limit - cursor;
lab1: do {
// call consonant_pair, line 70
if (!r_consonant_pair())
// (, line 65
// do, line 66
v_1 = limit - cursor;
lab0: do {
// (, line 66
// [, line 66
ket = cursor;
// literal, line 66
if (!(eq_s_b(2, "st")))
{
break lab1;
break lab0;
}
// ], line 66
bra = cursor;
// literal, line 66
if (!(eq_s_b(2, "ig")))
{
break lab0;
}
// delete, line 66
slice_del();
} while (false);
cursor = limit - v_4;
break;
case 2:
// (, line 72
// <-, line 72
slice_from("l\u00F8s");
break;
}
return true;
}
cursor = limit - v_1;
// setlimit, line 67
v_2 = limit - cursor;
// tomark, line 67
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 67
// [, line 67
ket = cursor;
// substring, line 67
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_3;
return false;
}
// ], line 67
bra = cursor;
limit_backward = v_3;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 70
// delete, line 70
slice_del();
// do, line 70
v_4 = limit - cursor;
lab1: do {
// call consonant_pair, line 70
if (!r_consonant_pair())
{
break lab1;
}
} while (false);
cursor = limit - v_4;
break;
case 2:
// (, line 72
// <-, line 72
slice_from("l\u00F8s");
break;
}
return true;
}
private boolean r_undouble() {
private boolean r_undouble() {
int v_1;
int v_2;
// (, line 75
// setlimit, line 76
v_1 = limit - cursor;
// tomark, line 76
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 76
// [, line 76
ket = cursor;
if (!(out_grouping_b(g_v, 97, 248)))
{
limit_backward = v_2;
return false;
}
// ], line 76
bra = cursor;
// -> ch, line 76
S_ch = slice_to(S_ch);
limit_backward = v_2;
// name ch, line 77
if (!(eq_v_b(S_ch)))
{
return false;
}
// delete, line 78
slice_del();
return true;
}
// (, line 75
// setlimit, line 76
v_1 = limit - cursor;
// tomark, line 76
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 76
// [, line 76
ket = cursor;
if (!(out_grouping_b(g_v, 97, 248)))
{
limit_backward = v_2;
return false;
}
// ], line 76
bra = cursor;
// -> ch, line 76
S_ch = slice_to(S_ch);
limit_backward = v_2;
// name ch, line 77
if (!(eq_v_b(S_ch)))
{
return false;
}
// delete, line 78
slice_del();
return true;
}
public boolean stem() {
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
// (, line 82
// do, line 84
v_1 = cursor;
lab0: do {
// call mark_regions, line 84
if (!r_mark_regions())
{
break lab0;
// (, line 82
// do, line 84
v_1 = cursor;
lab0: do {
// call mark_regions, line 84
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 85
limit_backward = cursor; cursor = limit;
// (, line 85
// do, line 86
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 86
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 87
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 87
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 88
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 88
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
// do, line 89
v_5 = limit - cursor;
lab4: do {
// call undouble, line 89
if (!r_undouble())
{
break lab4;
}
} while (false);
cursor = limit - v_5;
cursor = limit_backward; return true;
}
} while (false);
cursor = v_1;
// backwards, line 85
limit_backward = cursor; cursor = limit;
// (, line 85
// do, line 86
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 86
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 87
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 87
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 88
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 88
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
// do, line 89
v_5 = limit - cursor;
lab4: do {
// call undouble, line 89
if (!r_undouble())
{
break lab4;
}
} while (false);
cursor = limit - v_5;
cursor = limit_backward; return true;
public boolean equals( Object o ) {
return o instanceof DanishStemmer;
}
}
public int hashCode() {
return DanishStemmer.class.getName().hashCode();
}
}

View File

@ -1,358 +1,375 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.Among;
/**
* Generated class implementing code defined by a snowball script.
*/
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class NorwegianStemmer extends SnowballProgram {
private Among a_0[] = {
new Among ( "a", -1, 1, "", this),
new Among ( "e", -1, 1, "", this),
new Among ( "ede", 1, 1, "", this),
new Among ( "ande", 1, 1, "", this),
new Among ( "ende", 1, 1, "", this),
new Among ( "ane", 1, 1, "", this),
new Among ( "ene", 1, 1, "", this),
new Among ( "hetene", 6, 1, "", this),
new Among ( "erte", 1, 3, "", this),
new Among ( "en", -1, 1, "", this),
new Among ( "heten", 9, 1, "", this),
new Among ( "ar", -1, 1, "", this),
new Among ( "er", -1, 1, "", this),
new Among ( "heter", 12, 1, "", this),
new Among ( "s", -1, 2, "", this),
new Among ( "as", 14, 1, "", this),
new Among ( "es", 14, 1, "", this),
new Among ( "edes", 16, 1, "", this),
new Among ( "endes", 16, 1, "", this),
new Among ( "enes", 16, 1, "", this),
new Among ( "hetenes", 19, 1, "", this),
new Among ( "ens", 14, 1, "", this),
new Among ( "hetens", 21, 1, "", this),
new Among ( "ers", 14, 1, "", this),
new Among ( "ets", 14, 1, "", this),
new Among ( "et", -1, 1, "", this),
new Among ( "het", 25, 1, "", this),
new Among ( "ert", -1, 3, "", this),
new Among ( "ast", -1, 1, "", this)
};
private static final long serialVersionUID = 1L;
private Among a_1[] = {
new Among ( "dt", -1, -1, "", this),
new Among ( "vt", -1, -1, "", this)
};
private final static NorwegianStemmer methodObject = new NorwegianStemmer ();
private Among a_2[] = {
new Among ( "leg", -1, 1, "", this),
new Among ( "eleg", 0, 1, "", this),
new Among ( "ig", -1, 1, "", this),
new Among ( "eig", 2, 1, "", this),
new Among ( "lig", 2, 1, "", this),
new Among ( "elig", 4, 1, "", this),
new Among ( "els", -1, 1, "", this),
new Among ( "lov", -1, 1, "", this),
new Among ( "elov", 7, 1, "", this),
new Among ( "slov", 7, 1, "", this),
new Among ( "hetslov", 9, 1, "", this)
};
private final static Among a_0[] = {
new Among ( "a", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "ede", 1, 1, "", methodObject ),
new Among ( "ande", 1, 1, "", methodObject ),
new Among ( "ende", 1, 1, "", methodObject ),
new Among ( "ane", 1, 1, "", methodObject ),
new Among ( "ene", 1, 1, "", methodObject ),
new Among ( "hetene", 6, 1, "", methodObject ),
new Among ( "erte", 1, 3, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "heten", 9, 1, "", methodObject ),
new Among ( "ar", -1, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heter", 12, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "as", 14, 1, "", methodObject ),
new Among ( "es", 14, 1, "", methodObject ),
new Among ( "edes", 16, 1, "", methodObject ),
new Among ( "endes", 16, 1, "", methodObject ),
new Among ( "enes", 16, 1, "", methodObject ),
new Among ( "hetenes", 19, 1, "", methodObject ),
new Among ( "ens", 14, 1, "", methodObject ),
new Among ( "hetens", 21, 1, "", methodObject ),
new Among ( "ers", 14, 1, "", methodObject ),
new Among ( "ets", 14, 1, "", methodObject ),
new Among ( "et", -1, 1, "", methodObject ),
new Among ( "het", 25, 1, "", methodObject ),
new Among ( "ert", -1, 3, "", methodObject ),
new Among ( "ast", -1, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private final static Among a_1[] = {
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "vt", -1, -1, "", methodObject )
};
private static final char g_s_ending[] = {119, 125, 149, 1 };
private final static Among a_2[] = {
new Among ( "leg", -1, 1, "", methodObject ),
new Among ( "eleg", 0, 1, "", methodObject ),
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "eig", 2, 1, "", methodObject ),
new Among ( "lig", 2, 1, "", methodObject ),
new Among ( "elig", 4, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "lov", -1, 1, "", methodObject ),
new Among ( "elov", 7, 1, "", methodObject ),
new Among ( "slov", 7, 1, "", methodObject ),
new Among ( "hetslov", 9, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private static final char g_s_ending[] = {119, 125, 149, 1 };
private int I_x;
private int I_p1;
private void copy_from(NorwegianStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private void copy_from(NorwegianStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private boolean r_mark_regions() {
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 26
I_p1 = limit;
// test, line 30
v_1 = cursor;
// (, line 30
// hop, line 30
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 30
I_x = cursor;
cursor = v_1;
// goto, line 31
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
// (, line 26
I_p1 = limit;
// test, line 30
v_1 = cursor;
// (, line 30
// hop, line 30
{
break lab1;
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 31
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
// setmark x, line 30
I_x = cursor;
cursor = v_1;
// goto, line 31
golab0: while(true)
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 31
I_p1 = cursor;
// try, line 32
lab4: do {
// (, line 32
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
// (, line 37
// setlimit, line 38
v_1 = limit - cursor;
// tomark, line 38
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 38
// [, line 38
ket = cursor;
// substring, line 38
among_var = find_among_b(a_0, 29);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 38
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
// or, line 46
lab0: do {
v_3 = limit - cursor;
v_2 = cursor;
lab1: do {
if (!(in_grouping_b(g_s_ending, 98, 122)))
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
break lab0;
cursor = v_2;
break golab0;
} while (false);
cursor = limit - v_3;
// (, line 46
// literal, line 46
if (!(eq_s_b(1, "k")))
cursor = v_2;
if (cursor >= limit)
{
return false;
}
if (!(out_grouping_b(g_v, 97, 248)))
cursor++;
}
// gopast, line 31
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 31
I_p1 = cursor;
// try, line 32
lab4: do {
// (, line 32
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
// delete, line 46
slice_del();
break;
case 3:
// (, line 48
// <-, line 48
slice_from("er");
break;
}
return true;
}
return true;
}
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 52
// test, line 53
v_1 = limit - cursor;
// (, line 53
// setlimit, line 54
v_2 = limit - cursor;
// tomark, line 54
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 54
// [, line 54
ket = cursor;
// substring, line 54
if (find_among_b(a_1, 2) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 54
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 59
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 59
bra = cursor;
// delete, line 59
slice_del();
return true;
}
private boolean r_other_suffix() {
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 62
// setlimit, line 63
v_1 = limit - cursor;
// tomark, line 63
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 63
// [, line 63
ket = cursor;
// substring, line 63
among_var = find_among_b(a_2, 11);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 63
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 67
// delete, line 67
slice_del();
break;
}
return true;
}
int v_3;
// (, line 37
// setlimit, line 38
v_1 = limit - cursor;
// tomark, line 38
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 38
// [, line 38
ket = cursor;
// substring, line 38
among_var = find_among_b(a_0, 29);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 38
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
// or, line 46
lab0: do {
v_3 = limit - cursor;
lab1: do {
if (!(in_grouping_b(g_s_ending, 98, 122)))
{
break lab1;
}
break lab0;
} while (false);
cursor = limit - v_3;
// (, line 46
// literal, line 46
if (!(eq_s_b(1, "k")))
{
return false;
}
if (!(out_grouping_b(g_v, 97, 248)))
{
return false;
}
} while (false);
// delete, line 46
slice_del();
break;
case 3:
// (, line 48
// <-, line 48
slice_from("er");
break;
}
return true;
}
public boolean stem() {
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 52
// test, line 53
v_1 = limit - cursor;
// (, line 53
// setlimit, line 54
v_2 = limit - cursor;
// tomark, line 54
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 54
// [, line 54
ket = cursor;
// substring, line 54
if (find_among_b(a_1, 2) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 54
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 59
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 59
bra = cursor;
// delete, line 59
slice_del();
return true;
}
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 62
// setlimit, line 63
v_1 = limit - cursor;
// tomark, line 63
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 63
// [, line 63
ket = cursor;
// substring, line 63
among_var = find_among_b(a_2, 11);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 63
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 67
// delete, line 67
slice_del();
break;
}
return true;
}
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 72
// do, line 74
v_1 = cursor;
lab0: do {
// call mark_regions, line 74
if (!r_mark_regions())
{
break lab0;
// (, line 72
// do, line 74
v_1 = cursor;
lab0: do {
// call mark_regions, line 74
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 75
limit_backward = cursor; cursor = limit;
// (, line 75
// do, line 76
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 76
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 77
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 77
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 78
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 78
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
}
} while (false);
cursor = v_1;
// backwards, line 75
limit_backward = cursor; cursor = limit;
// (, line 75
// do, line 76
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 76
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 77
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 77
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 78
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 78
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
public boolean equals( Object o ) {
return o instanceof NorwegianStemmer;
}
public int hashCode() {
return NorwegianStemmer.class.getName().hashCode();
}
}

View File

@ -1,349 +1,366 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.Among;
/**
* Generated class implementing code defined by a snowball script.
*/
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class SwedishStemmer extends SnowballProgram {
private Among a_0[] = {
new Among ( "a", -1, 1, "", this),
new Among ( "arna", 0, 1, "", this),
new Among ( "erna", 0, 1, "", this),
new Among ( "heterna", 2, 1, "", this),
new Among ( "orna", 0, 1, "", this),
new Among ( "ad", -1, 1, "", this),
new Among ( "e", -1, 1, "", this),
new Among ( "ade", 6, 1, "", this),
new Among ( "ande", 6, 1, "", this),
new Among ( "arne", 6, 1, "", this),
new Among ( "are", 6, 1, "", this),
new Among ( "aste", 6, 1, "", this),
new Among ( "en", -1, 1, "", this),
new Among ( "anden", 12, 1, "", this),
new Among ( "aren", 12, 1, "", this),
new Among ( "heten", 12, 1, "", this),
new Among ( "ern", -1, 1, "", this),
new Among ( "ar", -1, 1, "", this),
new Among ( "er", -1, 1, "", this),
new Among ( "heter", 18, 1, "", this),
new Among ( "or", -1, 1, "", this),
new Among ( "s", -1, 2, "", this),
new Among ( "as", 21, 1, "", this),
new Among ( "arnas", 22, 1, "", this),
new Among ( "ernas", 22, 1, "", this),
new Among ( "ornas", 22, 1, "", this),
new Among ( "es", 21, 1, "", this),
new Among ( "ades", 26, 1, "", this),
new Among ( "andes", 26, 1, "", this),
new Among ( "ens", 21, 1, "", this),
new Among ( "arens", 29, 1, "", this),
new Among ( "hetens", 29, 1, "", this),
new Among ( "erns", 21, 1, "", this),
new Among ( "at", -1, 1, "", this),
new Among ( "andet", -1, 1, "", this),
new Among ( "het", -1, 1, "", this),
new Among ( "ast", -1, 1, "", this)
};
private static final long serialVersionUID = 1L;
private Among a_1[] = {
new Among ( "dd", -1, -1, "", this),
new Among ( "gd", -1, -1, "", this),
new Among ( "nn", -1, -1, "", this),
new Among ( "dt", -1, -1, "", this),
new Among ( "gt", -1, -1, "", this),
new Among ( "kt", -1, -1, "", this),
new Among ( "tt", -1, -1, "", this)
};
private final static SwedishStemmer methodObject = new SwedishStemmer ();
private Among a_2[] = {
new Among ( "ig", -1, 1, "", this),
new Among ( "lig", 0, 1, "", this),
new Among ( "els", -1, 1, "", this),
new Among ( "fullt", -1, 3, "", this),
new Among ( "l\u00F6st", -1, 2, "", this)
};
private final static Among a_0[] = {
new Among ( "a", -1, 1, "", methodObject ),
new Among ( "arna", 0, 1, "", methodObject ),
new Among ( "erna", 0, 1, "", methodObject ),
new Among ( "heterna", 2, 1, "", methodObject ),
new Among ( "orna", 0, 1, "", methodObject ),
new Among ( "ad", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "ade", 6, 1, "", methodObject ),
new Among ( "ande", 6, 1, "", methodObject ),
new Among ( "arne", 6, 1, "", methodObject ),
new Among ( "are", 6, 1, "", methodObject ),
new Among ( "aste", 6, 1, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "anden", 12, 1, "", methodObject ),
new Among ( "aren", 12, 1, "", methodObject ),
new Among ( "heten", 12, 1, "", methodObject ),
new Among ( "ern", -1, 1, "", methodObject ),
new Among ( "ar", -1, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heter", 18, 1, "", methodObject ),
new Among ( "or", -1, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "as", 21, 1, "", methodObject ),
new Among ( "arnas", 22, 1, "", methodObject ),
new Among ( "ernas", 22, 1, "", methodObject ),
new Among ( "ornas", 22, 1, "", methodObject ),
new Among ( "es", 21, 1, "", methodObject ),
new Among ( "ades", 26, 1, "", methodObject ),
new Among ( "andes", 26, 1, "", methodObject ),
new Among ( "ens", 21, 1, "", methodObject ),
new Among ( "arens", 29, 1, "", methodObject ),
new Among ( "hetens", 29, 1, "", methodObject ),
new Among ( "erns", 21, 1, "", methodObject ),
new Among ( "at", -1, 1, "", methodObject ),
new Among ( "andet", -1, 1, "", methodObject ),
new Among ( "het", -1, 1, "", methodObject ),
new Among ( "ast", -1, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
private final static Among a_1[] = {
new Among ( "dd", -1, -1, "", methodObject ),
new Among ( "gd", -1, -1, "", methodObject ),
new Among ( "nn", -1, -1, "", methodObject ),
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "gt", -1, -1, "", methodObject ),
new Among ( "kt", -1, -1, "", methodObject ),
new Among ( "tt", -1, -1, "", methodObject )
};
private static final char g_s_ending[] = {119, 127, 149 };
private final static Among a_2[] = {
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "lig", 0, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "fullt", -1, 3, "", methodObject ),
new Among ( "l\u00F6st", -1, 2, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
private static final char g_s_ending[] = {119, 127, 149 };
private int I_x;
private int I_p1;
private void copy_from(SwedishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private void copy_from(SwedishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private boolean r_mark_regions() {
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 26
I_p1 = limit;
// test, line 29
v_1 = cursor;
// (, line 29
// hop, line 29
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 29
I_x = cursor;
cursor = v_1;
// goto, line 30
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 246)))
// (, line 26
I_p1 = limit;
// test, line 29
v_1 = cursor;
// (, line 29
// hop, line 29
{
break lab1;
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 30
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 246)))
// setmark x, line 29
I_x = cursor;
cursor = v_1;
// goto, line 30
golab0: while(true)
{
break lab3;
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 246)))
{
break lab1;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
// gopast, line 30
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 246)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 30
I_p1 = cursor;
// try, line 31
lab4: do {
// (, line 31
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
cursor++;
}
// setmark p1, line 30
I_p1 = cursor;
// try, line 31
lab4: do {
// (, line 31
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 36
// setlimit, line 37
v_1 = limit - cursor;
// tomark, line 37
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 37
// [, line 37
ket = cursor;
// substring, line 37
among_var = find_among_b(a_0, 37);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 37
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
if (!(in_grouping_b(g_s_ending, 98, 121)))
// (, line 36
// setlimit, line 37
v_1 = limit - cursor;
// tomark, line 37
if (cursor < I_p1)
{
return false;
}
// delete, line 46
slice_del();
break;
}
return true;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 37
// [, line 37
ket = cursor;
// substring, line 37
among_var = find_among_b(a_0, 37);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 37
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
if (!(in_grouping_b(g_s_ending, 98, 121)))
{
return false;
}
// delete, line 46
slice_del();
break;
}
return true;
}
private boolean r_consonant_pair() {
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// setlimit, line 50
v_1 = limit - cursor;
// tomark, line 50
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 50
// and, line 52
v_3 = limit - cursor;
// among, line 51
if (find_among_b(a_1, 7) == 0)
{
limit_backward = v_2;
return false;
}
cursor = limit - v_3;
// (, line 52
// [, line 52
ket = cursor;
// next, line 52
if (cursor <= limit_backward)
{
limit_backward = v_2;
return false;
}
cursor--;
// ], line 52
bra = cursor;
// delete, line 52
slice_del();
limit_backward = v_2;
return true;
}
// setlimit, line 50
v_1 = limit - cursor;
// tomark, line 50
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 50
// and, line 52
v_3 = limit - cursor;
// among, line 51
if (find_among_b(a_1, 7) == 0)
{
limit_backward = v_2;
return false;
}
cursor = limit - v_3;
// (, line 52
// [, line 52
ket = cursor;
// next, line 52
if (cursor <= limit_backward)
{
limit_backward = v_2;
return false;
}
cursor--;
// ], line 52
bra = cursor;
// delete, line 52
slice_del();
limit_backward = v_2;
return true;
}
private boolean r_other_suffix() {
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
// setlimit, line 55
v_1 = limit - cursor;
// tomark, line 55
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 55
// [, line 56
ket = cursor;
// substring, line 56
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 56
bra = cursor;
switch(among_var) {
case 0:
// setlimit, line 55
v_1 = limit - cursor;
// tomark, line 55
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 55
// [, line 56
ket = cursor;
// substring, line 56
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 56
bra = cursor;
switch(among_var) {
case 0:
limit_backward = v_2;
return false;
case 1:
// (, line 57
// delete, line 57
slice_del();
break;
case 2:
// (, line 58
// <-, line 58
slice_from("l\u00F6s");
break;
case 3:
// (, line 59
// <-, line 59
slice_from("full");
break;
}
limit_backward = v_2;
return false;
case 1:
// (, line 57
// delete, line 57
slice_del();
break;
case 2:
// (, line 58
// <-, line 58
slice_from("l\u00F6s");
break;
case 3:
// (, line 59
// <-, line 59
slice_from("full");
break;
}
limit_backward = v_2;
return true;
}
return true;
}
public boolean stem() {
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 64
// do, line 66
v_1 = cursor;
lab0: do {
// call mark_regions, line 66
if (!r_mark_regions())
{
break lab0;
// (, line 64
// do, line 66
v_1 = cursor;
lab0: do {
// call mark_regions, line 66
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 67
limit_backward = cursor; cursor = limit;
// (, line 67
// do, line 68
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 68
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 69
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 69
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 70
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 70
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
}
} while (false);
cursor = v_1;
// backwards, line 67
limit_backward = cursor; cursor = limit;
// (, line 67
// do, line 68
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 68
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 69
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 69
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 70
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 70
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
public boolean equals( Object o ) {
return o instanceof SwedishStemmer;
}
public int hashCode() {
return SwedishStemmer.class.getName().hashCode();
}
}

View File

@ -40,7 +40,6 @@ org.apache.lucene.analysis.en.PorterStemFilterFactory
org.apache.lucene.analysis.es.SpanishLightStemFilterFactory
org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory
org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory
org.apache.lucene.analysis.fr.ElisionFilterFactory
org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory
org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory
org.apache.lucene.analysis.ga.IrishLowerCaseFilterFactory
@ -88,3 +87,4 @@ org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory
org.apache.lucene.analysis.synonym.SynonymFilterFactory
org.apache.lucene.analysis.th.ThaiWordFilterFactory
org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory
org.apache.lucene.analysis.util.ElisionFilterFactory

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
*/
import java.io.Reader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -33,6 +34,15 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
}
};
Analyzer unibiAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
return new TokenStreamComponents(t,
new CJKBigramFilter(t, 0xff, true));
}
};
public void testHuge() throws Exception {
assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
@ -62,6 +72,96 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
}
};
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
new String[] { "", "", "", "学生", "", "試験", "", "", "", "" });
new String[] { "", "", "", "学生", "", "試験", "", "", "", "" },
new int[] { 0, 1, 2, 3, 5, 6, 8, 9, 10, 11 },
new int[] { 1, 2, 3, 5, 6, 8, 9, 10, 11, 12 },
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>",
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
}
public void testAllScripts() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
return new TokenStreamComponents(t,
new CJKBigramFilter(t, 0xff, false));
}
};
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
}
public void testUnigramsAndBigramsAllScripts() throws Exception {
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた。",
new String[] {
"", "多く", "", "くの", "", "の学", "", "学生", "",
"生が", "", "が試", "", "試験", "", "験に", "",
"に落", "", "落ち", "", "ちた", ""
},
new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
6, 7, 7, 8, 8, 9, 9, 10, 10, 11 },
new int[] { 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11, 11, 12, 12 },
new String[] { "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>" },
new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
new int[] { 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }
);
}
public void testUnigramsAndBigramsHanOnly() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
}
};
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
new String[] { "", "", "", "", "学生", "", "", "", "試験", "", "", "", "", "" },
new int[] { 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11 },
new int[] { 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12 },
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>",
"<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>",
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
}
public void testUnigramsAndBigramsHuge() throws Exception {
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた",
new String[] {
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", ""
}
);
}
/** blast some random strings through the analyzer */
public void testRandomUnibiStrings() throws Exception {
checkRandomData(random(), unibiAnalyzer, 1000*RANDOM_MULTIPLIER);
}
/** blast some random strings through the analyzer */
public void testRandomUnibiHugeStrings() throws Exception {
Random random = random();
checkRandomData(random, unibiAnalyzer, 100*RANDOM_MULTIPLIER, 8192);
}
}

View File

@ -52,4 +52,16 @@ public class TestCJKBigramFilterFactory extends BaseTokenStreamTestCase {
assertTokenStreamContents(stream,
new String[] { "", "", "", "学生", "", "試験", "", "", "", "" });
}
public void testHanOnlyUnigrams() throws Exception {
Reader reader = new StringReader("多くの学生が試験に落ちた。");
CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("hiragana", "false");
args.put("outputUnigrams", "true");
factory.init(args);
TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
assertTokenStreamContents(stream,
new String[] { "", "", "", "", "学生", "", "", "", "試験", "", "", "", "", "" });
}
}

View File

@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.TestStopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.io.StringReader;
@ -39,7 +39,7 @@ import java.util.HashMap;
public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
Map<String, String> args = new HashMap<String, String>();
@ -89,7 +89,7 @@ public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);

View File

@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.TestStopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.io.StringReader;
@ -39,7 +39,7 @@ import java.util.HashMap;
public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
Map<String, String> args = new HashMap<String, String>();
@ -89,7 +89,7 @@ public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);

View File

@ -26,7 +26,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
/**
@ -40,7 +40,7 @@ public class TestDictionaryCompoundWordTokenFilterFactory extends BaseTokenStrea
Reader reader = new StringReader("I like to play softball");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
ResourceLoader loader = new ClasspathResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("dictionary", "compoundDictionary.txt");
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);

View File

@ -26,7 +26,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
/**
@ -40,7 +40,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenStre
Reader reader = new StringReader("min veninde som er lidt af en læsehest");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
ResourceLoader loader = new ClasspathResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("hyphenator", "da_UTF8.xml");
args.put("dictionary", "da_compoundDictionary.txt");
@ -64,7 +64,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenStre
Reader reader = new StringReader("basketballkurv");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
ResourceLoader loader = new ClasspathResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("hyphenator", "da_UTF8.xml");
args.put("minSubwordSize", "2");

View File

@ -285,8 +285,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader),
"content",
new BytesRef("another"),
false);
new BytesRef("another"));
assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());

View File

@ -103,7 +103,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
new BytesRef("Q36"),
MultiFields.getLiveDocs(reader),
null,
false);
0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
td = _TestUtil.docs(random(),
reader,
@ -111,7 +111,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
new BytesRef("Q37"),
MultiFields.getLiveDocs(reader),
null,
false);
0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
}

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.util.Map;
@ -32,7 +32,7 @@ import java.util.HashMap;
public class TestStopFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
StopFilterFactory factory = new StopFilterFactory();
Map<String, String> args = new HashMap<String, String>();

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.junit.Test;
@ -34,7 +34,7 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamTestCase {
@Test
public void testInform() throws Exception {
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
ResourceLoader loader = new ClasspathResourceLoader(getClass());
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>();
args.put("types", "stoptypes-1.txt");
@ -94,7 +94,7 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamTestCase {
args.put("enablePositionIncrements", "false");
typeTokenFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
typeTokenFilterFactory.init(args);
typeTokenFilterFactory.inform(new ResourceAsStreamResourceLoader(getClass()));
typeTokenFilterFactory.inform(new ClasspathResourceLoader(getClass()));
fail("not supplying 'types' parameter should cause an IllegalArgumentException");
} catch (IllegalArgumentException e) {
// everything ok

View File

@ -25,7 +25,7 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/**
* Simple tests to ensure the Hunspell stemmer loads from factory
@ -38,7 +38,7 @@ public class TestHunspellStemFilterFactory extends BaseTokenStreamTestCase {
args.put("affix", "test.aff");
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
factory.init(args);
factory.inform(new ResourceAsStreamResourceLoader(getClass()));
factory.inform(new ClasspathResourceLoader(getClass()));
Reader reader = new StringReader("abc");
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import java.util.Map;
@ -32,7 +32,7 @@ import java.util.HashMap;
public class TestKeepFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
KeepWordFilterFactory factory = new KeepWordFilterFactory();
Map<String, String> args = new HashMap<String, String>();

View File

@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
TermsEnum termsEnum = vector.iterator(null);
termsEnum.next();
assertEquals(2, termsEnum.totalTermFreq());
DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null, true);
DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);
assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(2, positions.freq());
positions.nextPosition();

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -116,4 +117,21 @@ public class TestSnowball extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "", "");
}
}
public void testRandomStrings() throws IOException {
for (String lang : SNOWBALL_LANGS) {
checkRandomStrings(lang);
}
}
public void checkRandomStrings(final String snowballLanguage) throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new MockTokenizer(reader);
return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
}
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
}
}

Some files were not shown because too many files have changed in this diff Show More