LUCENE-3892: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1372366 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-08-13 11:16:57 +00:00
commit 789981c9fd
369 changed files with 5511 additions and 3981 deletions

49
build-clover.xml Normal file
View File

@ -0,0 +1,49 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="clover" basedir=".">
<import file="lucene/common-build.xml"/>
<!--
Run after Junit tests.
This target is in a separate file, as it needs to include common-build.xml,
but must run from top-level!
-->
<target name="generate-clover-reports" depends="clover">
<fail unless="run.clover">Clover not enabled!</fail>
<mkdir dir="${clover.report.dir}"/>
<fileset dir="." id="clover.test.result.files">
<include name="*/build/**/test/TEST-*.xml"/>
<exclude name="lucene/build/backwards/**"/>
</fileset>
<clover-report>
<current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
<format type="html" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
<current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
<format type="xml" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
</clover-report>
<echo>You can find the merged Lucene/Solr Clover report in '${clover.report.dir}'.</echo>
</target>
</project>

130
build.xml
View File

@ -51,11 +51,28 @@
</sequential> </sequential>
</target> </target>
<target name="validate" description="Validate dependencies, licenses, etc."> <target name="validate" description="Validate dependencies, licenses, etc." depends="-validate-source-patterns">
<sequential><subant target="validate" inheritall="false" failonerror="true"> <subant target="validate" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" /> <fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" /> <fileset dir="solr" includes="build.xml" />
</subant></sequential> </subant>
</target>
<target name="-validate-source-patterns" unless="disable.source-patterns">
<!-- check that there are no nocommits or @author javadoc tags: -->
<property name="validate.currDir" location="."/>
<pathconvert pathsep="${line.separator}" dirsep="/" property="validate.patternsFound" setonempty="false">
<fileset dir="${validate.currDir}">
<include name="**/*.java"/>
<exclude name="**/backwards/**"/>
<or>
<containsregexp expression="@author\b" casesensitive="yes"/>
<containsregexp expression="\bno(n|)commit\b" casesensitive="no"/>
</or>
</fileset>
<map from="${validate.currDir}${file.separator}" to="* "/>
</pathconvert>
<fail if="validate.patternsFound">The following files contain @author tags or nocommits:${line.separator}${validate.patternsFound}</fail>
</target> </target>
<target name="rat-sources" description="Runs rat across all sources and tests"> <target name="rat-sources" description="Runs rat across all sources and tests">
@ -184,4 +201,111 @@
</subant> </subant>
</sequential> </sequential>
</target> </target>
<!-- define here, as common-build is not included! -->
<property name="python32.exe" value="python3.2" />
<property name="fakeRelease" value="lucene/build/fakeRelease"/>
<property name="fakeReleaseTmp" value="lucene/build/fakeReleaseTmp"/>
<property name="fakeReleaseVersion" value="5.0"/> <!-- *not* -SNAPSHOT, the real version -->
<target name="nightly-smoke" description="Builds an unsigned release and smoke tests it." depends="clean">
<sequential>
<fail unless="JAVA6_HOME">JAVA6_HOME property is not defined.</fail>
<fail unless="JAVA7_HOME">JAVA7_HOME property is not defined.</fail>
<subant target="prepare-release-no-sign" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
<property name="version" value="${fakeReleaseVersion}" />
</subant>
<delete dir="${fakeRelease}"/>
<delete dir="${fakeReleaseTmp}"/>
<mkdir dir="${fakeRelease}"/>
<copy todir="${fakeRelease}/lucene">
<fileset dir="lucene/dist"/>
</copy>
<copy todir="${fakeRelease}/lucene/changes">
<fileset dir="lucene/build/docs/changes"/>
</copy>
<get src="http://people.apache.org/keys/group/lucene.asc"
dest="${fakeRelease}/lucene/KEYS"/>
<copy todir="${fakeRelease}/solr">
<fileset dir="solr/package"/>
</copy>
<copy file="${fakeRelease}/lucene/KEYS" todir="${fakeRelease}/solr"/>
<makeurl file="${fakeRelease}" validate="false" property="fakeRelease.uri"/>
<exec executable="${python32.exe}" failonerror="true">
<arg value="-u"/>
<arg value="dev-tools/scripts/smokeTestRelease.py"/>
<arg value="${fakeRelease.uri}"/>
<arg value="${fakeReleaseVersion}"/>
<arg value="${fakeReleaseTmp}"/>
<arg value="false"/>
<env key="JAVA6_HOME" value="${JAVA6_HOME}"/>
<env key="JAVA7_HOME" value="${JAVA7_HOME}"/>
</exec>
<delete dir="${fakeRelease}"/>
<delete dir="${fakeReleaseTmp}"/>
</sequential>
</target>
<!-- Calls only generate-clover-reports on Lucene, as Solr's is just a clone with other target; the database itsself is fixed -->
<target name="generate-clover-reports">
<subant target="generate-clover-reports" inheritall="false" failonerror="true">
<fileset dir="." includes="build-clover.xml" />
</subant>
</target>
<!-- Jenkins tasks -->
<target name="jenkins-hourly" depends="clean,test,validate,-jenkins-javadocs-lint,-svn-status"/>
<target name="jenkins-clover">
<antcall target="-jenkins-clover">
<param name="run.clover" value="true"/>
<!-- must be 1, as clover does not like parallel test runs: -->
<param name="tests.jvms" value="1"/>
<!-- Also override some other props to be fast, ignoring what's set on command line: -->
<param name="tests.multiplier" value="1"/>
<param name="tests.slow" value="false"/>
<param name="tests.nightly" value="false"/>
<param name="tests.weekly" value="false"/>
<param name="tests.multiplier" value="1"/>
</antcall>
</target>
<target name="-jenkins-clover" depends="clean,test,generate-clover-reports"/>
<!-- we need this extra condition, as we want to match only on "true", not solely if property is set: -->
<property name="disable.javadocs-lint" value="false" />
<condition property="-disable.javadocs-lint">
<equals arg1="${disable.javadocs-lint}" arg2="true"/>
</condition>
<target name="-jenkins-javadocs-lint" unless="-disable.javadocs-lint">
<antcall target="javadocs-lint"/>
</target>
<!-- define here, as common-build is not included! -->
<property name="svn.exe" value="svn" />
<target name="-svn-status">
<exec executable="${svn.exe}" dir="." failonerror="true">
<arg value="status"/>
<redirector outputproperty="svn.status.output">
<outputfilterchain>
<linecontainsregexp>
<regexp pattern="^\?" />
</linecontainsregexp>
<tokenfilter>
<replaceregex pattern="^........" replace="* " />
<replacestring from="${file.separator}" to="/" />
</tokenfilter>
</outputfilterchain>
</redirector>
</exec>
<fail message="Source checkout is dirty after running tests!!! Offending files:${line.separator}${svn.status.output}">
<condition>
<not>
<equals arg1="${svn.status.output}" arg2=""/>
</not>
</condition>
</fail>
</target>
</project> </project>

View File

@ -174,6 +174,6 @@
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-beanutils-1.7.0.jar"/> <classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-beanutils-1.7.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/> <classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar"/> <classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/>
<classpathentry kind="output" path="bin/other"/> <classpathentry kind="output" path="bin/other"/>
</classpath> </classpath>

View File

@ -2,7 +2,7 @@
<library name="JUnit"> <library name="JUnit">
<CLASSES> <CLASSES>
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" /> <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar!/" /> <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC /> <JAVADOC />
<SOURCES /> <SOURCES />

View File

@ -36,27 +36,25 @@ A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
B. How to generate Lucene/Solr Maven artifacts B. How to generate Lucene/Solr Maven artifacts
Prerequisites: JDK 1.6+ and Ant 1.7.X Prerequisites: JDK 1.6+ and Ant 1.8.2+
Run 'ant generate-maven-artifacts' to create an internal Maven Run 'ant generate-maven-artifacts' to create an internal Maven
repository, including POMs, binary .jars, source .jars, and javadoc repository, including POMs, binary .jars, source .jars, and javadoc
.jars. .jars.
You can run the above command in four possible places: the top-level You can run the above command in three possible places: the top-level
directory; under lucene/; under solr/; or under modules/. From the directory; under lucene/; or under solr/. From the top-level directory
top-level directory, from lucene/, or from modules/, the internal or from lucene/, the internal repository will be located at dist/maven/.
repository will be located at dist/maven/. From solr/, the internal From solr/, the internal repository will be located at package/maven/.
repository will be located at package/maven/.
C. How to deploy Maven artifacts to a repository C. How to deploy Maven artifacts to a repository
Prerequisites: JDK 1.6+ and Ant 1.7.X Prerequisites: JDK 1.6+ and Ant 1.8.2+
You can deploy targets for all of Lucene/Solr, only Lucene, only Solr, You can deploy targets for all of Lucene/Solr, only Lucene, or only Solr,
or only modules/, as in B. above. To deploy to a Maven repository, the as in B. above. To deploy to a Maven repository, the command is the same
command is the same as in B. above, with the addition of two system as in B. above, with the addition of two system properties:
properties:
ant -Dm2.repository.id=my-repo-id \ ant -Dm2.repository.id=my-repo-id \
-Dm2.repository.url=http://example.org/my/repo \ -Dm2.repository.url=http://example.org/my/repo \
@ -101,7 +99,7 @@ D. How to use Maven to build Lucene/Solr
the default, you can supply an alternate version on the command line the default, you can supply an alternate version on the command line
with the above command, e.g.: with the above command, e.g.:
ant -Dversion=5.0-my-special-version get-maven-poms ant -Dversion=my-special-version get-maven-poms
Note: if you change the version in the POMs, there is one test method Note: if you change the version in the POMs, there is one test method
that will fail under maven-surefire-plugin: that will fail under maven-surefire-plugin:

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -77,33 +71,5 @@
</excludes> </excludes>
</testResource> </testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.analysis.charfilter.HtmlStripCharFilter</mainClass>
<name>HtmlStripCharFilter</name>
</program>
<program>
<mainClass>org.apache.lucene.analysis.en.PorterStemmer</mainClass>
<name>EnglishPorterStemmer</name>
</program>
<program>
<mainClass>org.tartarus.snowball.TestApp</mainClass>
<name>SnowballTestApp</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -40,15 +40,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -75,6 +69,11 @@
<build> <build>
<sourceDirectory>${module-path}/src/java</sourceDirectory> <sourceDirectory>${module-path}/src/java</sourceDirectory>
<testSourceDirectory>${module-path}/src/test</testSourceDirectory> <testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<testResources> <testResources>
<testResource> <testResource>
<directory>${project.build.testSourceDirectory}</directory> <directory>${project.build.testSourceDirectory}</directory>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -75,6 +69,11 @@
<build> <build>
<sourceDirectory>${module-path}/src/java</sourceDirectory> <sourceDirectory>${module-path}/src/java</sourceDirectory>
<testSourceDirectory>${module-path}/src/test</testSourceDirectory> <testSourceDirectory>${module-path}/src/test</testSourceDirectory>
<resources>
<resource>
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<testResources> <testResources>
<testResource> <testResource>
<directory>${project.build.testSourceDirectory}</directory> <directory>${project.build.testSourceDirectory}</directory>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -41,15 +41,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -120,41 +114,5 @@
</includes> </includes>
</testResource> </testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.benchmark.byTask.Benchmark</mainClass>
<name>Benchmark</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.quality.trec.QueryDriver</mainClass>
<name>QueryDriver</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder</mainClass>
<name>QualityQueriesFinder</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.utils.ExtractReuters</mainClass>
<name>ExtractReuters</name>
</program>
<program>
<mainClass>org.apache.lucene.benchmark.utils.ExtractWikipedia</mainClass>
<name>ExtractWikipedia</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -104,40 +98,6 @@
</systemPropertyVariables> </systemPropertyVariables>
</configuration> </configuration>
</plugin> </plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.index.CheckIndex</mainClass>
<name>CheckIndex</name>
</program>
<program>
<mainClass>org.apache.lucene.index.IndexReader</mainClass>
<name>IndexReader</name>
</program>
<program>
<mainClass>org.apache.lucene.store.LockStressTest</mainClass>
<name>LockStressTest</name>
</program>
<program>
<mainClass>org.apache.lucene.store.LockVerifyServer</mainClass>
<name>LockVerifyServer</name>
</program>
<program>
<mainClass>org.apache.lucene.util.English</mainClass>
<name>English</name>
</program>
</programs>
</configuration>
</plugin>
<plugin> <plugin>
<groupId>org.codehaus.mojo</groupId> <groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId> <artifactId>build-helper-maven-plugin</artifactId>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -87,30 +81,5 @@
</excludes> </excludes>
</testResource> </testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<assembleDirectory>${build-directory}</assembleDirectory>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.demo.IndexFiles</mainClass>
<name>IndexFiles</name>
</program>
<program>
<mainClass>org.apache.lucene.demo.SearchFiles</mainClass>
<name>SearchFiles</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -39,15 +39,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -72,49 +66,5 @@
</excludes> </excludes>
</testResource> </testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.lucene.index.FieldNormModifier</mainClass>
<name>FieldNormModifier</name>
</program>
<program>
<mainClass>org.apache.lucene.index.IndexSplitter</mainClass>
<name>IndexSplitter</name>
</program>
<program>
<mainClass>org.apache.lucene.index.MultiPassIndexSplitter</mainClass>
<name>MultiPassIndexSplitter</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.GetTermInfo</mainClass>
<name>GetTermInfo</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.HighFreqTerms</mainClass>
<name>HighFreqTerms</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.IndexMergeTool</mainClass>
<name>IndexMergeTool</name>
</program>
<program>
<mainClass>org.apache.lucene.misc.LengthNormModifier</mainClass>
<name>LengthNormModifier</name>
</program>
</programs>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -35,15 +35,9 @@
<module-directory>lucene</module-directory> <module-directory>lucene</module-directory>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<modules> <modules>
<module>core</module> <module>core</module>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -31,15 +31,18 @@
<version>@version@</version> <version>@version@</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>Grandparent POM for Apache Lucene Core and Apache Solr</name> <name>Grandparent POM for Apache Lucene Core and Apache Solr</name>
<description>Parent POM for Apache Lucene Core and Apache Solr</description> <description>Grandparent POM for Apache Lucene Core and Apache Solr</description>
<url>http://lucene.apache.org/java</url> <url>http://lucene.apache.org</url>
<modules> <modules>
<module>lucene</module> <module>lucene</module>
<module>solr</module> <module>solr</module>
</modules> </modules>
<properties> <properties>
<top-level>..</top-level> <top-level>..</top-level>
<base.specification.version>4.0.0</base.specification.version> <vc-anonymous-base-url>http://svn.apache.org/repos/asf/lucene/dev/trunk</vc-anonymous-base-url>
<vc-dev-base-url>https://svn.apache.org/repos/asf/lucene/dev/trunk</vc-dev-base-url>
<vc-browse-base-url>http://svn.apache.org/viewvc/lucene/dev/trunk</vc-browse-base-url>
<base.specification.version>5.0.0</base.specification.version>
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format> <maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
<java.compat.version>1.6</java.compat.version> <java.compat.version>1.6</java.compat.version>
<jetty.version>8.1.2.v20120308</jetty.version> <jetty.version>8.1.2.v20120308</jetty.version>
@ -69,11 +72,11 @@
</properties> </properties>
<issueManagement> <issueManagement>
<system>JIRA</system> <system>JIRA</system>
<url>http://issues.apache.org/jira/browse/LUCENE</url> <url>https://issues.apache.org/jira/browse/LUCENE</url>
</issueManagement> </issueManagement>
<ciManagement> <ciManagement>
<system>Hudson</system> <system>Jenkins</system>
<url>http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/</url> <url>https://builds.apache.org/computer/lucene/</url>
</ciManagement> </ciManagement>
<mailingLists> <mailingLists>
<mailingList> <mailingList>
@ -109,15 +112,9 @@
</mailingLists> </mailingLists>
<inceptionYear>2000</inceptionYear> <inceptionYear>2000</inceptionYear>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk <developerConnection>scm:svn:${vc-dev-base-url}</developerConnection>
</connection> <url>${vc-browse-base-url}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk
</url>
</scm> </scm>
<licenses> <licenses>
<license> <license>
@ -388,7 +385,7 @@
<dependency> <dependency>
<groupId>com.carrotsearch.randomizedtesting</groupId> <groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>randomizedtesting-runner</artifactId> <artifactId>randomizedtesting-runner</artifactId>
<version>1.6.0</version> <version>2.0.0.rc5</version>
</dependency> </dependency>
</dependencies> </dependencies>
</dependencyManagement> </dependencyManagement>
@ -549,11 +546,6 @@
</archive> </archive>
</configuration> </configuration>
</plugin> </plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<version>1.2.1</version>
</plugin>
<plugin> <plugin>
<groupId>org.codehaus.mojo</groupId> <groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId> <artifactId>build-helper-maven-plugin</artifactId>

View File

@ -35,18 +35,11 @@
<module-directory>solr/contrib/analysis-extras</module-directory> <module-directory>solr/contrib/analysis-extras</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -101,17 +94,12 @@
<testResource> <testResource>
<directory>${top-level}/solr/core/src/test-files</directory> <directory>${top-level}/solr/core/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -35,18 +35,11 @@
<module-directory>solr/contrib/clustering</module-directory> <module-directory>solr/contrib/clustering</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -106,17 +99,12 @@
<testResource> <testResource>
<directory>${top-level}/solr/core/src/test-files</directory> <directory>${top-level}/solr/core/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -35,18 +35,11 @@
<module-directory>solr/contrib/dataimporthandler-extras</module-directory> <module-directory>solr/contrib/dataimporthandler-extras</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -104,17 +97,12 @@
<testResource> <testResource>
<directory>${top-level}/solr/core/src/test-files</directory> <directory>${top-level}/solr/core/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -35,18 +35,11 @@
<module-directory>solr/contrib/dataimporthandler</module-directory> <module-directory>solr/contrib/dataimporthandler</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -90,6 +83,12 @@
<testResource> <testResource>
<directory>${top-level}/solr/core/src/test-files</directory> <directory>${top-level}/solr/core/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins> <plugins>
<plugin> <plugin>
@ -103,15 +102,6 @@
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins> </plugins>
</build> </build>
</project> </project>

View File

@ -38,18 +38,11 @@
<module-directory>solr/contrib/extraction</module-directory> <module-directory>solr/contrib/extraction</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -102,17 +95,12 @@
<testResource> <testResource>
<directory>${top-level}/solr/core/src/test-files</directory> <directory>${top-level}/solr/core/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -39,18 +39,11 @@
<module-directory>solr/contrib/langid</module-directory> <module-directory>solr/contrib/langid</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -107,17 +100,12 @@
<testResource> <testResource>
<directory>${top-level}/solr/core/src/test-files</directory> <directory>${top-level}/solr/core/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -35,18 +35,11 @@
<module-directory>solr/contrib/uima</module-directory> <module-directory>solr/contrib/uima</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -121,17 +114,12 @@
<testResource> <testResource>
<directory>${module-path}/src/test-files</directory> <directory>${module-path}/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -35,18 +35,11 @@
<module-directory>solr/contrib/velocity</module-directory> <module-directory>solr/contrib/velocity</module-directory>
<top-level>../../../..</top-level> <top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -142,17 +135,12 @@
<testResource> <testResource>
<directory>${top-level}/solr/core/src/test-files</directory> <directory>${top-level}/solr/core/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -35,18 +35,11 @@
<module-directory>solr/core</module-directory> <module-directory>solr/core</module-directory>
<top-level>../../..</top-level> <top-level>../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
<surefire-top-level>${top-level}/../..</surefire-top-level>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>
@ -243,48 +236,14 @@
<testResource> <testResource>
<directory>${top-level}/solr/solrj/src/test-files</directory> <directory>${top-level}/solr/solrj/src/test-files</directory>
</testResource> </testResource>
<testResource>
<directory>${top-level}/dev-tools/maven/solr</directory>
<includes>
<include>maven.testlogging.properties</include>
</includes>
</testResource>
</testResources> </testResources>
<plugins> <plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>appassembler-maven-plugin</artifactId>
<configuration>
<extraJvmArguments>-Xmx128M</extraJvmArguments>
<repositoryLayout>flat</repositoryLayout>
<platforms>
<platform>windows</platform>
<platform>unix</platform>
</platforms>
<programs>
<program>
<mainClass>org.apache.solr.client.solrj.embedded.JettySolrRunner</mainClass>
<name>JettySolrRunner</name>
</program>
<program>
<mainClass>org.apache.solr.util.BitSetPerf</mainClass>
<name>BitSetPerf</name>
<extraJvmArguments>-Xms128m -Xbatch</extraJvmArguments>
</program>
<program>
<mainClass>org.apache.solr.util.SimplePostTool</mainClass>
<name>SimplePostTool</name>
</program>
<program>
<mainClass>org.apache.solr.util.SuggestMissingFactories</mainClass>
<name>SuggestMissingFactories</name>
</program>
</programs>
</configuration>
</plugin>
<plugin> <plugin>
<groupId>org.codehaus.mojo</groupId> <groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId> <artifactId>build-helper-maven-plugin</artifactId>

View File

@ -0,0 +1,2 @@
handlers=java.util.logging.ConsoleHandler
.level=SEVERE

View File

@ -43,26 +43,14 @@
<module-directory>solr</module-directory> <module-directory>solr</module-directory>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<issueManagement> <issueManagement>
<system>JIRA</system> <system>JIRA</system>
<url>http://issues.apache.org/jira/browse/SOLR</url> <url>https://issues.apache.org/jira/browse/SOLR</url>
</issueManagement> </issueManagement>
<ciManagement>
<system>Hudson</system>
<url>
http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/
</url>
</ciManagement>
<mailingLists> <mailingLists>
<mailingList> <mailingList>
<name>Solr User List</name> <name>Solr User List</name>
@ -111,6 +99,15 @@
<doctitle>${project.name} ${project.version} API (${now.version})</doctitle> <doctitle>${project.name} ${project.version} API (${now.version})</doctitle>
</configuration> </configuration>
</plugin> </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<java.util.logging.config.file>../test-classes/maven.testlogging.properties</java.util.logging.config.file>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins> </plugins>
</pluginManagement> </pluginManagement>
</build> </build>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<!-- These dependencies are compile scope because this is a test framework. --> <!-- These dependencies are compile scope because this is a test framework. -->
@ -60,20 +54,29 @@
<artifactId>solr-core</artifactId> <artifactId>solr-core</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<!-- SOLR-3263: Provided scope is required to avoid jar signing conflicts -->
<scope>provided</scope>
</dependency>
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>
<artifactId>junit</artifactId> <artifactId>junit</artifactId>
</dependency> </dependency>
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
you can exclude the three Jetty dependencies below. -->
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId> <artifactId>jetty-servlet</artifactId>
<scope>runtime</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId> <artifactId>jetty-util</artifactId>
</dependency>
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
you can exclude the two Jetty dependencies below. -->
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<scope>runtime</scope> <scope>runtime</scope>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path> <module-path>${top-level}/${module-directory}</module-path>
</properties> </properties>
<scm> <scm>
<connection> <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
</connection> <url>${vc-browse-base-url}/${module-directory}</url>
<developerConnection>
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
</developerConnection>
<url>
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
</url>
</scm> </scm>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -58,7 +58,7 @@ def javaExe(version):
def verifyJavaVersion(version): def verifyJavaVersion(version):
s = os.popen('%s; java -version 2>&1' % javaExe(version)).read() s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
if s.find('java version "%s.' % version) == -1: if s.find(' version "%s.' % version) == -1:
raise RuntimeError('got wrong version for java %s:\n%s' % (version, s)) raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
# http://s.apache.org/lusolr32rc2 # http://s.apache.org/lusolr32rc2
@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, tmpDir):
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual)) raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
def getDirEntries(urlString): def getDirEntries(urlString):
if urlString.startswith('file:/') and not urlString.startswith('file://'):
# stupid bogus ant URI
urlString = "file:///" + urlString[6:]
if urlString.startswith('file://'): if urlString.startswith('file://'):
path = urlString[7:] path = urlString[7:]
if path.endswith('/'): if path.endswith('/'):
@ -1026,7 +1030,7 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
def main(): def main():
if len(sys.argv) != 4: if len(sys.argv) < 4:
print() print()
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0]) print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
print() print()
@ -1035,8 +1039,11 @@ def main():
baseURL = sys.argv[1] baseURL = sys.argv[1]
version = sys.argv[2] version = sys.argv[2]
tmpDir = os.path.abspath(sys.argv[3]) tmpDir = os.path.abspath(sys.argv[3])
isSigned = True
if len(sys.argv) == 5:
isSigned = (sys.argv[4] == "True")
smokeTest(baseURL, version, tmpDir, True) smokeTest(baseURL, version, tmpDir, isSigned)
def smokeTest(baseURL, version, tmpDir, isSigned): def smokeTest(baseURL, version, tmpDir, isSigned):
@ -1090,4 +1097,5 @@ if __name__ == '__main__':
except: except:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
sys.exit(1)
sys.exit(0)

View File

@ -6,6 +6,56 @@ http://s.apache.org/luceneversions
======================= Lucene 5.0.0 ======================= ======================= Lucene 5.0.0 =======================
======================= Lucene 4.0.0 =======================
New Features
* LUCENE-1888: Added the option to store payloads in the term
vectors (IndexableFieldType.storeTermVectorPayloads()). Note
that you must store term vector positions to store payloads.
(Robert Muir)
API Changes
* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
Previously you had no real way to know that a term vector field
had positions or offsets, since this can be configured on a
per-field-per-document basis. (Robert Muir)
* Removed DocsAndPositionsEnum.hasPayload() and simplified the
contract of getPayload(). It returns null if there is no payload,
otherwise returns the current payload. You can now call it multiple
times per position if you want. (Robert Muir)
* Removed FieldsEnum. Fields API instead implements Iterable<String>
and exposes Iterator, so you can iterate over field names with
for (String field : fields) instead. (Robert Muir)
Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
twice for conjunctions: for most users this is no problem, but
if you had a customized Similarity that returned something other
than 1 when overlap == maxOverlap (always the case for conjunctions),
then the score would be incorrect. (Pascal Chollet, Robert Muir)
* LUCENE-4298: MultiFields.getTermDocsEnum(IndexReader, Bits, String, BytesRef)
did not work at all, it would infinitely recurse.
(Alberto Paro via Robert Muir)
* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
had a custom Similarity where coord(1,1) != 1F, then the rewritten
query would be scored differently. (Robert Muir)
* Don't allow negatives in the positions file. If you have an index
from 2.4.0 or earlier with such negative positions, and you already
upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run
CheckIndex. If it fails, then you need to upgrade again to 4.0 (Robert Muir)
Build
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
thread leak detection. Added support for suite timeouts. (Dawid Weiss)
======================= Lucene 4.0.0-BETA ======================= ======================= Lucene 4.0.0-BETA =======================
@ -48,6 +98,11 @@ New features
reader is an NRT reader, and the segment has not yet been merged reader is an NRT reader, and the segment has not yet been merged
away (Mike McCandless). away (Mike McCandless).
* LUCENE-4286: Added option to CJKBigramFilter to always also output
unigrams. This can be used for a unigram+bigram approach, or at
index-time only for better support of short queries.
(Tom Burton-West, Robert Muir)
API Changes API Changes
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3. * LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
@ -115,6 +170,10 @@ Optimizations
making them substantially more lightweight. Behavior is unchanged. making them substantially more lightweight. Behavior is unchanged.
(Robert Muir) (Robert Muir)
* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers
such as StandardTokenizer from 32kb to 8kb.
(Raintung Li, Steven Rowe, Robert Muir)
Bug Fixes Bug Fixes
* LUCENE-4109: BooleanQueries are not parsed correctly with the * LUCENE-4109: BooleanQueries are not parsed correctly with the
@ -164,6 +223,9 @@ Bug Fixes
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results. * LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
(Johannes Christen, Uwe Schindler, Robert Muir) (Johannes Christen, Uwe Schindler, Robert Muir)
* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
(Robert Muir)
Changes in Runtime Behavior Changes in Runtime Behavior
* LUCENE-4109: Enable position increments in the flexible queryparser by default. * LUCENE-4109: Enable position increments in the flexible queryparser by default.

View File

@ -9,7 +9,7 @@ enumeration APIs. Here are the major changes:
by the BytesRef class (which provides an offset + length "slice" by the BytesRef class (which provides an offset + length "slice"
into an existing byte[]). into an existing byte[]).
* Fields are separately enumerated (FieldsEnum) from the terms * Fields are separately enumerated (Fields.iterator()) from the terms
within each field (TermEnum). So instead of this: within each field (TermEnum). So instead of this:
TermEnum termsEnum = ...; TermEnum termsEnum = ...;
@ -20,10 +20,8 @@ enumeration APIs. Here are the major changes:
Do this: Do this:
FieldsEnum fieldsEnum = ...; for(String field : fields) {
String field; TermsEnum termsEnum = fields.terms(field);
while((field = fieldsEnum.next()) != null) {
TermsEnum termsEnum = fieldsEnum.terms();
BytesRef text; BytesRef text;
while((text = termsEnum.next()) != null) { while((text = termsEnum.next()) != null) {
System.out.println("field=" + field + "; text=" + text.utf8ToString()); System.out.println("field=" + field + "; text=" + text.utf8ToString());
@ -316,11 +314,12 @@ an AtomicReader. Note: using "atomicity emulators" can cause serious
slowdowns due to the need to merge terms, postings, DocValues, and slowdowns due to the need to merge terms, postings, DocValues, and
FieldCache, use them with care! FieldCache, use them with care!
## LUCENE-2413: Analyzer package changes ## LUCENE-2413,LUCENE-3396: Analyzer package changes
Lucene's core and contrib analyzers, along with Solr's analyzers, Lucene's core and contrib analyzers, along with Solr's analyzers,
were consolidated into lucene/analysis. During the refactoring some were consolidated into lucene/analysis. During the refactoring some
package names have changed: package names have changed, and ReusableAnalyzerBase was renamed to
Analyzer:
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer - o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer - o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
@ -345,7 +344,7 @@ package names have changed:
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap - o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
- o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet - o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
- o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap - o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.charfilter; package org.apache.lucene.analysis.charfilter;
@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
/** /**
* This class is a scanner generated by * This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 7/26/12 6:22 PM from the specification file * on 8/6/12 11:57 AM from the specification file
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt> * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
*/ */
public final class HTMLStripCharFilter extends BaseCharFilter { public final class HTMLStripCharFilter extends BaseCharFilter {
@ -31255,6 +31255,93 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
{ yybegin(STYLE); { yybegin(STYLE);
} }
case 55: break; case 55: break;
case 27:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
}
case 56: break;
case 30:
{ int length = yylength();
inputSegment.write(zzBuffer, zzStartRead, length);
entitySegment.clear();
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
entitySegment.append(ch);
outputSegment = entitySegment;
yybegin(CHARACTER_REFERENCE_TAIL);
}
case 57: break;
case 48:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position the offset correction at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = STYLE_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 58: break;
case 8:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_SUBSTITUTE);
}
}
case 59: break;
case 2:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('<');
yybegin(LEFT_ANGLE_BRACKET);
}
case 60: break;
case 44:
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 61: break;
case 21:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(SINGLE_QUOTED_STRING);
}
case 62: break;
case 11:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
yybegin(LEFT_ANGLE_BRACKET_SPACE);
}
case 63: break;
case 35:
{ yybegin(SCRIPT);
}
case 64: break;
case 42:
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 65: break;
case 10:
{ inputSegment.append('!'); yybegin(BANG);
}
case 66: break;
case 51: case 51:
{ // Handle paired UTF-16 surrogates. { // Handle paired UTF-16 surrogates.
String surrogatePair = yytext(); String surrogatePair = yytext();
@ -31288,13 +31375,331 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
inputSegment.append('#'); inputSegment.append('#');
yybegin(NUMERIC_CHARACTER); yybegin(NUMERIC_CHARACTER);
} }
case 56: break; case 67: break;
case 21: case 4:
{ yypushback(1);
outputSegment = inputSegment;
outputSegment.restart();
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 68: break;
case 43:
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 69: break;
case 52:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try { // High surrogates are in decimal range [55296, 56319]
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(1, 6) + "'";
}
if (Character.isHighSurrogate(highSurrogate)) {
outputSegment = entitySegment;
outputSegment.clear();
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 70: break;
case 28:
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 71: break;
case 50:
{ // Handle paired UTF-16 surrogates.
outputSegment = entitySegment;
outputSegment.clear();
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try {
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(2, 6) + "'";
}
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
case 72: break;
case 16:
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 73: break;
case 22:
{ previousRestoreState = restoreState; { previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE; restoreState = SERVER_SIDE_INCLUDE;
yybegin(SINGLE_QUOTED_STRING); yybegin(DOUBLE_QUOTED_STRING);
} }
case 57: break; case 74: break;
case 26:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
outputSegment = inputSegment;
yybegin(YYINITIAL);
}
case 75: break;
case 20:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
}
case 76: break;
case 47:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(CDATA);
}
case 77: break;
case 33:
{ yybegin(YYINITIAL);
if (escapeBR) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
return outputSegment.nextChar();
} else {
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.reset();
return BR_START_TAG_REPLACEMENT;
}
}
case 78: break;
case 23:
{ yybegin(restoreState); restoreState = previousRestoreState;
}
case 79: break;
case 32:
{ yybegin(COMMENT);
}
case 80: break;
case 24:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 81: break;
case 3:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('&');
yybegin(AMPERSAND);
}
case 82: break;
case 46:
{ yybegin(SCRIPT);
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 83: break;
case 14:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 84: break;
case 6:
{ int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength);
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
String decimalCharRef = yytext();
int codePoint = 0;
try {
codePoint = Integer.parseInt(decimalCharRef);
} catch(Exception e) {
assert false: "Exception parsing code point '" + decimalCharRef + "'";
}
if (codePoint <= 0x10FFFF) {
outputSegment = entitySegment;
outputSegment.clear();
if (codePoint >= Character.MIN_SURROGATE
&& codePoint <= Character.MAX_SURROGATE) {
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
} else {
outputSegment.setLength
(Character.toChars(codePoint, outputSegment.getArray(), 0));
}
yybegin(CHARACTER_REFERENCE_TAIL);
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
}
case 85: break;
case 34:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
cumulativeDiff += yychar - inputStart + yylength();
// position the correction at (already output length) [ + (substitution length) = 0]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 86: break;
case 5:
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
}
case 87: break;
case 13:
{ inputSegment.append(zzBuffer[zzStartRead]);
}
case 88: break;
case 18:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_SUBSTITUTE);
}
}
case 89: break;
case 40:
{ yybegin(SCRIPT_COMMENT);
}
case 90: break;
case 37:
{ // add (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
yybegin(YYINITIAL);
}
case 91: break;
case 12:
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
}
case 92: break;
case 9:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_EXCLUDE);
}
}
case 93: break;
case 49:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = SCRIPT_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 94: break;
case 29:
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 95: break;
case 17:
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 96: break;
case 45:
{ yybegin(STYLE);
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 97: break;
case 7:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 98: break;
case 19:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_EXCLUDE);
}
}
case 99: break;
case 25:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
}
case 100: break;
case 31: case 31:
{ int matchLength = yylength(); { int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength); inputSegment.write(zzBuffer, zzStartRead, matchLength);
@ -31329,66 +31734,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
return outputSegment.nextChar(); return outputSegment.nextChar();
} }
} }
case 58: break; case 101: break;
case 19:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_EXCLUDE);
}
}
case 59: break;
case 2:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('<');
yybegin(LEFT_ANGLE_BRACKET);
}
case 60: break;
case 27:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
}
case 61: break;
case 44:
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 62: break;
case 35:
{ yybegin(SCRIPT);
}
case 63: break;
case 42:
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 64: break;
case 10:
{ inputSegment.append('!'); yybegin(BANG);
}
case 65: break;
case 33:
{ yybegin(YYINITIAL);
if (escapeBR) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
return outputSegment.nextChar();
} else {
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.reset();
return BR_START_TAG_REPLACEMENT;
}
}
case 66: break;
case 53: case 53:
{ // Handle paired UTF-16 surrogates. { // Handle paired UTF-16 surrogates.
String surrogatePair = yytext(); String surrogatePair = yytext();
@ -31424,288 +31770,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
inputSegment.append('#'); inputSegment.append('#');
yybegin(NUMERIC_CHARACTER); yybegin(NUMERIC_CHARACTER);
} }
case 67: break; case 102: break;
case 43:
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 68: break;
case 30:
{ int length = yylength();
inputSegment.write(zzBuffer, zzStartRead, length);
entitySegment.clear();
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
entitySegment.append(ch);
outputSegment = entitySegment;
yybegin(CHARACTER_REFERENCE_TAIL);
}
case 69: break;
case 28:
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 70: break;
case 3:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('&');
yybegin(AMPERSAND);
}
case 71: break;
case 16:
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 72: break;
case 52:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try { // High surrogates are in decimal range [55296, 56319]
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(1, 6) + "'";
}
if (Character.isHighSurrogate(highSurrogate)) {
outputSegment = entitySegment;
outputSegment.clear();
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 73: break;
case 6:
{ int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength);
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
String decimalCharRef = yytext();
int codePoint = 0;
try {
codePoint = Integer.parseInt(decimalCharRef);
} catch(Exception e) {
assert false: "Exception parsing code point '" + decimalCharRef + "'";
}
if (codePoint <= 0x10FFFF) {
outputSegment = entitySegment;
outputSegment.clear();
if (codePoint >= Character.MIN_SURROGATE
&& codePoint <= Character.MAX_SURROGATE) {
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
} else {
outputSegment.setLength
(Character.toChars(codePoint, outputSegment.getArray(), 0));
}
yybegin(CHARACTER_REFERENCE_TAIL);
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
}
case 74: break;
case 37:
{ // add (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
yybegin(YYINITIAL);
}
case 75: break;
case 8:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_SUBSTITUTE);
}
}
case 76: break;
case 46:
{ yybegin(SCRIPT);
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 77: break;
case 11:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
yybegin(LEFT_ANGLE_BRACKET_SPACE);
}
case 78: break;
case 20:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
}
case 79: break;
case 34:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
cumulativeDiff += yychar - inputStart + yylength();
// position the correction at (already output length) [ + (substitution length) = 0]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 80: break;
case 23:
{ yybegin(restoreState); restoreState = previousRestoreState;
}
case 81: break;
case 32:
{ yybegin(COMMENT);
}
case 82: break;
case 14:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 83: break;
case 18:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_SUBSTITUTE);
}
}
case 84: break;
case 25:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
}
case 85: break;
case 7:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 86: break;
case 48:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position the offset correction at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = STYLE_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 87: break;
case 5:
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
}
case 88: break;
case 26:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
outputSegment = inputSegment;
yybegin(YYINITIAL);
}
case 89: break;
case 13:
{ inputSegment.append(zzBuffer[zzStartRead]);
}
case 90: break;
case 50:
{ // Handle paired UTF-16 surrogates.
outputSegment = entitySegment;
outputSegment.clear();
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try {
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(2, 6) + "'";
}
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
case 91: break;
case 40:
{ yybegin(SCRIPT_COMMENT);
}
case 92: break;
case 45:
{ yybegin(STYLE);
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 93: break;
case 22:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(DOUBLE_QUOTED_STRING);
}
case 94: break;
case 12:
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
}
case 95: break;
case 36: case 36:
{ yybegin(YYINITIAL); { yybegin(YYINITIAL);
if (escapeBR) { if (escapeBR) {
@ -31721,83 +31786,18 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
return BR_END_TAG_REPLACEMENT; return BR_END_TAG_REPLACEMENT;
} }
} }
case 96: break; case 103: break;
case 24:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 97: break;
case 47:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(CDATA);
}
case 98: break;
case 29:
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 99: break;
case 17:
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 100: break;
case 9:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_EXCLUDE);
}
}
case 101: break;
case 49:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = SCRIPT_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 102: break;
case 38: case 38:
{ yybegin(restoreState); { yybegin(restoreState);
} }
case 103: break; case 104: break;
case 41: case 41:
{ yybegin(STYLE_COMMENT); { yybegin(STYLE_COMMENT);
} }
case 104: break; case 105: break;
case 1: case 1:
{ return zzBuffer[zzStartRead]; { return zzBuffer[zzStartRead];
} }
case 105: break;
case 4:
{ yypushback(1);
outputSegment = inputSegment;
outputSegment.restart();
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 106: break; case 106: break;
default: default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {

View File

@ -141,9 +141,9 @@ InlineElment = ( [aAbBiIqQsSuU] |
[vV][aA][rR] ) [vV][aA][rR] )
%include src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex %include HTMLCharacterEntities.jflex
%include src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro %include HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
%{ %{
private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024; private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;

View File

@ -24,6 +24,8 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
@ -35,6 +37,12 @@ import org.apache.lucene.util.ArrayUtil;
* {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which * {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
* of the CJK scripts are turned into bigrams. * of the CJK scripts are turned into bigrams.
* <p> * <p>
* By default, when a CJK character has no adjacent characters to form
* a bigram, it is output in unigram form. If you want to always output
* both unigrams and bigrams, set the <code>outputUnigrams</code>
* flag in {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)}.
* This can be used for a combined unigram+bigram approach.
* <p>
* In all cases, all non-CJK input is passed thru unmodified. * In all cases, all non-CJK input is passed thru unmodified.
*/ */
public final class CJKBigramFilter extends TokenFilter { public final class CJKBigramFilter extends TokenFilter {
@ -68,9 +76,15 @@ public final class CJKBigramFilter extends TokenFilter {
private final Object doKatakana; private final Object doKatakana;
private final Object doHangul; private final Object doHangul;
// true if we should output unigram tokens always
private final boolean outputUnigrams;
private boolean ngramState; // false = output unigram, true = output bigram
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
// buffers containing codepoint and offsets in parallel // buffers containing codepoint and offsets in parallel
int buffer[] = new int[8]; int buffer[] = new int[8];
@ -88,23 +102,36 @@ public final class CJKBigramFilter extends TokenFilter {
/** /**
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int) * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
* CJKBigramFilter(HAN | HIRAGANA | KATAKANA | HANGUL)} * CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
*/ */
public CJKBigramFilter(TokenStream in) { public CJKBigramFilter(TokenStream in) {
this(in, HAN | HIRAGANA | KATAKANA | HANGUL); this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
} }
/** /**
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed. * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA}, * CJKBigramFilter(in, flags, false)}
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
*/ */
public CJKBigramFilter(TokenStream in, int flags) { public CJKBigramFilter(TokenStream in, int flags) {
this(in, flags, false);
}
/**
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
* and whether or not unigrams should also be output.
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
* @param outputUnigrams true if unigrams for the selected writing systems should also be output.
* when this is false, this is only done when there are no adjacent characters to form
* a bigram.
*/
public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
super(in); super(in);
doHan = (flags & HAN) == 0 ? NO : HAN_TYPE; doHan = (flags & HAN) == 0 ? NO : HAN_TYPE;
doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE; doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE; doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE; doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE;
this.outputUnigrams = outputUnigrams;
} }
/* /*
@ -120,7 +147,24 @@ public final class CJKBigramFilter extends TokenFilter {
// case 1: we have multiple remaining codepoints buffered, // case 1: we have multiple remaining codepoints buffered,
// so we can emit a bigram here. // so we can emit a bigram here.
if (outputUnigrams) {
// when also outputting unigrams, we output the unigram first,
// then rewind back to revisit the bigram.
// so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
// the logic in hasBufferedUnigram ensures we output the C,
// even though it did actually have adjacent CJK characters.
if (ngramState) {
flushBigram(); flushBigram();
} else {
flushUnigram();
index--;
}
ngramState = !ngramState;
} else {
flushBigram();
}
return true; return true;
} else if (doNext()) { } else if (doNext()) {
@ -260,6 +304,11 @@ public final class CJKBigramFilter extends TokenFilter {
termAtt.setLength(len2); termAtt.setLength(len2);
offsetAtt.setOffset(startOffset[index], endOffset[index+1]); offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
typeAtt.setType(DOUBLE_TYPE); typeAtt.setType(DOUBLE_TYPE);
// when outputting unigrams, all bigrams are synonyms that span two unigrams
if (outputUnigrams) {
posIncAtt.setPositionIncrement(0);
posLengthAtt.setPositionLength(2);
}
index++; index++;
} }
@ -292,8 +341,14 @@ public final class CJKBigramFilter extends TokenFilter {
* inputs. * inputs.
*/ */
private boolean hasBufferedUnigram() { private boolean hasBufferedUnigram() {
if (outputUnigrams) {
// when outputting unigrams always
return bufferLen - index == 1;
} else {
// otherwise its only when we have a lone CJK character
return bufferLen == 1 && index == 0; return bufferLen == 1 && index == 0;
} }
}
@Override @Override
public void reset() throws IOException { public void reset() throws IOException {
@ -303,5 +358,6 @@ public final class CJKBigramFilter extends TokenFilter {
lastEndOffset = 0; lastEndOffset = 0;
loneState = null; loneState = null;
exhausted = false; exhausted = false;
ngramState = false;
} }
} }

View File

@ -33,12 +33,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt; * &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.CJKBigramFilterFactory" * &lt;filter class="solr.CJKBigramFilterFactory"
* han="true" hiragana="true" * han="true" hiragana="true"
* katakana="true" hangul="true" /&gt; * katakana="true" hangul="true" outputUnigrams="false" /&gt;
* &lt;/analyzer&gt; * &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre> * &lt;/fieldType&gt;</pre>
*/ */
public class CJKBigramFilterFactory extends TokenFilterFactory { public class CJKBigramFilterFactory extends TokenFilterFactory {
int flags; int flags;
boolean outputUnigrams;
@Override @Override
public void init(Map<String,String> args) { public void init(Map<String,String> args) {
@ -56,10 +57,11 @@ public class CJKBigramFilterFactory extends TokenFilterFactory {
if (getBoolean("hangul", true)) { if (getBoolean("hangul", true)) {
flags |= CJKBigramFilter.HANGUL; flags |= CJKBigramFilter.HANGUL;
} }
outputUnigrams = getBoolean("outputUnigrams", false);
} }
@Override @Override
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new CJKBigramFilter(input, flags); return new CJKBigramFilter(input, flags, outputUnigrams);
} }
} }

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** /**
* This class is a scanner generated by * This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 7/15/12 1:57 AM from the specification file * on 8/6/12 11:57 AM from the specification file
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt> * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
*/ */
class ClassicTokenizerImpl implements StandardTokenizerInterface { class ClassicTokenizerImpl implements StandardTokenizerInterface {
@ -42,7 +42,7 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
public static final int YYEOF = -1; public static final int YYEOF = -1;
/** initial size of the lookahead buffer */ /** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384; private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */ /** lexical states */
public static final int YYINITIAL = 0; public static final int YYINITIAL = 0;

View File

@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%function getNextToken %function getNextToken
%pack %pack
%char %char
%buffer 4096
%{ %{

View File

@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
// Generated using ICU4J 49.1.0.0 on Thursday, July 26, 2012 10:22:01 PM UTC // Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
@ -43,7 +43,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
public static final int YYEOF = -1; public static final int YYEOF = -1;
/** initial size of the lookahead buffer */ /** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384; private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */ /** lexical states */
public static final int YYINITIAL = 0; public static final int YYINITIAL = 0;

View File

@ -44,8 +44,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%implements StandardTokenizerInterface %implements StandardTokenizerInterface
%function getNextToken %function getNextToken
%char %char
%buffer 4096
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro %include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp}) Format = ([\p{WB:Format}] | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp}) Numeric = ([\p{WB:Numeric}] | {NumericSupp})

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
@ -46,7 +46,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
public static final int YYEOF = -1; public static final int YYEOF = -1;
/** initial size of the lookahead buffer */ /** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384; private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */ /** lexical states */
public static final int YYINITIAL = 0; public static final int YYINITIAL = 0;

View File

@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%implements StandardTokenizerInterface %implements StandardTokenizerInterface
%function getNextToken %function getNextToken
%char %char
%buffer 4096
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro %include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp}) Format = ([\p{WB:Format}] | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp}) Numeric = ([\p{WB:Numeric}] | {NumericSupp})
@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
// RFC-5321: Simple Mail Transfer Protocol // RFC-5321: Simple Mail Transfer Protocol
// RFC-5322: Internet Message Format // RFC-5322: Internet Message Format
%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro %include ASCIITLD.jflex-macro
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])? DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD} DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.wikipedia; package org.apache.lucene.analysis.wikipedia;
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** /**
* This class is a scanner generated by * This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 7/15/12 1:57 AM from the specification file * on 8/6/12 11:57 AM from the specification file
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt> * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/ */
class WikipediaTokenizerImpl { class WikipediaTokenizerImpl {
@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
public static final int YYEOF = -1; public static final int YYEOF = -1;
/** initial size of the lookahead buffer */ /** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384; private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */ /** lexical states */
public static final int THREE_SINGLE_QUOTES_STATE = 10; public static final int THREE_SINGLE_QUOTES_STATE = 10;

View File

@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%function getNextToken %function getNextToken
%pack %pack
%char %char
%buffer 4096
%{ %{

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
*/ */
import java.io.Reader; import java.io.Reader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -33,6 +34,15 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
} }
}; };
Analyzer unibiAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
return new TokenStreamComponents(t,
new CJKBigramFilter(t, 0xff, true));
}
};
public void testHuge() throws Exception { public void testHuge() throws Exception {
assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
@ -62,6 +72,96 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
} }
}; };
assertAnalyzesTo(a, "多くの学生が試験に落ちた。", assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
new String[] { "", "", "", "学生", "", "試験", "", "", "", "" }); new String[] { "", "", "", "学生", "", "試験", "", "", "", "" },
new int[] { 0, 1, 2, 3, 5, 6, 8, 9, 10, 11 },
new int[] { 1, 2, 3, 5, 6, 8, 9, 10, 11, 12 },
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>",
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
}
public void testAllScripts() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
return new TokenStreamComponents(t,
new CJKBigramFilter(t, 0xff, false));
}
};
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
}
public void testUnigramsAndBigramsAllScripts() throws Exception {
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた。",
new String[] {
"", "多く", "", "くの", "", "の学", "", "学生", "",
"生が", "", "が試", "", "試験", "", "験に", "",
"に落", "", "落ち", "", "ちた", ""
},
new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
6, 7, 7, 8, 8, 9, 9, 10, 10, 11 },
new int[] { 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11, 11, 12, 12 },
new String[] { "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>" },
new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
new int[] { 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }
);
}
public void testUnigramsAndBigramsHanOnly() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
}
};
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
new String[] { "", "", "", "", "学生", "", "", "", "試験", "", "", "", "", "" },
new int[] { 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11 },
new int[] { 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12 },
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>",
"<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>",
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
}
public void testUnigramsAndBigramsHuge() throws Exception {
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた",
new String[] {
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", "", "た多",
"", "多く", "", "くの", "", "の学", "", "学生", "", "生が", "", "が試", "", "試験", "", "験に", "", "に落", "", "落ち", "", "ちた", ""
}
);
}
/** blast some random strings through the analyzer */
public void testRandomUnibiStrings() throws Exception {
checkRandomData(random(), unibiAnalyzer, 1000*RANDOM_MULTIPLIER);
}
/** blast some random strings through the analyzer */
public void testRandomUnibiHugeStrings() throws Exception {
Random random = random();
checkRandomData(random, unibiAnalyzer, 100*RANDOM_MULTIPLIER, 8192);
} }
} }

View File

@ -52,4 +52,16 @@ public class TestCJKBigramFilterFactory extends BaseTokenStreamTestCase {
assertTokenStreamContents(stream, assertTokenStreamContents(stream,
new String[] { "", "", "", "学生", "", "試験", "", "", "", "" }); new String[] { "", "", "", "学生", "", "試験", "", "", "", "" });
} }
public void testHanOnlyUnigrams() throws Exception {
Reader reader = new StringReader("多くの学生が試験に落ちた。");
CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("hiragana", "false");
args.put("outputUnigrams", "true");
factory.init(args);
TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
assertTokenStreamContents(stream,
new String[] { "", "", "", "", "学生", "", "", "", "試験", "", "", "", "", "" });
}
} }

View File

@ -100,8 +100,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
private static final ResourceLoader loader = new StringMockResourceLoader(""); private static final ResourceLoader loader = new StringMockResourceLoader("");
public void test() throws Exception { public void test() throws Exception {
List<Class<?>> analysisClasses = new ArrayList<Class<?>>(); List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
TestRandomChains.getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
for (final Class<?> c : analysisClasses) { for (final Class<?> c : analysisClasses) {
final int modifiers = c.getModifiers(); final int modifiers = c.getModifiers();

View File

@ -25,6 +25,7 @@ import java.io.StringReader;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import java.net.URI;
import java.net.URL; import java.net.URL;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.util.ArrayList; import java.util.ArrayList;
@ -165,8 +166,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
List<Class<?>> analysisClasses = new ArrayList<Class<?>>(); List<Class<?>> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
tokenizers = new ArrayList<Constructor<? extends Tokenizer>>(); tokenizers = new ArrayList<Constructor<? extends Tokenizer>>();
tokenfilters = new ArrayList<Constructor<? extends TokenFilter>>(); tokenfilters = new ArrayList<Constructor<? extends TokenFilter>>();
charfilters = new ArrayList<Constructor<? extends CharFilter>>(); charfilters = new ArrayList<Constructor<? extends CharFilter>>();
@ -235,19 +235,30 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) { private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
return (Constructor<T>) ctor; return (Constructor<T>) ctor;
} }
static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
public static List<Class<?>> getClassesForPackage(String pckgname) throws Exception {
final List<Class<?>> classes = new ArrayList<Class<?>>();
collectClassesForPackage(pckgname, classes);
assertFalse("No classes found in package '"+pckgname+"'; maybe your test classes are packaged as JAR file?", classes.isEmpty());
return classes;
}
private static void collectClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
final ClassLoader cld = TestRandomChains.class.getClassLoader(); final ClassLoader cld = TestRandomChains.class.getClassLoader();
final String path = pckgname.replace('.', '/'); final String path = pckgname.replace('.', '/');
final Enumeration<URL> resources = cld.getResources(path); final Enumeration<URL> resources = cld.getResources(path);
while (resources.hasMoreElements()) { while (resources.hasMoreElements()) {
final File directory = new File(resources.nextElement().toURI()); final URI uri = resources.nextElement().toURI();
if (!"file".equalsIgnoreCase(uri.getScheme()))
continue;
final File directory = new File(uri);
if (directory.exists()) { if (directory.exists()) {
String[] files = directory.list(); String[] files = directory.list();
for (String file : files) { for (String file : files) {
if (new File(directory, file).isDirectory()) { if (new File(directory, file).isDirectory()) {
// recurse // recurse
String subPackage = pckgname + "." + file; String subPackage = pckgname + "." + file;
getClassesForPackage(subPackage, classes); collectClassesForPackage(subPackage, classes);
} }
if (file.endsWith(".class")) { if (file.endsWith(".class")) {
String clazzName = file.substring(0, file.length() - 6); String clazzName = file.substring(0, file.length() - 6);

View File

@ -43,7 +43,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -156,7 +155,12 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
CountingSearchTestTask.numSearches = 0; CountingSearchTestTask.numSearches = 0;
execBenchmark(algLines); execBenchmark(algLines);
assertTrue(CountingSearchTestTask.numSearches > 0);
// NOTE: cannot assert this, because on a super-slow
// system, it could be after waiting 0.5 seconds that
// the search threads hadn't yet succeeded in starting
// up and then they start up and do no searching:
//assertTrue(CountingSearchTestTask.numSearches > 0);
} }
public void testHighlighting() throws Exception { public void testHighlighting() throws Exception {
@ -201,6 +205,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
// 1. alg definition (required in every "logic" test) // 1. alg definition (required in every "logic" test)
String algLines[] = { String algLines[] = {
"doc.stored=true",//doc storage is required in order to have text to highlight "doc.stored=true",//doc storage is required in order to have text to highlight
"doc.term.vector=true",
"doc.term.vector.offsets=true", "doc.term.vector.offsets=true",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(), "docs.file=" + getReuters20LinesFile(),
@ -487,13 +492,13 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
int totalTokenCount2 = 0; int totalTokenCount2 = 0;
FieldsEnum fields = MultiFields.getFields(reader).iterator(); Fields fields = MultiFields.getFields(reader);
String fieldName = null;
while((fieldName = fields.next()) != null) { for (String fieldName : fields) {
if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) { if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
continue; continue;
} }
Terms terms = fields.terms(); Terms terms = fields.terms(fieldName);
if (terms == null) { if (terms == null) {
continue; continue;
} }

View File

@ -139,29 +139,6 @@
<target name="compile-core" depends="compile-lucene-core"/> <target name="compile-core" depends="compile-lucene-core"/>
<!--
Run after Junit tests.
-->
<target name="generate-clover-reports" depends="clover">
<fail unless="run.clover">Clover not enabled!</fail>
<mkdir dir="${clover.report.dir}"/>
<fileset dir="build" id="clover.test.result.files">
<include name="**/test/TEST-*.xml"/>
<!-- do not include BW tests -->
<exclude name="backwards/**"/>
</fileset>
<clover-report>
<current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
<format type="html" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
<current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
<format type="xml" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
</clover-report>
</target>
<!-- Validation (license/notice/api checks). --> <!-- Validation (license/notice/api checks). -->
<target name="validate" depends="check-licenses,rat-sources,check-forbidden-apis" description="Validate stuff." /> <target name="validate" depends="check-licenses,rat-sources,check-forbidden-apis" description="Validate stuff." />
@ -176,6 +153,7 @@
<apiFileSet dir="${custom-tasks.dir}/forbiddenApis"> <apiFileSet dir="${custom-tasks.dir}/forbiddenApis">
<include name="jdk.txt" /> <include name="jdk.txt" />
<include name="jdk-deprecated.txt" /> <include name="jdk-deprecated.txt" />
<include name="executors.txt" />
</apiFileSet> </apiFileSet>
<fileset dir="${basedir}/build" includes="**/*.class" /> <fileset dir="${basedir}/build" includes="**/*.class" />
</forbidden-apis> </forbidden-apis>

View File

@ -88,7 +88,7 @@
<property name="tests.timezone" value="random" /> <property name="tests.timezone" value="random" />
<property name="tests.directory" value="random" /> <property name="tests.directory" value="random" />
<property name="tests.linedocsfile" value="europarl.lines.txt.gz" /> <property name="tests.linedocsfile" value="europarl.lines.txt.gz" />
<property name="tests.loggingfile" value="/dev/null"/> <property name="tests.loggingfile" value="${common.dir}/tools/junit4/logging.properties"/>
<property name="tests.nightly" value="false" /> <property name="tests.nightly" value="false" />
<property name="tests.weekly" value="false" /> <property name="tests.weekly" value="false" />
<property name="tests.slow" value="true" /> <property name="tests.slow" value="true" />
@ -700,15 +700,22 @@
<condition property="tests.method" value="${testmethod}*"> <condition property="tests.method" value="${testmethod}*">
<isset property="testmethod" /> <isset property="testmethod" />
</condition> </condition>
<condition property="tests.showSuccess" value="true"> <condition property="tests.showSuccess" value="true">
<or> <or>
<isset property="tests.class" /> <isset property="tests.class" />
<isset property="tests.method" /> <isset property="tests.method" />
</or> </or>
</condition> </condition>
<!-- default -->
<property name="tests.showSuccess" value="false"/> <property name="tests.showSuccess" value="false"/>
<condition property="tests.showOutput" value="always">
<or>
<isset property="tests.class" />
<isset property="tests.method" />
</or>
</condition>
<property name="tests.showOutput" value="onerror"/>
<!-- Test macro using junit4. --> <!-- Test macro using junit4. -->
<macrodef name="test-macro" description="Executes junit tests."> <macrodef name="test-macro" description="Executes junit tests.">
@ -854,6 +861,7 @@
<syspropertyset> <syspropertyset>
<propertyref prefix="tests.maxfailures" /> <propertyref prefix="tests.maxfailures" />
<propertyref prefix="tests.failfast" /> <propertyref prefix="tests.failfast" />
<propertyref prefix="tests.badapples" />
</syspropertyset> </syspropertyset>
<!-- Pass randomized settings to the forked JVM. --> <!-- Pass randomized settings to the forked JVM. -->
@ -875,8 +883,7 @@
<junit4:report-text <junit4:report-text
showThrowable="true" showThrowable="true"
showStackTraces="true" showStackTraces="true"
showOutputStream="true" showOutput="${tests.showOutput}"
showErrorStream="true"
showStatusOk="${tests.showSuccess}" showStatusOk="${tests.showSuccess}"
showStatusError="${tests.showError}" showStatusError="${tests.showError}"
@ -896,8 +903,7 @@
file="@{junit.output.dir}/tests-report.txt" file="@{junit.output.dir}/tests-report.txt"
showThrowable="true" showThrowable="true"
showStackTraces="true" showStackTraces="true"
showOutputStream="true" showOutput="always"
showErrorStream="true"
showStatusOk="true" showStatusOk="true"
showStatusError="true" showStatusError="true"
@ -913,8 +919,7 @@
file="@{junit.output.dir}/tests-failures.txt" file="@{junit.output.dir}/tests-failures.txt"
showThrowable="true" showThrowable="true"
showStackTraces="true" showStackTraces="true"
showOutputStream="true" showOutput="onerror"
showErrorStream="true"
showStatusOk="false" showStatusOk="false"
showStatusError="true" showStatusError="true"
@ -929,8 +934,13 @@
the slowest tests or for reuse in balancing). --> the slowest tests or for reuse in balancing). -->
<junit4:report-execution-times file="@{junit.output.dir}/tests-timehints.txt" historyLength="5" /> <junit4:report-execution-times file="@{junit.output.dir}/tests-timehints.txt" historyLength="5" />
<junit4:report-ant-xml dir="@{junit.output.dir}" /> <!-- ANT-compatible XMLs for jenkins records etc. -->
<junit4:report-json file="@{junit.output.dir}/tests-report-${ant.project.name}/index.html" /> <junit4:report-ant-xml dir="@{junit.output.dir}" outputStreams="no" />
<!--
Enable if you wish to have a nice HTML5 report.
<junit4:report-json file="@{junit.output.dir}/tests-report-${ant.project.name}/index.html" outputStreams="no" />
-->
</listeners> </listeners>
<!-- Input test classes. --> <!-- Input test classes. -->

View File

@ -480,7 +480,7 @@ public class MyAnalyzer extends Analyzer {
System.out.println(termAtt.toString()); System.out.println(termAtt.toString());
} }
stream.end() stream.end();
} finally { } finally {
stream.close(); stream.close();
} }
@ -509,7 +509,7 @@ easily by adding a LengthFilter to the chain. Only the
{@literal @Override} {@literal @Override}
protected TokenStreamComponents createComponents(String fieldName, Reader reader) { protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader); final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
TokenStream result = new LengthFilter(source, 3, Integer.MAX_VALUE); TokenStream result = new LengthFilter(true, source, 3, Integer.MAX_VALUE);
return new TokenStreamComponents(source, result); return new TokenStreamComponents(source, result);
} }
</pre> </pre>

View File

@ -27,7 +27,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -40,6 +39,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DoubleBarrelLRUCache; import org.apache.lucene.util.DoubleBarrelLRUCache;
import org.apache.lucene.util.UnmodifiableIterator;
/** Handles a terms dict, but decouples all details of /** Handles a terms dict, but decouples all details of
* doc/freqs/positions reading to an instance of {@link * doc/freqs/positions reading to an instance of {@link
@ -184,8 +184,8 @@ public class BlockTermsReader extends FieldsProducer {
} }
@Override @Override
public FieldsEnum iterator() { public Iterator<String> iterator() {
return new TermFieldsEnum(); return new UnmodifiableIterator<String>(fields.keySet().iterator());
} }
@Override @Override
@ -199,32 +199,6 @@ public class BlockTermsReader extends FieldsProducer {
return fields.size(); return fields.size();
} }
// Iterates through all fields
private class TermFieldsEnum extends FieldsEnum {
final Iterator<FieldReader> it;
FieldReader current;
TermFieldsEnum() {
it = fields.values().iterator();
}
@Override
public String next() {
if (it.hasNext()) {
current = it.next();
return current.fieldInfo.name;
} else {
current = null;
return null;
}
}
@Override
public Terms terms() throws IOException {
return current;
}
}
private class FieldReader extends Terms { private class FieldReader extends Terms {
final long numTerms; final long numTerms;
final FieldInfo fieldInfo; final FieldInfo fieldInfo;
@ -253,6 +227,21 @@ public class BlockTermsReader extends FieldsProducer {
return new SegmentTermsEnum(); return new SegmentTermsEnum();
} }
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
}
@Override @Override
public long size() { public long size() {
return numTerms; return numTerms;

View File

@ -31,7 +31,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -46,6 +45,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnmodifiableIterator;
import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RunAutomaton; import org.apache.lucene.util.automaton.RunAutomaton;
import org.apache.lucene.util.automaton.Transition; import org.apache.lucene.util.automaton.Transition;
@ -199,8 +199,8 @@ public class BlockTreeTermsReader extends FieldsProducer {
} }
@Override @Override
public FieldsEnum iterator() { public Iterator<String> iterator() {
return new TermFieldsEnum(); return new UnmodifiableIterator<String>(fields.keySet().iterator());
} }
@Override @Override
@ -214,32 +214,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
return fields.size(); return fields.size();
} }
// Iterates through all fields
private class TermFieldsEnum extends FieldsEnum {
final Iterator<FieldReader> it;
FieldReader current;
TermFieldsEnum() {
it = fields.values().iterator();
}
@Override
public String next() {
if (it.hasNext()) {
current = it.next();
return current.fieldInfo.name;
} else {
current = null;
return null;
}
}
@Override
public Terms terms() throws IOException {
return current;
}
}
// for debugging // for debugging
String brToString(BytesRef b) { String brToString(BytesRef b) {
if (b == null) { if (b == null) {
@ -456,6 +430,21 @@ public class BlockTreeTermsReader extends FieldsProducer {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
}
@Override @Override
public TermsEnum iterator(TermsEnum reuse) throws IOException { public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum(); return new SegmentTermsEnum();

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState; // javadocs import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -53,13 +52,10 @@ public abstract class FieldsConsumer implements Closeable {
public abstract void close() throws IOException; public abstract void close() throws IOException;
public void merge(MergeState mergeState, Fields fields) throws IOException { public void merge(MergeState mergeState, Fields fields) throws IOException {
FieldsEnum fieldsEnum = fields.iterator(); for (String field : fields) {
assert fieldsEnum != null;
String field;
while((field = fieldsEnum.next()) != null) {
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field; assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field;
Terms terms = fieldsEnum.terms(); Terms terms = fields.terms(field);
if (terms != null) { if (terms != null) {
final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
termsConsumer.merge(mergeState, terms.iterator(null)); termsConsumer.merge(mergeState, terms.iterator(null));

View File

@ -124,15 +124,17 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum
@Override @Override
public BytesRef getPayload() throws IOException { public BytesRef getPayload() throws IOException {
BytesRef payload = current.getPayload(); BytesRef payload = current.getPayload();
if (mergeState.currentPayloadProcessor[upto] != null) { if (mergeState.currentPayloadProcessor[upto] != null && payload != null) {
// to not violate the D&P api, we must give the processor a private copy
// TODO: reuse a BytesRef if there is a PPP
payload = BytesRef.deepCopyOf(payload);
mergeState.currentPayloadProcessor[upto].processPayload(payload); mergeState.currentPayloadProcessor[upto].processPayload(payload);
if (payload.length == 0) {
// don't let PayloadProcessors corrumpt the index
return null;
}
} }
return payload; return payload;
} }
@Override
public boolean hasPayload() {
return current.hasPayload();
}
} }

View File

@ -112,12 +112,7 @@ public abstract class PostingsConsumer {
totTF += freq; totTF += freq;
for(int i=0;i<freq;i++) { for(int i=0;i<freq;i++) {
final int position = postingsEnum.nextPosition(); final int position = postingsEnum.nextPosition();
final BytesRef payload; final BytesRef payload = postingsEnum.getPayload();
if (postingsEnum.hasPayload()) {
payload = postingsEnum.getPayload();
} else {
payload = null;
}
this.addPosition(position, payload, -1, -1); this.addPosition(position, payload, -1, -1);
} }
this.finishDoc(); this.finishDoc();
@ -137,12 +132,7 @@ public abstract class PostingsConsumer {
totTF += freq; totTF += freq;
for(int i=0;i<freq;i++) { for(int i=0;i<freq;i++) {
final int position = postingsEnum.nextPosition(); final int position = postingsEnum.nextPosition();
final BytesRef payload; final BytesRef payload = postingsEnum.getPayload();
if (postingsEnum.hasPayload()) {
payload = postingsEnum.getPayload();
} else {
payload = null;
}
this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset()); this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset());
} }
this.finishDoc(); this.finishDoc();

View File

@ -26,8 +26,9 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
@ -41,14 +42,14 @@ import org.apache.lucene.util.BytesRef;
* <ol> * <ol>
* <li>For every document, {@link #startDocument(int)} is called, * <li>For every document, {@link #startDocument(int)} is called,
* informing the Codec how many fields will be written. * informing the Codec how many fields will be written.
* <li>{@link #startField(FieldInfo, int, boolean, boolean)} is called for * <li>{@link #startField(FieldInfo, int, boolean, boolean, boolean)} is called for
* each field in the document, informing the codec how many terms * each field in the document, informing the codec how many terms
* will be written for that field, and whether or not positions * will be written for that field, and whether or not positions,
* or offsets are enabled. * offsets, or payloads are enabled.
* <li>Within each field, {@link #startTerm(BytesRef, int)} is called * <li>Within each field, {@link #startTerm(BytesRef, int)} is called
* for each term. * for each term.
* <li>If offsets and/or positions are enabled, then * <li>If offsets and/or positions are enabled, then
* {@link #addPosition(int, int, int)} will be called for each term * {@link #addPosition(int, int, int, BytesRef)} will be called for each term
* occurrence. * occurrence.
* <li>After all documents have been written, {@link #finish(FieldInfos, int)} * <li>After all documents have been written, {@link #finish(FieldInfos, int)}
* is called for verification/sanity-checks. * is called for verification/sanity-checks.
@ -60,7 +61,7 @@ import org.apache.lucene.util.BytesRef;
public abstract class TermVectorsWriter implements Closeable { public abstract class TermVectorsWriter implements Closeable {
/** Called before writing the term vectors of the document. /** Called before writing the term vectors of the document.
* {@link #startField(FieldInfo, int, boolean, boolean)} will * {@link #startField(FieldInfo, int, boolean, boolean, boolean)} will
* be called <code>numVectorFields</code> times. Note that if term * be called <code>numVectorFields</code> times. Note that if term
* vectors are enabled, this is called even if the document * vectors are enabled, this is called even if the document
* has no vector fields, in this case <code>numVectorFields</code> * has no vector fields, in this case <code>numVectorFields</code>
@ -69,17 +70,17 @@ public abstract class TermVectorsWriter implements Closeable {
/** Called before writing the terms of the field. /** Called before writing the terms of the field.
* {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */ * {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException; public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
/** Adds a term and its term frequency <code>freq</code>. /** Adds a term and its term frequency <code>freq</code>.
* If this field has positions and/or offsets enabled, then * If this field has positions and/or offsets enabled, then
* {@link #addPosition(int, int, int)} will be called * {@link #addPosition(int, int, int, BytesRef)} will be called
* <code>freq</code> times respectively. * <code>freq</code> times respectively.
*/ */
public abstract void startTerm(BytesRef term, int freq) throws IOException; public abstract void startTerm(BytesRef term, int freq) throws IOException;
/** Adds a term position and offsets */ /** Adds a term position and offsets */
public abstract void addPosition(int position, int startOffset, int endOffset) throws IOException; public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
/** Aborts writing entirely, implementation should remove /** Aborts writing entirely, implementation should remove
* any partially-written files, etc. */ * any partially-written files, etc. */
@ -99,7 +100,7 @@ public abstract class TermVectorsWriter implements Closeable {
* This is an expert API that allows the codec to consume * This is an expert API that allows the codec to consume
* positions and offsets directly from the indexer. * positions and offsets directly from the indexer.
* <p> * <p>
* The default implementation calls {@link #addPosition(int, int, int)}, * The default implementation calls {@link #addPosition(int, int, int, BytesRef)},
* but subclasses can override this if they want to efficiently write * but subclasses can override this if they want to efficiently write
* all the positions, then all the offsets, for example. * all the positions, then all the offsets, for example.
* <p> * <p>
@ -111,15 +112,36 @@ public abstract class TermVectorsWriter implements Closeable {
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException { public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
int position = 0; int position = 0;
int lastOffset = 0; int lastOffset = 0;
BytesRef payload = null;
for (int i = 0; i < numProx; i++) { for (int i = 0; i < numProx; i++) {
final int startOffset; final int startOffset;
final int endOffset; final int endOffset;
final BytesRef thisPayload;
if (positions == null) { if (positions == null) {
position = -1; position = -1;
thisPayload = null;
} else { } else {
position += positions.readVInt(); int code = positions.readVInt();
position += code >>> 1;
if ((code & 1) != 0) {
// This position has a payload
final int payloadLength = positions.readVInt();
if (payload == null) {
payload = new BytesRef();
payload.bytes = new byte[payloadLength];
} else if (payload.bytes.length < payloadLength) {
payload.grow(payloadLength);
}
positions.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
thisPayload = payload;
} else {
thisPayload = null;
}
} }
if (offsets == null) { if (offsets == null) {
@ -129,24 +151,31 @@ public abstract class TermVectorsWriter implements Closeable {
endOffset = startOffset + offsets.readVInt(); endOffset = startOffset + offsets.readVInt();
lastOffset = endOffset; lastOffset = endOffset;
} }
addPosition(position, startOffset, endOffset); addPosition(position, startOffset, endOffset, thisPayload);
} }
} }
/** Merges in the term vectors from the readers in /** Merges in the term vectors from the readers in
* <code>mergeState</code>. The default implementation skips * <code>mergeState</code>. The default implementation skips
* over deleted documents, and uses {@link #startDocument(int)}, * over deleted documents, and uses {@link #startDocument(int)},
* {@link #startField(FieldInfo, int, boolean, boolean)}, * {@link #startField(FieldInfo, int, boolean, boolean, boolean)},
* {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int)}, * {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
* and {@link #finish(FieldInfos, int)}, * and {@link #finish(FieldInfos, int)},
* returning the number of documents that were written. * returning the number of documents that were written.
* Implementations can override this method for more sophisticated * Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */ * merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException { public int merge(MergeState mergeState) throws IOException {
int docCount = 0; int docCount = 0;
for (AtomicReader reader : mergeState.readers) { for (int i = 0; i < mergeState.readers.size(); i++) {
final AtomicReader reader = mergeState.readers.get(i);
final int maxDoc = reader.maxDoc(); final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs(); final Bits liveDocs = reader.getLiveDocs();
// set PayloadProcessor
if (mergeState.payloadProcessorProvider != null) {
mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
} else {
mergeState.currentReaderPayloadProcessor = null;
}
for (int docID = 0; docID < maxDoc; docID++) { for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && !liveDocs.get(docID)) { if (liveDocs != null && !liveDocs.get(docID)) {
// skip deleted docs // skip deleted docs
@ -155,7 +184,7 @@ public abstract class TermVectorsWriter implements Closeable {
// NOTE: it's very important to first assign to vectors then pass it to // NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282 // termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.getTermVectors(docID); Fields vectors = reader.getTermVectors(docID);
addAllDocVectors(vectors, mergeState.fieldInfos); addAllDocVectors(vectors, mergeState);
docCount++; docCount++;
mergeState.checkAbort.work(300); mergeState.checkAbort.work(300);
} }
@ -169,7 +198,7 @@ public abstract class TermVectorsWriter implements Closeable {
* implementation requires that the vectors implement * implementation requires that the vectors implement
* both Fields.size and * both Fields.size and
* Terms.size. */ * Terms.size. */
protected final void addAllDocVectors(Fields vectors, FieldInfos fieldInfos) throws IOException { protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
if (vectors == null) { if (vectors == null) {
startDocument(0); startDocument(0);
return; return;
@ -181,35 +210,38 @@ public abstract class TermVectorsWriter implements Closeable {
} }
startDocument(numFields); startDocument(numFields);
final FieldsEnum fieldsEnum = vectors.iterator();
String fieldName;
String lastFieldName = null; String lastFieldName = null;
while((fieldName = fieldsEnum.next()) != null) { TermsEnum termsEnum = null;
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName); DocsAndPositionsEnum docsAndPositionsEnum = null;
final ReaderPayloadProcessor readerPayloadProcessor = mergeState.currentReaderPayloadProcessor;
PayloadProcessor payloadProcessor = null;
for(String fieldName : vectors) {
final FieldInfo fieldInfo = mergeState.fieldInfos.fieldInfo(fieldName);
assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName; assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
lastFieldName = fieldName; lastFieldName = fieldName;
final Terms terms = fieldsEnum.terms(); final Terms terms = vectors.terms(fieldName);
if (terms == null) { if (terms == null) {
// FieldsEnum shouldn't lie... // FieldsEnum shouldn't lie...
continue; continue;
} }
final boolean hasPositions = terms.hasPositions();
final boolean hasOffsets = terms.hasOffsets();
final boolean hasPayloads = terms.hasPayloads();
assert !hasPayloads || hasPositions;
final int numTerms = (int) terms.size(); final int numTerms = (int) terms.size();
if (numTerms == -1) { if (numTerms == -1) {
throw new IllegalStateException("terms.size() must be implemented (it returned -1)"); throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
} }
final TermsEnum termsEnum = terms.iterator(null);
DocsAndPositionsEnum docsAndPositionsEnum = null; startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
termsEnum = terms.iterator(termsEnum);
boolean startedField = false;
// NOTE: this is tricky, because TermVectors allow
// indexing offsets but NOT positions. So we must
// lazily init the field by checking whether first
// position we see is -1 or not.
int termCount = 0; int termCount = 0;
while(termsEnum.next() != null) { while(termsEnum.next() != null) {
@ -217,18 +249,16 @@ public abstract class TermVectorsWriter implements Closeable {
final int freq = (int) termsEnum.totalTermFreq(); final int freq = (int) termsEnum.totalTermFreq();
if (startedField) {
startTerm(termsEnum.term(), freq); startTerm(termsEnum.term(), freq);
if (hasPayloads && readerPayloadProcessor != null) {
payloadProcessor = readerPayloadProcessor.getProcessor(fieldName, termsEnum.term());
} }
// TODO: we need a "query" API where we can ask (via if (hasPositions || hasOffsets) {
// flex API) what this term was indexed with... docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
// Both positions & offsets: assert docsAndPositionsEnum != null;
docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
boolean hasOffsets = false;
boolean hasPositions = false;
if (docsAndPositionsEnum != null) {
final int docID = docsAndPositionsEnum.nextDoc(); final int docID = docsAndPositionsEnum.nextDoc();
assert docID != DocIdSetIterator.NO_MORE_DOCS; assert docID != DocIdSetIterator.NO_MORE_DOCS;
assert docsAndPositionsEnum.freq() == freq; assert docsAndPositionsEnum.freq() == freq;
@ -237,27 +267,21 @@ public abstract class TermVectorsWriter implements Closeable {
final int pos = docsAndPositionsEnum.nextPosition(); final int pos = docsAndPositionsEnum.nextPosition();
final int startOffset = docsAndPositionsEnum.startOffset(); final int startOffset = docsAndPositionsEnum.startOffset();
final int endOffset = docsAndPositionsEnum.endOffset(); final int endOffset = docsAndPositionsEnum.endOffset();
if (!startedField) {
assert numTerms > 0; BytesRef payload = docsAndPositionsEnum.getPayload();
hasPositions = pos != -1;
hasOffsets = startOffset != -1; if (payloadProcessor != null && payload != null) {
startField(fieldInfo, numTerms, hasPositions, hasOffsets); // to not violate the D&P api, we must give the processor a private copy
startTerm(termsEnum.term(), freq); payload = BytesRef.deepCopyOf(payload);
startedField = true; payloadProcessor.processPayload(payload);
if (payload.length == 0) {
// don't let PayloadProcessors corrumpt the index
payload = null;
} }
if (hasOffsets) {
assert startOffset != -1;
assert endOffset != -1;
} }
assert !hasPositions || pos >= 0; assert !hasPositions || pos >= 0;
addPosition(pos, startOffset, endOffset); addPosition(pos, startOffset, endOffset, payload);
}
} else {
if (!startedField) {
assert numTerms > 0;
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
startTerm(termsEnum.term(), freq);
startedField = true;
} }
} }
} }

View File

@ -954,11 +954,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
return -1; return -1;
} }
@Override
public boolean hasPayload() {
return false;
}
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
return null; return null;
@ -1226,10 +1221,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
if (DEBUG) { if (DEBUG) {
System.out.println(" FPR.nextDoc"); System.out.println(" FPR.nextDoc");
} }
if (indexHasPayloads) {
payloadByteUpto += payloadLength;
payloadLength = 0;
}
while (true) { while (true) {
if (DEBUG) { if (DEBUG) {
System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto); System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
@ -1255,7 +1246,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount); System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount);
} }
position = 0; position = 0;
payloadLength = 0;
lastStartOffset = 0; lastStartOffset = 0;
return doc; return doc;
} }
@ -1355,12 +1345,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
if (DEBUG) { if (DEBUG) {
System.out.println(" return doc=" + accum); System.out.println(" return doc=" + accum);
} }
if (indexHasPayloads) {
payloadByteUpto += payloadLength;
payloadLength = 0;
}
position = 0; position = 0;
payloadLength = 0;
lastStartOffset = 0; lastStartOffset = 0;
return doc = accum; return doc = accum;
} else { } else {
@ -1433,7 +1418,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
position = 0; position = 0;
payloadLength = 0;
lastStartOffset = 0; lastStartOffset = 0;
} }
@ -1461,16 +1445,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
posBufferUpto = BLOCK_SIZE; posBufferUpto = BLOCK_SIZE;
} }
if (indexHasPayloads) {
if (DEBUG) {
if (payloadLength != 0) {
System.out.println(" skip unread payload length=" + payloadLength);
}
}
payloadByteUpto += payloadLength;
payloadLength = 0;
}
if (posPendingCount > freq) { if (posPendingCount > freq) {
skipPositions(); skipPositions();
posPendingCount = freq; posPendingCount = freq;
@ -1484,6 +1458,10 @@ public final class BlockPostingsReader extends PostingsReaderBase {
if (indexHasPayloads) { if (indexHasPayloads) {
payloadLength = payloadLengthBuffer[posBufferUpto]; payloadLength = payloadLengthBuffer[posBufferUpto];
payload.bytes = payloadBytes;
payload.offset = payloadByteUpto;
payload.length = payloadLength;
payloadByteUpto += payloadLength;
} }
if (indexHasOffsets) { if (indexHasOffsets) {
@ -1510,22 +1488,16 @@ public final class BlockPostingsReader extends PostingsReaderBase {
return endOffset; return endOffset;
} }
@Override
public boolean hasPayload() {
return payloadLength != 0;
}
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
if (DEBUG) { if (DEBUG) {
System.out.println(" FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto); System.out.println(" FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto);
} }
payload.bytes = payloadBytes; if (payloadLength == 0) {
payload.offset = payloadByteUpto; return null;
payload.length = payloadLength; } else {
payloadByteUpto += payloadLength;
payloadLength = 0;
return payload; return payload;
} }
} }
}
} }

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -35,7 +36,6 @@ import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
@ -44,7 +44,6 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FuzzySet; import org.apache.lucene.util.FuzzySet;
@ -187,9 +186,8 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
} }
public FieldsEnum iterator() throws IOException { public Iterator<String> iterator() {
return new BloomFilteredFieldsEnum(delegateFieldsProducer.iterator(), return delegateFieldsProducer.iterator();
bloomsByFieldName);
} }
public void close() throws IOException { public void close() throws IOException {
@ -217,44 +215,6 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
return delegateFieldsProducer.getUniqueTermCount(); return delegateFieldsProducer.getUniqueTermCount();
} }
// Not all fields in a segment may be subject to a bloom filter. This class
// wraps Terms objects appropriately if a filtering request is present
class BloomFilteredFieldsEnum extends FieldsEnum {
private FieldsEnum delegateFieldsEnum;
private HashMap<String,FuzzySet> bloomsByFieldName;
private String currentFieldName;
public BloomFilteredFieldsEnum(FieldsEnum iterator,
HashMap<String,FuzzySet> bloomsByFieldName) {
this.delegateFieldsEnum = iterator;
this.bloomsByFieldName = bloomsByFieldName;
}
public AttributeSource attributes() {
return delegateFieldsEnum.attributes();
}
public String next() throws IOException {
currentFieldName = delegateFieldsEnum.next();
return currentFieldName;
}
public Terms terms() throws IOException {
FuzzySet filter = bloomsByFieldName.get(currentFieldName);
if (filter == null) {
return delegateFieldsEnum.terms();
} else {
Terms result = delegateFieldsEnum.terms();
if (result == null) {
return null;
}
// wrap the terms object with a bloom filter
return new BloomFilteredTerms(result, filter);
}
}
}
class BloomFilteredTerms extends Terms { class BloomFilteredTerms extends Terms {
private Terms delegateTerms; private Terms delegateTerms;
private FuzzySet filter; private FuzzySet filter;
@ -314,6 +274,21 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
public int getDocCount() throws IOException { public int getDocCount() throws IOException {
return delegateTerms.getDocCount(); return delegateTerms.getDocCount();
} }
@Override
public boolean hasOffsets() {
return delegateTerms.hasOffsets();
}
@Override
public boolean hasPositions() {
return delegateTerms.hasPositions();
}
@Override
public boolean hasPayloads() {
return delegateTerms.hasPayloads();
}
} }
class BloomFilteredTermsEnum extends TermsEnum { class BloomFilteredTermsEnum extends TermsEnum {

View File

@ -873,12 +873,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
* payload was indexed. */ * payload was indexed. */
@Override @Override
public BytesRef getPayload() throws IOException { public BytesRef getPayload() throws IOException {
throw new IOException("No payloads exist for this field!"); return null;
}
@Override
public boolean hasPayload() {
return false;
} }
} }
@ -1152,11 +1147,13 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
@Override @Override
public BytesRef getPayload() throws IOException { public BytesRef getPayload() throws IOException {
if (storePayloads) { if (storePayloads) {
if (payloadLength <= 0) {
return null;
}
assert lazyProxPointer == -1; assert lazyProxPointer == -1;
assert posPendingCount < freq; assert posPendingCount < freq;
if (!payloadPending) {
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); if (payloadPending) {
}
if (payloadLength > payload.bytes.length) { if (payloadLength > payload.bytes.length) {
payload.grow(payloadLength); payload.grow(payloadLength);
} }
@ -1164,16 +1161,12 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
proxIn.readBytes(payload.bytes, 0, payloadLength); proxIn.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength; payload.length = payloadLength;
payloadPending = false; payloadPending = false;
}
return payload; return payload;
} else { } else {
throw new IOException("No payloads exist for this field!"); return null;
} }
} }
@Override
public boolean hasPayload() {
return payloadPending && payloadLength > 0;
}
} }
} }

View File

@ -67,33 +67,46 @@ import org.apache.lucene.store.IOContext;
* <li><a name="tvf" id="tvf"></a> * <li><a name="tvf" id="tvf"></a>
* <p>The Field or .tvf file.</p> * <p>The Field or .tvf file.</p>
* <p>This file contains, for each field that has a term vector stored, a list of * <p>This file contains, for each field that has a term vector stored, a list of
* the terms, their frequencies and, optionally, position and offset * the terms, their frequencies and, optionally, position, offset, and payload
* information.</p> * information.</p>
* <p>Field (.tvf) --&gt; Header,&lt;NumTerms, Position/Offset, TermFreqs&gt; * <p>Field (.tvf) --&gt; Header,&lt;NumTerms, Flags, TermFreqs&gt;
* <sup>NumFields</sup></p> * <sup>NumFields</sup></p>
* <ul> * <ul>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li> * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>NumTerms --&gt; {@link DataOutput#writeVInt VInt}</li> * <li>NumTerms --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Position/Offset --&gt; {@link DataOutput#writeByte Byte}</li> * <li>Flags --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>TermFreqs --&gt; &lt;TermText, TermFreq, Positions?, Offsets?&gt; * <li>TermFreqs --&gt; &lt;TermText, TermFreq, Positions?, PayloadData?, Offsets?&gt;
* <sup>NumTerms</sup></li> * <sup>NumTerms</sup></li>
* <li>TermText --&gt; &lt;PrefixLength, Suffix&gt;</li> * <li>TermText --&gt; &lt;PrefixLength, Suffix&gt;</li>
* <li>PrefixLength --&gt; {@link DataOutput#writeVInt VInt}</li> * <li>PrefixLength --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Suffix --&gt; {@link DataOutput#writeString String}</li> * <li>Suffix --&gt; {@link DataOutput#writeString String}</li>
* <li>TermFreq --&gt; {@link DataOutput#writeVInt VInt}</li> * <li>TermFreq --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Positions --&gt; &lt;{@link DataOutput#writeVInt VInt}&gt;<sup>TermFreq</sup></li> * <li>Positions --&gt; &lt;PositionDelta PayloadLength?&gt;<sup>TermFreq</sup></li>
* <li>PositionDelta --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>PayloadLength --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>PayloadData --&gt; {@link DataOutput#writeByte Byte}<sup>NumPayloadBytes</sup></li>
* <li>Offsets --&gt; &lt;{@link DataOutput#writeVInt VInt}, {@link DataOutput#writeVInt VInt}&gt;<sup>TermFreq</sup></li> * <li>Offsets --&gt; &lt;{@link DataOutput#writeVInt VInt}, {@link DataOutput#writeVInt VInt}&gt;<sup>TermFreq</sup></li>
* </ul> * </ul>
* <p>Notes:</p> * <p>Notes:</p>
* <ul> * <ul>
* <li>Position/Offset byte stores whether this term vector has position or offset * <li>Flags byte stores whether this term vector has position, offset, payload.
* information stored.</li> * information stored.</li>
* <li>Term byte prefixes are shared. The PrefixLength is the number of initial * <li>Term byte prefixes are shared. The PrefixLength is the number of initial
* bytes from the previous term which must be pre-pended to a term's suffix * bytes from the previous term which must be pre-pended to a term's suffix
* in order to form the term's bytes. Thus, if the previous term's text was "bone" * in order to form the term's bytes. Thus, if the previous term's text was "bone"
* and the term is "boy", the PrefixLength is two and the suffix is "y".</li> * and the term is "boy", the PrefixLength is two and the suffix is "y".</li>
* <li>Positions are stored as delta encoded VInts. This means we only store the * <li>PositionDelta is, if payloads are disabled for the term's field, the
* difference of the current position from the last position</li> * difference between the position of the current occurrence in the document and
* the previous occurrence (or zero, if this is the first occurrence in this
* document). If payloads are enabled for the term's field, then PositionDelta/2
* is the difference between the current and the previous position. If payloads
* are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
* the length of the payload at the current term position.</li>
* <li>PayloadData is metadata associated with a term position. If
* PayloadLength is stored at the current position, then it indicates the length
* of this payload. If PayloadLength is not stored, then this payload has the same
* length as the payload at the previous position. PayloadData encodes the
* concatenated bytes for all of a terms occurrences.</li>
* <li>Offsets are stored as delta encoded VInts. The first VInt is the * <li>Offsets are stored as delta encoded VInts. The first VInt is the
* startOffset, the second is the endOffset.</li> * startOffset, the second is the endOffset.</li>
* </ul> * </ul>

View File

@ -21,7 +21,9 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsReader;
@ -30,7 +32,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -55,6 +56,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2; static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
static final byte STORE_PAYLOAD_WITH_TERMVECTOR = 0x4;
/** Extension of vectors fields file */ /** Extension of vectors fields file */
static final String VECTORS_FIELDS_EXTENSION = "tvf"; static final String VECTORS_FIELDS_EXTENSION = "tvf";
@ -68,8 +71,10 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs"; static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs";
static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex"; static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex";
static final int VERSION_START = 0; static final int VERSION_NO_PAYLOADS = 0;
static final int VERSION_CURRENT = VERSION_START; static final int VERSION_PAYLOADS = 1;
static final int VERSION_START = VERSION_NO_PAYLOADS;
static final int VERSION_CURRENT = VERSION_PAYLOADS;
static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS); static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS);
static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS); static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS);
@ -245,9 +250,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
} }
@Override @Override
public FieldsEnum iterator() throws IOException { public Iterator<String> iterator() {
return new Iterator<String>() {
return new FieldsEnum() {
private int fieldUpto; private int fieldUpto;
@Override @Override
@ -255,13 +259,18 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
if (fieldNumbers != null && fieldUpto < fieldNumbers.length) { if (fieldNumbers != null && fieldUpto < fieldNumbers.length) {
return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name; return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name;
} else { } else {
return null; throw new NoSuchElementException();
} }
} }
@Override @Override
public Terms terms() throws IOException { public boolean hasNext() {
return TVFields.this.terms(fieldInfos.fieldInfo(fieldNumbers[fieldUpto-1]).name); return fieldNumbers != null && fieldUpto < fieldNumbers.length;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
} }
}; };
} }
@ -296,10 +305,17 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private class TVTerms extends Terms { private class TVTerms extends Terms {
private final int numTerms; private final int numTerms;
private final long tvfFPStart; private final long tvfFPStart;
private final boolean storePositions;
private final boolean storeOffsets;
private final boolean storePayloads;
public TVTerms(long tvfFP) throws IOException { public TVTerms(long tvfFP) throws IOException {
tvf.seek(tvfFP); tvf.seek(tvfFP);
numTerms = tvf.readVInt(); numTerms = tvf.readVInt();
final byte bits = tvf.readByte();
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
storePayloads = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
tvfFPStart = tvf.getFilePointer(); tvfFPStart = tvf.getFilePointer();
} }
@ -314,7 +330,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
} else { } else {
termsEnum = new TVTermsEnum(); termsEnum = new TVTermsEnum();
} }
termsEnum.reset(numTerms, tvfFPStart); termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets, storePayloads);
return termsEnum; return termsEnum;
} }
@ -345,6 +361,21 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
// this...? I guess codec could buffer and re-sort... // this...? I guess codec could buffer and re-sort...
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
@Override
public boolean hasOffsets() {
return storeOffsets;
}
@Override
public boolean hasPositions() {
return storePositions;
}
@Override
public boolean hasPayloads() {
return storePayloads;
}
} }
private class TVTermsEnum extends TermsEnum { private class TVTermsEnum extends TermsEnum {
@ -357,12 +388,18 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private BytesRef term = new BytesRef(); private BytesRef term = new BytesRef();
private boolean storePositions; private boolean storePositions;
private boolean storeOffsets; private boolean storeOffsets;
private boolean storePayloads;
private long tvfFP; private long tvfFP;
private int[] positions; private int[] positions;
private int[] startOffsets; private int[] startOffsets;
private int[] endOffsets; private int[] endOffsets;
// one shared byte[] for any term's payloads
private int[] payloadOffsets;
private int lastPayloadLength;
private byte[] payloadData;
// NOTE: tvf is pre-positioned by caller // NOTE: tvf is pre-positioned by caller
public TVTermsEnum() { public TVTermsEnum() {
this.origTVF = Lucene40TermVectorsReader.this.tvf; this.origTVF = Lucene40TermVectorsReader.this.tvf;
@ -373,17 +410,20 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
return tvf == origTVF; return tvf == origTVF;
} }
public void reset(int numTerms, long tvfFPStart) throws IOException { public void reset(int numTerms, long tvfFPStart, boolean storePositions, boolean storeOffsets, boolean storePayloads) throws IOException {
this.numTerms = numTerms; this.numTerms = numTerms;
this.storePositions = storePositions;
this.storeOffsets = storeOffsets;
this.storePayloads = storePayloads;
nextTerm = 0; nextTerm = 0;
tvf.seek(tvfFPStart); tvf.seek(tvfFPStart);
final byte bits = tvf.readByte();
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
tvfFP = 1+tvfFPStart; tvfFP = 1+tvfFPStart;
positions = null; positions = null;
startOffsets = null; startOffsets = null;
endOffsets = null; endOffsets = null;
payloadOffsets = null;
payloadData = null;
lastPayloadLength = -1;
} }
// NOTE: slow! (linear scan) // NOTE: slow! (linear scan)
@ -430,7 +470,26 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
tvf.readBytes(term.bytes, start, deltaLen); tvf.readBytes(term.bytes, start, deltaLen);
freq = tvf.readVInt(); freq = tvf.readVInt();
if (storePositions) { if (storePayloads) {
positions = new int[freq];
payloadOffsets = new int[freq];
int totalPayloadLength = 0;
int pos = 0;
for(int posUpto=0;posUpto<freq;posUpto++) {
int code = tvf.readVInt();
pos += code >>> 1;
positions[posUpto] = pos;
if ((code & 1) != 0) {
// length change
lastPayloadLength = tvf.readVInt();
}
payloadOffsets[posUpto] = totalPayloadLength;
totalPayloadLength += lastPayloadLength;
assert totalPayloadLength >= 0;
}
payloadData = new byte[totalPayloadLength];
tvf.readBytes(payloadData, 0, payloadData.length);
} else if (storePositions /* no payloads */) {
// TODO: we could maybe reuse last array, if we can // TODO: we could maybe reuse last array, if we can
// somehow be careful about consumer never using two // somehow be careful about consumer never using two
// D&PEnums at once... // D&PEnums at once...
@ -502,14 +561,12 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
} else { } else {
docsAndPositionsEnum = new TVDocsAndPositionsEnum(); docsAndPositionsEnum = new TVDocsAndPositionsEnum();
} }
docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets); docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets, payloadOffsets, payloadData);
return docsAndPositionsEnum; return docsAndPositionsEnum;
} }
@Override @Override
public Comparator<BytesRef> getComparator() { public Comparator<BytesRef> getComparator() {
// TODO: really indexer hardwires
// this...? I guess codec could buffer and re-sort...
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
} }
@ -567,6 +624,9 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private int[] positions; private int[] positions;
private int[] startOffsets; private int[] startOffsets;
private int[] endOffsets; private int[] endOffsets;
private int[] payloadOffsets;
private BytesRef payload = new BytesRef();
private byte[] payloadBytes;
@Override @Override
public int freq() throws IOException { public int freq() throws IOException {
@ -602,11 +662,13 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
} }
} }
public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) { public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, int[] payloadLengths, byte[] payloadBytes) {
this.liveDocs = liveDocs; this.liveDocs = liveDocs;
this.positions = positions; this.positions = positions;
this.startOffsets = startOffsets; this.startOffsets = startOffsets;
this.endOffsets = endOffsets; this.endOffsets = endOffsets;
this.payloadOffsets = payloadLengths;
this.payloadBytes = payloadBytes;
this.doc = -1; this.doc = -1;
didNext = false; didNext = false;
nextPos = 0; nextPos = 0;
@ -614,12 +676,19 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
if (payloadOffsets == null) {
return null;
} else {
int off = payloadOffsets[nextPos-1];
int end = nextPos == payloadOffsets.length ? payloadBytes.length : payloadOffsets[nextPos];
if (end - off == 0) {
return null; return null;
} }
payload.bytes = payloadBytes;
@Override payload.offset = off;
public boolean hasPayload() { payload.length = end - off;
return false; return payload;
}
} }
@Override @Override

View File

@ -106,12 +106,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
private String lastFieldName; private String lastFieldName;
@Override @Override
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException { public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {
assert lastFieldName == null || info.name.compareTo(lastFieldName) > 0: "fieldName=" + info.name + " lastFieldName=" + lastFieldName; assert lastFieldName == null || info.name.compareTo(lastFieldName) > 0: "fieldName=" + info.name + " lastFieldName=" + lastFieldName;
lastFieldName = info.name; lastFieldName = info.name;
this.positions = positions; this.positions = positions;
this.offsets = offsets; this.offsets = offsets;
this.payloads = payloads;
lastTerm.length = 0; lastTerm.length = 0;
lastPayloadLength = -1; // force first payload to write its length
fps[fieldCount++] = tvf.getFilePointer(); fps[fieldCount++] = tvf.getFilePointer();
tvd.writeVInt(info.number); tvd.writeVInt(info.number);
tvf.writeVInt(numTerms); tvf.writeVInt(numTerms);
@ -120,6 +122,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
bits |= Lucene40TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; bits |= Lucene40TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
if (offsets) if (offsets)
bits |= Lucene40TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; bits |= Lucene40TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
if (payloads)
bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR;
tvf.writeByte(bits); tvf.writeByte(bits);
assert fieldCount <= numVectorFields; assert fieldCount <= numVectorFields;
@ -138,10 +142,12 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
// we also don't buffer during bulk merges. // we also don't buffer during bulk merges.
private int offsetStartBuffer[] = new int[10]; private int offsetStartBuffer[] = new int[10];
private int offsetEndBuffer[] = new int[10]; private int offsetEndBuffer[] = new int[10];
private int offsetIndex = 0; private BytesRef payloadData = new BytesRef(10);
private int offsetFreq = 0; private int bufferedIndex = 0;
private int bufferedFreq = 0;
private boolean positions = false; private boolean positions = false;
private boolean offsets = false; private boolean offsets = false;
private boolean payloads = false;
@Override @Override
public void startTerm(BytesRef term, int freq) throws IOException { public void startTerm(BytesRef term, int freq) throws IOException {
@ -158,20 +164,40 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
// we might need to buffer if its a non-bulk merge // we might need to buffer if its a non-bulk merge
offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq); offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq); offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
offsetIndex = 0;
offsetFreq = freq;
} }
bufferedIndex = 0;
bufferedFreq = freq;
payloadData.length = 0;
} }
int lastPosition = 0; int lastPosition = 0;
int lastOffset = 0; int lastOffset = 0;
int lastPayloadLength = -1; // force first payload to write its length
BytesRef scratch = new BytesRef(); // used only by this optimized flush below
@Override @Override
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException { public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
// TODO: technically we could just copy bytes and not re-encode if we knew the length... if (payloads) {
if (positions != null) { // TODO, maybe overkill and just call super.addProx() in this case?
// we do avoid buffering the offsets in RAM though.
for (int i = 0; i < numProx; i++) { for (int i = 0; i < numProx; i++) {
tvf.writeVInt(positions.readVInt()); int code = positions.readVInt();
if ((code & 1) == 1) {
int length = positions.readVInt();
scratch.grow(length);
scratch.length = length;
positions.readBytes(scratch.bytes, scratch.offset, scratch.length);
writePosition(code >>> 1, scratch);
} else {
writePosition(code >>> 1, null);
}
}
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
} else if (positions != null) {
// pure positions, no payloads
for (int i = 0; i < numProx; i++) {
tvf.writeVInt(positions.readVInt() >>> 1);
} }
} }
@ -184,28 +210,36 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
} }
@Override @Override
public void addPosition(int position, int startOffset, int endOffset) throws IOException { public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
if (positions && offsets) { if (positions && (offsets || payloads)) {
// write position delta // write position delta
tvf.writeVInt(position - lastPosition); writePosition(position - lastPosition, payload);
lastPosition = position; lastPosition = position;
// buffer offsets // buffer offsets
offsetStartBuffer[offsetIndex] = startOffset; if (offsets) {
offsetEndBuffer[offsetIndex] = endOffset; offsetStartBuffer[bufferedIndex] = startOffset;
offsetIndex++; offsetEndBuffer[bufferedIndex] = endOffset;
}
bufferedIndex++;
// dump buffer if we are done // dump buffer if we are done
if (offsetIndex == offsetFreq) { if (bufferedIndex == bufferedFreq) {
for (int i = 0; i < offsetIndex; i++) { if (payloads) {
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
}
for (int i = 0; i < bufferedIndex; i++) {
if (offsets) {
tvf.writeVInt(offsetStartBuffer[i] - lastOffset); tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]); tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
lastOffset = offsetEndBuffer[i]; lastOffset = offsetEndBuffer[i];
} }
} }
}
} else if (positions) { } else if (positions) {
// write position delta // write position delta
tvf.writeVInt(position - lastPosition); writePosition(position - lastPosition, payload);
lastPosition = position; lastPosition = position;
} else if (offsets) { } else if (offsets) {
// write offset deltas // write offset deltas
@ -215,6 +249,30 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
} }
} }
private void writePosition(int delta, BytesRef payload) throws IOException {
if (payloads) {
int payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
lastPayloadLength = payloadLength;
tvf.writeVInt((delta<<1)|1);
tvf.writeVInt(payloadLength);
} else {
tvf.writeVInt(delta << 1);
}
if (payloadLength > 0) {
if (payloadLength + payloadData.length < 0) {
// we overflowed the payload buffer, just throw UOE
// having > Integer.MAX_VALUE bytes of payload for a single term in a single doc is nuts.
throw new UnsupportedOperationException("A term cannot have more than Integer.MAX_VALUE bytes of payload data in a single document");
}
payloadData.append(payload);
}
} else {
tvf.writeVInt(delta);
}
}
@Override @Override
public void abort() { public void abort() {
try { try {
@ -255,7 +313,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
int idx = 0; int idx = 0;
int numDocs = 0; int numDocs = 0;
for (final AtomicReader reader : mergeState.readers) { for (int i = 0; i < mergeState.readers.size(); i++) {
final AtomicReader reader = mergeState.readers.get(i);
// set PayloadProcessor
if (mergeState.payloadProcessorProvider != null) {
mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
} else {
mergeState.currentReaderPayloadProcessor = null;
}
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++]; final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
Lucene40TermVectorsReader matchingVectorsReader = null; Lucene40TermVectorsReader matchingVectorsReader = null;
if (matchingSegmentReader != null) { if (matchingSegmentReader != null) {
@ -288,8 +353,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
final int maxDoc = reader.maxDoc(); final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs(); final Bits liveDocs = reader.getLiveDocs();
int totalNumDocs = 0; int totalNumDocs = 0;
if (matchingVectorsReader != null) { if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
// We can bulk-copy because the fieldInfos are "congruent" // We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
for (int docNum = 0; docNum < maxDoc;) { for (int docNum = 0; docNum < maxDoc;) {
if (!liveDocs.get(docNum)) { if (!liveDocs.get(docNum)) {
// skip deleted docs // skip deleted docs
@ -324,7 +389,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
// NOTE: it's very important to first assign to vectors then pass it to // NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282 // termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.getTermVectors(docNum); Fields vectors = reader.getTermVectors(docNum);
addAllDocVectors(vectors, mergeState.fieldInfos); addAllDocVectors(vectors, mergeState);
totalNumDocs++; totalNumDocs++;
mergeState.checkAbort.work(300); mergeState.checkAbort.work(300);
} }
@ -339,8 +404,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
int rawDocLengths2[]) int rawDocLengths2[])
throws IOException { throws IOException {
final int maxDoc = reader.maxDoc(); final int maxDoc = reader.maxDoc();
if (matchingVectorsReader != null) { if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
// We can bulk-copy because the fieldInfos are "congruent" // We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
int docCount = 0; int docCount = 0;
while (docCount < maxDoc) { while (docCount < maxDoc) {
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
@ -354,7 +419,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
// NOTE: it's very important to first assign to vectors then pass it to // NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282 // termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.getTermVectors(docNum); Fields vectors = reader.getTermVectors(docNum);
addAllDocVectors(vectors, mergeState.fieldInfos); addAllDocVectors(vectors, mergeState);
mergeState.checkAbort.work(300); mergeState.checkAbort.work(300);
} }
} }

View File

@ -366,7 +366,7 @@ the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
factors need no longer be a single byte, they can be any DocValues factors need no longer be a single byte, they can be any DocValues
{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode {@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode
strings, they can be any byte sequence. Term offsets can optionally be indexed strings, they can be any byte sequence. Term offsets can optionally be indexed
into the postings lists.</li> into the postings lists. Payloads can be stored in the term vectors.</li>
</ul> </ul>
<a name="Limitations" id="Limitations"></a> <a name="Limitations" id="Limitations"></a>
<h2>Limitations</h2> <h2>Limitations</h2>

View File

@ -32,7 +32,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
@ -44,6 +43,7 @@ import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnmodifiableIterator;
import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RunAutomaton; import org.apache.lucene.util.automaton.RunAutomaton;
import org.apache.lucene.util.automaton.Transition; import org.apache.lucene.util.automaton.Transition;
@ -124,36 +124,14 @@ public class DirectPostingsFormat extends PostingsFormat {
private final Map<String,DirectField> fields = new TreeMap<String,DirectField>(); private final Map<String,DirectField> fields = new TreeMap<String,DirectField>();
public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff) throws IOException { public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff) throws IOException {
FieldsEnum fieldsEnum = fields.iterator(); for (String field : fields) {
String field; this.fields.put(field, new DirectField(state, field, fields.terms(field), minSkipCount, lowFreqCutoff));
while ((field = fieldsEnum.next()) != null) {
this.fields.put(field, new DirectField(state, field, fieldsEnum.terms(), minSkipCount, lowFreqCutoff));
} }
} }
@Override @Override
public FieldsEnum iterator() { public Iterator<String> iterator() {
return new UnmodifiableIterator<String>(fields.keySet().iterator());
final Iterator<Map.Entry<String,DirectField>> iter = fields.entrySet().iterator();
return new FieldsEnum() {
Map.Entry<String,DirectField> current;
@Override
public String next() {
if (iter.hasNext()) {
current = iter.next();
return current.getKey();
} else {
return null;
}
}
@Override
public Terms terms() {
return current.getValue();
}
};
} }
@Override @Override
@ -348,9 +326,8 @@ public class DirectPostingsFormat extends PostingsFormat {
scratch.add(docsAndPositionsEnum.endOffset()); scratch.add(docsAndPositionsEnum.endOffset());
} }
if (hasPayloads) { if (hasPayloads) {
final BytesRef payload; final BytesRef payload = docsAndPositionsEnum.getPayload();
if (docsAndPositionsEnum.hasPayload()) { if (payload != null) {
payload = docsAndPositionsEnum.getPayload();
scratch.add(payload.length); scratch.add(payload.length);
ros.writeBytes(payload.bytes, payload.offset, payload.length); ros.writeBytes(payload.bytes, payload.offset, payload.length);
} else { } else {
@ -421,9 +398,8 @@ public class DirectPostingsFormat extends PostingsFormat {
for(int pos=0;pos<freq;pos++) { for(int pos=0;pos<freq;pos++) {
positions[upto][posUpto] = docsAndPositionsEnum.nextPosition(); positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
if (hasPayloads) { if (hasPayloads) {
if (docsAndPositionsEnum.hasPayload()) {
BytesRef payload = docsAndPositionsEnum.getPayload(); BytesRef payload = docsAndPositionsEnum.getPayload();
assert payload != null; if (payload != null) {
byte[] payloadBytes = new byte[payload.length]; byte[] payloadBytes = new byte[payload.length];
System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length); System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
payloads[upto][pos] = payloadBytes; payloads[upto][pos] = payloadBytes;
@ -635,6 +611,21 @@ public class DirectPostingsFormat extends PostingsFormat {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
@Override
public boolean hasOffsets() {
return hasOffsets;
}
@Override
public boolean hasPositions() {
return hasPos;
}
@Override
public boolean hasPayloads() {
return hasPayloads;
}
private final class DirectTermsEnum extends TermsEnum { private final class DirectTermsEnum extends TermsEnum {
private final BytesRef scratch = new BytesRef(); private final BytesRef scratch = new BytesRef();
@ -1791,18 +1782,12 @@ public class DirectPostingsFormat extends PostingsFormat {
return docID; return docID;
} }
@Override
public boolean hasPayload() {
return payloadLength > 0;
}
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
if (payloadLength > 0) { if (payloadLength > 0) {
payload.bytes = payloadBytes; payload.bytes = payloadBytes;
payload.offset = lastPayloadOffset; payload.offset = lastPayloadOffset;
payload.length = payloadLength; payload.length = payloadLength;
payloadLength = 0;
return payload; return payload;
} else { } else {
return null; return null;
@ -1995,7 +1980,6 @@ public class DirectPostingsFormat extends PostingsFormat {
private int upto; private int upto;
private int docID = -1; private int docID = -1;
private int posUpto; private int posUpto;
private boolean gotPayload;
private int[] curPositions; private int[] curPositions;
public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) { public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) {
@ -2065,7 +2049,6 @@ public class DirectPostingsFormat extends PostingsFormat {
@Override @Override
public int nextPosition() { public int nextPosition() {
posUpto += posJump; posUpto += posJump;
gotPayload = false;
return curPositions[posUpto]; return curPositions[posUpto];
} }
@ -2199,21 +2182,22 @@ public class DirectPostingsFormat extends PostingsFormat {
} }
} }
@Override
public boolean hasPayload() {
return !gotPayload && payloads != null && payloads[upto][posUpto/(hasOffsets ? 3 : 1)] != null;
}
private final BytesRef payload = new BytesRef(); private final BytesRef payload = new BytesRef();
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
if (payloads == null) {
return null;
} else {
final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)]; final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
if (payloadBytes == null) {
return null;
}
payload.bytes = payloadBytes; payload.bytes = payloadBytes;
payload.length = payloadBytes.length; payload.length = payloadBytes.length;
payload.offset = 0; payload.offset = 0;
gotPayload = true;
return payload; return payload;
} }
} }
}
} }

View File

@ -34,7 +34,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
@ -49,6 +48,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnmodifiableIterator;
import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
@ -446,7 +446,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
private int numDocs; private int numDocs;
private int posPending; private int posPending;
private int payloadLength; private int payloadLength;
private boolean payloadRetrieved;
final boolean storeOffsets; final boolean storeOffsets;
int offsetLength; int offsetLength;
int startOffset; int startOffset;
@ -484,7 +483,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
payloadLength = 0; payloadLength = 0;
this.numDocs = numDocs; this.numDocs = numDocs;
posPending = 0; posPending = 0;
payloadRetrieved = false;
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
offsetLength = 0; offsetLength = 0;
return this; return this;
@ -577,10 +575,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
payload.offset = in.getPosition(); payload.offset = in.getPosition();
in.skipBytes(payloadLength); in.skipBytes(payloadLength);
payload.length = payloadLength; payload.length = payloadLength;
// Necessary, in case caller changed the
// payload.bytes from prior call:
payload.bytes = buffer;
payloadRetrieved = false;
} }
//System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition()); //System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
@ -599,13 +593,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
payloadRetrieved = true; return payload.length > 0 ? payload : null;
return payload;
}
@Override
public boolean hasPayload() {
return !payloadRetrieved && payload.length > 0;
} }
@Override @Override
@ -834,6 +822,21 @@ public class MemoryPostingsFormat extends PostingsFormat {
public Comparator<BytesRef> getComparator() { public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
@Override
public boolean hasOffsets() {
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@Override
public boolean hasPositions() {
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return field.hasPayloads();
}
} }
@Override @Override
@ -859,24 +862,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
return new FieldsProducer() { return new FieldsProducer() {
@Override @Override
public FieldsEnum iterator() { public Iterator<String> iterator() {
final Iterator<TermsReader> iter = fields.values().iterator(); return new UnmodifiableIterator<String>(fields.keySet().iterator());
return new FieldsEnum() {
private TermsReader current;
@Override
public String next() {
current = iter.next();
return current.field.name;
}
@Override
public Terms terms() {
return current;
}
};
} }
@Override @Override

View File

@ -30,11 +30,11 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermsConsumer; import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnmodifiableIterator;
/** /**
* Enables per field format support. * Enables per field format support.
@ -197,34 +197,9 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
} }
} }
private final class FieldsIterator extends FieldsEnum {
private final Iterator<String> it;
private String current;
public FieldsIterator() {
it = fields.keySet().iterator();
}
@Override @Override
public String next() { public Iterator<String> iterator() {
if (it.hasNext()) { return new UnmodifiableIterator<String>(fields.keySet().iterator());
current = it.next();
} else {
current = null;
}
return current;
}
@Override
public Terms terms() throws IOException {
return fields.get(current).terms(current);
}
}
@Override
public FieldsEnum iterator() throws IOException {
return new FieldsIterator();
} }
@Override @Override

View File

@ -532,19 +532,13 @@ public class PulsingPostingsReader extends PostingsReaderBase {
} }
} }
@Override
public boolean hasPayload() {
return storePayloads && !payloadRetrieved && payloadLength > 0;
}
@Override @Override
public BytesRef getPayload() throws IOException { public BytesRef getPayload() throws IOException {
//System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this); //System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this);
if (payloadRetrieved) { if (payloadRetrieved) {
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); return payload;
} } else if (storePayloads && payloadLength > 0) {
payloadRetrieved = true; payloadRetrieved = true;
if (payloadLength > 0) {
if (payload == null) { if (payload == null) {
payload = new BytesRef(payloadLength); payload = new BytesRef(payloadLength);
} else { } else {

View File

@ -714,7 +714,11 @@ public class SepPostingsReader extends PostingsReaderBase {
@Override @Override
public BytesRef getPayload() throws IOException { public BytesRef getPayload() throws IOException {
if (!payloadPending) { if (!payloadPending) {
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); return null;
}
if (pendingPayloadBytes == 0) {
return payload;
} }
assert pendingPayloadBytes >= payloadLength; assert pendingPayloadBytes >= payloadLength;
@ -731,15 +735,9 @@ public class SepPostingsReader extends PostingsReaderBase {
} }
payloadIn.readBytes(payload.bytes, 0, payloadLength); payloadIn.readBytes(payload.bytes, 0, payloadLength);
payloadPending = false;
payload.length = payloadLength; payload.length = payloadLength;
pendingPayloadBytes = 0; pendingPayloadBytes = 0;
return payload; return payload;
} }
@Override
public boolean hasPayload() {
return payloadPending && payloadLength > 0;
}
} }
} }

View File

@ -20,14 +20,17 @@ package org.apache.lucene.codecs.simpletext;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
@ -40,6 +43,7 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.UnmodifiableIterator;
import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
@ -48,7 +52,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
class SimpleTextFieldsReader extends FieldsProducer { class SimpleTextFieldsReader extends FieldsProducer {
private final TreeMap<String,Long> fields;
private final IndexInput in; private final IndexInput in;
private final FieldInfos fieldInfos; private final FieldInfos fieldInfos;
@ -66,34 +70,21 @@ class SimpleTextFieldsReader extends FieldsProducer {
in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context); in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
fieldInfos = state.fieldInfos; fieldInfos = state.fieldInfos;
fields = readFields((IndexInput)in.clone());
} }
private class SimpleTextFieldsEnum extends FieldsEnum { private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
private final IndexInput in; BytesRef scratch = new BytesRef(10);
private final BytesRef scratch = new BytesRef(10); TreeMap<String,Long> fields = new TreeMap<String,Long>();
private String current;
public SimpleTextFieldsEnum() { while (true) {
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
}
@Override
public String next() throws IOException {
while(true) {
SimpleTextUtil.readLine(in, scratch); SimpleTextUtil.readLine(in, scratch);
if (scratch.equals(END)) { if (scratch.equals(END)) {
current = null; return fields;
return null; } else if (StringHelper.startsWith(scratch, FIELD)) {
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
fields.put(fieldName, in.getFilePointer());
} }
if (StringHelper.startsWith(scratch, FIELD)) {
return current = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
}
}
}
@Override
public Terms terms() throws IOException {
return SimpleTextFieldsReader.this.terms(current);
} }
} }
@ -471,18 +462,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
// Some tests rely on only being able to retrieve the
// payload once
try {
return payload; return payload;
} finally {
payload = null;
}
}
@Override
public boolean hasPayload() {
return payload != null;
} }
} }
@ -498,7 +478,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextTerms extends Terms { private class SimpleTextTerms extends Terms {
private final long termsStart; private final long termsStart;
private final IndexOptions indexOptions; private final FieldInfo fieldInfo;
private long sumTotalTermFreq; private long sumTotalTermFreq;
private long sumDocFreq; private long sumDocFreq;
private int docCount; private int docCount;
@ -509,7 +489,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
public SimpleTextTerms(String field, long termsStart) throws IOException { public SimpleTextTerms(String field, long termsStart) throws IOException {
this.termsStart = termsStart; this.termsStart = termsStart;
indexOptions = fieldInfos.fieldInfo(field).getIndexOptions(); fieldInfo = fieldInfos.fieldInfo(field);
loadTerms(); loadTerms();
} }
@ -579,7 +559,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override @Override
public TermsEnum iterator(TermsEnum reuse) throws IOException { public TermsEnum iterator(TermsEnum reuse) throws IOException {
if (fst != null) { if (fst != null) {
return new SimpleTextTermsEnum(fst, indexOptions); return new SimpleTextTermsEnum(fst, fieldInfo.getIndexOptions());
} else { } else {
return TermsEnum.EMPTY; return TermsEnum.EMPTY;
} }
@ -597,7 +577,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override @Override
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq; return fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
} }
@Override @Override
@ -609,11 +589,26 @@ class SimpleTextFieldsReader extends FieldsProducer {
public int getDocCount() throws IOException { public int getDocCount() throws IOException {
return docCount; return docCount;
} }
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
} }
@Override @Override
public FieldsEnum iterator() throws IOException { public boolean hasPositions() {
return new SimpleTextFieldsEnum(); return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
}
}
@Override
public Iterator<String> iterator() {
return new UnmodifiableIterator<String>(fields.keySet().iterator());
} }
private final Map<String,Terms> termsCache = new HashMap<String,Terms>(); private final Map<String,Terms> termsCache = new HashMap<String,Terms>();
@ -622,16 +617,14 @@ class SimpleTextFieldsReader extends FieldsProducer {
synchronized public Terms terms(String field) throws IOException { synchronized public Terms terms(String field) throws IOException {
Terms terms = termsCache.get(field); Terms terms = termsCache.get(field);
if (terms == null) { if (terms == null) {
SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator(); Long fp = fields.get(field);
String fieldUpto; if (fp == null) {
while((fieldUpto = fe.next()) != null) { return null;
if (fieldUpto.equals(field)) { } else {
terms = new SimpleTextTerms(field, fe.in.getFilePointer()); terms = new SimpleTextTerms(field, fp);
break;
}
}
termsCache.put(field, terms); termsCache.put(field, terms);
} }
}
return terms; return terms;
} }

View File

@ -29,7 +29,6 @@ import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
@ -45,6 +44,7 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.UnmodifiableIterator;
import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*; import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
@ -126,11 +126,15 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
assert StringHelper.startsWith(scratch, FIELDOFFSETS); assert StringHelper.startsWith(scratch, FIELDOFFSETS);
boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch)); boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
readLine();
assert StringHelper.startsWith(scratch, FIELDPAYLOADS);
boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));
readLine(); readLine();
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT); assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
int termCount = parseIntAt(FIELDTERMCOUNT.length); int termCount = parseIntAt(FIELDTERMCOUNT.length);
SimpleTVTerms terms = new SimpleTVTerms(); SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
fields.put(fieldName, terms); fields.put(fieldName, terms);
for (int j = 0; j < termCount; j++) { for (int j = 0; j < termCount; j++) {
@ -152,6 +156,9 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
if (positions || offsets) { if (positions || offsets) {
if (positions) { if (positions) {
postings.positions = new int[postings.freq]; postings.positions = new int[postings.freq];
if (payloads) {
postings.payloads = new BytesRef[postings.freq];
}
} }
if (offsets) { if (offsets) {
@ -164,6 +171,17 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
readLine(); readLine();
assert StringHelper.startsWith(scratch, POSITION); assert StringHelper.startsWith(scratch, POSITION);
postings.positions[k] = parseIntAt(POSITION.length); postings.positions[k] = parseIntAt(POSITION.length);
if (payloads) {
readLine();
assert StringHelper.startsWith(scratch, PAYLOAD);
if (scratch.length - PAYLOAD.length == 0) {
postings.payloads[k] = null;
} else {
byte payloadBytes[] = new byte[scratch.length - PAYLOAD.length];
System.arraycopy(scratch.bytes, scratch.offset+PAYLOAD.length, payloadBytes, 0, payloadBytes.length);
postings.payloads[k] = new BytesRef(payloadBytes);
}
}
} }
if (offsets) { if (offsets) {
@ -222,26 +240,8 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
} }
@Override @Override
public FieldsEnum iterator() throws IOException { public Iterator<String> iterator() {
return new FieldsEnum() { return new UnmodifiableIterator<String>(fields.keySet().iterator());
private Iterator<Map.Entry<String,SimpleTVTerms>> iterator = fields.entrySet().iterator();
private Map.Entry<String,SimpleTVTerms> current = null;
@Override
public String next() {
if (!iterator.hasNext()) {
return null;
} else {
current = iterator.next();
return current.getKey();
}
}
@Override
public Terms terms() {
return current.getValue();
}
};
} }
@Override @Override
@ -257,8 +257,14 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
private static class SimpleTVTerms extends Terms { private static class SimpleTVTerms extends Terms {
final SortedMap<BytesRef,SimpleTVPostings> terms; final SortedMap<BytesRef,SimpleTVPostings> terms;
final boolean hasOffsets;
final boolean hasPositions;
final boolean hasPayloads;
SimpleTVTerms() { SimpleTVTerms(boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
this.hasOffsets = hasOffsets;
this.hasPositions = hasPositions;
this.hasPayloads = hasPayloads;
terms = new TreeMap<BytesRef,SimpleTVPostings>(); terms = new TreeMap<BytesRef,SimpleTVPostings>();
} }
@ -292,6 +298,21 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
public int getDocCount() throws IOException { public int getDocCount() throws IOException {
return 1; return 1;
} }
@Override
public boolean hasOffsets() {
return hasOffsets;
}
@Override
public boolean hasPositions() {
return hasPositions;
}
@Override
public boolean hasPayloads() {
return hasPayloads;
}
} }
private static class SimpleTVPostings { private static class SimpleTVPostings {
@ -299,6 +320,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
private int positions[]; private int positions[];
private int startOffsets[]; private int startOffsets[];
private int endOffsets[]; private int endOffsets[];
private BytesRef payloads[];
} }
private static class SimpleTVTermsEnum extends TermsEnum { private static class SimpleTVTermsEnum extends TermsEnum {
@ -372,7 +394,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
} }
// TODO: reuse // TODO: reuse
SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum(); SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets); e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
return e; return e;
} }
@ -433,6 +455,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
private int nextPos; private int nextPos;
private Bits liveDocs; private Bits liveDocs;
private int[] positions; private int[] positions;
private BytesRef[] payloads;
private int[] startOffsets; private int[] startOffsets;
private int[] endOffsets; private int[] endOffsets;
@ -470,11 +493,12 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
} }
} }
public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) { public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, BytesRef payloads[]) {
this.liveDocs = liveDocs; this.liveDocs = liveDocs;
this.positions = positions; this.positions = positions;
this.startOffsets = startOffsets; this.startOffsets = startOffsets;
this.endOffsets = endOffsets; this.endOffsets = endOffsets;
this.payloads = payloads;
this.doc = -1; this.doc = -1;
didNext = false; didNext = false;
nextPos = 0; nextPos = 0;
@ -482,12 +506,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
@Override @Override
public BytesRef getPayload() { public BytesRef getPayload() {
return null; return payloads == null ? null : payloads[nextPos-1];
}
@Override
public boolean hasPayload() {
return false;
} }
@Override @Override

View File

@ -45,10 +45,12 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
static final BytesRef FIELDNAME = new BytesRef(" name "); static final BytesRef FIELDNAME = new BytesRef(" name ");
static final BytesRef FIELDPOSITIONS = new BytesRef(" positions "); static final BytesRef FIELDPOSITIONS = new BytesRef(" positions ");
static final BytesRef FIELDOFFSETS = new BytesRef(" offsets "); static final BytesRef FIELDOFFSETS = new BytesRef(" offsets ");
static final BytesRef FIELDPAYLOADS = new BytesRef(" payloads ");
static final BytesRef FIELDTERMCOUNT = new BytesRef(" numterms "); static final BytesRef FIELDTERMCOUNT = new BytesRef(" numterms ");
static final BytesRef TERMTEXT = new BytesRef(" term "); static final BytesRef TERMTEXT = new BytesRef(" term ");
static final BytesRef TERMFREQ = new BytesRef(" freq "); static final BytesRef TERMFREQ = new BytesRef(" freq ");
static final BytesRef POSITION = new BytesRef(" position "); static final BytesRef POSITION = new BytesRef(" position ");
static final BytesRef PAYLOAD = new BytesRef(" payload ");
static final BytesRef STARTOFFSET = new BytesRef(" startoffset "); static final BytesRef STARTOFFSET = new BytesRef(" startoffset ");
static final BytesRef ENDOFFSET = new BytesRef(" endoffset "); static final BytesRef ENDOFFSET = new BytesRef(" endoffset ");
@ -61,6 +63,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
private final BytesRef scratch = new BytesRef(); private final BytesRef scratch = new BytesRef();
private boolean offsets; private boolean offsets;
private boolean positions; private boolean positions;
private boolean payloads;
public SimpleTextTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException { public SimpleTextTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
this.directory = directory; this.directory = directory;
@ -89,7 +92,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
} }
@Override @Override
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException { public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {
write(FIELD); write(FIELD);
write(Integer.toString(info.number)); write(Integer.toString(info.number));
newLine(); newLine();
@ -106,12 +109,17 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
write(Boolean.toString(offsets)); write(Boolean.toString(offsets));
newLine(); newLine();
write(FIELDPAYLOADS);
write(Boolean.toString(payloads));
newLine();
write(FIELDTERMCOUNT); write(FIELDTERMCOUNT);
write(Integer.toString(numTerms)); write(Integer.toString(numTerms));
newLine(); newLine();
this.positions = positions; this.positions = positions;
this.offsets = offsets; this.offsets = offsets;
this.payloads = payloads;
} }
@Override @Override
@ -126,13 +134,22 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
} }
@Override @Override
public void addPosition(int position, int startOffset, int endOffset) throws IOException { public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
assert positions || offsets; assert positions || offsets;
if (positions) { if (positions) {
write(POSITION); write(POSITION);
write(Integer.toString(position)); write(Integer.toString(position));
newLine(); newLine();
if (payloads) {
write(PAYLOAD);
if (payload != null) {
assert payload.length > 0;
write(payload);
}
newLine();
}
} }
if (offsets) { if (offsets) {

View File

@ -39,6 +39,7 @@ public class FieldType implements IndexableFieldType {
private boolean storeTermVectors; private boolean storeTermVectors;
private boolean storeTermVectorOffsets; private boolean storeTermVectorOffsets;
private boolean storeTermVectorPositions; private boolean storeTermVectorPositions;
private boolean storeTermVectorPayloads;
private boolean omitNorms; private boolean omitNorms;
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
private DocValues.Type docValueType; private DocValues.Type docValueType;
@ -53,6 +54,7 @@ public class FieldType implements IndexableFieldType {
this.storeTermVectors = ref.storeTermVectors(); this.storeTermVectors = ref.storeTermVectors();
this.storeTermVectorOffsets = ref.storeTermVectorOffsets(); this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
this.storeTermVectorPositions = ref.storeTermVectorPositions(); this.storeTermVectorPositions = ref.storeTermVectorPositions();
this.storeTermVectorPayloads = ref.storeTermVectorPayloads();
this.omitNorms = ref.omitNorms(); this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions(); this.indexOptions = ref.indexOptions();
this.docValueType = ref.docValueType(); this.docValueType = ref.docValueType();
@ -132,6 +134,15 @@ public class FieldType implements IndexableFieldType {
this.storeTermVectorPositions = value; this.storeTermVectorPositions = value;
} }
public boolean storeTermVectorPayloads() {
return this.storeTermVectorPayloads;
}
public void setStoreTermVectorPayloads(boolean value) {
checkIfFrozen();
this.storeTermVectorPayloads = value;
}
public boolean omitNorms() { public boolean omitNorms() {
return this.omitNorms; return this.omitNorms;
} }
@ -198,24 +209,19 @@ public class FieldType implements IndexableFieldType {
result.append(","); result.append(",");
result.append("indexed"); result.append("indexed");
if (tokenized()) { if (tokenized()) {
if (result.length() > 0) result.append(",tokenized");
result.append(",");
result.append("tokenized");
} }
if (storeTermVectors()) { if (storeTermVectors()) {
if (result.length() > 0) result.append(",termVector");
result.append(",");
result.append("termVector");
} }
if (storeTermVectorOffsets()) { if (storeTermVectorOffsets()) {
if (result.length() > 0) result.append(",termVectorOffsets");
result.append(",");
result.append("termVectorOffsets");
} }
if (storeTermVectorPositions()) { if (storeTermVectorPositions()) {
if (result.length() > 0) result.append(",termVectorPosition");
result.append(","); if (storeTermVectorPayloads()) {
result.append("termVectorPosition"); result.append(",termVectorPayloads");
}
} }
if (omitNorms()) { if (omitNorms()) {
result.append(",omitNorms"); result.append(",omitNorms");
@ -232,7 +238,9 @@ public class FieldType implements IndexableFieldType {
} }
} }
if (docValueType != null) { if (docValueType != null) {
result.append(",docValueType="); if (result.length() > 0)
result.append(",");
result.append("docValueType=");
result.append(docValueType); result.append(docValueType);
} }

View File

@ -685,12 +685,7 @@ public class CheckIndex {
DocsAndPositionsEnum postings = null; DocsAndPositionsEnum postings = null;
String lastField = null; String lastField = null;
final FieldsEnum fieldsEnum = fields.iterator(); for (String field : fields) {
while(true) {
final String field = fieldsEnum.next();
if (field == null) {
break;
}
// MultiFieldsEnum relies upon this order... // MultiFieldsEnum relies upon this order...
if (lastField != null && field.compareTo(lastField) <= 0) { if (lastField != null && field.compareTo(lastField) <= 0) {
throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field); throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
@ -713,11 +708,16 @@ public class CheckIndex {
// assert fields.terms(field) != null; // assert fields.terms(field) != null;
computedFieldCount++; computedFieldCount++;
final Terms terms = fieldsEnum.terms(); final Terms terms = fields.terms(field);
if (terms == null) { if (terms == null) {
continue; continue;
} }
final boolean hasPositions = terms.hasPositions();
final boolean hasOffsets = terms.hasOffsets();
// term vectors cannot omit TF
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
final TermsEnum termsEnum = terms.iterator(null); final TermsEnum termsEnum = terms.iterator(null);
boolean hasOrd = true; boolean hasOrd = true;
@ -777,17 +777,10 @@ public class CheckIndex {
status.termCount++; status.termCount++;
final DocsEnum docs2; final DocsEnum docs2;
final boolean hasPositions;
// if we are checking vectors, we have freqs implicitly
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
// if we are checking vectors, offsets are a free-for-all anyway
final boolean hasOffsets = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (postings != null) { if (postings != null) {
docs2 = postings; docs2 = postings;
hasPositions = true;
} else { } else {
docs2 = docs; docs2 = docs;
hasPositions = false;
} }
int lastDoc = -1; int lastDoc = -1;
@ -824,22 +817,17 @@ public class CheckIndex {
if (hasPositions) { if (hasPositions) {
for(int j=0;j<freq;j++) { for(int j=0;j<freq;j++) {
final int pos = postings.nextPosition(); final int pos = postings.nextPosition();
// NOTE: pos=-1 is allowed because of ancient bug
// (LUCENE-1542) whereby IndexWriter could if (pos < 0) {
// write pos=-1 when first token's posInc is 0
// (separately: analyzers should not give
// posInc=0 to first token); also, term
// vectors are allowed to return pos=-1 if
// they indexed offset but not positions:
if (pos < -1) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
} }
if (pos < lastPos) { if (pos < lastPos) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
} }
lastPos = pos; lastPos = pos;
if (postings.hasPayload()) { BytesRef payload = postings.getPayload();
postings.getPayload(); if (payload != null && payload.length < 1) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.length);
} }
if (hasOffsets) { if (hasOffsets) {
int startOffset = postings.startOffset(); int startOffset = postings.startOffset();
@ -924,14 +912,8 @@ public class CheckIndex {
int lastOffset = 0; int lastOffset = 0;
for(int posUpto=0;posUpto<freq;posUpto++) { for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = postings.nextPosition(); final int pos = postings.nextPosition();
// NOTE: pos=-1 is allowed because of ancient bug
// (LUCENE-1542) whereby IndexWriter could if (pos < 0) {
// write pos=-1 when first token's posInc is 0
// (separately: analyzers should not give
// posInc=0 to first token); also, term
// vectors are allowed to return pos=-1 if
// they indexed offset but not positions:
if (pos < -1) {
throw new RuntimeException("position " + pos + " is out of bounds"); throw new RuntimeException("position " + pos + " is out of bounds");
} }
if (pos < lastPosition) { if (pos < lastPosition) {
@ -1000,11 +982,7 @@ public class CheckIndex {
// only happen if it's a ghost field (field with // only happen if it's a ghost field (field with
// no terms, eg there used to be terms but all // no terms, eg there used to be terms but all
// docs got deleted and then merged away): // docs got deleted and then merged away):
// make sure TermsEnum is empty:
final Terms fieldTerms2 = fieldsEnum.terms();
if (fieldTerms2 != null && fieldTerms2.iterator(null).next() != null) {
throw new RuntimeException("Fields.terms(field=" + field + ") returned null yet the field appears to have terms");
}
} else { } else {
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) { if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats(); final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
@ -1415,9 +1393,7 @@ public class CheckIndex {
status.docCount++; status.docCount++;
} }
FieldsEnum fieldsEnum = tfv.iterator(); for(String field : tfv) {
String field = null;
while((field = fieldsEnum.next()) != null) {
if (doStats) { if (doStats) {
status.totVectors++; status.totVectors++;
} }
@ -1432,6 +1408,8 @@ public class CheckIndex {
Terms terms = tfv.terms(field); Terms terms = tfv.terms(field);
termsEnum = terms.iterator(termsEnum); termsEnum = terms.iterator(termsEnum);
final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
final boolean postingsHasPayload = fieldInfo.hasPayloads();
final boolean vectorsHasPayload = terms.hasPayloads();
Terms postingsTerms = postingsFields.terms(field); Terms postingsTerms = postingsFields.terms(field);
if (postingsTerms == null) { if (postingsTerms == null) {
@ -1439,19 +1417,18 @@ public class CheckIndex {
} }
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum); postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
BytesRef term = null; BytesRef term = null;
while ((term = termsEnum.next()) != null) { while ((term = termsEnum.next()) != null) {
final boolean hasProx; if (hasProx) {
// Try positions:
postings = termsEnum.docsAndPositions(null, postings); postings = termsEnum.docsAndPositions(null, postings);
if (postings == null) { assert postings != null;
hasProx = false; docs = null;
// Try docIDs & freqs:
docs = termsEnum.docs(null, docs);
} else { } else {
hasProx = true; docs = termsEnum.docs(null, docs);
assert docs != null;
postings = null;
} }
final DocsEnum docs2; final DocsEnum docs2;
@ -1504,7 +1481,7 @@ public class CheckIndex {
int pos = postings.nextPosition(); int pos = postings.nextPosition();
if (postingsPostings != null) { if (postingsPostings != null) {
int postingsPos = postingsPostings.nextPosition(); int postingsPos = postingsPostings.nextPosition();
if (pos != -1 && postingsPos != -1 && pos != postingsPos) { if (terms.hasPositions() && pos != postingsPos) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos); throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
} }
} }
@ -1535,6 +1512,34 @@ public class CheckIndex {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset); throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
} }
} }
BytesRef payload = postings.getPayload();
if (payload != null) {
assert vectorsHasPayload;
}
if (postingsHasPayload && vectorsHasPayload) {
assert postingsPostings != null;
if (payload == null) {
// we have payloads, but not at this position.
// postings has payloads too, it should not have one at this position
if (postingsPostings.getPayload() != null) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.getPayload());
}
} else {
// we have payloads, and one at this position
// postings should also have one at this position, with the same bytes.
if (postingsPostings.getPayload() == null) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
}
BytesRef postingsPayload = postingsPostings.getPayload();
if (!payload.equals(postingsPayload)) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload);
}
}
}
} }
} }
} }

View File

@ -24,7 +24,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.MergedIterator;
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit; import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
class CoalescedDeletes { class CoalescedDeletes {
@ -48,13 +48,14 @@ class CoalescedDeletes {
public Iterable<Term> termsIterable() { public Iterable<Term> termsIterable() {
return new Iterable<Term>() { return new Iterable<Term>() {
@SuppressWarnings("unchecked")
@Override @Override
public Iterator<Term> iterator() { public Iterator<Term> iterator() {
ArrayList<Iterator<Term>> subs = new ArrayList<Iterator<Term>>(iterables.size()); Iterator<Term> subs[] = new Iterator[iterables.size()];
for (Iterable<Term> iterable : iterables) { for (int i = 0; i < iterables.size(); i++) {
subs.add(iterable.iterator()); subs[i] = iterables.get(i).iterator();
} }
return mergedIterator(subs); return new MergedIterator<Term>(subs);
} }
}; };
} }
@ -86,106 +87,4 @@ class CoalescedDeletes {
} }
}; };
} }
/** provides a merged view across multiple iterators */
static Iterator<Term> mergedIterator(final List<Iterator<Term>> iterators) {
return new Iterator<Term>() {
Term current;
TermMergeQueue queue = new TermMergeQueue(iterators.size());
SubIterator[] top = new SubIterator[iterators.size()];
int numTop;
{
int index = 0;
for (Iterator<Term> iterator : iterators) {
if (iterator.hasNext()) {
SubIterator sub = new SubIterator();
sub.current = iterator.next();
sub.iterator = iterator;
sub.index = index++;
queue.add(sub);
}
}
}
public boolean hasNext() {
if (queue.size() > 0) {
return true;
}
for (int i = 0; i < numTop; i++) {
if (top[i].iterator.hasNext()) {
return true;
}
}
return false;
}
public Term next() {
// restore queue
pushTop();
// gather equal top fields
if (queue.size() > 0) {
pullTop();
} else {
current = null;
}
return current;
}
public void remove() {
throw new UnsupportedOperationException();
}
private void pullTop() {
// extract all subs from the queue that have the same top term
assert numTop == 0;
while (true) {
top[numTop++] = queue.pop();
if (queue.size() == 0
|| !(queue.top()).current.equals(top[0].current)) {
break;
}
}
current = top[0].current;
}
private void pushTop() {
// call next() on each top, and put back into queue
for (int i = 0; i < numTop; i++) {
if (top[i].iterator.hasNext()) {
top[i].current = top[i].iterator.next();
queue.add(top[i]);
} else {
// no more terms
top[i].current = null;
}
}
numTop = 0;
}
};
}
private static class SubIterator {
Iterator<Term> iterator;
Term current;
int index;
}
private static class TermMergeQueue extends PriorityQueue<SubIterator> {
TermMergeQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(SubIterator a, SubIterator b) {
final int cmp = a.current.compareTo(b.current);
if (cmp != 0) {
return cmp < 0;
} else {
return a.index < b.index;
}
}
}
} }

View File

@ -105,7 +105,7 @@ public abstract class DocValues implements Closeable {
* <p> * <p>
* {@link Source} instances obtained from this method are closed / released * {@link Source} instances obtained from this method are closed / released
* from the cache once this {@link DocValues} instance is closed by the * from the cache once this {@link DocValues} instance is closed by the
* {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the * {@link IndexReader}, {@link Fields} or the
* {@link DocValues} was created from. * {@link DocValues} was created from.
*/ */
public Source getSource() throws IOException { public Source getSource() throws IOException {

View File

@ -48,11 +48,8 @@ public abstract class DocsAndPositionsEnum extends DocsEnum {
public abstract int endOffset() throws IOException; public abstract int endOffset() throws IOException;
/** Returns the payload at this position, or null if no /** Returns the payload at this position, or null if no
* payload was indexed. Only call this once per * payload was indexed. You should not modify anything
* position. You should not modify anything (neither * (neither members of the returned BytesRef nor bytes
* members of the returned BytesRef nor bytes in the * in the byte[]). */
* byte[]). */
public abstract BytesRef getPayload() throws IOException; public abstract BytesRef getPayload() throws IOException;
public abstract boolean hasPayload();
} }

View File

@ -18,15 +18,16 @@ package org.apache.lucene.index;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
/** Flex API for access to fields and terms /** Flex API for access to fields and terms
* @lucene.experimental */ * @lucene.experimental */
public abstract class Fields { public abstract class Fields implements Iterable<String> {
/** Returns an iterator that will step through all fields /** Returns an iterator that will step through all fields
* names. This will not return null. */ * names. This will not return null. */
public abstract FieldsEnum iterator() throws IOException; public abstract Iterator<String> iterator();
/** Get the {@link Terms} for this field. This will return /** Get the {@link Terms} for this field. This will return
* null if the field does not exist. */ * null if the field does not exist. */
@ -45,12 +46,7 @@ public abstract class Fields {
// TODO: deprecate? // TODO: deprecate?
public long getUniqueTermCount() throws IOException { public long getUniqueTermCount() throws IOException {
long numTerms = 0; long numTerms = 0;
FieldsEnum it = iterator(); for (String field : this) {
while(true) {
String field = it.next();
if (field == null) {
break;
}
Terms terms = terms(field); Terms terms = terms(field);
if (terms != null) { if (terms != null) {
final long termCount = terms.size(); final long termCount = terms.size();

View File

@ -1,79 +0,0 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.AttributeSource;
/** Enumerates indexed fields. You must first call {@link
* #next} before calling {@link #terms}.
*
* @lucene.experimental */
public abstract class FieldsEnum {
// TODO: maybe allow retrieving FieldInfo for current
// field, as optional method?
private AttributeSource atts = null;
/**
* Returns the related attributes.
*/
public AttributeSource attributes() {
if (atts == null) {
atts = new AttributeSource();
}
return atts;
}
/** Increments the enumeration to the next field. Returns
* null when there are no more fields.*/
public abstract String next() throws IOException;
// TODO: would be nice to require/fix all impls so they
// never return null here... we have to fix the writers to
// never write 0-terms fields... or maybe allow a non-null
// Terms instance in just this case
/** Get {@link Terms} for the current field. After {@link #next} returns
* null this method should not be called. This method may
* return null in some cases, which means the provided
* field does not have any terms. */
public abstract Terms terms() throws IOException;
// TODO: should we allow pulling Terms as well? not just
// the iterator?
public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0];
/** Provides zero fields */
public final static FieldsEnum EMPTY = new FieldsEnum() {
@Override
public String next() {
return null;
}
@Override
public Terms terms() {
throw new IllegalStateException("this method should never be called");
}
};
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator;
/** A <code>FilterAtomicReader</code> contains another AtomicReader, which it /** A <code>FilterAtomicReader</code> contains another AtomicReader, which it
* uses as its basic source of data, possibly transforming the data along the * uses as its basic source of data, possibly transforming the data along the
@ -46,7 +47,7 @@ public class FilterAtomicReader extends AtomicReader {
} }
@Override @Override
public FieldsEnum iterator() throws IOException { public Iterator<String> iterator() {
return in.iterator(); return in.iterator();
} }
@ -109,28 +110,20 @@ public class FilterAtomicReader extends AtomicReader {
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException { public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
return in.intersect(automaton, bytes); return in.intersect(automaton, bytes);
} }
}
/** Base class for filtering {@link TermsEnum} implementations. */ @Override
public static class FilterFieldsEnum extends FieldsEnum { public boolean hasOffsets() {
protected final FieldsEnum in; return in.hasOffsets();
public FilterFieldsEnum(FieldsEnum in) {
this.in = in;
} }
@Override @Override
public String next() throws IOException { public boolean hasPositions() {
return in.next(); return in.hasPositions();
} }
@Override @Override
public Terms terms() throws IOException { public boolean hasPayloads() {
return in.terms(); return in.hasPayloads();
}
@Override
public AttributeSource attributes() {
return in.attributes();
} }
} }
@ -293,11 +286,6 @@ public class FilterAtomicReader extends AtomicReader {
return in.getPayload(); return in.getPayload();
} }
@Override
public boolean hasPayload() {
return in.hasPayload();
}
@Override @Override
public AttributeSource attributes() { public AttributeSource attributes() {
return in.attributes(); return in.attributes();

View File

@ -173,7 +173,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
postings.lastDocCodes[termID] = docState.docID; postings.lastDocCodes[termID] = docState.docID;
} else { } else {
postings.lastDocCodes[termID] = docState.docID << 1; postings.lastDocCodes[termID] = docState.docID << 1;
postings.docFreqs[termID] = 1; postings.termFreqs[termID] = 1;
if (hasProx) { if (hasProx) {
writeProx(termID, fieldState.position); writeProx(termID, fieldState.position);
if (hasOffsets) { if (hasOffsets) {
@ -194,10 +194,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
assert !hasFreq || postings.docFreqs[termID] > 0; assert !hasFreq || postings.termFreqs[termID] > 0;
if (!hasFreq) { if (!hasFreq) {
assert postings.docFreqs == null; assert postings.termFreqs == null;
if (docState.docID != postings.lastDocIDs[termID]) { if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID]; assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]); termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@ -212,13 +212,13 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
// Now that we know doc freq for previous doc, // Now that we know doc freq for previous doc,
// write it & lastDocCode // write it & lastDocCode
if (1 == postings.docFreqs[termID]) { if (1 == postings.termFreqs[termID]) {
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1); termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
} else { } else {
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]); termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
termsHashPerField.writeVInt(0, postings.docFreqs[termID]); termsHashPerField.writeVInt(0, postings.termFreqs[termID]);
} }
postings.docFreqs[termID] = 1; postings.termFreqs[termID] = 1;
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency); fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1; postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID; postings.lastDocIDs[termID] = docState.docID;
@ -233,7 +233,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
} }
fieldState.uniqueTermCount++; fieldState.uniqueTermCount++;
} else { } else {
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]); fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
if (hasProx) { if (hasProx) {
writeProx(termID, fieldState.position-postings.lastPositions[termID]); writeProx(termID, fieldState.position-postings.lastPositions[termID]);
} }
@ -252,7 +252,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) { public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
super(size); super(size);
if (writeFreqs) { if (writeFreqs) {
docFreqs = new int[size]; termFreqs = new int[size];
} }
lastDocIDs = new int[size]; lastDocIDs = new int[size];
lastDocCodes = new int[size]; lastDocCodes = new int[size];
@ -267,7 +267,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
//System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets); //System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
} }
int docFreqs[]; // # times this term occurs in the current doc int termFreqs[]; // # times this term occurs in the current doc
int lastDocIDs[]; // Last docID where this term occurred int lastDocIDs[]; // Last docID where this term occurred
int lastDocCodes[]; // Code for prior doc int lastDocCodes[]; // Code for prior doc
int lastPositions[]; // Last position where this term occurred int lastPositions[]; // Last position where this term occurred
@ -275,7 +275,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
@Override @Override
ParallelPostingsArray newInstance(int size) { ParallelPostingsArray newInstance(int size) {
return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null); return new FreqProxPostingsArray(size, termFreqs != null, lastPositions != null, lastOffsets != null);
} }
@Override @Override
@ -295,9 +295,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
assert to.lastOffsets != null; assert to.lastOffsets != null;
System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy); System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
} }
if (docFreqs != null) { if (termFreqs != null) {
assert to.docFreqs != null; assert to.termFreqs != null;
System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy); System.arraycopy(termFreqs, 0, to.termFreqs, 0, numToCopy);
} }
} }
@ -310,7 +310,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
if (lastOffsets != null) { if (lastOffsets != null) {
bytes += RamUsageEstimator.NUM_BYTES_INT; bytes += RamUsageEstimator.NUM_BYTES_INT;
} }
if (docFreqs != null) { if (termFreqs != null) {
bytes += RamUsageEstimator.NUM_BYTES_INT; bytes += RamUsageEstimator.NUM_BYTES_INT;
} }
@ -416,21 +416,21 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
// Now termStates has numToMerge FieldMergeStates // Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must // which all share the same term. Now we must
// interleave the docID streams. // interleave the docID streams.
int numDocs = 0; int docFreq = 0;
long totTF = 0; long totTF = 0;
int docID = 0; int docID = 0;
while(true) { while(true) {
//System.out.println(" cycle"); //System.out.println(" cycle");
final int termDocFreq; final int termFreq;
if (freq.eof()) { if (freq.eof()) {
if (postings.lastDocCodes[termID] != -1) { if (postings.lastDocCodes[termID] != -1) {
// Return last doc // Return last doc
docID = postings.lastDocIDs[termID]; docID = postings.lastDocIDs[termID];
if (readTermFreq) { if (readTermFreq) {
termDocFreq = postings.docFreqs[termID]; termFreq = postings.termFreqs[termID];
} else { } else {
termDocFreq = -1; termFreq = -1;
} }
postings.lastDocCodes[termID] = -1; postings.lastDocCodes[termID] = -1;
} else { } else {
@ -441,20 +441,20 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final int code = freq.readVInt(); final int code = freq.readVInt();
if (!readTermFreq) { if (!readTermFreq) {
docID += code; docID += code;
termDocFreq = -1; termFreq = -1;
} else { } else {
docID += code >>> 1; docID += code >>> 1;
if ((code & 1) != 0) { if ((code & 1) != 0) {
termDocFreq = 1; termFreq = 1;
} else { } else {
termDocFreq = freq.readVInt(); termFreq = freq.readVInt();
} }
} }
assert docID != postings.lastDocIDs[termID]; assert docID != postings.lastDocIDs[termID];
} }
numDocs++; docFreq++;
assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount(); assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
// NOTE: we could check here if the docID was // NOTE: we could check here if the docID was
@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
// 2nd sweep does the real flush, but I suspect // 2nd sweep does the real flush, but I suspect
// that'd add too much time to flush. // that'd add too much time to flush.
visitedDocs.set(docID); visitedDocs.set(docID);
postingsConsumer.startDoc(docID, writeTermFreq ? termDocFreq : -1); postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
if (docID < delDocLimit) { if (docID < delDocLimit) {
// Mark it deleted. TODO: we could also skip // Mark it deleted. TODO: we could also skip
// writing its postings; this would be // writing its postings; this would be
@ -485,7 +485,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
} }
} }
totTF += termDocFreq; totTF += termFreq;
// Carefully copy over the prox + payload info, // Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment // changing the format to match Lucene's segment
@ -495,7 +495,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
// we did record positions (& maybe payload) and/or offsets // we did record positions (& maybe payload) and/or offsets
int position = 0; int position = 0;
int offset = 0; int offset = 0;
for(int j=0;j<termDocFreq;j++) { for(int j=0;j<termFreq;j++) {
final BytesRef thisPayload; final BytesRef thisPayload;
if (readPositions) { if (readPositions) {
@ -542,9 +542,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
} }
postingsConsumer.finishDoc(); postingsConsumer.finishDoc();
} }
termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1)); termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totTF : -1));
sumTotalTermFreq += totTF; sumTotalTermFreq += totTF;
sumDocFreq += numDocs; sumDocFreq += docFreq;
} }
termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality()); termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());

View File

@ -2312,9 +2312,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
} }
SegmentInfos sis = new SegmentInfos(); // read infos from dir SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dir); sis.read(dir);
final Set<String> dsFilesCopied = new HashSet<String>();
final Map<String, String> dsNames = new HashMap<String, String>();
final Set<String> copiedFiles = new HashSet<String>();
for (SegmentInfoPerCommit info : sis) { for (SegmentInfoPerCommit info : sis) {
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name; assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
@ -2327,7 +2325,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1)); IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
infos.add(copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles)); infos.add(copySegmentAsIs(info, newSegName, context));
} }
} }
@ -2463,24 +2461,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
} }
/** Copies the segment files as-is into the IndexWriter's directory. */ /** Copies the segment files as-is into the IndexWriter's directory. */
// TODO: this can be substantially simplified now that 3.x support/shared docstores is removed! private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName, IOContext context)
private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName,
Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context,
Set<String> copiedFiles)
throws IOException { throws IOException {
// Determine if the doc store of this segment needs to be copied. It's
// only relevant for segments that share doc store with others,
// because the DS might have been copied already, in which case we
// just want to update the DS name of this SegmentInfo.
final String dsName = info.info.name;
assert dsName != null;
final String newDsName;
if (dsNames.containsKey(dsName)) {
newDsName = dsNames.get(dsName);
} else {
dsNames.put(dsName, segName);
newDsName = segName;
}
// note: we don't really need this fis (its copied), but we load it up // note: we don't really need this fis (its copied), but we load it up
// so we don't pass a null value to the si writer // so we don't pass a null value to the si writer
@ -2496,7 +2478,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
} }
//System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion()); //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
// Same SI as before but we change directory, name and docStoreSegment: // Same SI as before but we change directory and name
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(), SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
info.info.getUseCompoundFile(), info.info.getUseCompoundFile(),
info.info.getCodec(), info.info.getDiagnostics(), attributes); info.info.getCodec(), info.info.getDiagnostics(), attributes);
@ -2513,16 +2495,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
} }
newInfo.setFiles(segFiles); newInfo.setFiles(segFiles);
// We must rewrite the SI file because it references // We must rewrite the SI file because it references segment name in its list of files, etc
// segment name (its own name, if its 3.x, and doc
// store segment name):
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
try {
newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context); newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
} catch (UnsupportedOperationException uoe) {
// OK: 3x codec cannot write a new SI file;
// SegmentInfos will write this on commit
}
final Collection<String> siFiles = trackingDir.getCreatedFiles(); final Collection<String> siFiles = trackingDir.getCreatedFiles();
@ -2537,8 +2513,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
} }
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles; assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
assert !copiedFiles.contains(file): "file \"" + file + "\" is being copied more than once";
copiedFiles.add(file);
info.info.dir.copy(directory, file, newFileName, context); info.info.dir.copy(directory, file, newFileName, context);
} }

View File

@ -43,6 +43,9 @@ public interface IndexableFieldType {
/** True if term vector positions should be indexed */ /** True if term vector positions should be indexed */
public boolean storeTermVectorPositions(); public boolean storeTermVectorPositions();
/** True if term vector payloads should be indexed */
public boolean storeTermVectorPayloads();
/** True if norms should not be indexed */ /** True if norms should not be indexed */
public boolean omitNorms(); public boolean omitNorms();

View File

@ -199,6 +199,7 @@ public class MergeState {
// and we could make a codec(wrapper) to do all of this privately so IW is uninvolved // and we could make a codec(wrapper) to do all of this privately so IW is uninvolved
public PayloadProcessorProvider payloadProcessorProvider; public PayloadProcessorProvider payloadProcessorProvider;
public ReaderPayloadProcessor[] readerPayloadProcessor; public ReaderPayloadProcessor[] readerPayloadProcessor;
public ReaderPayloadProcessor currentReaderPayloadProcessor;
public PayloadProcessor[] currentPayloadProcessor; public PayloadProcessor[] currentPayloadProcessor;
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging) // TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)

Some files were not shown because too many files have changed in this diff Show More