mirror of https://github.com/apache/lucene.git
LUCENE-3892: merge trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1372366 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
789981c9fd
|
@ -0,0 +1,49 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<project name="clover" basedir=".">
|
||||||
|
<import file="lucene/common-build.xml"/>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Run after Junit tests.
|
||||||
|
|
||||||
|
This target is in a separate file, as it needs to include common-build.xml,
|
||||||
|
but must run from top-level!
|
||||||
|
-->
|
||||||
|
<target name="generate-clover-reports" depends="clover">
|
||||||
|
<fail unless="run.clover">Clover not enabled!</fail>
|
||||||
|
<mkdir dir="${clover.report.dir}"/>
|
||||||
|
<fileset dir="." id="clover.test.result.files">
|
||||||
|
<include name="*/build/**/test/TEST-*.xml"/>
|
||||||
|
<exclude name="lucene/build/backwards/**"/>
|
||||||
|
</fileset>
|
||||||
|
<clover-report>
|
||||||
|
<current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
|
||||||
|
<format type="html" filter="assert"/>
|
||||||
|
<testresults refid="clover.test.result.files"/>
|
||||||
|
</current>
|
||||||
|
<current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
|
||||||
|
<format type="xml" filter="assert"/>
|
||||||
|
<testresults refid="clover.test.result.files"/>
|
||||||
|
</current>
|
||||||
|
</clover-report>
|
||||||
|
<echo>You can find the merged Lucene/Solr Clover report in '${clover.report.dir}'.</echo>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
</project>
|
130
build.xml
130
build.xml
|
@ -51,11 +51,28 @@
|
||||||
</sequential>
|
</sequential>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="validate" description="Validate dependencies, licenses, etc.">
|
<target name="validate" description="Validate dependencies, licenses, etc." depends="-validate-source-patterns">
|
||||||
<sequential><subant target="validate" inheritall="false" failonerror="true">
|
<subant target="validate" inheritall="false" failonerror="true">
|
||||||
<fileset dir="lucene" includes="build.xml" />
|
<fileset dir="lucene" includes="build.xml" />
|
||||||
<fileset dir="solr" includes="build.xml" />
|
<fileset dir="solr" includes="build.xml" />
|
||||||
</subant></sequential>
|
</subant>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="-validate-source-patterns" unless="disable.source-patterns">
|
||||||
|
<!-- check that there are no nocommits or @author javadoc tags: -->
|
||||||
|
<property name="validate.currDir" location="."/>
|
||||||
|
<pathconvert pathsep="${line.separator}" dirsep="/" property="validate.patternsFound" setonempty="false">
|
||||||
|
<fileset dir="${validate.currDir}">
|
||||||
|
<include name="**/*.java"/>
|
||||||
|
<exclude name="**/backwards/**"/>
|
||||||
|
<or>
|
||||||
|
<containsregexp expression="@author\b" casesensitive="yes"/>
|
||||||
|
<containsregexp expression="\bno(n|)commit\b" casesensitive="no"/>
|
||||||
|
</or>
|
||||||
|
</fileset>
|
||||||
|
<map from="${validate.currDir}${file.separator}" to="* "/>
|
||||||
|
</pathconvert>
|
||||||
|
<fail if="validate.patternsFound">The following files contain @author tags or nocommits:${line.separator}${validate.patternsFound}</fail>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="rat-sources" description="Runs rat across all sources and tests">
|
<target name="rat-sources" description="Runs rat across all sources and tests">
|
||||||
|
@ -184,4 +201,111 @@
|
||||||
</subant>
|
</subant>
|
||||||
</sequential>
|
</sequential>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<!-- define here, as common-build is not included! -->
|
||||||
|
<property name="python32.exe" value="python3.2" />
|
||||||
|
<property name="fakeRelease" value="lucene/build/fakeRelease"/>
|
||||||
|
<property name="fakeReleaseTmp" value="lucene/build/fakeReleaseTmp"/>
|
||||||
|
<property name="fakeReleaseVersion" value="5.0"/> <!-- *not* -SNAPSHOT, the real version -->
|
||||||
|
|
||||||
|
<target name="nightly-smoke" description="Builds an unsigned release and smoke tests it." depends="clean">
|
||||||
|
<sequential>
|
||||||
|
<fail unless="JAVA6_HOME">JAVA6_HOME property is not defined.</fail>
|
||||||
|
<fail unless="JAVA7_HOME">JAVA7_HOME property is not defined.</fail>
|
||||||
|
<subant target="prepare-release-no-sign" inheritall="false" failonerror="true">
|
||||||
|
<fileset dir="lucene" includes="build.xml" />
|
||||||
|
<fileset dir="solr" includes="build.xml" />
|
||||||
|
<property name="version" value="${fakeReleaseVersion}" />
|
||||||
|
</subant>
|
||||||
|
<delete dir="${fakeRelease}"/>
|
||||||
|
<delete dir="${fakeReleaseTmp}"/>
|
||||||
|
<mkdir dir="${fakeRelease}"/>
|
||||||
|
<copy todir="${fakeRelease}/lucene">
|
||||||
|
<fileset dir="lucene/dist"/>
|
||||||
|
</copy>
|
||||||
|
<copy todir="${fakeRelease}/lucene/changes">
|
||||||
|
<fileset dir="lucene/build/docs/changes"/>
|
||||||
|
</copy>
|
||||||
|
<get src="http://people.apache.org/keys/group/lucene.asc"
|
||||||
|
dest="${fakeRelease}/lucene/KEYS"/>
|
||||||
|
<copy todir="${fakeRelease}/solr">
|
||||||
|
<fileset dir="solr/package"/>
|
||||||
|
</copy>
|
||||||
|
<copy file="${fakeRelease}/lucene/KEYS" todir="${fakeRelease}/solr"/>
|
||||||
|
<makeurl file="${fakeRelease}" validate="false" property="fakeRelease.uri"/>
|
||||||
|
<exec executable="${python32.exe}" failonerror="true">
|
||||||
|
<arg value="-u"/>
|
||||||
|
<arg value="dev-tools/scripts/smokeTestRelease.py"/>
|
||||||
|
<arg value="${fakeRelease.uri}"/>
|
||||||
|
<arg value="${fakeReleaseVersion}"/>
|
||||||
|
<arg value="${fakeReleaseTmp}"/>
|
||||||
|
<arg value="false"/>
|
||||||
|
<env key="JAVA6_HOME" value="${JAVA6_HOME}"/>
|
||||||
|
<env key="JAVA7_HOME" value="${JAVA7_HOME}"/>
|
||||||
|
</exec>
|
||||||
|
<delete dir="${fakeRelease}"/>
|
||||||
|
<delete dir="${fakeReleaseTmp}"/>
|
||||||
|
</sequential>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- Calls only generate-clover-reports on Lucene, as Solr's is just a clone with other target; the database itsself is fixed -->
|
||||||
|
<target name="generate-clover-reports">
|
||||||
|
<subant target="generate-clover-reports" inheritall="false" failonerror="true">
|
||||||
|
<fileset dir="." includes="build-clover.xml" />
|
||||||
|
</subant>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- Jenkins tasks -->
|
||||||
|
<target name="jenkins-hourly" depends="clean,test,validate,-jenkins-javadocs-lint,-svn-status"/>
|
||||||
|
|
||||||
|
<target name="jenkins-clover">
|
||||||
|
<antcall target="-jenkins-clover">
|
||||||
|
<param name="run.clover" value="true"/>
|
||||||
|
<!-- must be 1, as clover does not like parallel test runs: -->
|
||||||
|
<param name="tests.jvms" value="1"/>
|
||||||
|
<!-- Also override some other props to be fast, ignoring what's set on command line: -->
|
||||||
|
<param name="tests.multiplier" value="1"/>
|
||||||
|
<param name="tests.slow" value="false"/>
|
||||||
|
<param name="tests.nightly" value="false"/>
|
||||||
|
<param name="tests.weekly" value="false"/>
|
||||||
|
<param name="tests.multiplier" value="1"/>
|
||||||
|
</antcall>
|
||||||
|
</target>
|
||||||
|
<target name="-jenkins-clover" depends="clean,test,generate-clover-reports"/>
|
||||||
|
|
||||||
|
<!-- we need this extra condition, as we want to match only on "true", not solely if property is set: -->
|
||||||
|
<property name="disable.javadocs-lint" value="false" />
|
||||||
|
<condition property="-disable.javadocs-lint">
|
||||||
|
<equals arg1="${disable.javadocs-lint}" arg2="true"/>
|
||||||
|
</condition>
|
||||||
|
<target name="-jenkins-javadocs-lint" unless="-disable.javadocs-lint">
|
||||||
|
<antcall target="javadocs-lint"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- define here, as common-build is not included! -->
|
||||||
|
<property name="svn.exe" value="svn" />
|
||||||
|
|
||||||
|
<target name="-svn-status">
|
||||||
|
<exec executable="${svn.exe}" dir="." failonerror="true">
|
||||||
|
<arg value="status"/>
|
||||||
|
<redirector outputproperty="svn.status.output">
|
||||||
|
<outputfilterchain>
|
||||||
|
<linecontainsregexp>
|
||||||
|
<regexp pattern="^\?" />
|
||||||
|
</linecontainsregexp>
|
||||||
|
<tokenfilter>
|
||||||
|
<replaceregex pattern="^........" replace="* " />
|
||||||
|
<replacestring from="${file.separator}" to="/" />
|
||||||
|
</tokenfilter>
|
||||||
|
</outputfilterchain>
|
||||||
|
</redirector>
|
||||||
|
</exec>
|
||||||
|
<fail message="Source checkout is dirty after running tests!!! Offending files:${line.separator}${svn.status.output}">
|
||||||
|
<condition>
|
||||||
|
<not>
|
||||||
|
<equals arg1="${svn.status.output}" arg2=""/>
|
||||||
|
</not>
|
||||||
|
</condition>
|
||||||
|
</fail>
|
||||||
|
</target>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -174,6 +174,6 @@
|
||||||
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-beanutils-1.7.0.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-beanutils-1.7.0.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
|
||||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||||
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar"/>
|
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/>
|
||||||
<classpathentry kind="output" path="bin/other"/>
|
<classpathentry kind="output" path="bin/other"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
<library name="JUnit">
|
<library name="JUnit">
|
||||||
<CLASSES>
|
<CLASSES>
|
||||||
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
|
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
|
||||||
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar!/" />
|
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar!/" />
|
||||||
</CLASSES>
|
</CLASSES>
|
||||||
<JAVADOC />
|
<JAVADOC />
|
||||||
<SOURCES />
|
<SOURCES />
|
||||||
|
|
|
@ -36,27 +36,25 @@ A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
|
||||||
|
|
||||||
B. How to generate Lucene/Solr Maven artifacts
|
B. How to generate Lucene/Solr Maven artifacts
|
||||||
|
|
||||||
Prerequisites: JDK 1.6+ and Ant 1.7.X
|
Prerequisites: JDK 1.6+ and Ant 1.8.2+
|
||||||
|
|
||||||
Run 'ant generate-maven-artifacts' to create an internal Maven
|
Run 'ant generate-maven-artifacts' to create an internal Maven
|
||||||
repository, including POMs, binary .jars, source .jars, and javadoc
|
repository, including POMs, binary .jars, source .jars, and javadoc
|
||||||
.jars.
|
.jars.
|
||||||
|
|
||||||
You can run the above command in four possible places: the top-level
|
You can run the above command in three possible places: the top-level
|
||||||
directory; under lucene/; under solr/; or under modules/. From the
|
directory; under lucene/; or under solr/. From the top-level directory
|
||||||
top-level directory, from lucene/, or from modules/, the internal
|
or from lucene/, the internal repository will be located at dist/maven/.
|
||||||
repository will be located at dist/maven/. From solr/, the internal
|
From solr/, the internal repository will be located at package/maven/.
|
||||||
repository will be located at package/maven/.
|
|
||||||
|
|
||||||
|
|
||||||
C. How to deploy Maven artifacts to a repository
|
C. How to deploy Maven artifacts to a repository
|
||||||
|
|
||||||
Prerequisites: JDK 1.6+ and Ant 1.7.X
|
Prerequisites: JDK 1.6+ and Ant 1.8.2+
|
||||||
|
|
||||||
You can deploy targets for all of Lucene/Solr, only Lucene, only Solr,
|
You can deploy targets for all of Lucene/Solr, only Lucene, or only Solr,
|
||||||
or only modules/, as in B. above. To deploy to a Maven repository, the
|
as in B. above. To deploy to a Maven repository, the command is the same
|
||||||
command is the same as in B. above, with the addition of two system
|
as in B. above, with the addition of two system properties:
|
||||||
properties:
|
|
||||||
|
|
||||||
ant -Dm2.repository.id=my-repo-id \
|
ant -Dm2.repository.id=my-repo-id \
|
||||||
-Dm2.repository.url=http://example.org/my/repo \
|
-Dm2.repository.url=http://example.org/my/repo \
|
||||||
|
@ -101,7 +99,7 @@ D. How to use Maven to build Lucene/Solr
|
||||||
the default, you can supply an alternate version on the command line
|
the default, you can supply an alternate version on the command line
|
||||||
with the above command, e.g.:
|
with the above command, e.g.:
|
||||||
|
|
||||||
ant -Dversion=5.0-my-special-version get-maven-poms
|
ant -Dversion=my-special-version get-maven-poms
|
||||||
|
|
||||||
Note: if you change the version in the POMs, there is one test method
|
Note: if you change the version in the POMs, there is one test method
|
||||||
that will fail under maven-surefire-plugin:
|
that will fail under maven-surefire-plugin:
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -77,33 +71,5 @@
|
||||||
</excludes>
|
</excludes>
|
||||||
</testResource>
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>appassembler-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
|
||||||
<repositoryLayout>flat</repositoryLayout>
|
|
||||||
<platforms>
|
|
||||||
<platform>windows</platform>
|
|
||||||
<platform>unix</platform>
|
|
||||||
</platforms>
|
|
||||||
<programs>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.analysis.charfilter.HtmlStripCharFilter</mainClass>
|
|
||||||
<name>HtmlStripCharFilter</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.analysis.en.PorterStemmer</mainClass>
|
|
||||||
<name>EnglishPorterStemmer</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.tartarus.snowball.TestApp</mainClass>
|
|
||||||
<name>SnowballTestApp</name>
|
|
||||||
</program>
|
|
||||||
</programs>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -40,15 +40,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -39,15 +39,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -39,15 +39,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -75,6 +69,11 @@
|
||||||
<build>
|
<build>
|
||||||
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
||||||
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>${module-path}/src/resources</directory>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
<testResources>
|
<testResources>
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${project.build.testSourceDirectory}</directory>
|
<directory>${project.build.testSourceDirectory}</directory>
|
||||||
|
|
|
@ -39,15 +39,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -75,6 +69,11 @@
|
||||||
<build>
|
<build>
|
||||||
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
<sourceDirectory>${module-path}/src/java</sourceDirectory>
|
||||||
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
<testSourceDirectory>${module-path}/src/test</testSourceDirectory>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>${module-path}/src/resources</directory>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
<testResources>
|
<testResources>
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${project.build.testSourceDirectory}</directory>
|
<directory>${project.build.testSourceDirectory}</directory>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -41,15 +41,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -120,41 +114,5 @@
|
||||||
</includes>
|
</includes>
|
||||||
</testResource>
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>appassembler-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
|
||||||
<repositoryLayout>flat</repositoryLayout>
|
|
||||||
<platforms>
|
|
||||||
<platform>windows</platform>
|
|
||||||
<platform>unix</platform>
|
|
||||||
</platforms>
|
|
||||||
<programs>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.benchmark.byTask.Benchmark</mainClass>
|
|
||||||
<name>Benchmark</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.benchmark.quality.trec.QueryDriver</mainClass>
|
|
||||||
<name>QueryDriver</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder</mainClass>
|
|
||||||
<name>QualityQueriesFinder</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.benchmark.utils.ExtractReuters</mainClass>
|
|
||||||
<name>ExtractReuters</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.benchmark.utils.ExtractWikipedia</mainClass>
|
|
||||||
<name>ExtractWikipedia</name>
|
|
||||||
</program>
|
|
||||||
</programs>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -104,40 +98,6 @@
|
||||||
</systemPropertyVariables>
|
</systemPropertyVariables>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>appassembler-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
|
||||||
<repositoryLayout>flat</repositoryLayout>
|
|
||||||
<platforms>
|
|
||||||
<platform>windows</platform>
|
|
||||||
<platform>unix</platform>
|
|
||||||
</platforms>
|
|
||||||
<programs>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.index.CheckIndex</mainClass>
|
|
||||||
<name>CheckIndex</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.index.IndexReader</mainClass>
|
|
||||||
<name>IndexReader</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.store.LockStressTest</mainClass>
|
|
||||||
<name>LockStressTest</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.store.LockVerifyServer</mainClass>
|
|
||||||
<name>LockVerifyServer</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.util.English</mainClass>
|
|
||||||
<name>English</name>
|
|
||||||
</program>
|
|
||||||
</programs>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
<artifactId>build-helper-maven-plugin</artifactId>
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -87,30 +81,5 @@
|
||||||
</excludes>
|
</excludes>
|
||||||
</testResource>
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>appassembler-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
|
||||||
<repositoryLayout>flat</repositoryLayout>
|
|
||||||
<assembleDirectory>${build-directory}</assembleDirectory>
|
|
||||||
<platforms>
|
|
||||||
<platform>windows</platform>
|
|
||||||
<platform>unix</platform>
|
|
||||||
</platforms>
|
|
||||||
<programs>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.demo.IndexFiles</mainClass>
|
|
||||||
<name>IndexFiles</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.demo.SearchFiles</mainClass>
|
|
||||||
<name>SearchFiles</name>
|
|
||||||
</program>
|
|
||||||
</programs>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -39,15 +39,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -39,15 +39,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -39,15 +39,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -72,49 +66,5 @@
|
||||||
</excludes>
|
</excludes>
|
||||||
</testResource>
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>appassembler-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
|
||||||
<repositoryLayout>flat</repositoryLayout>
|
|
||||||
<platforms>
|
|
||||||
<platform>windows</platform>
|
|
||||||
<platform>unix</platform>
|
|
||||||
</platforms>
|
|
||||||
<programs>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.index.FieldNormModifier</mainClass>
|
|
||||||
<name>FieldNormModifier</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.index.IndexSplitter</mainClass>
|
|
||||||
<name>IndexSplitter</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.index.MultiPassIndexSplitter</mainClass>
|
|
||||||
<name>MultiPassIndexSplitter</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.misc.GetTermInfo</mainClass>
|
|
||||||
<name>GetTermInfo</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.misc.HighFreqTerms</mainClass>
|
|
||||||
<name>HighFreqTerms</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.misc.IndexMergeTool</mainClass>
|
|
||||||
<name>IndexMergeTool</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.lucene.misc.LengthNormModifier</mainClass>
|
|
||||||
<name>LengthNormModifier</name>
|
|
||||||
</program>
|
|
||||||
</programs>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -35,15 +35,9 @@
|
||||||
<module-directory>lucene</module-directory>
|
<module-directory>lucene</module-directory>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<modules>
|
<modules>
|
||||||
<module>core</module>
|
<module>core</module>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -31,15 +31,18 @@
|
||||||
<version>@version@</version>
|
<version>@version@</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>Grandparent POM for Apache Lucene Core and Apache Solr</name>
|
<name>Grandparent POM for Apache Lucene Core and Apache Solr</name>
|
||||||
<description>Parent POM for Apache Lucene Core and Apache Solr</description>
|
<description>Grandparent POM for Apache Lucene Core and Apache Solr</description>
|
||||||
<url>http://lucene.apache.org/java</url>
|
<url>http://lucene.apache.org</url>
|
||||||
<modules>
|
<modules>
|
||||||
<module>lucene</module>
|
<module>lucene</module>
|
||||||
<module>solr</module>
|
<module>solr</module>
|
||||||
</modules>
|
</modules>
|
||||||
<properties>
|
<properties>
|
||||||
<top-level>..</top-level>
|
<top-level>..</top-level>
|
||||||
<base.specification.version>4.0.0</base.specification.version>
|
<vc-anonymous-base-url>http://svn.apache.org/repos/asf/lucene/dev/trunk</vc-anonymous-base-url>
|
||||||
|
<vc-dev-base-url>https://svn.apache.org/repos/asf/lucene/dev/trunk</vc-dev-base-url>
|
||||||
|
<vc-browse-base-url>http://svn.apache.org/viewvc/lucene/dev/trunk</vc-browse-base-url>
|
||||||
|
<base.specification.version>5.0.0</base.specification.version>
|
||||||
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
|
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
|
||||||
<java.compat.version>1.6</java.compat.version>
|
<java.compat.version>1.6</java.compat.version>
|
||||||
<jetty.version>8.1.2.v20120308</jetty.version>
|
<jetty.version>8.1.2.v20120308</jetty.version>
|
||||||
|
@ -69,11 +72,11 @@
|
||||||
</properties>
|
</properties>
|
||||||
<issueManagement>
|
<issueManagement>
|
||||||
<system>JIRA</system>
|
<system>JIRA</system>
|
||||||
<url>http://issues.apache.org/jira/browse/LUCENE</url>
|
<url>https://issues.apache.org/jira/browse/LUCENE</url>
|
||||||
</issueManagement>
|
</issueManagement>
|
||||||
<ciManagement>
|
<ciManagement>
|
||||||
<system>Hudson</system>
|
<system>Jenkins</system>
|
||||||
<url>http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/</url>
|
<url>https://builds.apache.org/computer/lucene/</url>
|
||||||
</ciManagement>
|
</ciManagement>
|
||||||
<mailingLists>
|
<mailingLists>
|
||||||
<mailingList>
|
<mailingList>
|
||||||
|
@ -109,15 +112,9 @@
|
||||||
</mailingLists>
|
</mailingLists>
|
||||||
<inceptionYear>2000</inceptionYear>
|
<inceptionYear>2000</inceptionYear>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk
|
<developerConnection>scm:svn:${vc-dev-base-url}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<licenses>
|
<licenses>
|
||||||
<license>
|
<license>
|
||||||
|
@ -388,7 +385,7 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.carrotsearch.randomizedtesting</groupId>
|
<groupId>com.carrotsearch.randomizedtesting</groupId>
|
||||||
<artifactId>randomizedtesting-runner</artifactId>
|
<artifactId>randomizedtesting-runner</artifactId>
|
||||||
<version>1.6.0</version>
|
<version>2.0.0.rc5</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
@ -549,11 +546,6 @@
|
||||||
</archive>
|
</archive>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>appassembler-maven-plugin</artifactId>
|
|
||||||
<version>1.2.1</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
<artifactId>build-helper-maven-plugin</artifactId>
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
|
|
@ -35,18 +35,11 @@
|
||||||
<module-directory>solr/contrib/analysis-extras</module-directory>
|
<module-directory>solr/contrib/analysis-extras</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -101,17 +94,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/core/src/test-files</directory>
|
<directory>${top-level}/solr/core/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -35,18 +35,11 @@
|
||||||
<module-directory>solr/contrib/clustering</module-directory>
|
<module-directory>solr/contrib/clustering</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -106,17 +99,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/core/src/test-files</directory>
|
<directory>${top-level}/solr/core/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -35,18 +35,11 @@
|
||||||
<module-directory>solr/contrib/dataimporthandler-extras</module-directory>
|
<module-directory>solr/contrib/dataimporthandler-extras</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -104,17 +97,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/core/src/test-files</directory>
|
<directory>${top-level}/solr/core/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -35,18 +35,11 @@
|
||||||
<module-directory>solr/contrib/dataimporthandler</module-directory>
|
<module-directory>solr/contrib/dataimporthandler</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -90,6 +83,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/core/src/test-files</directory>
|
<directory>${top-level}/solr/core/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
|
@ -103,15 +102,6 @@
|
||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -38,18 +38,11 @@
|
||||||
<module-directory>solr/contrib/extraction</module-directory>
|
<module-directory>solr/contrib/extraction</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -102,17 +95,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/core/src/test-files</directory>
|
<directory>${top-level}/solr/core/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -39,18 +39,11 @@
|
||||||
<module-directory>solr/contrib/langid</module-directory>
|
<module-directory>solr/contrib/langid</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -107,17 +100,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/core/src/test-files</directory>
|
<directory>${top-level}/solr/core/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -35,18 +35,11 @@
|
||||||
<module-directory>solr/contrib/uima</module-directory>
|
<module-directory>solr/contrib/uima</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -121,17 +114,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${module-path}/src/test-files</directory>
|
<directory>${module-path}/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -35,18 +35,11 @@
|
||||||
<module-directory>solr/contrib/velocity</module-directory>
|
<module-directory>solr/contrib/velocity</module-directory>
|
||||||
<top-level>../../../..</top-level>
|
<top-level>../../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -142,17 +135,12 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/core/src/test-files</directory>
|
<directory>${top-level}/solr/core/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -35,18 +35,11 @@
|
||||||
<module-directory>solr/core</module-directory>
|
<module-directory>solr/core</module-directory>
|
||||||
<top-level>../../..</top-level>
|
<top-level>../../..</top-level>
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
<surefire-top-level>${top-level}/../..</surefire-top-level>
|
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -243,48 +236,14 @@
|
||||||
<testResource>
|
<testResource>
|
||||||
<directory>${top-level}/solr/solrj/src/test-files</directory>
|
<directory>${top-level}/solr/solrj/src/test-files</directory>
|
||||||
</testResource>
|
</testResource>
|
||||||
|
<testResource>
|
||||||
|
<directory>${top-level}/dev-tools/maven/solr</directory>
|
||||||
|
<includes>
|
||||||
|
<include>maven.testlogging.properties</include>
|
||||||
|
</includes>
|
||||||
|
</testResource>
|
||||||
</testResources>
|
</testResources>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<systemPropertyVariables>
|
|
||||||
<java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
|
|
||||||
</systemPropertyVariables>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>appassembler-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<extraJvmArguments>-Xmx128M</extraJvmArguments>
|
|
||||||
<repositoryLayout>flat</repositoryLayout>
|
|
||||||
<platforms>
|
|
||||||
<platform>windows</platform>
|
|
||||||
<platform>unix</platform>
|
|
||||||
</platforms>
|
|
||||||
<programs>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.solr.client.solrj.embedded.JettySolrRunner</mainClass>
|
|
||||||
<name>JettySolrRunner</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.solr.util.BitSetPerf</mainClass>
|
|
||||||
<name>BitSetPerf</name>
|
|
||||||
<extraJvmArguments>-Xms128m -Xbatch</extraJvmArguments>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.solr.util.SimplePostTool</mainClass>
|
|
||||||
<name>SimplePostTool</name>
|
|
||||||
</program>
|
|
||||||
<program>
|
|
||||||
<mainClass>org.apache.solr.util.SuggestMissingFactories</mainClass>
|
|
||||||
<name>SuggestMissingFactories</name>
|
|
||||||
</program>
|
|
||||||
</programs>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
<artifactId>build-helper-maven-plugin</artifactId>
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
handlers=java.util.logging.ConsoleHandler
|
||||||
|
.level=SEVERE
|
|
@ -43,26 +43,14 @@
|
||||||
<module-directory>solr</module-directory>
|
<module-directory>solr</module-directory>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<issueManagement>
|
<issueManagement>
|
||||||
<system>JIRA</system>
|
<system>JIRA</system>
|
||||||
<url>http://issues.apache.org/jira/browse/SOLR</url>
|
<url>https://issues.apache.org/jira/browse/SOLR</url>
|
||||||
</issueManagement>
|
</issueManagement>
|
||||||
<ciManagement>
|
|
||||||
<system>Hudson</system>
|
|
||||||
<url>
|
|
||||||
http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/
|
|
||||||
</url>
|
|
||||||
</ciManagement>
|
|
||||||
<mailingLists>
|
<mailingLists>
|
||||||
<mailingList>
|
<mailingList>
|
||||||
<name>Solr User List</name>
|
<name>Solr User List</name>
|
||||||
|
@ -111,6 +99,15 @@
|
||||||
<doctitle>${project.name} ${project.version} API (${now.version})</doctitle>
|
<doctitle>${project.name} ${project.version} API (${now.version})</doctitle>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<systemPropertyVariables>
|
||||||
|
<java.util.logging.config.file>../test-classes/maven.testlogging.properties</java.util.logging.config.file>
|
||||||
|
</systemPropertyVariables>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
</build>
|
</build>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<!-- These dependencies are compile scope because this is a test framework. -->
|
<!-- These dependencies are compile scope because this is a test framework. -->
|
||||||
|
@ -60,20 +54,29 @@
|
||||||
<artifactId>solr-core</artifactId>
|
<artifactId>solr-core</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
<!-- SOLR-3263: Provided scope is required to avoid jar signing conflicts -->
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
|
|
||||||
you can exclude the three Jetty dependencies below. -->
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.eclipse.jetty</groupId>
|
<groupId>org.eclipse.jetty</groupId>
|
||||||
<artifactId>jetty-server</artifactId>
|
<artifactId>jetty-servlet</artifactId>
|
||||||
<scope>runtime</scope>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.eclipse.jetty</groupId>
|
<groupId>org.eclipse.jetty</groupId>
|
||||||
<artifactId>jetty-util</artifactId>
|
<artifactId>jetty-util</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
|
||||||
|
you can exclude the two Jetty dependencies below. -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.eclipse.jetty</groupId>
|
||||||
|
<artifactId>jetty-server</artifactId>
|
||||||
<scope>runtime</scope>
|
<scope>runtime</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -37,15 +37,9 @@
|
||||||
<module-path>${top-level}/${module-directory}</module-path>
|
<module-path>${top-level}/${module-directory}</module-path>
|
||||||
</properties>
|
</properties>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>
|
<connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
|
||||||
scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
<developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
|
||||||
</connection>
|
<url>${vc-browse-base-url}/${module-directory}</url>
|
||||||
<developerConnection>
|
|
||||||
scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
|
|
||||||
</developerConnection>
|
|
||||||
<url>
|
|
||||||
http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
|
|
||||||
</url>
|
|
||||||
</scm>
|
</scm>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -58,7 +58,7 @@ def javaExe(version):
|
||||||
|
|
||||||
def verifyJavaVersion(version):
|
def verifyJavaVersion(version):
|
||||||
s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
|
s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
|
||||||
if s.find('java version "%s.' % version) == -1:
|
if s.find(' version "%s.' % version) == -1:
|
||||||
raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
|
raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
|
||||||
|
|
||||||
# http://s.apache.org/lusolr32rc2
|
# http://s.apache.org/lusolr32rc2
|
||||||
|
@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, tmpDir):
|
||||||
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
|
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
|
||||||
|
|
||||||
def getDirEntries(urlString):
|
def getDirEntries(urlString):
|
||||||
|
if urlString.startswith('file:/') and not urlString.startswith('file://'):
|
||||||
|
# stupid bogus ant URI
|
||||||
|
urlString = "file:///" + urlString[6:]
|
||||||
|
|
||||||
if urlString.startswith('file://'):
|
if urlString.startswith('file://'):
|
||||||
path = urlString[7:]
|
path = urlString[7:]
|
||||||
if path.endswith('/'):
|
if path.endswith('/'):
|
||||||
|
@ -1026,7 +1030,7 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
if len(sys.argv) != 4:
|
if len(sys.argv) < 4:
|
||||||
print()
|
print()
|
||||||
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
|
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
|
||||||
print()
|
print()
|
||||||
|
@ -1035,8 +1039,11 @@ def main():
|
||||||
baseURL = sys.argv[1]
|
baseURL = sys.argv[1]
|
||||||
version = sys.argv[2]
|
version = sys.argv[2]
|
||||||
tmpDir = os.path.abspath(sys.argv[3])
|
tmpDir = os.path.abspath(sys.argv[3])
|
||||||
|
isSigned = True
|
||||||
|
if len(sys.argv) == 5:
|
||||||
|
isSigned = (sys.argv[4] == "True")
|
||||||
|
|
||||||
smokeTest(baseURL, version, tmpDir, True)
|
smokeTest(baseURL, version, tmpDir, isSigned)
|
||||||
|
|
||||||
def smokeTest(baseURL, version, tmpDir, isSigned):
|
def smokeTest(baseURL, version, tmpDir, isSigned):
|
||||||
|
|
||||||
|
@ -1090,4 +1097,5 @@ if __name__ == '__main__':
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
sys.exit(1)
|
||||||
|
sys.exit(0)
|
||||||
|
|
|
@ -6,6 +6,56 @@ http://s.apache.org/luceneversions
|
||||||
|
|
||||||
======================= Lucene 5.0.0 =======================
|
======================= Lucene 5.0.0 =======================
|
||||||
|
|
||||||
|
======================= Lucene 4.0.0 =======================
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
* LUCENE-1888: Added the option to store payloads in the term
|
||||||
|
vectors (IndexableFieldType.storeTermVectorPayloads()). Note
|
||||||
|
that you must store term vector positions to store payloads.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
|
||||||
|
Previously you had no real way to know that a term vector field
|
||||||
|
had positions or offsets, since this can be configured on a
|
||||||
|
per-field-per-document basis. (Robert Muir)
|
||||||
|
|
||||||
|
* Removed DocsAndPositionsEnum.hasPayload() and simplified the
|
||||||
|
contract of getPayload(). It returns null if there is no payload,
|
||||||
|
otherwise returns the current payload. You can now call it multiple
|
||||||
|
times per position if you want. (Robert Muir)
|
||||||
|
|
||||||
|
* Removed FieldsEnum. Fields API instead implements Iterable<String>
|
||||||
|
and exposes Iterator, so you can iterate over field names with
|
||||||
|
for (String field : fields) instead. (Robert Muir)
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
|
||||||
|
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
|
||||||
|
twice for conjunctions: for most users this is no problem, but
|
||||||
|
if you had a customized Similarity that returned something other
|
||||||
|
than 1 when overlap == maxOverlap (always the case for conjunctions),
|
||||||
|
then the score would be incorrect. (Pascal Chollet, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4298: MultiFields.getTermDocsEnum(IndexReader, Bits, String, BytesRef)
|
||||||
|
did not work at all, it would infinitely recurse.
|
||||||
|
(Alberto Paro via Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
|
||||||
|
had a custom Similarity where coord(1,1) != 1F, then the rewritten
|
||||||
|
query would be scored differently. (Robert Muir)
|
||||||
|
|
||||||
|
* Don't allow negatives in the positions file. If you have an index
|
||||||
|
from 2.4.0 or earlier with such negative positions, and you already
|
||||||
|
upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run
|
||||||
|
CheckIndex. If it fails, then you need to upgrade again to 4.0 (Robert Muir)
|
||||||
|
|
||||||
|
Build
|
||||||
|
|
||||||
|
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
|
||||||
|
thread leak detection. Added support for suite timeouts. (Dawid Weiss)
|
||||||
|
|
||||||
======================= Lucene 4.0.0-BETA =======================
|
======================= Lucene 4.0.0-BETA =======================
|
||||||
|
|
||||||
|
@ -48,6 +98,11 @@ New features
|
||||||
reader is an NRT reader, and the segment has not yet been merged
|
reader is an NRT reader, and the segment has not yet been merged
|
||||||
away (Mike McCandless).
|
away (Mike McCandless).
|
||||||
|
|
||||||
|
* LUCENE-4286: Added option to CJKBigramFilter to always also output
|
||||||
|
unigrams. This can be used for a unigram+bigram approach, or at
|
||||||
|
index-time only for better support of short queries.
|
||||||
|
(Tom Burton-West, Robert Muir)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
|
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
|
||||||
|
@ -115,6 +170,10 @@ Optimizations
|
||||||
making them substantially more lightweight. Behavior is unchanged.
|
making them substantially more lightweight. Behavior is unchanged.
|
||||||
(Robert Muir)
|
(Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers
|
||||||
|
such as StandardTokenizer from 32kb to 8kb.
|
||||||
|
(Raintung Li, Steven Rowe, Robert Muir)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-4109: BooleanQueries are not parsed correctly with the
|
* LUCENE-4109: BooleanQueries are not parsed correctly with the
|
||||||
|
@ -164,6 +223,9 @@ Bug Fixes
|
||||||
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
|
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
|
||||||
(Johannes Christen, Uwe Schindler, Robert Muir)
|
(Johannes Christen, Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
|
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
|
||||||
|
|
|
@ -9,7 +9,7 @@ enumeration APIs. Here are the major changes:
|
||||||
by the BytesRef class (which provides an offset + length "slice"
|
by the BytesRef class (which provides an offset + length "slice"
|
||||||
into an existing byte[]).
|
into an existing byte[]).
|
||||||
|
|
||||||
* Fields are separately enumerated (FieldsEnum) from the terms
|
* Fields are separately enumerated (Fields.iterator()) from the terms
|
||||||
within each field (TermEnum). So instead of this:
|
within each field (TermEnum). So instead of this:
|
||||||
|
|
||||||
TermEnum termsEnum = ...;
|
TermEnum termsEnum = ...;
|
||||||
|
@ -20,10 +20,8 @@ enumeration APIs. Here are the major changes:
|
||||||
|
|
||||||
Do this:
|
Do this:
|
||||||
|
|
||||||
FieldsEnum fieldsEnum = ...;
|
for(String field : fields) {
|
||||||
String field;
|
TermsEnum termsEnum = fields.terms(field);
|
||||||
while((field = fieldsEnum.next()) != null) {
|
|
||||||
TermsEnum termsEnum = fieldsEnum.terms();
|
|
||||||
BytesRef text;
|
BytesRef text;
|
||||||
while((text = termsEnum.next()) != null) {
|
while((text = termsEnum.next()) != null) {
|
||||||
System.out.println("field=" + field + "; text=" + text.utf8ToString());
|
System.out.println("field=" + field + "; text=" + text.utf8ToString());
|
||||||
|
@ -316,11 +314,12 @@ an AtomicReader. Note: using "atomicity emulators" can cause serious
|
||||||
slowdowns due to the need to merge terms, postings, DocValues, and
|
slowdowns due to the need to merge terms, postings, DocValues, and
|
||||||
FieldCache, use them with care!
|
FieldCache, use them with care!
|
||||||
|
|
||||||
## LUCENE-2413: Analyzer package changes
|
## LUCENE-2413,LUCENE-3396: Analyzer package changes
|
||||||
|
|
||||||
Lucene's core and contrib analyzers, along with Solr's analyzers,
|
Lucene's core and contrib analyzers, along with Solr's analyzers,
|
||||||
were consolidated into lucene/analysis. During the refactoring some
|
were consolidated into lucene/analysis. During the refactoring some
|
||||||
package names have changed:
|
package names have changed, and ReusableAnalyzerBase was renamed to
|
||||||
|
Analyzer:
|
||||||
|
|
||||||
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
|
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
|
||||||
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
|
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
|
||||||
|
@ -345,7 +344,7 @@ package names have changed:
|
||||||
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
|
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
|
||||||
- o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
|
- o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
|
||||||
- o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
|
- o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
|
||||||
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
|
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
|
||||||
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
|
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
|
||||||
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
|
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
|
||||||
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
|
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.charfilter;
|
package org.apache.lucene.analysis.charfilter;
|
||||||
|
|
||||||
|
@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
||||||
/**
|
/**
|
||||||
* This class is a scanner generated by
|
* This class is a scanner generated by
|
||||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||||
* on 7/26/12 6:22 PM from the specification file
|
* on 8/6/12 11:57 AM from the specification file
|
||||||
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
||||||
*/
|
*/
|
||||||
public final class HTMLStripCharFilter extends BaseCharFilter {
|
public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
|
|
||||||
|
@ -31255,6 +31255,93 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
{ yybegin(STYLE);
|
{ yybegin(STYLE);
|
||||||
}
|
}
|
||||||
case 55: break;
|
case 55: break;
|
||||||
|
case 27:
|
||||||
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||||
|
// position the correction at (already output length) + (substitution length)
|
||||||
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
|
||||||
|
}
|
||||||
|
case 56: break;
|
||||||
|
case 30:
|
||||||
|
{ int length = yylength();
|
||||||
|
inputSegment.write(zzBuffer, zzStartRead, length);
|
||||||
|
entitySegment.clear();
|
||||||
|
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
|
||||||
|
entitySegment.append(ch);
|
||||||
|
outputSegment = entitySegment;
|
||||||
|
yybegin(CHARACTER_REFERENCE_TAIL);
|
||||||
|
}
|
||||||
|
case 57: break;
|
||||||
|
case 48:
|
||||||
|
{ inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
// add (previously matched input length) -- current match and substitution handled below
|
||||||
|
cumulativeDiff += yychar - inputStart;
|
||||||
|
// position the offset correction at (already output length) -- substitution handled below
|
||||||
|
int offsetCorrectionPos = outputCharCount;
|
||||||
|
int returnValue;
|
||||||
|
if (escapeSTYLE) {
|
||||||
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
returnValue = outputSegment.nextChar();
|
||||||
|
} else {
|
||||||
|
// add (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += yylength() - 1;
|
||||||
|
// add (substitution length)
|
||||||
|
++offsetCorrectionPos;
|
||||||
|
returnValue = STYLE_REPLACEMENT;
|
||||||
|
}
|
||||||
|
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
||||||
|
return returnValue;
|
||||||
|
}
|
||||||
|
case 58: break;
|
||||||
|
case 8:
|
||||||
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
if (null != escapedTags
|
||||||
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||||
|
yybegin(START_TAG_TAIL_INCLUDE);
|
||||||
|
} else {
|
||||||
|
yybegin(START_TAG_TAIL_SUBSTITUTE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 59: break;
|
||||||
|
case 2:
|
||||||
|
{ inputStart = yychar;
|
||||||
|
inputSegment.clear();
|
||||||
|
inputSegment.append('<');
|
||||||
|
yybegin(LEFT_ANGLE_BRACKET);
|
||||||
|
}
|
||||||
|
case 60: break;
|
||||||
|
case 44:
|
||||||
|
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||||
|
}
|
||||||
|
case 61: break;
|
||||||
|
case 21:
|
||||||
|
{ previousRestoreState = restoreState;
|
||||||
|
restoreState = SERVER_SIDE_INCLUDE;
|
||||||
|
yybegin(SINGLE_QUOTED_STRING);
|
||||||
|
}
|
||||||
|
case 62: break;
|
||||||
|
case 11:
|
||||||
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
yybegin(LEFT_ANGLE_BRACKET_SPACE);
|
||||||
|
}
|
||||||
|
case 63: break;
|
||||||
|
case 35:
|
||||||
|
{ yybegin(SCRIPT);
|
||||||
|
}
|
||||||
|
case 64: break;
|
||||||
|
case 42:
|
||||||
|
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||||
|
}
|
||||||
|
case 65: break;
|
||||||
|
case 10:
|
||||||
|
{ inputSegment.append('!'); yybegin(BANG);
|
||||||
|
}
|
||||||
|
case 66: break;
|
||||||
case 51:
|
case 51:
|
||||||
{ // Handle paired UTF-16 surrogates.
|
{ // Handle paired UTF-16 surrogates.
|
||||||
String surrogatePair = yytext();
|
String surrogatePair = yytext();
|
||||||
|
@ -31288,13 +31375,331 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
inputSegment.append('#');
|
inputSegment.append('#');
|
||||||
yybegin(NUMERIC_CHARACTER);
|
yybegin(NUMERIC_CHARACTER);
|
||||||
}
|
}
|
||||||
case 56: break;
|
case 67: break;
|
||||||
case 21:
|
case 4:
|
||||||
|
{ yypushback(1);
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
outputSegment.restart();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
}
|
||||||
|
case 68: break;
|
||||||
|
case 43:
|
||||||
|
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
||||||
|
}
|
||||||
|
case 69: break;
|
||||||
|
case 52:
|
||||||
|
{ // Handle paired UTF-16 surrogates.
|
||||||
|
String surrogatePair = yytext();
|
||||||
|
char highSurrogate = '\u0000';
|
||||||
|
try { // High surrogates are in decimal range [55296, 56319]
|
||||||
|
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
|
||||||
|
} catch(Exception e) { // should never happen
|
||||||
|
assert false: "Exception parsing high surrogate '"
|
||||||
|
+ surrogatePair.substring(1, 6) + "'";
|
||||||
|
}
|
||||||
|
if (Character.isHighSurrogate(highSurrogate)) {
|
||||||
|
outputSegment = entitySegment;
|
||||||
|
outputSegment.clear();
|
||||||
|
try {
|
||||||
|
outputSegment.unsafeWrite
|
||||||
|
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
||||||
|
} catch(Exception e) { // should never happen
|
||||||
|
assert false: "Exception parsing low surrogate '"
|
||||||
|
+ surrogatePair.substring(10, 14) + "'";
|
||||||
|
}
|
||||||
|
// add (previously matched input length) + (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
||||||
|
// position the correction at (already output length) + (substitution length)
|
||||||
|
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return highSurrogate;
|
||||||
|
}
|
||||||
|
yypushback(surrogatePair.length() - 1); // Consume only '#'
|
||||||
|
inputSegment.append('#');
|
||||||
|
yybegin(NUMERIC_CHARACTER);
|
||||||
|
}
|
||||||
|
case 70: break;
|
||||||
|
case 28:
|
||||||
|
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
||||||
|
}
|
||||||
|
case 71: break;
|
||||||
|
case 50:
|
||||||
|
{ // Handle paired UTF-16 surrogates.
|
||||||
|
outputSegment = entitySegment;
|
||||||
|
outputSegment.clear();
|
||||||
|
String surrogatePair = yytext();
|
||||||
|
char highSurrogate = '\u0000';
|
||||||
|
try {
|
||||||
|
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
|
||||||
|
} catch(Exception e) { // should never happen
|
||||||
|
assert false: "Exception parsing high surrogate '"
|
||||||
|
+ surrogatePair.substring(2, 6) + "'";
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
outputSegment.unsafeWrite
|
||||||
|
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
||||||
|
} catch(Exception e) { // should never happen
|
||||||
|
assert false: "Exception parsing low surrogate '"
|
||||||
|
+ surrogatePair.substring(10, 14) + "'";
|
||||||
|
}
|
||||||
|
// add (previously matched input length) + (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
||||||
|
// position the correction at (already output length) + (substitution length)
|
||||||
|
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return highSurrogate;
|
||||||
|
}
|
||||||
|
case 72: break;
|
||||||
|
case 16:
|
||||||
|
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
||||||
|
}
|
||||||
|
case 73: break;
|
||||||
|
case 22:
|
||||||
{ previousRestoreState = restoreState;
|
{ previousRestoreState = restoreState;
|
||||||
restoreState = SERVER_SIDE_INCLUDE;
|
restoreState = SERVER_SIDE_INCLUDE;
|
||||||
yybegin(SINGLE_QUOTED_STRING);
|
yybegin(DOUBLE_QUOTED_STRING);
|
||||||
}
|
}
|
||||||
case 57: break;
|
case 74: break;
|
||||||
|
case 26:
|
||||||
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength();
|
||||||
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||||
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
}
|
||||||
|
case 75: break;
|
||||||
|
case 20:
|
||||||
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
}
|
||||||
|
case 76: break;
|
||||||
|
case 47:
|
||||||
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength();
|
||||||
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||||
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
yybegin(CDATA);
|
||||||
|
}
|
||||||
|
case 77: break;
|
||||||
|
case 33:
|
||||||
|
{ yybegin(YYINITIAL);
|
||||||
|
if (escapeBR) {
|
||||||
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
} else {
|
||||||
|
// add (previously matched input length) + (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||||
|
// position the correction at (already output length) + (substitution length)
|
||||||
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||||
|
inputSegment.reset();
|
||||||
|
return BR_START_TAG_REPLACEMENT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 78: break;
|
||||||
|
case 23:
|
||||||
|
{ yybegin(restoreState); restoreState = previousRestoreState;
|
||||||
|
}
|
||||||
|
case 79: break;
|
||||||
|
case 32:
|
||||||
|
{ yybegin(COMMENT);
|
||||||
|
}
|
||||||
|
case 80: break;
|
||||||
|
case 24:
|
||||||
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
}
|
||||||
|
case 81: break;
|
||||||
|
case 3:
|
||||||
|
{ inputStart = yychar;
|
||||||
|
inputSegment.clear();
|
||||||
|
inputSegment.append('&');
|
||||||
|
yybegin(AMPERSAND);
|
||||||
|
}
|
||||||
|
case 82: break;
|
||||||
|
case 46:
|
||||||
|
{ yybegin(SCRIPT);
|
||||||
|
if (escapeSCRIPT) {
|
||||||
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
inputStart += 1 + yylength();
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 83: break;
|
||||||
|
case 14:
|
||||||
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength();
|
||||||
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||||
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
}
|
||||||
|
case 84: break;
|
||||||
|
case 6:
|
||||||
|
{ int matchLength = yylength();
|
||||||
|
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
||||||
|
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
|
||||||
|
String decimalCharRef = yytext();
|
||||||
|
int codePoint = 0;
|
||||||
|
try {
|
||||||
|
codePoint = Integer.parseInt(decimalCharRef);
|
||||||
|
} catch(Exception e) {
|
||||||
|
assert false: "Exception parsing code point '" + decimalCharRef + "'";
|
||||||
|
}
|
||||||
|
if (codePoint <= 0x10FFFF) {
|
||||||
|
outputSegment = entitySegment;
|
||||||
|
outputSegment.clear();
|
||||||
|
if (codePoint >= Character.MIN_SURROGATE
|
||||||
|
&& codePoint <= Character.MAX_SURROGATE) {
|
||||||
|
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
|
||||||
|
} else {
|
||||||
|
outputSegment.setLength
|
||||||
|
(Character.toChars(codePoint, outputSegment.getArray(), 0));
|
||||||
|
}
|
||||||
|
yybegin(CHARACTER_REFERENCE_TAIL);
|
||||||
|
} else {
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 85: break;
|
||||||
|
case 34:
|
||||||
|
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
|
||||||
|
cumulativeDiff += yychar - inputStart + yylength();
|
||||||
|
// position the correction at (already output length) [ + (substitution length) = 0]
|
||||||
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
}
|
||||||
|
case 86: break;
|
||||||
|
case 5:
|
||||||
|
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
|
||||||
|
}
|
||||||
|
case 87: break;
|
||||||
|
case 13:
|
||||||
|
{ inputSegment.append(zzBuffer[zzStartRead]);
|
||||||
|
}
|
||||||
|
case 88: break;
|
||||||
|
case 18:
|
||||||
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
if (null != escapedTags
|
||||||
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||||
|
yybegin(END_TAG_TAIL_INCLUDE);
|
||||||
|
} else {
|
||||||
|
yybegin(END_TAG_TAIL_SUBSTITUTE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 89: break;
|
||||||
|
case 40:
|
||||||
|
{ yybegin(SCRIPT_COMMENT);
|
||||||
|
}
|
||||||
|
case 90: break;
|
||||||
|
case 37:
|
||||||
|
{ // add (this match length) [ - (substitution length) = 0 ]
|
||||||
|
cumulativeDiff += yylength();
|
||||||
|
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
||||||
|
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
}
|
||||||
|
case 91: break;
|
||||||
|
case 12:
|
||||||
|
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
|
||||||
|
}
|
||||||
|
case 92: break;
|
||||||
|
case 9:
|
||||||
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
if (null != escapedTags
|
||||||
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||||
|
yybegin(START_TAG_TAIL_INCLUDE);
|
||||||
|
} else {
|
||||||
|
yybegin(START_TAG_TAIL_EXCLUDE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 93: break;
|
||||||
|
case 49:
|
||||||
|
{ inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
// add (previously matched input length) -- current match and substitution handled below
|
||||||
|
cumulativeDiff += yychar - inputStart;
|
||||||
|
// position at (already output length) -- substitution handled below
|
||||||
|
int offsetCorrectionPos = outputCharCount;
|
||||||
|
int returnValue;
|
||||||
|
if (escapeSCRIPT) {
|
||||||
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
returnValue = outputSegment.nextChar();
|
||||||
|
} else {
|
||||||
|
// add (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += yylength() - 1;
|
||||||
|
// add (substitution length)
|
||||||
|
++offsetCorrectionPos;
|
||||||
|
returnValue = SCRIPT_REPLACEMENT;
|
||||||
|
}
|
||||||
|
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
||||||
|
return returnValue;
|
||||||
|
}
|
||||||
|
case 94: break;
|
||||||
|
case 29:
|
||||||
|
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
||||||
|
}
|
||||||
|
case 95: break;
|
||||||
|
case 17:
|
||||||
|
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
||||||
|
}
|
||||||
|
case 96: break;
|
||||||
|
case 45:
|
||||||
|
{ yybegin(STYLE);
|
||||||
|
if (escapeSTYLE) {
|
||||||
|
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
outputSegment = inputSegment;
|
||||||
|
inputStart += 1 + yylength();
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 97: break;
|
||||||
|
case 7:
|
||||||
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
|
||||||
|
// position the correction at (already output length) + (substitution length)
|
||||||
|
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return outputSegment.nextChar();
|
||||||
|
}
|
||||||
|
case 98: break;
|
||||||
|
case 19:
|
||||||
|
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
||||||
|
if (null != escapedTags
|
||||||
|
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
||||||
|
yybegin(END_TAG_TAIL_INCLUDE);
|
||||||
|
} else {
|
||||||
|
yybegin(END_TAG_TAIL_EXCLUDE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 99: break;
|
||||||
|
case 25:
|
||||||
|
{ // add (previously matched input length) + (this match length) - (substitution length)
|
||||||
|
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
||||||
|
// position the correction at (already output length) + (substitution length)
|
||||||
|
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
||||||
|
inputSegment.clear();
|
||||||
|
yybegin(YYINITIAL);
|
||||||
|
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
|
||||||
|
}
|
||||||
|
case 100: break;
|
||||||
case 31:
|
case 31:
|
||||||
{ int matchLength = yylength();
|
{ int matchLength = yylength();
|
||||||
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
||||||
|
@ -31329,66 +31734,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
return outputSegment.nextChar();
|
return outputSegment.nextChar();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case 58: break;
|
case 101: break;
|
||||||
case 19:
|
|
||||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
if (null != escapedTags
|
|
||||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
||||||
yybegin(END_TAG_TAIL_INCLUDE);
|
|
||||||
} else {
|
|
||||||
yybegin(END_TAG_TAIL_EXCLUDE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 59: break;
|
|
||||||
case 2:
|
|
||||||
{ inputStart = yychar;
|
|
||||||
inputSegment.clear();
|
|
||||||
inputSegment.append('<');
|
|
||||||
yybegin(LEFT_ANGLE_BRACKET);
|
|
||||||
}
|
|
||||||
case 60: break;
|
|
||||||
case 27:
|
|
||||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
||||||
// position the correction at (already output length) + (substitution length)
|
|
||||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
|
|
||||||
}
|
|
||||||
case 61: break;
|
|
||||||
case 44:
|
|
||||||
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
||||||
}
|
|
||||||
case 62: break;
|
|
||||||
case 35:
|
|
||||||
{ yybegin(SCRIPT);
|
|
||||||
}
|
|
||||||
case 63: break;
|
|
||||||
case 42:
|
|
||||||
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
||||||
}
|
|
||||||
case 64: break;
|
|
||||||
case 10:
|
|
||||||
{ inputSegment.append('!'); yybegin(BANG);
|
|
||||||
}
|
|
||||||
case 65: break;
|
|
||||||
case 33:
|
|
||||||
{ yybegin(YYINITIAL);
|
|
||||||
if (escapeBR) {
|
|
||||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
} else {
|
|
||||||
// add (previously matched input length) + (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
||||||
// position the correction at (already output length) + (substitution length)
|
|
||||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
||||||
inputSegment.reset();
|
|
||||||
return BR_START_TAG_REPLACEMENT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 66: break;
|
|
||||||
case 53:
|
case 53:
|
||||||
{ // Handle paired UTF-16 surrogates.
|
{ // Handle paired UTF-16 surrogates.
|
||||||
String surrogatePair = yytext();
|
String surrogatePair = yytext();
|
||||||
|
@ -31424,288 +31770,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
inputSegment.append('#');
|
inputSegment.append('#');
|
||||||
yybegin(NUMERIC_CHARACTER);
|
yybegin(NUMERIC_CHARACTER);
|
||||||
}
|
}
|
||||||
case 67: break;
|
case 102: break;
|
||||||
case 43:
|
|
||||||
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
|
|
||||||
}
|
|
||||||
case 68: break;
|
|
||||||
case 30:
|
|
||||||
{ int length = yylength();
|
|
||||||
inputSegment.write(zzBuffer, zzStartRead, length);
|
|
||||||
entitySegment.clear();
|
|
||||||
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
|
|
||||||
entitySegment.append(ch);
|
|
||||||
outputSegment = entitySegment;
|
|
||||||
yybegin(CHARACTER_REFERENCE_TAIL);
|
|
||||||
}
|
|
||||||
case 69: break;
|
|
||||||
case 28:
|
|
||||||
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
|
||||||
}
|
|
||||||
case 70: break;
|
|
||||||
case 3:
|
|
||||||
{ inputStart = yychar;
|
|
||||||
inputSegment.clear();
|
|
||||||
inputSegment.append('&');
|
|
||||||
yybegin(AMPERSAND);
|
|
||||||
}
|
|
||||||
case 71: break;
|
|
||||||
case 16:
|
|
||||||
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
|
|
||||||
}
|
|
||||||
case 72: break;
|
|
||||||
case 52:
|
|
||||||
{ // Handle paired UTF-16 surrogates.
|
|
||||||
String surrogatePair = yytext();
|
|
||||||
char highSurrogate = '\u0000';
|
|
||||||
try { // High surrogates are in decimal range [55296, 56319]
|
|
||||||
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
|
|
||||||
} catch(Exception e) { // should never happen
|
|
||||||
assert false: "Exception parsing high surrogate '"
|
|
||||||
+ surrogatePair.substring(1, 6) + "'";
|
|
||||||
}
|
|
||||||
if (Character.isHighSurrogate(highSurrogate)) {
|
|
||||||
outputSegment = entitySegment;
|
|
||||||
outputSegment.clear();
|
|
||||||
try {
|
|
||||||
outputSegment.unsafeWrite
|
|
||||||
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
|
||||||
} catch(Exception e) { // should never happen
|
|
||||||
assert false: "Exception parsing low surrogate '"
|
|
||||||
+ surrogatePair.substring(10, 14) + "'";
|
|
||||||
}
|
|
||||||
// add (previously matched input length) + (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
|
||||||
// position the correction at (already output length) + (substitution length)
|
|
||||||
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return highSurrogate;
|
|
||||||
}
|
|
||||||
yypushback(surrogatePair.length() - 1); // Consume only '#'
|
|
||||||
inputSegment.append('#');
|
|
||||||
yybegin(NUMERIC_CHARACTER);
|
|
||||||
}
|
|
||||||
case 73: break;
|
|
||||||
case 6:
|
|
||||||
{ int matchLength = yylength();
|
|
||||||
inputSegment.write(zzBuffer, zzStartRead, matchLength);
|
|
||||||
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
|
|
||||||
String decimalCharRef = yytext();
|
|
||||||
int codePoint = 0;
|
|
||||||
try {
|
|
||||||
codePoint = Integer.parseInt(decimalCharRef);
|
|
||||||
} catch(Exception e) {
|
|
||||||
assert false: "Exception parsing code point '" + decimalCharRef + "'";
|
|
||||||
}
|
|
||||||
if (codePoint <= 0x10FFFF) {
|
|
||||||
outputSegment = entitySegment;
|
|
||||||
outputSegment.clear();
|
|
||||||
if (codePoint >= Character.MIN_SURROGATE
|
|
||||||
&& codePoint <= Character.MAX_SURROGATE) {
|
|
||||||
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
|
|
||||||
} else {
|
|
||||||
outputSegment.setLength
|
|
||||||
(Character.toChars(codePoint, outputSegment.getArray(), 0));
|
|
||||||
}
|
|
||||||
yybegin(CHARACTER_REFERENCE_TAIL);
|
|
||||||
} else {
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 74: break;
|
|
||||||
case 37:
|
|
||||||
{ // add (this match length) [ - (substitution length) = 0 ]
|
|
||||||
cumulativeDiff += yylength();
|
|
||||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
||||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
}
|
|
||||||
case 75: break;
|
|
||||||
case 8:
|
|
||||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
if (null != escapedTags
|
|
||||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
||||||
yybegin(START_TAG_TAIL_INCLUDE);
|
|
||||||
} else {
|
|
||||||
yybegin(START_TAG_TAIL_SUBSTITUTE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 76: break;
|
|
||||||
case 46:
|
|
||||||
{ yybegin(SCRIPT);
|
|
||||||
if (escapeSCRIPT) {
|
|
||||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
inputStart += 1 + yylength();
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 77: break;
|
|
||||||
case 11:
|
|
||||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
yybegin(LEFT_ANGLE_BRACKET_SPACE);
|
|
||||||
}
|
|
||||||
case 78: break;
|
|
||||||
case 20:
|
|
||||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
}
|
|
||||||
case 79: break;
|
|
||||||
case 34:
|
|
||||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
|
|
||||||
cumulativeDiff += yychar - inputStart + yylength();
|
|
||||||
// position the correction at (already output length) [ + (substitution length) = 0]
|
|
||||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
}
|
|
||||||
case 80: break;
|
|
||||||
case 23:
|
|
||||||
{ yybegin(restoreState); restoreState = previousRestoreState;
|
|
||||||
}
|
|
||||||
case 81: break;
|
|
||||||
case 32:
|
|
||||||
{ yybegin(COMMENT);
|
|
||||||
}
|
|
||||||
case 82: break;
|
|
||||||
case 14:
|
|
||||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength();
|
|
||||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
||||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
}
|
|
||||||
case 83: break;
|
|
||||||
case 18:
|
|
||||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
if (null != escapedTags
|
|
||||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
||||||
yybegin(END_TAG_TAIL_INCLUDE);
|
|
||||||
} else {
|
|
||||||
yybegin(END_TAG_TAIL_SUBSTITUTE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 84: break;
|
|
||||||
case 25:
|
|
||||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength() - 1;
|
|
||||||
// position the correction at (already output length) + (substitution length)
|
|
||||||
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
|
|
||||||
}
|
|
||||||
case 85: break;
|
|
||||||
case 7:
|
|
||||||
{ // add (previously matched input length) + (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
|
|
||||||
// position the correction at (already output length) + (substitution length)
|
|
||||||
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
}
|
|
||||||
case 86: break;
|
|
||||||
case 48:
|
|
||||||
{ inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
// add (previously matched input length) -- current match and substitution handled below
|
|
||||||
cumulativeDiff += yychar - inputStart;
|
|
||||||
// position the offset correction at (already output length) -- substitution handled below
|
|
||||||
int offsetCorrectionPos = outputCharCount;
|
|
||||||
int returnValue;
|
|
||||||
if (escapeSTYLE) {
|
|
||||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
returnValue = outputSegment.nextChar();
|
|
||||||
} else {
|
|
||||||
// add (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += yylength() - 1;
|
|
||||||
// add (substitution length)
|
|
||||||
++offsetCorrectionPos;
|
|
||||||
returnValue = STYLE_REPLACEMENT;
|
|
||||||
}
|
|
||||||
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
|
||||||
return returnValue;
|
|
||||||
}
|
|
||||||
case 87: break;
|
|
||||||
case 5:
|
|
||||||
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
|
|
||||||
}
|
|
||||||
case 88: break;
|
|
||||||
case 26:
|
|
||||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength();
|
|
||||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
||||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
}
|
|
||||||
case 89: break;
|
|
||||||
case 13:
|
|
||||||
{ inputSegment.append(zzBuffer[zzStartRead]);
|
|
||||||
}
|
|
||||||
case 90: break;
|
|
||||||
case 50:
|
|
||||||
{ // Handle paired UTF-16 surrogates.
|
|
||||||
outputSegment = entitySegment;
|
|
||||||
outputSegment.clear();
|
|
||||||
String surrogatePair = yytext();
|
|
||||||
char highSurrogate = '\u0000';
|
|
||||||
try {
|
|
||||||
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
|
|
||||||
} catch(Exception e) { // should never happen
|
|
||||||
assert false: "Exception parsing high surrogate '"
|
|
||||||
+ surrogatePair.substring(2, 6) + "'";
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
outputSegment.unsafeWrite
|
|
||||||
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
|
|
||||||
} catch(Exception e) { // should never happen
|
|
||||||
assert false: "Exception parsing low surrogate '"
|
|
||||||
+ surrogatePair.substring(10, 14) + "'";
|
|
||||||
}
|
|
||||||
// add (previously matched input length) + (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength() - 2;
|
|
||||||
// position the correction at (already output length) + (substitution length)
|
|
||||||
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return highSurrogate;
|
|
||||||
}
|
|
||||||
case 91: break;
|
|
||||||
case 40:
|
|
||||||
{ yybegin(SCRIPT_COMMENT);
|
|
||||||
}
|
|
||||||
case 92: break;
|
|
||||||
case 45:
|
|
||||||
{ yybegin(STYLE);
|
|
||||||
if (escapeSTYLE) {
|
|
||||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
inputStart += 1 + yylength();
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 93: break;
|
|
||||||
case 22:
|
|
||||||
{ previousRestoreState = restoreState;
|
|
||||||
restoreState = SERVER_SIDE_INCLUDE;
|
|
||||||
yybegin(DOUBLE_QUOTED_STRING);
|
|
||||||
}
|
|
||||||
case 94: break;
|
|
||||||
case 12:
|
|
||||||
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
|
|
||||||
}
|
|
||||||
case 95: break;
|
|
||||||
case 36:
|
case 36:
|
||||||
{ yybegin(YYINITIAL);
|
{ yybegin(YYINITIAL);
|
||||||
if (escapeBR) {
|
if (escapeBR) {
|
||||||
|
@ -31721,83 +31786,18 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
return BR_END_TAG_REPLACEMENT;
|
return BR_END_TAG_REPLACEMENT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case 96: break;
|
case 103: break;
|
||||||
case 24:
|
|
||||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
}
|
|
||||||
case 97: break;
|
|
||||||
case 47:
|
|
||||||
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
|
|
||||||
cumulativeDiff += inputSegment.length() + yylength();
|
|
||||||
// position the correction at (already output length) [ + (substitution length) = 0 ]
|
|
||||||
addOffCorrectMap(outputCharCount, cumulativeDiff);
|
|
||||||
inputSegment.clear();
|
|
||||||
yybegin(CDATA);
|
|
||||||
}
|
|
||||||
case 98: break;
|
|
||||||
case 29:
|
|
||||||
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
|
||||||
}
|
|
||||||
case 99: break;
|
|
||||||
case 17:
|
|
||||||
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
|
|
||||||
}
|
|
||||||
case 100: break;
|
|
||||||
case 9:
|
|
||||||
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
if (null != escapedTags
|
|
||||||
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
|
|
||||||
yybegin(START_TAG_TAIL_INCLUDE);
|
|
||||||
} else {
|
|
||||||
yybegin(START_TAG_TAIL_EXCLUDE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case 101: break;
|
|
||||||
case 49:
|
|
||||||
{ inputSegment.clear();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
// add (previously matched input length) -- current match and substitution handled below
|
|
||||||
cumulativeDiff += yychar - inputStart;
|
|
||||||
// position at (already output length) -- substitution handled below
|
|
||||||
int offsetCorrectionPos = outputCharCount;
|
|
||||||
int returnValue;
|
|
||||||
if (escapeSCRIPT) {
|
|
||||||
inputSegment.write(zzBuffer, zzStartRead, yylength());
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
returnValue = outputSegment.nextChar();
|
|
||||||
} else {
|
|
||||||
// add (this match length) - (substitution length)
|
|
||||||
cumulativeDiff += yylength() - 1;
|
|
||||||
// add (substitution length)
|
|
||||||
++offsetCorrectionPos;
|
|
||||||
returnValue = SCRIPT_REPLACEMENT;
|
|
||||||
}
|
|
||||||
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
|
|
||||||
return returnValue;
|
|
||||||
}
|
|
||||||
case 102: break;
|
|
||||||
case 38:
|
case 38:
|
||||||
{ yybegin(restoreState);
|
{ yybegin(restoreState);
|
||||||
}
|
}
|
||||||
case 103: break;
|
case 104: break;
|
||||||
case 41:
|
case 41:
|
||||||
{ yybegin(STYLE_COMMENT);
|
{ yybegin(STYLE_COMMENT);
|
||||||
}
|
}
|
||||||
case 104: break;
|
case 105: break;
|
||||||
case 1:
|
case 1:
|
||||||
{ return zzBuffer[zzStartRead];
|
{ return zzBuffer[zzStartRead];
|
||||||
}
|
}
|
||||||
case 105: break;
|
|
||||||
case 4:
|
|
||||||
{ yypushback(1);
|
|
||||||
outputSegment = inputSegment;
|
|
||||||
outputSegment.restart();
|
|
||||||
yybegin(YYINITIAL);
|
|
||||||
return outputSegment.nextChar();
|
|
||||||
}
|
|
||||||
case 106: break;
|
case 106: break;
|
||||||
default:
|
default:
|
||||||
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
||||||
|
|
|
@ -141,9 +141,9 @@ InlineElment = ( [aAbBiIqQsSuU] |
|
||||||
[vV][aA][rR] )
|
[vV][aA][rR] )
|
||||||
|
|
||||||
|
|
||||||
%include src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex
|
%include HTMLCharacterEntities.jflex
|
||||||
|
|
||||||
%include src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
|
%include HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
|
||||||
|
|
||||||
%{
|
%{
|
||||||
private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;
|
private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;
|
||||||
|
|
|
@ -24,6 +24,8 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
|
@ -35,6 +37,12 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
* {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
|
* {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
|
||||||
* of the CJK scripts are turned into bigrams.
|
* of the CJK scripts are turned into bigrams.
|
||||||
* <p>
|
* <p>
|
||||||
|
* By default, when a CJK character has no adjacent characters to form
|
||||||
|
* a bigram, it is output in unigram form. If you want to always output
|
||||||
|
* both unigrams and bigrams, set the <code>outputUnigrams</code>
|
||||||
|
* flag in {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)}.
|
||||||
|
* This can be used for a combined unigram+bigram approach.
|
||||||
|
* <p>
|
||||||
* In all cases, all non-CJK input is passed thru unmodified.
|
* In all cases, all non-CJK input is passed thru unmodified.
|
||||||
*/
|
*/
|
||||||
public final class CJKBigramFilter extends TokenFilter {
|
public final class CJKBigramFilter extends TokenFilter {
|
||||||
|
@ -68,9 +76,15 @@ public final class CJKBigramFilter extends TokenFilter {
|
||||||
private final Object doKatakana;
|
private final Object doKatakana;
|
||||||
private final Object doHangul;
|
private final Object doHangul;
|
||||||
|
|
||||||
|
// true if we should output unigram tokens always
|
||||||
|
private final boolean outputUnigrams;
|
||||||
|
private boolean ngramState; // false = output unigram, true = output bigram
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
|
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
|
||||||
|
|
||||||
// buffers containing codepoint and offsets in parallel
|
// buffers containing codepoint and offsets in parallel
|
||||||
int buffer[] = new int[8];
|
int buffer[] = new int[8];
|
||||||
|
@ -88,23 +102,36 @@ public final class CJKBigramFilter extends TokenFilter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
|
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
|
||||||
* CJKBigramFilter(HAN | HIRAGANA | KATAKANA | HANGUL)}
|
* CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
|
||||||
*/
|
*/
|
||||||
public CJKBigramFilter(TokenStream in) {
|
public CJKBigramFilter(TokenStream in) {
|
||||||
this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
|
this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed.
|
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
|
||||||
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
|
* CJKBigramFilter(in, flags, false)}
|
||||||
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
|
|
||||||
*/
|
*/
|
||||||
public CJKBigramFilter(TokenStream in, int flags) {
|
public CJKBigramFilter(TokenStream in, int flags) {
|
||||||
|
this(in, flags, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
|
||||||
|
* and whether or not unigrams should also be output.
|
||||||
|
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
|
||||||
|
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
|
||||||
|
* @param outputUnigrams true if unigrams for the selected writing systems should also be output.
|
||||||
|
* when this is false, this is only done when there are no adjacent characters to form
|
||||||
|
* a bigram.
|
||||||
|
*/
|
||||||
|
public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
|
||||||
super(in);
|
super(in);
|
||||||
doHan = (flags & HAN) == 0 ? NO : HAN_TYPE;
|
doHan = (flags & HAN) == 0 ? NO : HAN_TYPE;
|
||||||
doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
|
doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
|
||||||
doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
|
doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
|
||||||
doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE;
|
doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE;
|
||||||
|
this.outputUnigrams = outputUnigrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -120,7 +147,24 @@ public final class CJKBigramFilter extends TokenFilter {
|
||||||
// case 1: we have multiple remaining codepoints buffered,
|
// case 1: we have multiple remaining codepoints buffered,
|
||||||
// so we can emit a bigram here.
|
// so we can emit a bigram here.
|
||||||
|
|
||||||
|
if (outputUnigrams) {
|
||||||
|
|
||||||
|
// when also outputting unigrams, we output the unigram first,
|
||||||
|
// then rewind back to revisit the bigram.
|
||||||
|
// so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
|
||||||
|
// the logic in hasBufferedUnigram ensures we output the C,
|
||||||
|
// even though it did actually have adjacent CJK characters.
|
||||||
|
|
||||||
|
if (ngramState) {
|
||||||
flushBigram();
|
flushBigram();
|
||||||
|
} else {
|
||||||
|
flushUnigram();
|
||||||
|
index--;
|
||||||
|
}
|
||||||
|
ngramState = !ngramState;
|
||||||
|
} else {
|
||||||
|
flushBigram();
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
} else if (doNext()) {
|
} else if (doNext()) {
|
||||||
|
|
||||||
|
@ -260,6 +304,11 @@ public final class CJKBigramFilter extends TokenFilter {
|
||||||
termAtt.setLength(len2);
|
termAtt.setLength(len2);
|
||||||
offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
|
offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
|
||||||
typeAtt.setType(DOUBLE_TYPE);
|
typeAtt.setType(DOUBLE_TYPE);
|
||||||
|
// when outputting unigrams, all bigrams are synonyms that span two unigrams
|
||||||
|
if (outputUnigrams) {
|
||||||
|
posIncAtt.setPositionIncrement(0);
|
||||||
|
posLengthAtt.setPositionLength(2);
|
||||||
|
}
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,8 +341,14 @@ public final class CJKBigramFilter extends TokenFilter {
|
||||||
* inputs.
|
* inputs.
|
||||||
*/
|
*/
|
||||||
private boolean hasBufferedUnigram() {
|
private boolean hasBufferedUnigram() {
|
||||||
|
if (outputUnigrams) {
|
||||||
|
// when outputting unigrams always
|
||||||
|
return bufferLen - index == 1;
|
||||||
|
} else {
|
||||||
|
// otherwise its only when we have a lone CJK character
|
||||||
return bufferLen == 1 && index == 0;
|
return bufferLen == 1 && index == 0;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
|
@ -303,5 +358,6 @@ public final class CJKBigramFilter extends TokenFilter {
|
||||||
lastEndOffset = 0;
|
lastEndOffset = 0;
|
||||||
loneState = null;
|
loneState = null;
|
||||||
exhausted = false;
|
exhausted = false;
|
||||||
|
ngramState = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,12 +33,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||||
* <filter class="solr.CJKBigramFilterFactory"
|
* <filter class="solr.CJKBigramFilterFactory"
|
||||||
* han="true" hiragana="true"
|
* han="true" hiragana="true"
|
||||||
* katakana="true" hangul="true" />
|
* katakana="true" hangul="true" outputUnigrams="false" />
|
||||||
* </analyzer>
|
* </analyzer>
|
||||||
* </fieldType></pre>
|
* </fieldType></pre>
|
||||||
*/
|
*/
|
||||||
public class CJKBigramFilterFactory extends TokenFilterFactory {
|
public class CJKBigramFilterFactory extends TokenFilterFactory {
|
||||||
int flags;
|
int flags;
|
||||||
|
boolean outputUnigrams;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(Map<String,String> args) {
|
public void init(Map<String,String> args) {
|
||||||
|
@ -56,10 +57,11 @@ public class CJKBigramFilterFactory extends TokenFilterFactory {
|
||||||
if (getBoolean("hangul", true)) {
|
if (getBoolean("hangul", true)) {
|
||||||
flags |= CJKBigramFilter.HANGUL;
|
flags |= CJKBigramFilter.HANGUL;
|
||||||
}
|
}
|
||||||
|
outputUnigrams = getBoolean("outputUnigrams", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream input) {
|
public TokenStream create(TokenStream input) {
|
||||||
return new CJKBigramFilter(input, flags);
|
return new CJKBigramFilter(input, flags, outputUnigrams);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.standard;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
|
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
/**
|
/**
|
||||||
* This class is a scanner generated by
|
* This class is a scanner generated by
|
||||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||||
* on 7/15/12 1:57 AM from the specification file
|
* on 8/6/12 11:57 AM from the specification file
|
||||||
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
||||||
*/
|
*/
|
||||||
class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
|
||||||
/** initial size of the lookahead buffer */
|
/** initial size of the lookahead buffer */
|
||||||
private static final int ZZ_BUFFERSIZE = 16384;
|
private static final int ZZ_BUFFERSIZE = 4096;
|
||||||
|
|
||||||
/** lexical states */
|
/** lexical states */
|
||||||
public static final int YYINITIAL = 0;
|
public static final int YYINITIAL = 0;
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%pack
|
%pack
|
||||||
%char
|
%char
|
||||||
|
%buffer 4096
|
||||||
|
|
||||||
%{
|
%{
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Generated using ICU4J 49.1.0.0 on Thursday, July 26, 2012 10:22:01 PM UTC
|
// Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
|
||||||
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.standard;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
|
||||||
/** initial size of the lookahead buffer */
|
/** initial size of the lookahead buffer */
|
||||||
private static final int ZZ_BUFFERSIZE = 16384;
|
private static final int ZZ_BUFFERSIZE = 4096;
|
||||||
|
|
||||||
/** lexical states */
|
/** lexical states */
|
||||||
public static final int YYINITIAL = 0;
|
public static final int YYINITIAL = 0;
|
||||||
|
|
|
@ -44,8 +44,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%implements StandardTokenizerInterface
|
%implements StandardTokenizerInterface
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%char
|
%char
|
||||||
|
%buffer 4096
|
||||||
|
|
||||||
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
|
%include SUPPLEMENTARY.jflex-macro
|
||||||
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
||||||
Format = ([\p{WB:Format}] | {FormatSupp})
|
Format = ([\p{WB:Format}] | {FormatSupp})
|
||||||
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.standard;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
|
||||||
/** initial size of the lookahead buffer */
|
/** initial size of the lookahead buffer */
|
||||||
private static final int ZZ_BUFFERSIZE = 16384;
|
private static final int ZZ_BUFFERSIZE = 4096;
|
||||||
|
|
||||||
/** lexical states */
|
/** lexical states */
|
||||||
public static final int YYINITIAL = 0;
|
public static final int YYINITIAL = 0;
|
||||||
|
|
|
@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%implements StandardTokenizerInterface
|
%implements StandardTokenizerInterface
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%char
|
%char
|
||||||
|
%buffer 4096
|
||||||
|
|
||||||
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
|
%include SUPPLEMENTARY.jflex-macro
|
||||||
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
||||||
Format = ([\p{WB:Format}] | {FormatSupp})
|
Format = ([\p{WB:Format}] | {FormatSupp})
|
||||||
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
||||||
|
@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||||
// RFC-5321: Simple Mail Transfer Protocol
|
// RFC-5321: Simple Mail Transfer Protocol
|
||||||
// RFC-5322: Internet Message Format
|
// RFC-5322: Internet Message Format
|
||||||
|
|
||||||
%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
|
%include ASCIITLD.jflex-macro
|
||||||
|
|
||||||
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
|
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
|
||||||
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
|
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.wikipedia;
|
package org.apache.lucene.analysis.wikipedia;
|
||||||
|
|
||||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
/**
|
/**
|
||||||
* This class is a scanner generated by
|
* This class is a scanner generated by
|
||||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||||
* on 7/15/12 1:57 AM from the specification file
|
* on 8/6/12 11:57 AM from the specification file
|
||||||
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
* <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
||||||
*/
|
*/
|
||||||
class WikipediaTokenizerImpl {
|
class WikipediaTokenizerImpl {
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
|
||||||
/** initial size of the lookahead buffer */
|
/** initial size of the lookahead buffer */
|
||||||
private static final int ZZ_BUFFERSIZE = 16384;
|
private static final int ZZ_BUFFERSIZE = 4096;
|
||||||
|
|
||||||
/** lexical states */
|
/** lexical states */
|
||||||
public static final int THREE_SINGLE_QUOTES_STATE = 10;
|
public static final int THREE_SINGLE_QUOTES_STATE = 10;
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%pack
|
%pack
|
||||||
%char
|
%char
|
||||||
|
%buffer 4096
|
||||||
|
|
||||||
%{
|
%{
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
@ -33,6 +34,15 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Analyzer unibiAnalyzer = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||||
|
return new TokenStreamComponents(t,
|
||||||
|
new CJKBigramFilter(t, 0xff, true));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
public void testHuge() throws Exception {
|
public void testHuge() throws Exception {
|
||||||
assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||||
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||||
|
@ -62,6 +72,96 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
||||||
new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" });
|
new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" },
|
||||||
|
new int[] { 0, 1, 2, 3, 5, 6, 8, 9, 10, 11 },
|
||||||
|
new int[] { 1, 2, 3, 5, 6, 8, 9, 10, 11, 12 },
|
||||||
|
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>",
|
||||||
|
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
|
||||||
|
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
|
||||||
|
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAllScripts() throws Exception {
|
||||||
|
Analyzer a = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||||
|
return new TokenStreamComponents(t,
|
||||||
|
new CJKBigramFilter(t, 0xff, false));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
||||||
|
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnigramsAndBigramsAllScripts() throws Exception {
|
||||||
|
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた。",
|
||||||
|
new String[] {
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生",
|
||||||
|
"生が", "が", "が試", "試", "試験", "験", "験に", "に",
|
||||||
|
"に落", "落", "落ち", "ち", "ちた", "た"
|
||||||
|
},
|
||||||
|
new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
|
||||||
|
6, 7, 7, 8, 8, 9, 9, 10, 10, 11 },
|
||||||
|
new int[] { 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
|
||||||
|
8, 8, 9, 9, 10, 10, 11, 11, 12, 12 },
|
||||||
|
new String[] { "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
|
||||||
|
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
|
||||||
|
"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>" },
|
||||||
|
new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||||
|
0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
|
||||||
|
new int[] { 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
|
||||||
|
2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnigramsAndBigramsHanOnly() throws Exception {
|
||||||
|
Analyzer a = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||||
|
return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
|
||||||
|
new String[] { "多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た" },
|
||||||
|
new int[] { 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11 },
|
||||||
|
new int[] { 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12 },
|
||||||
|
new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>",
|
||||||
|
"<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>",
|
||||||
|
"<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
|
||||||
|
new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
|
||||||
|
new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnigramsAndBigramsHuge() throws Exception {
|
||||||
|
assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||||
|
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
|
||||||
|
+ "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた",
|
||||||
|
new String[] {
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多",
|
||||||
|
"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た"
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** blast some random strings through the analyzer */
|
||||||
|
public void testRandomUnibiStrings() throws Exception {
|
||||||
|
checkRandomData(random(), unibiAnalyzer, 1000*RANDOM_MULTIPLIER);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** blast some random strings through the analyzer */
|
||||||
|
public void testRandomUnibiHugeStrings() throws Exception {
|
||||||
|
Random random = random();
|
||||||
|
checkRandomData(random, unibiAnalyzer, 100*RANDOM_MULTIPLIER, 8192);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,4 +52,16 @@ public class TestCJKBigramFilterFactory extends BaseTokenStreamTestCase {
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" });
|
new String[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testHanOnlyUnigrams() throws Exception {
|
||||||
|
Reader reader = new StringReader("多くの学生が試験に落ちた。");
|
||||||
|
CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
|
||||||
|
Map<String,String> args = new HashMap<String,String>();
|
||||||
|
args.put("hiragana", "false");
|
||||||
|
args.put("outputUnigrams", "true");
|
||||||
|
factory.init(args);
|
||||||
|
TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
|
||||||
|
assertTokenStreamContents(stream,
|
||||||
|
new String[] { "多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た" });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,8 +100,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
|
||||||
private static final ResourceLoader loader = new StringMockResourceLoader("");
|
private static final ResourceLoader loader = new StringMockResourceLoader("");
|
||||||
|
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
List<Class<?>> analysisClasses = new ArrayList<Class<?>>();
|
List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
|
||||||
TestRandomChains.getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
|
|
||||||
|
|
||||||
for (final Class<?> c : analysisClasses) {
|
for (final Class<?> c : analysisClasses) {
|
||||||
final int modifiers = c.getModifiers();
|
final int modifiers = c.getModifiers();
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.io.StringReader;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.lang.reflect.Modifier;
|
import java.lang.reflect.Modifier;
|
||||||
|
import java.net.URI;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.nio.CharBuffer;
|
import java.nio.CharBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -165,8 +166,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClass() throws Exception {
|
public static void beforeClass() throws Exception {
|
||||||
List<Class<?>> analysisClasses = new ArrayList<Class<?>>();
|
List<Class<?>> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
|
||||||
getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
|
|
||||||
tokenizers = new ArrayList<Constructor<? extends Tokenizer>>();
|
tokenizers = new ArrayList<Constructor<? extends Tokenizer>>();
|
||||||
tokenfilters = new ArrayList<Constructor<? extends TokenFilter>>();
|
tokenfilters = new ArrayList<Constructor<? extends TokenFilter>>();
|
||||||
charfilters = new ArrayList<Constructor<? extends CharFilter>>();
|
charfilters = new ArrayList<Constructor<? extends CharFilter>>();
|
||||||
|
@ -235,19 +235,30 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
||||||
private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
|
private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
|
||||||
return (Constructor<T>) ctor;
|
return (Constructor<T>) ctor;
|
||||||
}
|
}
|
||||||
static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
|
|
||||||
|
public static List<Class<?>> getClassesForPackage(String pckgname) throws Exception {
|
||||||
|
final List<Class<?>> classes = new ArrayList<Class<?>>();
|
||||||
|
collectClassesForPackage(pckgname, classes);
|
||||||
|
assertFalse("No classes found in package '"+pckgname+"'; maybe your test classes are packaged as JAR file?", classes.isEmpty());
|
||||||
|
return classes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void collectClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
|
||||||
final ClassLoader cld = TestRandomChains.class.getClassLoader();
|
final ClassLoader cld = TestRandomChains.class.getClassLoader();
|
||||||
final String path = pckgname.replace('.', '/');
|
final String path = pckgname.replace('.', '/');
|
||||||
final Enumeration<URL> resources = cld.getResources(path);
|
final Enumeration<URL> resources = cld.getResources(path);
|
||||||
while (resources.hasMoreElements()) {
|
while (resources.hasMoreElements()) {
|
||||||
final File directory = new File(resources.nextElement().toURI());
|
final URI uri = resources.nextElement().toURI();
|
||||||
|
if (!"file".equalsIgnoreCase(uri.getScheme()))
|
||||||
|
continue;
|
||||||
|
final File directory = new File(uri);
|
||||||
if (directory.exists()) {
|
if (directory.exists()) {
|
||||||
String[] files = directory.list();
|
String[] files = directory.list();
|
||||||
for (String file : files) {
|
for (String file : files) {
|
||||||
if (new File(directory, file).isDirectory()) {
|
if (new File(directory, file).isDirectory()) {
|
||||||
// recurse
|
// recurse
|
||||||
String subPackage = pckgname + "." + file;
|
String subPackage = pckgname + "." + file;
|
||||||
getClassesForPackage(subPackage, classes);
|
collectClassesForPackage(subPackage, classes);
|
||||||
}
|
}
|
||||||
if (file.endsWith(".class")) {
|
if (file.endsWith(".class")) {
|
||||||
String clazzName = file.substring(0, file.length() - 6);
|
String clazzName = file.substring(0, file.length() - 6);
|
||||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
|
@ -156,7 +155,12 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
||||||
|
|
||||||
CountingSearchTestTask.numSearches = 0;
|
CountingSearchTestTask.numSearches = 0;
|
||||||
execBenchmark(algLines);
|
execBenchmark(algLines);
|
||||||
assertTrue(CountingSearchTestTask.numSearches > 0);
|
|
||||||
|
// NOTE: cannot assert this, because on a super-slow
|
||||||
|
// system, it could be after waiting 0.5 seconds that
|
||||||
|
// the search threads hadn't yet succeeded in starting
|
||||||
|
// up and then they start up and do no searching:
|
||||||
|
//assertTrue(CountingSearchTestTask.numSearches > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testHighlighting() throws Exception {
|
public void testHighlighting() throws Exception {
|
||||||
|
@ -201,6 +205,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
||||||
// 1. alg definition (required in every "logic" test)
|
// 1. alg definition (required in every "logic" test)
|
||||||
String algLines[] = {
|
String algLines[] = {
|
||||||
"doc.stored=true",//doc storage is required in order to have text to highlight
|
"doc.stored=true",//doc storage is required in order to have text to highlight
|
||||||
|
"doc.term.vector=true",
|
||||||
"doc.term.vector.offsets=true",
|
"doc.term.vector.offsets=true",
|
||||||
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
|
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
|
||||||
"docs.file=" + getReuters20LinesFile(),
|
"docs.file=" + getReuters20LinesFile(),
|
||||||
|
@ -487,13 +492,13 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
||||||
|
|
||||||
int totalTokenCount2 = 0;
|
int totalTokenCount2 = 0;
|
||||||
|
|
||||||
FieldsEnum fields = MultiFields.getFields(reader).iterator();
|
Fields fields = MultiFields.getFields(reader);
|
||||||
String fieldName = null;
|
|
||||||
while((fieldName = fields.next()) != null) {
|
for (String fieldName : fields) {
|
||||||
if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
|
if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Terms terms = fields.terms();
|
Terms terms = fields.terms(fieldName);
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,29 +139,6 @@
|
||||||
|
|
||||||
<target name="compile-core" depends="compile-lucene-core"/>
|
<target name="compile-core" depends="compile-lucene-core"/>
|
||||||
|
|
||||||
<!--
|
|
||||||
Run after Junit tests.
|
|
||||||
-->
|
|
||||||
<target name="generate-clover-reports" depends="clover">
|
|
||||||
<fail unless="run.clover">Clover not enabled!</fail>
|
|
||||||
<mkdir dir="${clover.report.dir}"/>
|
|
||||||
<fileset dir="build" id="clover.test.result.files">
|
|
||||||
<include name="**/test/TEST-*.xml"/>
|
|
||||||
<!-- do not include BW tests -->
|
|
||||||
<exclude name="backwards/**"/>
|
|
||||||
</fileset>
|
|
||||||
<clover-report>
|
|
||||||
<current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
|
|
||||||
<format type="html" filter="assert"/>
|
|
||||||
<testresults refid="clover.test.result.files"/>
|
|
||||||
</current>
|
|
||||||
<current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
|
|
||||||
<format type="xml" filter="assert"/>
|
|
||||||
<testresults refid="clover.test.result.files"/>
|
|
||||||
</current>
|
|
||||||
</clover-report>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- Validation (license/notice/api checks). -->
|
<!-- Validation (license/notice/api checks). -->
|
||||||
<target name="validate" depends="check-licenses,rat-sources,check-forbidden-apis" description="Validate stuff." />
|
<target name="validate" depends="check-licenses,rat-sources,check-forbidden-apis" description="Validate stuff." />
|
||||||
|
|
||||||
|
@ -176,6 +153,7 @@
|
||||||
<apiFileSet dir="${custom-tasks.dir}/forbiddenApis">
|
<apiFileSet dir="${custom-tasks.dir}/forbiddenApis">
|
||||||
<include name="jdk.txt" />
|
<include name="jdk.txt" />
|
||||||
<include name="jdk-deprecated.txt" />
|
<include name="jdk-deprecated.txt" />
|
||||||
|
<include name="executors.txt" />
|
||||||
</apiFileSet>
|
</apiFileSet>
|
||||||
<fileset dir="${basedir}/build" includes="**/*.class" />
|
<fileset dir="${basedir}/build" includes="**/*.class" />
|
||||||
</forbidden-apis>
|
</forbidden-apis>
|
||||||
|
|
|
@ -88,7 +88,7 @@
|
||||||
<property name="tests.timezone" value="random" />
|
<property name="tests.timezone" value="random" />
|
||||||
<property name="tests.directory" value="random" />
|
<property name="tests.directory" value="random" />
|
||||||
<property name="tests.linedocsfile" value="europarl.lines.txt.gz" />
|
<property name="tests.linedocsfile" value="europarl.lines.txt.gz" />
|
||||||
<property name="tests.loggingfile" value="/dev/null"/>
|
<property name="tests.loggingfile" value="${common.dir}/tools/junit4/logging.properties"/>
|
||||||
<property name="tests.nightly" value="false" />
|
<property name="tests.nightly" value="false" />
|
||||||
<property name="tests.weekly" value="false" />
|
<property name="tests.weekly" value="false" />
|
||||||
<property name="tests.slow" value="true" />
|
<property name="tests.slow" value="true" />
|
||||||
|
@ -700,15 +700,22 @@
|
||||||
<condition property="tests.method" value="${testmethod}*">
|
<condition property="tests.method" value="${testmethod}*">
|
||||||
<isset property="testmethod" />
|
<isset property="testmethod" />
|
||||||
</condition>
|
</condition>
|
||||||
|
|
||||||
<condition property="tests.showSuccess" value="true">
|
<condition property="tests.showSuccess" value="true">
|
||||||
<or>
|
<or>
|
||||||
<isset property="tests.class" />
|
<isset property="tests.class" />
|
||||||
<isset property="tests.method" />
|
<isset property="tests.method" />
|
||||||
</or>
|
</or>
|
||||||
</condition>
|
</condition>
|
||||||
<!-- default -->
|
|
||||||
<property name="tests.showSuccess" value="false"/>
|
<property name="tests.showSuccess" value="false"/>
|
||||||
|
|
||||||
|
<condition property="tests.showOutput" value="always">
|
||||||
|
<or>
|
||||||
|
<isset property="tests.class" />
|
||||||
|
<isset property="tests.method" />
|
||||||
|
</or>
|
||||||
|
</condition>
|
||||||
|
<property name="tests.showOutput" value="onerror"/>
|
||||||
|
|
||||||
<!-- Test macro using junit4. -->
|
<!-- Test macro using junit4. -->
|
||||||
<macrodef name="test-macro" description="Executes junit tests.">
|
<macrodef name="test-macro" description="Executes junit tests.">
|
||||||
|
@ -854,6 +861,7 @@
|
||||||
<syspropertyset>
|
<syspropertyset>
|
||||||
<propertyref prefix="tests.maxfailures" />
|
<propertyref prefix="tests.maxfailures" />
|
||||||
<propertyref prefix="tests.failfast" />
|
<propertyref prefix="tests.failfast" />
|
||||||
|
<propertyref prefix="tests.badapples" />
|
||||||
</syspropertyset>
|
</syspropertyset>
|
||||||
|
|
||||||
<!-- Pass randomized settings to the forked JVM. -->
|
<!-- Pass randomized settings to the forked JVM. -->
|
||||||
|
@ -875,8 +883,7 @@
|
||||||
<junit4:report-text
|
<junit4:report-text
|
||||||
showThrowable="true"
|
showThrowable="true"
|
||||||
showStackTraces="true"
|
showStackTraces="true"
|
||||||
showOutputStream="true"
|
showOutput="${tests.showOutput}"
|
||||||
showErrorStream="true"
|
|
||||||
|
|
||||||
showStatusOk="${tests.showSuccess}"
|
showStatusOk="${tests.showSuccess}"
|
||||||
showStatusError="${tests.showError}"
|
showStatusError="${tests.showError}"
|
||||||
|
@ -896,8 +903,7 @@
|
||||||
file="@{junit.output.dir}/tests-report.txt"
|
file="@{junit.output.dir}/tests-report.txt"
|
||||||
showThrowable="true"
|
showThrowable="true"
|
||||||
showStackTraces="true"
|
showStackTraces="true"
|
||||||
showOutputStream="true"
|
showOutput="always"
|
||||||
showErrorStream="true"
|
|
||||||
|
|
||||||
showStatusOk="true"
|
showStatusOk="true"
|
||||||
showStatusError="true"
|
showStatusError="true"
|
||||||
|
@ -913,8 +919,7 @@
|
||||||
file="@{junit.output.dir}/tests-failures.txt"
|
file="@{junit.output.dir}/tests-failures.txt"
|
||||||
showThrowable="true"
|
showThrowable="true"
|
||||||
showStackTraces="true"
|
showStackTraces="true"
|
||||||
showOutputStream="true"
|
showOutput="onerror"
|
||||||
showErrorStream="true"
|
|
||||||
|
|
||||||
showStatusOk="false"
|
showStatusOk="false"
|
||||||
showStatusError="true"
|
showStatusError="true"
|
||||||
|
@ -929,8 +934,13 @@
|
||||||
the slowest tests or for reuse in balancing). -->
|
the slowest tests or for reuse in balancing). -->
|
||||||
<junit4:report-execution-times file="@{junit.output.dir}/tests-timehints.txt" historyLength="5" />
|
<junit4:report-execution-times file="@{junit.output.dir}/tests-timehints.txt" historyLength="5" />
|
||||||
|
|
||||||
<junit4:report-ant-xml dir="@{junit.output.dir}" />
|
<!-- ANT-compatible XMLs for jenkins records etc. -->
|
||||||
<junit4:report-json file="@{junit.output.dir}/tests-report-${ant.project.name}/index.html" />
|
<junit4:report-ant-xml dir="@{junit.output.dir}" outputStreams="no" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Enable if you wish to have a nice HTML5 report.
|
||||||
|
<junit4:report-json file="@{junit.output.dir}/tests-report-${ant.project.name}/index.html" outputStreams="no" />
|
||||||
|
-->
|
||||||
</listeners>
|
</listeners>
|
||||||
|
|
||||||
<!-- Input test classes. -->
|
<!-- Input test classes. -->
|
||||||
|
|
|
@ -480,7 +480,7 @@ public class MyAnalyzer extends Analyzer {
|
||||||
System.out.println(termAtt.toString());
|
System.out.println(termAtt.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
stream.end()
|
stream.end();
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
|
@ -509,7 +509,7 @@ easily by adding a LengthFilter to the chain. Only the
|
||||||
{@literal @Override}
|
{@literal @Override}
|
||||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
|
final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
|
||||||
TokenStream result = new LengthFilter(source, 3, Integer.MAX_VALUE);
|
TokenStream result = new LengthFilter(true, source, 3, Integer.MAX_VALUE);
|
||||||
return new TokenStreamComponents(source, result);
|
return new TokenStreamComponents(source, result);
|
||||||
}
|
}
|
||||||
</pre>
|
</pre>
|
||||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -40,6 +39,7 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.DoubleBarrelLRUCache;
|
import org.apache.lucene.util.DoubleBarrelLRUCache;
|
||||||
|
import org.apache.lucene.util.UnmodifiableIterator;
|
||||||
|
|
||||||
/** Handles a terms dict, but decouples all details of
|
/** Handles a terms dict, but decouples all details of
|
||||||
* doc/freqs/positions reading to an instance of {@link
|
* doc/freqs/positions reading to an instance of {@link
|
||||||
|
@ -184,8 +184,8 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() {
|
public Iterator<String> iterator() {
|
||||||
return new TermFieldsEnum();
|
return new UnmodifiableIterator<String>(fields.keySet().iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -199,32 +199,6 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
return fields.size();
|
return fields.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterates through all fields
|
|
||||||
private class TermFieldsEnum extends FieldsEnum {
|
|
||||||
final Iterator<FieldReader> it;
|
|
||||||
FieldReader current;
|
|
||||||
|
|
||||||
TermFieldsEnum() {
|
|
||||||
it = fields.values().iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String next() {
|
|
||||||
if (it.hasNext()) {
|
|
||||||
current = it.next();
|
|
||||||
return current.fieldInfo.name;
|
|
||||||
} else {
|
|
||||||
current = null;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() throws IOException {
|
|
||||||
return current;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class FieldReader extends Terms {
|
private class FieldReader extends Terms {
|
||||||
final long numTerms;
|
final long numTerms;
|
||||||
final FieldInfo fieldInfo;
|
final FieldInfo fieldInfo;
|
||||||
|
@ -253,6 +227,21 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
return new SegmentTermsEnum();
|
return new SegmentTermsEnum();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return fieldInfo.hasPayloads();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long size() {
|
public long size() {
|
||||||
return numTerms;
|
return numTerms;
|
||||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -46,6 +45,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.UnmodifiableIterator;
|
||||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
import org.apache.lucene.util.automaton.RunAutomaton;
|
import org.apache.lucene.util.automaton.RunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.Transition;
|
import org.apache.lucene.util.automaton.Transition;
|
||||||
|
@ -199,8 +199,8 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() {
|
public Iterator<String> iterator() {
|
||||||
return new TermFieldsEnum();
|
return new UnmodifiableIterator<String>(fields.keySet().iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -214,32 +214,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
return fields.size();
|
return fields.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterates through all fields
|
|
||||||
private class TermFieldsEnum extends FieldsEnum {
|
|
||||||
final Iterator<FieldReader> it;
|
|
||||||
FieldReader current;
|
|
||||||
|
|
||||||
TermFieldsEnum() {
|
|
||||||
it = fields.values().iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String next() {
|
|
||||||
if (it.hasNext()) {
|
|
||||||
current = it.next();
|
|
||||||
return current.fieldInfo.name;
|
|
||||||
} else {
|
|
||||||
current = null;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() throws IOException {
|
|
||||||
return current;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for debugging
|
// for debugging
|
||||||
String brToString(BytesRef b) {
|
String brToString(BytesRef b) {
|
||||||
if (b == null) {
|
if (b == null) {
|
||||||
|
@ -456,6 +430,21 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return fieldInfo.hasPayloads();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
return new SegmentTermsEnum();
|
return new SegmentTermsEnum();
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.SegmentWriteState; // javadocs
|
import org.apache.lucene.index.SegmentWriteState; // javadocs
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -53,13 +52,10 @@ public abstract class FieldsConsumer implements Closeable {
|
||||||
public abstract void close() throws IOException;
|
public abstract void close() throws IOException;
|
||||||
|
|
||||||
public void merge(MergeState mergeState, Fields fields) throws IOException {
|
public void merge(MergeState mergeState, Fields fields) throws IOException {
|
||||||
FieldsEnum fieldsEnum = fields.iterator();
|
for (String field : fields) {
|
||||||
assert fieldsEnum != null;
|
|
||||||
String field;
|
|
||||||
while((field = fieldsEnum.next()) != null) {
|
|
||||||
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
|
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
|
||||||
assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field;
|
assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field;
|
||||||
Terms terms = fieldsEnum.terms();
|
Terms terms = fields.terms(field);
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
|
final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
|
||||||
termsConsumer.merge(mergeState, terms.iterator(null));
|
termsConsumer.merge(mergeState, terms.iterator(null));
|
||||||
|
|
|
@ -124,15 +124,17 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() throws IOException {
|
public BytesRef getPayload() throws IOException {
|
||||||
BytesRef payload = current.getPayload();
|
BytesRef payload = current.getPayload();
|
||||||
if (mergeState.currentPayloadProcessor[upto] != null) {
|
if (mergeState.currentPayloadProcessor[upto] != null && payload != null) {
|
||||||
|
// to not violate the D&P api, we must give the processor a private copy
|
||||||
|
// TODO: reuse a BytesRef if there is a PPP
|
||||||
|
payload = BytesRef.deepCopyOf(payload);
|
||||||
mergeState.currentPayloadProcessor[upto].processPayload(payload);
|
mergeState.currentPayloadProcessor[upto].processPayload(payload);
|
||||||
|
if (payload.length == 0) {
|
||||||
|
// don't let PayloadProcessors corrumpt the index
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return current.hasPayload();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -112,12 +112,7 @@ public abstract class PostingsConsumer {
|
||||||
totTF += freq;
|
totTF += freq;
|
||||||
for(int i=0;i<freq;i++) {
|
for(int i=0;i<freq;i++) {
|
||||||
final int position = postingsEnum.nextPosition();
|
final int position = postingsEnum.nextPosition();
|
||||||
final BytesRef payload;
|
final BytesRef payload = postingsEnum.getPayload();
|
||||||
if (postingsEnum.hasPayload()) {
|
|
||||||
payload = postingsEnum.getPayload();
|
|
||||||
} else {
|
|
||||||
payload = null;
|
|
||||||
}
|
|
||||||
this.addPosition(position, payload, -1, -1);
|
this.addPosition(position, payload, -1, -1);
|
||||||
}
|
}
|
||||||
this.finishDoc();
|
this.finishDoc();
|
||||||
|
@ -137,12 +132,7 @@ public abstract class PostingsConsumer {
|
||||||
totTF += freq;
|
totTF += freq;
|
||||||
for(int i=0;i<freq;i++) {
|
for(int i=0;i<freq;i++) {
|
||||||
final int position = postingsEnum.nextPosition();
|
final int position = postingsEnum.nextPosition();
|
||||||
final BytesRef payload;
|
final BytesRef payload = postingsEnum.getPayload();
|
||||||
if (postingsEnum.hasPayload()) {
|
|
||||||
payload = postingsEnum.getPayload();
|
|
||||||
} else {
|
|
||||||
payload = null;
|
|
||||||
}
|
|
||||||
this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset());
|
this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset());
|
||||||
}
|
}
|
||||||
this.finishDoc();
|
this.finishDoc();
|
||||||
|
|
|
@ -26,8 +26,9 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
|
||||||
|
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
@ -41,14 +42,14 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li>For every document, {@link #startDocument(int)} is called,
|
* <li>For every document, {@link #startDocument(int)} is called,
|
||||||
* informing the Codec how many fields will be written.
|
* informing the Codec how many fields will be written.
|
||||||
* <li>{@link #startField(FieldInfo, int, boolean, boolean)} is called for
|
* <li>{@link #startField(FieldInfo, int, boolean, boolean, boolean)} is called for
|
||||||
* each field in the document, informing the codec how many terms
|
* each field in the document, informing the codec how many terms
|
||||||
* will be written for that field, and whether or not positions
|
* will be written for that field, and whether or not positions,
|
||||||
* or offsets are enabled.
|
* offsets, or payloads are enabled.
|
||||||
* <li>Within each field, {@link #startTerm(BytesRef, int)} is called
|
* <li>Within each field, {@link #startTerm(BytesRef, int)} is called
|
||||||
* for each term.
|
* for each term.
|
||||||
* <li>If offsets and/or positions are enabled, then
|
* <li>If offsets and/or positions are enabled, then
|
||||||
* {@link #addPosition(int, int, int)} will be called for each term
|
* {@link #addPosition(int, int, int, BytesRef)} will be called for each term
|
||||||
* occurrence.
|
* occurrence.
|
||||||
* <li>After all documents have been written, {@link #finish(FieldInfos, int)}
|
* <li>After all documents have been written, {@link #finish(FieldInfos, int)}
|
||||||
* is called for verification/sanity-checks.
|
* is called for verification/sanity-checks.
|
||||||
|
@ -60,7 +61,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
public abstract class TermVectorsWriter implements Closeable {
|
public abstract class TermVectorsWriter implements Closeable {
|
||||||
|
|
||||||
/** Called before writing the term vectors of the document.
|
/** Called before writing the term vectors of the document.
|
||||||
* {@link #startField(FieldInfo, int, boolean, boolean)} will
|
* {@link #startField(FieldInfo, int, boolean, boolean, boolean)} will
|
||||||
* be called <code>numVectorFields</code> times. Note that if term
|
* be called <code>numVectorFields</code> times. Note that if term
|
||||||
* vectors are enabled, this is called even if the document
|
* vectors are enabled, this is called even if the document
|
||||||
* has no vector fields, in this case <code>numVectorFields</code>
|
* has no vector fields, in this case <code>numVectorFields</code>
|
||||||
|
@ -69,17 +70,17 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
|
|
||||||
/** Called before writing the terms of the field.
|
/** Called before writing the terms of the field.
|
||||||
* {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
|
* {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
|
||||||
public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException;
|
public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
|
||||||
|
|
||||||
/** Adds a term and its term frequency <code>freq</code>.
|
/** Adds a term and its term frequency <code>freq</code>.
|
||||||
* If this field has positions and/or offsets enabled, then
|
* If this field has positions and/or offsets enabled, then
|
||||||
* {@link #addPosition(int, int, int)} will be called
|
* {@link #addPosition(int, int, int, BytesRef)} will be called
|
||||||
* <code>freq</code> times respectively.
|
* <code>freq</code> times respectively.
|
||||||
*/
|
*/
|
||||||
public abstract void startTerm(BytesRef term, int freq) throws IOException;
|
public abstract void startTerm(BytesRef term, int freq) throws IOException;
|
||||||
|
|
||||||
/** Adds a term position and offsets */
|
/** Adds a term position and offsets */
|
||||||
public abstract void addPosition(int position, int startOffset, int endOffset) throws IOException;
|
public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
|
||||||
|
|
||||||
/** Aborts writing entirely, implementation should remove
|
/** Aborts writing entirely, implementation should remove
|
||||||
* any partially-written files, etc. */
|
* any partially-written files, etc. */
|
||||||
|
@ -99,7 +100,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
* This is an expert API that allows the codec to consume
|
* This is an expert API that allows the codec to consume
|
||||||
* positions and offsets directly from the indexer.
|
* positions and offsets directly from the indexer.
|
||||||
* <p>
|
* <p>
|
||||||
* The default implementation calls {@link #addPosition(int, int, int)},
|
* The default implementation calls {@link #addPosition(int, int, int, BytesRef)},
|
||||||
* but subclasses can override this if they want to efficiently write
|
* but subclasses can override this if they want to efficiently write
|
||||||
* all the positions, then all the offsets, for example.
|
* all the positions, then all the offsets, for example.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -111,15 +112,36 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
|
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
|
||||||
int position = 0;
|
int position = 0;
|
||||||
int lastOffset = 0;
|
int lastOffset = 0;
|
||||||
|
BytesRef payload = null;
|
||||||
|
|
||||||
for (int i = 0; i < numProx; i++) {
|
for (int i = 0; i < numProx; i++) {
|
||||||
final int startOffset;
|
final int startOffset;
|
||||||
final int endOffset;
|
final int endOffset;
|
||||||
|
final BytesRef thisPayload;
|
||||||
|
|
||||||
if (positions == null) {
|
if (positions == null) {
|
||||||
position = -1;
|
position = -1;
|
||||||
|
thisPayload = null;
|
||||||
} else {
|
} else {
|
||||||
position += positions.readVInt();
|
int code = positions.readVInt();
|
||||||
|
position += code >>> 1;
|
||||||
|
if ((code & 1) != 0) {
|
||||||
|
// This position has a payload
|
||||||
|
final int payloadLength = positions.readVInt();
|
||||||
|
|
||||||
|
if (payload == null) {
|
||||||
|
payload = new BytesRef();
|
||||||
|
payload.bytes = new byte[payloadLength];
|
||||||
|
} else if (payload.bytes.length < payloadLength) {
|
||||||
|
payload.grow(payloadLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
positions.readBytes(payload.bytes, 0, payloadLength);
|
||||||
|
payload.length = payloadLength;
|
||||||
|
thisPayload = payload;
|
||||||
|
} else {
|
||||||
|
thisPayload = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (offsets == null) {
|
if (offsets == null) {
|
||||||
|
@ -129,24 +151,31 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
endOffset = startOffset + offsets.readVInt();
|
endOffset = startOffset + offsets.readVInt();
|
||||||
lastOffset = endOffset;
|
lastOffset = endOffset;
|
||||||
}
|
}
|
||||||
addPosition(position, startOffset, endOffset);
|
addPosition(position, startOffset, endOffset, thisPayload);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Merges in the term vectors from the readers in
|
/** Merges in the term vectors from the readers in
|
||||||
* <code>mergeState</code>. The default implementation skips
|
* <code>mergeState</code>. The default implementation skips
|
||||||
* over deleted documents, and uses {@link #startDocument(int)},
|
* over deleted documents, and uses {@link #startDocument(int)},
|
||||||
* {@link #startField(FieldInfo, int, boolean, boolean)},
|
* {@link #startField(FieldInfo, int, boolean, boolean, boolean)},
|
||||||
* {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int)},
|
* {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
|
||||||
* and {@link #finish(FieldInfos, int)},
|
* and {@link #finish(FieldInfos, int)},
|
||||||
* returning the number of documents that were written.
|
* returning the number of documents that were written.
|
||||||
* Implementations can override this method for more sophisticated
|
* Implementations can override this method for more sophisticated
|
||||||
* merging (bulk-byte copying, etc). */
|
* merging (bulk-byte copying, etc). */
|
||||||
public int merge(MergeState mergeState) throws IOException {
|
public int merge(MergeState mergeState) throws IOException {
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
for (AtomicReader reader : mergeState.readers) {
|
for (int i = 0; i < mergeState.readers.size(); i++) {
|
||||||
|
final AtomicReader reader = mergeState.readers.get(i);
|
||||||
final int maxDoc = reader.maxDoc();
|
final int maxDoc = reader.maxDoc();
|
||||||
final Bits liveDocs = reader.getLiveDocs();
|
final Bits liveDocs = reader.getLiveDocs();
|
||||||
|
// set PayloadProcessor
|
||||||
|
if (mergeState.payloadProcessorProvider != null) {
|
||||||
|
mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
|
||||||
|
} else {
|
||||||
|
mergeState.currentReaderPayloadProcessor = null;
|
||||||
|
}
|
||||||
for (int docID = 0; docID < maxDoc; docID++) {
|
for (int docID = 0; docID < maxDoc; docID++) {
|
||||||
if (liveDocs != null && !liveDocs.get(docID)) {
|
if (liveDocs != null && !liveDocs.get(docID)) {
|
||||||
// skip deleted docs
|
// skip deleted docs
|
||||||
|
@ -155,7 +184,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
// NOTE: it's very important to first assign to vectors then pass it to
|
// NOTE: it's very important to first assign to vectors then pass it to
|
||||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||||
Fields vectors = reader.getTermVectors(docID);
|
Fields vectors = reader.getTermVectors(docID);
|
||||||
addAllDocVectors(vectors, mergeState.fieldInfos);
|
addAllDocVectors(vectors, mergeState);
|
||||||
docCount++;
|
docCount++;
|
||||||
mergeState.checkAbort.work(300);
|
mergeState.checkAbort.work(300);
|
||||||
}
|
}
|
||||||
|
@ -169,7 +198,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
* implementation requires that the vectors implement
|
* implementation requires that the vectors implement
|
||||||
* both Fields.size and
|
* both Fields.size and
|
||||||
* Terms.size. */
|
* Terms.size. */
|
||||||
protected final void addAllDocVectors(Fields vectors, FieldInfos fieldInfos) throws IOException {
|
protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
|
||||||
if (vectors == null) {
|
if (vectors == null) {
|
||||||
startDocument(0);
|
startDocument(0);
|
||||||
return;
|
return;
|
||||||
|
@ -181,35 +210,38 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
}
|
}
|
||||||
startDocument(numFields);
|
startDocument(numFields);
|
||||||
|
|
||||||
final FieldsEnum fieldsEnum = vectors.iterator();
|
|
||||||
String fieldName;
|
|
||||||
String lastFieldName = null;
|
String lastFieldName = null;
|
||||||
|
|
||||||
while((fieldName = fieldsEnum.next()) != null) {
|
TermsEnum termsEnum = null;
|
||||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
|
DocsAndPositionsEnum docsAndPositionsEnum = null;
|
||||||
|
|
||||||
|
final ReaderPayloadProcessor readerPayloadProcessor = mergeState.currentReaderPayloadProcessor;
|
||||||
|
PayloadProcessor payloadProcessor = null;
|
||||||
|
|
||||||
|
for(String fieldName : vectors) {
|
||||||
|
final FieldInfo fieldInfo = mergeState.fieldInfos.fieldInfo(fieldName);
|
||||||
|
|
||||||
assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
|
assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
|
||||||
lastFieldName = fieldName;
|
lastFieldName = fieldName;
|
||||||
|
|
||||||
final Terms terms = fieldsEnum.terms();
|
final Terms terms = vectors.terms(fieldName);
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
// FieldsEnum shouldn't lie...
|
// FieldsEnum shouldn't lie...
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final boolean hasPositions = terms.hasPositions();
|
||||||
|
final boolean hasOffsets = terms.hasOffsets();
|
||||||
|
final boolean hasPayloads = terms.hasPayloads();
|
||||||
|
assert !hasPayloads || hasPositions;
|
||||||
|
|
||||||
final int numTerms = (int) terms.size();
|
final int numTerms = (int) terms.size();
|
||||||
if (numTerms == -1) {
|
if (numTerms == -1) {
|
||||||
throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
|
throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
|
||||||
}
|
}
|
||||||
final TermsEnum termsEnum = terms.iterator(null);
|
|
||||||
|
|
||||||
DocsAndPositionsEnum docsAndPositionsEnum = null;
|
startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
|
||||||
|
termsEnum = terms.iterator(termsEnum);
|
||||||
boolean startedField = false;
|
|
||||||
|
|
||||||
// NOTE: this is tricky, because TermVectors allow
|
|
||||||
// indexing offsets but NOT positions. So we must
|
|
||||||
// lazily init the field by checking whether first
|
|
||||||
// position we see is -1 or not.
|
|
||||||
|
|
||||||
int termCount = 0;
|
int termCount = 0;
|
||||||
while(termsEnum.next() != null) {
|
while(termsEnum.next() != null) {
|
||||||
|
@ -217,18 +249,16 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
|
|
||||||
final int freq = (int) termsEnum.totalTermFreq();
|
final int freq = (int) termsEnum.totalTermFreq();
|
||||||
|
|
||||||
if (startedField) {
|
|
||||||
startTerm(termsEnum.term(), freq);
|
startTerm(termsEnum.term(), freq);
|
||||||
|
|
||||||
|
if (hasPayloads && readerPayloadProcessor != null) {
|
||||||
|
payloadProcessor = readerPayloadProcessor.getProcessor(fieldName, termsEnum.term());
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we need a "query" API where we can ask (via
|
if (hasPositions || hasOffsets) {
|
||||||
// flex API) what this term was indexed with...
|
docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
|
||||||
// Both positions & offsets:
|
assert docsAndPositionsEnum != null;
|
||||||
docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
|
|
||||||
boolean hasOffsets = false;
|
|
||||||
boolean hasPositions = false;
|
|
||||||
|
|
||||||
if (docsAndPositionsEnum != null) {
|
|
||||||
final int docID = docsAndPositionsEnum.nextDoc();
|
final int docID = docsAndPositionsEnum.nextDoc();
|
||||||
assert docID != DocIdSetIterator.NO_MORE_DOCS;
|
assert docID != DocIdSetIterator.NO_MORE_DOCS;
|
||||||
assert docsAndPositionsEnum.freq() == freq;
|
assert docsAndPositionsEnum.freq() == freq;
|
||||||
|
@ -237,27 +267,21 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
final int pos = docsAndPositionsEnum.nextPosition();
|
final int pos = docsAndPositionsEnum.nextPosition();
|
||||||
final int startOffset = docsAndPositionsEnum.startOffset();
|
final int startOffset = docsAndPositionsEnum.startOffset();
|
||||||
final int endOffset = docsAndPositionsEnum.endOffset();
|
final int endOffset = docsAndPositionsEnum.endOffset();
|
||||||
if (!startedField) {
|
|
||||||
assert numTerms > 0;
|
BytesRef payload = docsAndPositionsEnum.getPayload();
|
||||||
hasPositions = pos != -1;
|
|
||||||
hasOffsets = startOffset != -1;
|
if (payloadProcessor != null && payload != null) {
|
||||||
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
|
// to not violate the D&P api, we must give the processor a private copy
|
||||||
startTerm(termsEnum.term(), freq);
|
payload = BytesRef.deepCopyOf(payload);
|
||||||
startedField = true;
|
payloadProcessor.processPayload(payload);
|
||||||
|
if (payload.length == 0) {
|
||||||
|
// don't let PayloadProcessors corrumpt the index
|
||||||
|
payload = null;
|
||||||
}
|
}
|
||||||
if (hasOffsets) {
|
|
||||||
assert startOffset != -1;
|
|
||||||
assert endOffset != -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assert !hasPositions || pos >= 0;
|
assert !hasPositions || pos >= 0;
|
||||||
addPosition(pos, startOffset, endOffset);
|
addPosition(pos, startOffset, endOffset, payload);
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!startedField) {
|
|
||||||
assert numTerms > 0;
|
|
||||||
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
|
|
||||||
startTerm(termsEnum.term(), freq);
|
|
||||||
startedField = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -954,11 +954,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
return null;
|
return null;
|
||||||
|
@ -1226,10 +1221,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" FPR.nextDoc");
|
System.out.println(" FPR.nextDoc");
|
||||||
}
|
}
|
||||||
if (indexHasPayloads) {
|
|
||||||
payloadByteUpto += payloadLength;
|
|
||||||
payloadLength = 0;
|
|
||||||
}
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
|
System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
|
||||||
|
@ -1255,7 +1246,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount);
|
System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount);
|
||||||
}
|
}
|
||||||
position = 0;
|
position = 0;
|
||||||
payloadLength = 0;
|
|
||||||
lastStartOffset = 0;
|
lastStartOffset = 0;
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
@ -1355,12 +1345,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" return doc=" + accum);
|
System.out.println(" return doc=" + accum);
|
||||||
}
|
}
|
||||||
if (indexHasPayloads) {
|
|
||||||
payloadByteUpto += payloadLength;
|
|
||||||
payloadLength = 0;
|
|
||||||
}
|
|
||||||
position = 0;
|
position = 0;
|
||||||
payloadLength = 0;
|
|
||||||
lastStartOffset = 0;
|
lastStartOffset = 0;
|
||||||
return doc = accum;
|
return doc = accum;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1433,7 +1418,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
position = 0;
|
position = 0;
|
||||||
payloadLength = 0;
|
|
||||||
lastStartOffset = 0;
|
lastStartOffset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1461,16 +1445,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
posBufferUpto = BLOCK_SIZE;
|
posBufferUpto = BLOCK_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (indexHasPayloads) {
|
|
||||||
if (DEBUG) {
|
|
||||||
if (payloadLength != 0) {
|
|
||||||
System.out.println(" skip unread payload length=" + payloadLength);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
payloadByteUpto += payloadLength;
|
|
||||||
payloadLength = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (posPendingCount > freq) {
|
if (posPendingCount > freq) {
|
||||||
skipPositions();
|
skipPositions();
|
||||||
posPendingCount = freq;
|
posPendingCount = freq;
|
||||||
|
@ -1484,6 +1458,10 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
if (indexHasPayloads) {
|
if (indexHasPayloads) {
|
||||||
payloadLength = payloadLengthBuffer[posBufferUpto];
|
payloadLength = payloadLengthBuffer[posBufferUpto];
|
||||||
|
payload.bytes = payloadBytes;
|
||||||
|
payload.offset = payloadByteUpto;
|
||||||
|
payload.length = payloadLength;
|
||||||
|
payloadByteUpto += payloadLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (indexHasOffsets) {
|
if (indexHasOffsets) {
|
||||||
|
@ -1510,22 +1488,16 @@ public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
return endOffset;
|
return endOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return payloadLength != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto);
|
System.out.println(" FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto);
|
||||||
}
|
}
|
||||||
payload.bytes = payloadBytes;
|
if (payloadLength == 0) {
|
||||||
payload.offset = payloadByteUpto;
|
return null;
|
||||||
payload.length = payloadLength;
|
} else {
|
||||||
payloadByteUpto += payloadLength;
|
|
||||||
payloadLength = 0;
|
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
@ -35,7 +36,6 @@ import org.apache.lucene.codecs.TermsConsumer;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -44,7 +44,6 @@ import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FuzzySet;
|
import org.apache.lucene.util.FuzzySet;
|
||||||
|
@ -187,9 +186,8 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public FieldsEnum iterator() throws IOException {
|
public Iterator<String> iterator() {
|
||||||
return new BloomFilteredFieldsEnum(delegateFieldsProducer.iterator(),
|
return delegateFieldsProducer.iterator();
|
||||||
bloomsByFieldName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
@ -217,44 +215,6 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||||
return delegateFieldsProducer.getUniqueTermCount();
|
return delegateFieldsProducer.getUniqueTermCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not all fields in a segment may be subject to a bloom filter. This class
|
|
||||||
// wraps Terms objects appropriately if a filtering request is present
|
|
||||||
class BloomFilteredFieldsEnum extends FieldsEnum {
|
|
||||||
private FieldsEnum delegateFieldsEnum;
|
|
||||||
private HashMap<String,FuzzySet> bloomsByFieldName;
|
|
||||||
private String currentFieldName;
|
|
||||||
|
|
||||||
public BloomFilteredFieldsEnum(FieldsEnum iterator,
|
|
||||||
HashMap<String,FuzzySet> bloomsByFieldName) {
|
|
||||||
this.delegateFieldsEnum = iterator;
|
|
||||||
this.bloomsByFieldName = bloomsByFieldName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public AttributeSource attributes() {
|
|
||||||
return delegateFieldsEnum.attributes();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String next() throws IOException {
|
|
||||||
currentFieldName = delegateFieldsEnum.next();
|
|
||||||
return currentFieldName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Terms terms() throws IOException {
|
|
||||||
FuzzySet filter = bloomsByFieldName.get(currentFieldName);
|
|
||||||
if (filter == null) {
|
|
||||||
return delegateFieldsEnum.terms();
|
|
||||||
} else {
|
|
||||||
Terms result = delegateFieldsEnum.terms();
|
|
||||||
if (result == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
// wrap the terms object with a bloom filter
|
|
||||||
return new BloomFilteredTerms(result, filter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
class BloomFilteredTerms extends Terms {
|
class BloomFilteredTerms extends Terms {
|
||||||
private Terms delegateTerms;
|
private Terms delegateTerms;
|
||||||
private FuzzySet filter;
|
private FuzzySet filter;
|
||||||
|
@ -314,6 +274,21 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||||
public int getDocCount() throws IOException {
|
public int getDocCount() throws IOException {
|
||||||
return delegateTerms.getDocCount();
|
return delegateTerms.getDocCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return delegateTerms.hasOffsets();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return delegateTerms.hasPositions();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return delegateTerms.hasPayloads();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class BloomFilteredTermsEnum extends TermsEnum {
|
class BloomFilteredTermsEnum extends TermsEnum {
|
||||||
|
|
|
@ -873,12 +873,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
||||||
* payload was indexed. */
|
* payload was indexed. */
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() throws IOException {
|
public BytesRef getPayload() throws IOException {
|
||||||
throw new IOException("No payloads exist for this field!");
|
return null;
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1152,11 +1147,13 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() throws IOException {
|
public BytesRef getPayload() throws IOException {
|
||||||
if (storePayloads) {
|
if (storePayloads) {
|
||||||
|
if (payloadLength <= 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
assert lazyProxPointer == -1;
|
assert lazyProxPointer == -1;
|
||||||
assert posPendingCount < freq;
|
assert posPendingCount < freq;
|
||||||
if (!payloadPending) {
|
|
||||||
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
|
if (payloadPending) {
|
||||||
}
|
|
||||||
if (payloadLength > payload.bytes.length) {
|
if (payloadLength > payload.bytes.length) {
|
||||||
payload.grow(payloadLength);
|
payload.grow(payloadLength);
|
||||||
}
|
}
|
||||||
|
@ -1164,16 +1161,12 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
||||||
proxIn.readBytes(payload.bytes, 0, payloadLength);
|
proxIn.readBytes(payload.bytes, 0, payloadLength);
|
||||||
payload.length = payloadLength;
|
payload.length = payloadLength;
|
||||||
payloadPending = false;
|
payloadPending = false;
|
||||||
|
}
|
||||||
|
|
||||||
return payload;
|
return payload;
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("No payloads exist for this field!");
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return payloadPending && payloadLength > 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,33 +67,46 @@ import org.apache.lucene.store.IOContext;
|
||||||
* <li><a name="tvf" id="tvf"></a>
|
* <li><a name="tvf" id="tvf"></a>
|
||||||
* <p>The Field or .tvf file.</p>
|
* <p>The Field or .tvf file.</p>
|
||||||
* <p>This file contains, for each field that has a term vector stored, a list of
|
* <p>This file contains, for each field that has a term vector stored, a list of
|
||||||
* the terms, their frequencies and, optionally, position and offset
|
* the terms, their frequencies and, optionally, position, offset, and payload
|
||||||
* information.</p>
|
* information.</p>
|
||||||
* <p>Field (.tvf) --> Header,<NumTerms, Position/Offset, TermFreqs>
|
* <p>Field (.tvf) --> Header,<NumTerms, Flags, TermFreqs>
|
||||||
* <sup>NumFields</sup></p>
|
* <sup>NumFields</sup></p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>NumTerms --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>NumTerms --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>Position/Offset --> {@link DataOutput#writeByte Byte}</li>
|
* <li>Flags --> {@link DataOutput#writeByte Byte}</li>
|
||||||
* <li>TermFreqs --> <TermText, TermFreq, Positions?, Offsets?>
|
* <li>TermFreqs --> <TermText, TermFreq, Positions?, PayloadData?, Offsets?>
|
||||||
* <sup>NumTerms</sup></li>
|
* <sup>NumTerms</sup></li>
|
||||||
* <li>TermText --> <PrefixLength, Suffix></li>
|
* <li>TermText --> <PrefixLength, Suffix></li>
|
||||||
* <li>PrefixLength --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>PrefixLength --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>Suffix --> {@link DataOutput#writeString String}</li>
|
* <li>Suffix --> {@link DataOutput#writeString String}</li>
|
||||||
* <li>TermFreq --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>TermFreq --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>Positions --> <{@link DataOutput#writeVInt VInt}><sup>TermFreq</sup></li>
|
* <li>Positions --> <PositionDelta PayloadLength?><sup>TermFreq</sup></li>
|
||||||
|
* <li>PositionDelta --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
|
* <li>PayloadLength --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
|
* <li>PayloadData --> {@link DataOutput#writeByte Byte}<sup>NumPayloadBytes</sup></li>
|
||||||
* <li>Offsets --> <{@link DataOutput#writeVInt VInt}, {@link DataOutput#writeVInt VInt}><sup>TermFreq</sup></li>
|
* <li>Offsets --> <{@link DataOutput#writeVInt VInt}, {@link DataOutput#writeVInt VInt}><sup>TermFreq</sup></li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>Notes:</p>
|
* <p>Notes:</p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Position/Offset byte stores whether this term vector has position or offset
|
* <li>Flags byte stores whether this term vector has position, offset, payload.
|
||||||
* information stored.</li>
|
* information stored.</li>
|
||||||
* <li>Term byte prefixes are shared. The PrefixLength is the number of initial
|
* <li>Term byte prefixes are shared. The PrefixLength is the number of initial
|
||||||
* bytes from the previous term which must be pre-pended to a term's suffix
|
* bytes from the previous term which must be pre-pended to a term's suffix
|
||||||
* in order to form the term's bytes. Thus, if the previous term's text was "bone"
|
* in order to form the term's bytes. Thus, if the previous term's text was "bone"
|
||||||
* and the term is "boy", the PrefixLength is two and the suffix is "y".</li>
|
* and the term is "boy", the PrefixLength is two and the suffix is "y".</li>
|
||||||
* <li>Positions are stored as delta encoded VInts. This means we only store the
|
* <li>PositionDelta is, if payloads are disabled for the term's field, the
|
||||||
* difference of the current position from the last position</li>
|
* difference between the position of the current occurrence in the document and
|
||||||
|
* the previous occurrence (or zero, if this is the first occurrence in this
|
||||||
|
* document). If payloads are enabled for the term's field, then PositionDelta/2
|
||||||
|
* is the difference between the current and the previous position. If payloads
|
||||||
|
* are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
|
||||||
|
* the length of the payload at the current term position.</li>
|
||||||
|
* <li>PayloadData is metadata associated with a term position. If
|
||||||
|
* PayloadLength is stored at the current position, then it indicates the length
|
||||||
|
* of this payload. If PayloadLength is not stored, then this payload has the same
|
||||||
|
* length as the payload at the previous position. PayloadData encodes the
|
||||||
|
* concatenated bytes for all of a terms occurrences.</li>
|
||||||
* <li>Offsets are stored as delta encoded VInts. The first VInt is the
|
* <li>Offsets are stored as delta encoded VInts. The first VInt is the
|
||||||
* startOffset, the second is the endOffset.</li>
|
* startOffset, the second is the endOffset.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
|
|
|
@ -21,7 +21,9 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
|
@ -30,7 +32,6 @@ import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -55,6 +56,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
|
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
|
||||||
|
|
||||||
|
static final byte STORE_PAYLOAD_WITH_TERMVECTOR = 0x4;
|
||||||
|
|
||||||
/** Extension of vectors fields file */
|
/** Extension of vectors fields file */
|
||||||
static final String VECTORS_FIELDS_EXTENSION = "tvf";
|
static final String VECTORS_FIELDS_EXTENSION = "tvf";
|
||||||
|
|
||||||
|
@ -68,8 +71,10 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs";
|
static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs";
|
||||||
static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex";
|
static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex";
|
||||||
|
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_NO_PAYLOADS = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_PAYLOADS = 1;
|
||||||
|
static final int VERSION_START = VERSION_NO_PAYLOADS;
|
||||||
|
static final int VERSION_CURRENT = VERSION_PAYLOADS;
|
||||||
|
|
||||||
static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS);
|
static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS);
|
||||||
static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS);
|
static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS);
|
||||||
|
@ -245,9 +250,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() throws IOException {
|
public Iterator<String> iterator() {
|
||||||
|
return new Iterator<String>() {
|
||||||
return new FieldsEnum() {
|
|
||||||
private int fieldUpto;
|
private int fieldUpto;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -255,13 +259,18 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
if (fieldNumbers != null && fieldUpto < fieldNumbers.length) {
|
if (fieldNumbers != null && fieldUpto < fieldNumbers.length) {
|
||||||
return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name;
|
return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name;
|
||||||
} else {
|
} else {
|
||||||
return null;
|
throw new NoSuchElementException();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Terms terms() throws IOException {
|
public boolean hasNext() {
|
||||||
return TVFields.this.terms(fieldInfos.fieldInfo(fieldNumbers[fieldUpto-1]).name);
|
return fieldNumbers != null && fieldUpto < fieldNumbers.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -296,10 +305,17 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
private class TVTerms extends Terms {
|
private class TVTerms extends Terms {
|
||||||
private final int numTerms;
|
private final int numTerms;
|
||||||
private final long tvfFPStart;
|
private final long tvfFPStart;
|
||||||
|
private final boolean storePositions;
|
||||||
|
private final boolean storeOffsets;
|
||||||
|
private final boolean storePayloads;
|
||||||
|
|
||||||
public TVTerms(long tvfFP) throws IOException {
|
public TVTerms(long tvfFP) throws IOException {
|
||||||
tvf.seek(tvfFP);
|
tvf.seek(tvfFP);
|
||||||
numTerms = tvf.readVInt();
|
numTerms = tvf.readVInt();
|
||||||
|
final byte bits = tvf.readByte();
|
||||||
|
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
||||||
|
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||||
|
storePayloads = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
|
||||||
tvfFPStart = tvf.getFilePointer();
|
tvfFPStart = tvf.getFilePointer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,7 +330,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
} else {
|
} else {
|
||||||
termsEnum = new TVTermsEnum();
|
termsEnum = new TVTermsEnum();
|
||||||
}
|
}
|
||||||
termsEnum.reset(numTerms, tvfFPStart);
|
termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets, storePayloads);
|
||||||
return termsEnum;
|
return termsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,6 +361,21 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
// this...? I guess codec could buffer and re-sort...
|
// this...? I guess codec could buffer and re-sort...
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return storeOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return storePositions;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return storePayloads;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TVTermsEnum extends TermsEnum {
|
private class TVTermsEnum extends TermsEnum {
|
||||||
|
@ -357,12 +388,18 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
private BytesRef term = new BytesRef();
|
private BytesRef term = new BytesRef();
|
||||||
private boolean storePositions;
|
private boolean storePositions;
|
||||||
private boolean storeOffsets;
|
private boolean storeOffsets;
|
||||||
|
private boolean storePayloads;
|
||||||
private long tvfFP;
|
private long tvfFP;
|
||||||
|
|
||||||
private int[] positions;
|
private int[] positions;
|
||||||
private int[] startOffsets;
|
private int[] startOffsets;
|
||||||
private int[] endOffsets;
|
private int[] endOffsets;
|
||||||
|
|
||||||
|
// one shared byte[] for any term's payloads
|
||||||
|
private int[] payloadOffsets;
|
||||||
|
private int lastPayloadLength;
|
||||||
|
private byte[] payloadData;
|
||||||
|
|
||||||
// NOTE: tvf is pre-positioned by caller
|
// NOTE: tvf is pre-positioned by caller
|
||||||
public TVTermsEnum() {
|
public TVTermsEnum() {
|
||||||
this.origTVF = Lucene40TermVectorsReader.this.tvf;
|
this.origTVF = Lucene40TermVectorsReader.this.tvf;
|
||||||
|
@ -373,17 +410,20 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
return tvf == origTVF;
|
return tvf == origTVF;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(int numTerms, long tvfFPStart) throws IOException {
|
public void reset(int numTerms, long tvfFPStart, boolean storePositions, boolean storeOffsets, boolean storePayloads) throws IOException {
|
||||||
this.numTerms = numTerms;
|
this.numTerms = numTerms;
|
||||||
|
this.storePositions = storePositions;
|
||||||
|
this.storeOffsets = storeOffsets;
|
||||||
|
this.storePayloads = storePayloads;
|
||||||
nextTerm = 0;
|
nextTerm = 0;
|
||||||
tvf.seek(tvfFPStart);
|
tvf.seek(tvfFPStart);
|
||||||
final byte bits = tvf.readByte();
|
|
||||||
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
|
||||||
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
|
||||||
tvfFP = 1+tvfFPStart;
|
tvfFP = 1+tvfFPStart;
|
||||||
positions = null;
|
positions = null;
|
||||||
startOffsets = null;
|
startOffsets = null;
|
||||||
endOffsets = null;
|
endOffsets = null;
|
||||||
|
payloadOffsets = null;
|
||||||
|
payloadData = null;
|
||||||
|
lastPayloadLength = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: slow! (linear scan)
|
// NOTE: slow! (linear scan)
|
||||||
|
@ -430,7 +470,26 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
tvf.readBytes(term.bytes, start, deltaLen);
|
tvf.readBytes(term.bytes, start, deltaLen);
|
||||||
freq = tvf.readVInt();
|
freq = tvf.readVInt();
|
||||||
|
|
||||||
if (storePositions) {
|
if (storePayloads) {
|
||||||
|
positions = new int[freq];
|
||||||
|
payloadOffsets = new int[freq];
|
||||||
|
int totalPayloadLength = 0;
|
||||||
|
int pos = 0;
|
||||||
|
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||||
|
int code = tvf.readVInt();
|
||||||
|
pos += code >>> 1;
|
||||||
|
positions[posUpto] = pos;
|
||||||
|
if ((code & 1) != 0) {
|
||||||
|
// length change
|
||||||
|
lastPayloadLength = tvf.readVInt();
|
||||||
|
}
|
||||||
|
payloadOffsets[posUpto] = totalPayloadLength;
|
||||||
|
totalPayloadLength += lastPayloadLength;
|
||||||
|
assert totalPayloadLength >= 0;
|
||||||
|
}
|
||||||
|
payloadData = new byte[totalPayloadLength];
|
||||||
|
tvf.readBytes(payloadData, 0, payloadData.length);
|
||||||
|
} else if (storePositions /* no payloads */) {
|
||||||
// TODO: we could maybe reuse last array, if we can
|
// TODO: we could maybe reuse last array, if we can
|
||||||
// somehow be careful about consumer never using two
|
// somehow be careful about consumer never using two
|
||||||
// D&PEnums at once...
|
// D&PEnums at once...
|
||||||
|
@ -502,14 +561,12 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
} else {
|
} else {
|
||||||
docsAndPositionsEnum = new TVDocsAndPositionsEnum();
|
docsAndPositionsEnum = new TVDocsAndPositionsEnum();
|
||||||
}
|
}
|
||||||
docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets);
|
docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets, payloadOffsets, payloadData);
|
||||||
return docsAndPositionsEnum;
|
return docsAndPositionsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
// TODO: really indexer hardwires
|
|
||||||
// this...? I guess codec could buffer and re-sort...
|
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -567,6 +624,9 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
private int[] positions;
|
private int[] positions;
|
||||||
private int[] startOffsets;
|
private int[] startOffsets;
|
||||||
private int[] endOffsets;
|
private int[] endOffsets;
|
||||||
|
private int[] payloadOffsets;
|
||||||
|
private BytesRef payload = new BytesRef();
|
||||||
|
private byte[] payloadBytes;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int freq() throws IOException {
|
public int freq() throws IOException {
|
||||||
|
@ -602,11 +662,13 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) {
|
public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, int[] payloadLengths, byte[] payloadBytes) {
|
||||||
this.liveDocs = liveDocs;
|
this.liveDocs = liveDocs;
|
||||||
this.positions = positions;
|
this.positions = positions;
|
||||||
this.startOffsets = startOffsets;
|
this.startOffsets = startOffsets;
|
||||||
this.endOffsets = endOffsets;
|
this.endOffsets = endOffsets;
|
||||||
|
this.payloadOffsets = payloadLengths;
|
||||||
|
this.payloadBytes = payloadBytes;
|
||||||
this.doc = -1;
|
this.doc = -1;
|
||||||
didNext = false;
|
didNext = false;
|
||||||
nextPos = 0;
|
nextPos = 0;
|
||||||
|
@ -614,12 +676,19 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
|
if (payloadOffsets == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
int off = payloadOffsets[nextPos-1];
|
||||||
|
int end = nextPos == payloadOffsets.length ? payloadBytes.length : payloadOffsets[nextPos];
|
||||||
|
if (end - off == 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
payload.bytes = payloadBytes;
|
||||||
@Override
|
payload.offset = off;
|
||||||
public boolean hasPayload() {
|
payload.length = end - off;
|
||||||
return false;
|
return payload;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -106,12 +106,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
private String lastFieldName;
|
private String lastFieldName;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {
|
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {
|
||||||
assert lastFieldName == null || info.name.compareTo(lastFieldName) > 0: "fieldName=" + info.name + " lastFieldName=" + lastFieldName;
|
assert lastFieldName == null || info.name.compareTo(lastFieldName) > 0: "fieldName=" + info.name + " lastFieldName=" + lastFieldName;
|
||||||
lastFieldName = info.name;
|
lastFieldName = info.name;
|
||||||
this.positions = positions;
|
this.positions = positions;
|
||||||
this.offsets = offsets;
|
this.offsets = offsets;
|
||||||
|
this.payloads = payloads;
|
||||||
lastTerm.length = 0;
|
lastTerm.length = 0;
|
||||||
|
lastPayloadLength = -1; // force first payload to write its length
|
||||||
fps[fieldCount++] = tvf.getFilePointer();
|
fps[fieldCount++] = tvf.getFilePointer();
|
||||||
tvd.writeVInt(info.number);
|
tvd.writeVInt(info.number);
|
||||||
tvf.writeVInt(numTerms);
|
tvf.writeVInt(numTerms);
|
||||||
|
@ -120,6 +122,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
bits |= Lucene40TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
|
bits |= Lucene40TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
|
||||||
if (offsets)
|
if (offsets)
|
||||||
bits |= Lucene40TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
|
bits |= Lucene40TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
|
||||||
|
if (payloads)
|
||||||
|
bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR;
|
||||||
tvf.writeByte(bits);
|
tvf.writeByte(bits);
|
||||||
|
|
||||||
assert fieldCount <= numVectorFields;
|
assert fieldCount <= numVectorFields;
|
||||||
|
@ -138,10 +142,12 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
// we also don't buffer during bulk merges.
|
// we also don't buffer during bulk merges.
|
||||||
private int offsetStartBuffer[] = new int[10];
|
private int offsetStartBuffer[] = new int[10];
|
||||||
private int offsetEndBuffer[] = new int[10];
|
private int offsetEndBuffer[] = new int[10];
|
||||||
private int offsetIndex = 0;
|
private BytesRef payloadData = new BytesRef(10);
|
||||||
private int offsetFreq = 0;
|
private int bufferedIndex = 0;
|
||||||
|
private int bufferedFreq = 0;
|
||||||
private boolean positions = false;
|
private boolean positions = false;
|
||||||
private boolean offsets = false;
|
private boolean offsets = false;
|
||||||
|
private boolean payloads = false;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startTerm(BytesRef term, int freq) throws IOException {
|
public void startTerm(BytesRef term, int freq) throws IOException {
|
||||||
|
@ -158,20 +164,40 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
// we might need to buffer if its a non-bulk merge
|
// we might need to buffer if its a non-bulk merge
|
||||||
offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
|
offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
|
||||||
offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
|
offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
|
||||||
offsetIndex = 0;
|
|
||||||
offsetFreq = freq;
|
|
||||||
}
|
}
|
||||||
|
bufferedIndex = 0;
|
||||||
|
bufferedFreq = freq;
|
||||||
|
payloadData.length = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastPosition = 0;
|
int lastPosition = 0;
|
||||||
int lastOffset = 0;
|
int lastOffset = 0;
|
||||||
|
int lastPayloadLength = -1; // force first payload to write its length
|
||||||
|
|
||||||
|
BytesRef scratch = new BytesRef(); // used only by this optimized flush below
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
|
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
|
||||||
// TODO: technically we could just copy bytes and not re-encode if we knew the length...
|
if (payloads) {
|
||||||
if (positions != null) {
|
// TODO, maybe overkill and just call super.addProx() in this case?
|
||||||
|
// we do avoid buffering the offsets in RAM though.
|
||||||
for (int i = 0; i < numProx; i++) {
|
for (int i = 0; i < numProx; i++) {
|
||||||
tvf.writeVInt(positions.readVInt());
|
int code = positions.readVInt();
|
||||||
|
if ((code & 1) == 1) {
|
||||||
|
int length = positions.readVInt();
|
||||||
|
scratch.grow(length);
|
||||||
|
scratch.length = length;
|
||||||
|
positions.readBytes(scratch.bytes, scratch.offset, scratch.length);
|
||||||
|
writePosition(code >>> 1, scratch);
|
||||||
|
} else {
|
||||||
|
writePosition(code >>> 1, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
|
||||||
|
} else if (positions != null) {
|
||||||
|
// pure positions, no payloads
|
||||||
|
for (int i = 0; i < numProx; i++) {
|
||||||
|
tvf.writeVInt(positions.readVInt() >>> 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,28 +210,36 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addPosition(int position, int startOffset, int endOffset) throws IOException {
|
public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
|
||||||
if (positions && offsets) {
|
if (positions && (offsets || payloads)) {
|
||||||
// write position delta
|
// write position delta
|
||||||
tvf.writeVInt(position - lastPosition);
|
writePosition(position - lastPosition, payload);
|
||||||
lastPosition = position;
|
lastPosition = position;
|
||||||
|
|
||||||
// buffer offsets
|
// buffer offsets
|
||||||
offsetStartBuffer[offsetIndex] = startOffset;
|
if (offsets) {
|
||||||
offsetEndBuffer[offsetIndex] = endOffset;
|
offsetStartBuffer[bufferedIndex] = startOffset;
|
||||||
offsetIndex++;
|
offsetEndBuffer[bufferedIndex] = endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
bufferedIndex++;
|
||||||
|
|
||||||
// dump buffer if we are done
|
// dump buffer if we are done
|
||||||
if (offsetIndex == offsetFreq) {
|
if (bufferedIndex == bufferedFreq) {
|
||||||
for (int i = 0; i < offsetIndex; i++) {
|
if (payloads) {
|
||||||
|
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < bufferedIndex; i++) {
|
||||||
|
if (offsets) {
|
||||||
tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
|
tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
|
||||||
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
|
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
|
||||||
lastOffset = offsetEndBuffer[i];
|
lastOffset = offsetEndBuffer[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else if (positions) {
|
} else if (positions) {
|
||||||
// write position delta
|
// write position delta
|
||||||
tvf.writeVInt(position - lastPosition);
|
writePosition(position - lastPosition, payload);
|
||||||
lastPosition = position;
|
lastPosition = position;
|
||||||
} else if (offsets) {
|
} else if (offsets) {
|
||||||
// write offset deltas
|
// write offset deltas
|
||||||
|
@ -215,6 +249,30 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void writePosition(int delta, BytesRef payload) throws IOException {
|
||||||
|
if (payloads) {
|
||||||
|
int payloadLength = payload == null ? 0 : payload.length;
|
||||||
|
|
||||||
|
if (payloadLength != lastPayloadLength) {
|
||||||
|
lastPayloadLength = payloadLength;
|
||||||
|
tvf.writeVInt((delta<<1)|1);
|
||||||
|
tvf.writeVInt(payloadLength);
|
||||||
|
} else {
|
||||||
|
tvf.writeVInt(delta << 1);
|
||||||
|
}
|
||||||
|
if (payloadLength > 0) {
|
||||||
|
if (payloadLength + payloadData.length < 0) {
|
||||||
|
// we overflowed the payload buffer, just throw UOE
|
||||||
|
// having > Integer.MAX_VALUE bytes of payload for a single term in a single doc is nuts.
|
||||||
|
throw new UnsupportedOperationException("A term cannot have more than Integer.MAX_VALUE bytes of payload data in a single document");
|
||||||
|
}
|
||||||
|
payloadData.append(payload);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tvf.writeVInt(delta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void abort() {
|
public void abort() {
|
||||||
try {
|
try {
|
||||||
|
@ -255,7 +313,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
|
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
int numDocs = 0;
|
int numDocs = 0;
|
||||||
for (final AtomicReader reader : mergeState.readers) {
|
for (int i = 0; i < mergeState.readers.size(); i++) {
|
||||||
|
final AtomicReader reader = mergeState.readers.get(i);
|
||||||
|
// set PayloadProcessor
|
||||||
|
if (mergeState.payloadProcessorProvider != null) {
|
||||||
|
mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
|
||||||
|
} else {
|
||||||
|
mergeState.currentReaderPayloadProcessor = null;
|
||||||
|
}
|
||||||
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
|
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
|
||||||
Lucene40TermVectorsReader matchingVectorsReader = null;
|
Lucene40TermVectorsReader matchingVectorsReader = null;
|
||||||
if (matchingSegmentReader != null) {
|
if (matchingSegmentReader != null) {
|
||||||
|
@ -288,8 +353,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
final int maxDoc = reader.maxDoc();
|
final int maxDoc = reader.maxDoc();
|
||||||
final Bits liveDocs = reader.getLiveDocs();
|
final Bits liveDocs = reader.getLiveDocs();
|
||||||
int totalNumDocs = 0;
|
int totalNumDocs = 0;
|
||||||
if (matchingVectorsReader != null) {
|
if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
|
||||||
// We can bulk-copy because the fieldInfos are "congruent"
|
// We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
|
||||||
for (int docNum = 0; docNum < maxDoc;) {
|
for (int docNum = 0; docNum < maxDoc;) {
|
||||||
if (!liveDocs.get(docNum)) {
|
if (!liveDocs.get(docNum)) {
|
||||||
// skip deleted docs
|
// skip deleted docs
|
||||||
|
@ -324,7 +389,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
// NOTE: it's very important to first assign to vectors then pass it to
|
// NOTE: it's very important to first assign to vectors then pass it to
|
||||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||||
Fields vectors = reader.getTermVectors(docNum);
|
Fields vectors = reader.getTermVectors(docNum);
|
||||||
addAllDocVectors(vectors, mergeState.fieldInfos);
|
addAllDocVectors(vectors, mergeState);
|
||||||
totalNumDocs++;
|
totalNumDocs++;
|
||||||
mergeState.checkAbort.work(300);
|
mergeState.checkAbort.work(300);
|
||||||
}
|
}
|
||||||
|
@ -339,8 +404,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
int rawDocLengths2[])
|
int rawDocLengths2[])
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final int maxDoc = reader.maxDoc();
|
final int maxDoc = reader.maxDoc();
|
||||||
if (matchingVectorsReader != null) {
|
if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
|
||||||
// We can bulk-copy because the fieldInfos are "congruent"
|
// We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
while (docCount < maxDoc) {
|
while (docCount < maxDoc) {
|
||||||
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
|
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
|
||||||
|
@ -354,7 +419,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
// NOTE: it's very important to first assign to vectors then pass it to
|
// NOTE: it's very important to first assign to vectors then pass it to
|
||||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||||
Fields vectors = reader.getTermVectors(docNum);
|
Fields vectors = reader.getTermVectors(docNum);
|
||||||
addAllDocVectors(vectors, mergeState.fieldInfos);
|
addAllDocVectors(vectors, mergeState);
|
||||||
mergeState.checkAbort.work(300);
|
mergeState.checkAbort.work(300);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -366,7 +366,7 @@ the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
|
||||||
factors need no longer be a single byte, they can be any DocValues
|
factors need no longer be a single byte, they can be any DocValues
|
||||||
{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode
|
{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode
|
||||||
strings, they can be any byte sequence. Term offsets can optionally be indexed
|
strings, they can be any byte sequence. Term offsets can optionally be indexed
|
||||||
into the postings lists.</li>
|
into the postings lists. Payloads can be stored in the term vectors.</li>
|
||||||
</ul>
|
</ul>
|
||||||
<a name="Limitations" id="Limitations"></a>
|
<a name="Limitations" id="Limitations"></a>
|
||||||
<h2>Limitations</h2>
|
<h2>Limitations</h2>
|
||||||
|
|
|
@ -32,7 +32,6 @@ import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.OrdTermState;
|
import org.apache.lucene.index.OrdTermState;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -44,6 +43,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.UnmodifiableIterator;
|
||||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
import org.apache.lucene.util.automaton.RunAutomaton;
|
import org.apache.lucene.util.automaton.RunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.Transition;
|
import org.apache.lucene.util.automaton.Transition;
|
||||||
|
@ -124,36 +124,14 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
private final Map<String,DirectField> fields = new TreeMap<String,DirectField>();
|
private final Map<String,DirectField> fields = new TreeMap<String,DirectField>();
|
||||||
|
|
||||||
public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff) throws IOException {
|
public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff) throws IOException {
|
||||||
FieldsEnum fieldsEnum = fields.iterator();
|
for (String field : fields) {
|
||||||
String field;
|
this.fields.put(field, new DirectField(state, field, fields.terms(field), minSkipCount, lowFreqCutoff));
|
||||||
while ((field = fieldsEnum.next()) != null) {
|
|
||||||
this.fields.put(field, new DirectField(state, field, fieldsEnum.terms(), minSkipCount, lowFreqCutoff));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() {
|
public Iterator<String> iterator() {
|
||||||
|
return new UnmodifiableIterator<String>(fields.keySet().iterator());
|
||||||
final Iterator<Map.Entry<String,DirectField>> iter = fields.entrySet().iterator();
|
|
||||||
|
|
||||||
return new FieldsEnum() {
|
|
||||||
Map.Entry<String,DirectField> current;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String next() {
|
|
||||||
if (iter.hasNext()) {
|
|
||||||
current = iter.next();
|
|
||||||
return current.getKey();
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() {
|
|
||||||
return current.getValue();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -348,9 +326,8 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
scratch.add(docsAndPositionsEnum.endOffset());
|
scratch.add(docsAndPositionsEnum.endOffset());
|
||||||
}
|
}
|
||||||
if (hasPayloads) {
|
if (hasPayloads) {
|
||||||
final BytesRef payload;
|
final BytesRef payload = docsAndPositionsEnum.getPayload();
|
||||||
if (docsAndPositionsEnum.hasPayload()) {
|
if (payload != null) {
|
||||||
payload = docsAndPositionsEnum.getPayload();
|
|
||||||
scratch.add(payload.length);
|
scratch.add(payload.length);
|
||||||
ros.writeBytes(payload.bytes, payload.offset, payload.length);
|
ros.writeBytes(payload.bytes, payload.offset, payload.length);
|
||||||
} else {
|
} else {
|
||||||
|
@ -421,9 +398,8 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
for(int pos=0;pos<freq;pos++) {
|
for(int pos=0;pos<freq;pos++) {
|
||||||
positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
|
positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
|
||||||
if (hasPayloads) {
|
if (hasPayloads) {
|
||||||
if (docsAndPositionsEnum.hasPayload()) {
|
|
||||||
BytesRef payload = docsAndPositionsEnum.getPayload();
|
BytesRef payload = docsAndPositionsEnum.getPayload();
|
||||||
assert payload != null;
|
if (payload != null) {
|
||||||
byte[] payloadBytes = new byte[payload.length];
|
byte[] payloadBytes = new byte[payload.length];
|
||||||
System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
|
System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
|
||||||
payloads[upto][pos] = payloadBytes;
|
payloads[upto][pos] = payloadBytes;
|
||||||
|
@ -635,6 +611,21 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return hasOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return hasPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return hasPayloads;
|
||||||
|
}
|
||||||
|
|
||||||
private final class DirectTermsEnum extends TermsEnum {
|
private final class DirectTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
private final BytesRef scratch = new BytesRef();
|
private final BytesRef scratch = new BytesRef();
|
||||||
|
@ -1791,18 +1782,12 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return payloadLength > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
if (payloadLength > 0) {
|
if (payloadLength > 0) {
|
||||||
payload.bytes = payloadBytes;
|
payload.bytes = payloadBytes;
|
||||||
payload.offset = lastPayloadOffset;
|
payload.offset = lastPayloadOffset;
|
||||||
payload.length = payloadLength;
|
payload.length = payloadLength;
|
||||||
payloadLength = 0;
|
|
||||||
return payload;
|
return payload;
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
|
@ -1995,7 +1980,6 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
private int upto;
|
private int upto;
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
private int posUpto;
|
private int posUpto;
|
||||||
private boolean gotPayload;
|
|
||||||
private int[] curPositions;
|
private int[] curPositions;
|
||||||
|
|
||||||
public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) {
|
public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) {
|
||||||
|
@ -2065,7 +2049,6 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
@Override
|
@Override
|
||||||
public int nextPosition() {
|
public int nextPosition() {
|
||||||
posUpto += posJump;
|
posUpto += posJump;
|
||||||
gotPayload = false;
|
|
||||||
return curPositions[posUpto];
|
return curPositions[posUpto];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2199,21 +2182,22 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return !gotPayload && payloads != null && payloads[upto][posUpto/(hasOffsets ? 3 : 1)] != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final BytesRef payload = new BytesRef();
|
private final BytesRef payload = new BytesRef();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
|
if (payloads == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
|
final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
|
||||||
|
if (payloadBytes == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
payload.bytes = payloadBytes;
|
payload.bytes = payloadBytes;
|
||||||
payload.length = payloadBytes.length;
|
payload.length = payloadBytes.length;
|
||||||
payload.offset = 0;
|
payload.offset = 0;
|
||||||
gotPayload = true;
|
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,6 @@ import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
@ -49,6 +48,7 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
import org.apache.lucene.util.UnmodifiableIterator;
|
||||||
import org.apache.lucene.util.fst.Builder;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
|
@ -446,7 +446,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
private int numDocs;
|
private int numDocs;
|
||||||
private int posPending;
|
private int posPending;
|
||||||
private int payloadLength;
|
private int payloadLength;
|
||||||
private boolean payloadRetrieved;
|
|
||||||
final boolean storeOffsets;
|
final boolean storeOffsets;
|
||||||
int offsetLength;
|
int offsetLength;
|
||||||
int startOffset;
|
int startOffset;
|
||||||
|
@ -484,7 +483,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
payloadLength = 0;
|
payloadLength = 0;
|
||||||
this.numDocs = numDocs;
|
this.numDocs = numDocs;
|
||||||
posPending = 0;
|
posPending = 0;
|
||||||
payloadRetrieved = false;
|
|
||||||
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
|
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
|
||||||
offsetLength = 0;
|
offsetLength = 0;
|
||||||
return this;
|
return this;
|
||||||
|
@ -577,10 +575,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
payload.offset = in.getPosition();
|
payload.offset = in.getPosition();
|
||||||
in.skipBytes(payloadLength);
|
in.skipBytes(payloadLength);
|
||||||
payload.length = payloadLength;
|
payload.length = payloadLength;
|
||||||
// Necessary, in case caller changed the
|
|
||||||
// payload.bytes from prior call:
|
|
||||||
payload.bytes = buffer;
|
|
||||||
payloadRetrieved = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
|
//System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
|
||||||
|
@ -599,13 +593,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
payloadRetrieved = true;
|
return payload.length > 0 ? payload : null;
|
||||||
return payload;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return !payloadRetrieved && payload.length > 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -834,6 +822,21 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return field.hasPayloads();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -859,24 +862,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
return new FieldsProducer() {
|
return new FieldsProducer() {
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() {
|
public Iterator<String> iterator() {
|
||||||
final Iterator<TermsReader> iter = fields.values().iterator();
|
return new UnmodifiableIterator<String>(fields.keySet().iterator());
|
||||||
|
|
||||||
return new FieldsEnum() {
|
|
||||||
|
|
||||||
private TermsReader current;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String next() {
|
|
||||||
current = iter.next();
|
|
||||||
return current.field.name;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() {
|
|
||||||
return current;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -30,11 +30,11 @@ import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.TermsConsumer;
|
import org.apache.lucene.codecs.TermsConsumer;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.UnmodifiableIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enables per field format support.
|
* Enables per field format support.
|
||||||
|
@ -197,34 +197,9 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final class FieldsIterator extends FieldsEnum {
|
|
||||||
private final Iterator<String> it;
|
|
||||||
private String current;
|
|
||||||
|
|
||||||
public FieldsIterator() {
|
|
||||||
it = fields.keySet().iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String next() {
|
public Iterator<String> iterator() {
|
||||||
if (it.hasNext()) {
|
return new UnmodifiableIterator<String>(fields.keySet().iterator());
|
||||||
current = it.next();
|
|
||||||
} else {
|
|
||||||
current = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return current;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() throws IOException {
|
|
||||||
return fields.get(current).terms(current);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldsEnum iterator() throws IOException {
|
|
||||||
return new FieldsIterator();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -532,19 +532,13 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return storePayloads && !payloadRetrieved && payloadLength > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() throws IOException {
|
public BytesRef getPayload() throws IOException {
|
||||||
//System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this);
|
//System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this);
|
||||||
if (payloadRetrieved) {
|
if (payloadRetrieved) {
|
||||||
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
|
return payload;
|
||||||
}
|
} else if (storePayloads && payloadLength > 0) {
|
||||||
payloadRetrieved = true;
|
payloadRetrieved = true;
|
||||||
if (payloadLength > 0) {
|
|
||||||
if (payload == null) {
|
if (payload == null) {
|
||||||
payload = new BytesRef(payloadLength);
|
payload = new BytesRef(payloadLength);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -714,7 +714,11 @@ public class SepPostingsReader extends PostingsReaderBase {
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() throws IOException {
|
public BytesRef getPayload() throws IOException {
|
||||||
if (!payloadPending) {
|
if (!payloadPending) {
|
||||||
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pendingPayloadBytes == 0) {
|
||||||
|
return payload;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert pendingPayloadBytes >= payloadLength;
|
assert pendingPayloadBytes >= payloadLength;
|
||||||
|
@ -731,15 +735,9 @@ public class SepPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
payloadIn.readBytes(payload.bytes, 0, payloadLength);
|
payloadIn.readBytes(payload.bytes, 0, payloadLength);
|
||||||
payloadPending = false;
|
|
||||||
payload.length = payloadLength;
|
payload.length = payloadLength;
|
||||||
pendingPayloadBytes = 0;
|
pendingPayloadBytes = 0;
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return payloadPending && payloadLength > 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,14 +20,17 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
@ -40,6 +43,7 @@ import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.OpenBitSet;
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
|
import org.apache.lucene.util.UnmodifiableIterator;
|
||||||
import org.apache.lucene.util.fst.Builder;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
|
@ -48,7 +52,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
|
|
||||||
class SimpleTextFieldsReader extends FieldsProducer {
|
class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
private final TreeMap<String,Long> fields;
|
||||||
private final IndexInput in;
|
private final IndexInput in;
|
||||||
private final FieldInfos fieldInfos;
|
private final FieldInfos fieldInfos;
|
||||||
|
|
||||||
|
@ -66,34 +70,21 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
|
in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
|
||||||
|
|
||||||
fieldInfos = state.fieldInfos;
|
fieldInfos = state.fieldInfos;
|
||||||
|
fields = readFields((IndexInput)in.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
private class SimpleTextFieldsEnum extends FieldsEnum {
|
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
|
||||||
private final IndexInput in;
|
BytesRef scratch = new BytesRef(10);
|
||||||
private final BytesRef scratch = new BytesRef(10);
|
TreeMap<String,Long> fields = new TreeMap<String,Long>();
|
||||||
private String current;
|
|
||||||
|
|
||||||
public SimpleTextFieldsEnum() {
|
while (true) {
|
||||||
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String next() throws IOException {
|
|
||||||
while(true) {
|
|
||||||
SimpleTextUtil.readLine(in, scratch);
|
SimpleTextUtil.readLine(in, scratch);
|
||||||
if (scratch.equals(END)) {
|
if (scratch.equals(END)) {
|
||||||
current = null;
|
return fields;
|
||||||
return null;
|
} else if (StringHelper.startsWith(scratch, FIELD)) {
|
||||||
|
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
|
||||||
|
fields.put(fieldName, in.getFilePointer());
|
||||||
}
|
}
|
||||||
if (StringHelper.startsWith(scratch, FIELD)) {
|
|
||||||
return current = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() throws IOException {
|
|
||||||
return SimpleTextFieldsReader.this.terms(current);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -471,18 +462,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
// Some tests rely on only being able to retrieve the
|
|
||||||
// payload once
|
|
||||||
try {
|
|
||||||
return payload;
|
return payload;
|
||||||
} finally {
|
|
||||||
payload = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return payload != null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -498,7 +478,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
private class SimpleTextTerms extends Terms {
|
private class SimpleTextTerms extends Terms {
|
||||||
private final long termsStart;
|
private final long termsStart;
|
||||||
private final IndexOptions indexOptions;
|
private final FieldInfo fieldInfo;
|
||||||
private long sumTotalTermFreq;
|
private long sumTotalTermFreq;
|
||||||
private long sumDocFreq;
|
private long sumDocFreq;
|
||||||
private int docCount;
|
private int docCount;
|
||||||
|
@ -509,7 +489,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
public SimpleTextTerms(String field, long termsStart) throws IOException {
|
public SimpleTextTerms(String field, long termsStart) throws IOException {
|
||||||
this.termsStart = termsStart;
|
this.termsStart = termsStart;
|
||||||
indexOptions = fieldInfos.fieldInfo(field).getIndexOptions();
|
fieldInfo = fieldInfos.fieldInfo(field);
|
||||||
loadTerms();
|
loadTerms();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -579,7 +559,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
if (fst != null) {
|
if (fst != null) {
|
||||||
return new SimpleTextTermsEnum(fst, indexOptions);
|
return new SimpleTextTermsEnum(fst, fieldInfo.getIndexOptions());
|
||||||
} else {
|
} else {
|
||||||
return TermsEnum.EMPTY;
|
return TermsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
|
@ -597,7 +577,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getSumTotalTermFreq() {
|
public long getSumTotalTermFreq() {
|
||||||
return indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
|
return fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -609,11 +589,26 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
public int getDocCount() throws IOException {
|
public int getDocCount() throws IOException {
|
||||||
return docCount;
|
return docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() throws IOException {
|
public boolean hasPositions() {
|
||||||
return new SimpleTextFieldsEnum();
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return fieldInfo.hasPayloads();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
return new UnmodifiableIterator<String>(fields.keySet().iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Map<String,Terms> termsCache = new HashMap<String,Terms>();
|
private final Map<String,Terms> termsCache = new HashMap<String,Terms>();
|
||||||
|
@ -622,16 +617,14 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
synchronized public Terms terms(String field) throws IOException {
|
synchronized public Terms terms(String field) throws IOException {
|
||||||
Terms terms = termsCache.get(field);
|
Terms terms = termsCache.get(field);
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator();
|
Long fp = fields.get(field);
|
||||||
String fieldUpto;
|
if (fp == null) {
|
||||||
while((fieldUpto = fe.next()) != null) {
|
return null;
|
||||||
if (fieldUpto.equals(field)) {
|
} else {
|
||||||
terms = new SimpleTextTerms(field, fe.in.getFilePointer());
|
terms = new SimpleTextTerms(field, fp);
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
termsCache.put(field, terms);
|
termsCache.put(field, terms);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return terms;
|
return terms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -45,6 +44,7 @@ import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
|
import org.apache.lucene.util.UnmodifiableIterator;
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
|
||||||
|
|
||||||
|
@ -126,11 +126,15 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
assert StringHelper.startsWith(scratch, FIELDOFFSETS);
|
assert StringHelper.startsWith(scratch, FIELDOFFSETS);
|
||||||
boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
|
boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
|
||||||
|
|
||||||
|
readLine();
|
||||||
|
assert StringHelper.startsWith(scratch, FIELDPAYLOADS);
|
||||||
|
boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));
|
||||||
|
|
||||||
readLine();
|
readLine();
|
||||||
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
|
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
|
||||||
int termCount = parseIntAt(FIELDTERMCOUNT.length);
|
int termCount = parseIntAt(FIELDTERMCOUNT.length);
|
||||||
|
|
||||||
SimpleTVTerms terms = new SimpleTVTerms();
|
SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
|
||||||
fields.put(fieldName, terms);
|
fields.put(fieldName, terms);
|
||||||
|
|
||||||
for (int j = 0; j < termCount; j++) {
|
for (int j = 0; j < termCount; j++) {
|
||||||
|
@ -152,6 +156,9 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
if (positions || offsets) {
|
if (positions || offsets) {
|
||||||
if (positions) {
|
if (positions) {
|
||||||
postings.positions = new int[postings.freq];
|
postings.positions = new int[postings.freq];
|
||||||
|
if (payloads) {
|
||||||
|
postings.payloads = new BytesRef[postings.freq];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (offsets) {
|
if (offsets) {
|
||||||
|
@ -164,6 +171,17 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
readLine();
|
readLine();
|
||||||
assert StringHelper.startsWith(scratch, POSITION);
|
assert StringHelper.startsWith(scratch, POSITION);
|
||||||
postings.positions[k] = parseIntAt(POSITION.length);
|
postings.positions[k] = parseIntAt(POSITION.length);
|
||||||
|
if (payloads) {
|
||||||
|
readLine();
|
||||||
|
assert StringHelper.startsWith(scratch, PAYLOAD);
|
||||||
|
if (scratch.length - PAYLOAD.length == 0) {
|
||||||
|
postings.payloads[k] = null;
|
||||||
|
} else {
|
||||||
|
byte payloadBytes[] = new byte[scratch.length - PAYLOAD.length];
|
||||||
|
System.arraycopy(scratch.bytes, scratch.offset+PAYLOAD.length, payloadBytes, 0, payloadBytes.length);
|
||||||
|
postings.payloads[k] = new BytesRef(payloadBytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (offsets) {
|
if (offsets) {
|
||||||
|
@ -222,26 +240,8 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() throws IOException {
|
public Iterator<String> iterator() {
|
||||||
return new FieldsEnum() {
|
return new UnmodifiableIterator<String>(fields.keySet().iterator());
|
||||||
private Iterator<Map.Entry<String,SimpleTVTerms>> iterator = fields.entrySet().iterator();
|
|
||||||
private Map.Entry<String,SimpleTVTerms> current = null;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String next() {
|
|
||||||
if (!iterator.hasNext()) {
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
current = iterator.next();
|
|
||||||
return current.getKey();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() {
|
|
||||||
return current.getValue();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -257,8 +257,14 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
private static class SimpleTVTerms extends Terms {
|
private static class SimpleTVTerms extends Terms {
|
||||||
final SortedMap<BytesRef,SimpleTVPostings> terms;
|
final SortedMap<BytesRef,SimpleTVPostings> terms;
|
||||||
|
final boolean hasOffsets;
|
||||||
|
final boolean hasPositions;
|
||||||
|
final boolean hasPayloads;
|
||||||
|
|
||||||
SimpleTVTerms() {
|
SimpleTVTerms(boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
|
||||||
|
this.hasOffsets = hasOffsets;
|
||||||
|
this.hasPositions = hasPositions;
|
||||||
|
this.hasPayloads = hasPayloads;
|
||||||
terms = new TreeMap<BytesRef,SimpleTVPostings>();
|
terms = new TreeMap<BytesRef,SimpleTVPostings>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,6 +298,21 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
public int getDocCount() throws IOException {
|
public int getDocCount() throws IOException {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return hasOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return hasPositions;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return hasPayloads;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SimpleTVPostings {
|
private static class SimpleTVPostings {
|
||||||
|
@ -299,6 +320,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
private int positions[];
|
private int positions[];
|
||||||
private int startOffsets[];
|
private int startOffsets[];
|
||||||
private int endOffsets[];
|
private int endOffsets[];
|
||||||
|
private BytesRef payloads[];
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SimpleTVTermsEnum extends TermsEnum {
|
private static class SimpleTVTermsEnum extends TermsEnum {
|
||||||
|
@ -372,7 +394,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
// TODO: reuse
|
// TODO: reuse
|
||||||
SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
|
SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
|
||||||
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets);
|
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
|
||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -433,6 +455,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
private int nextPos;
|
private int nextPos;
|
||||||
private Bits liveDocs;
|
private Bits liveDocs;
|
||||||
private int[] positions;
|
private int[] positions;
|
||||||
|
private BytesRef[] payloads;
|
||||||
private int[] startOffsets;
|
private int[] startOffsets;
|
||||||
private int[] endOffsets;
|
private int[] endOffsets;
|
||||||
|
|
||||||
|
@ -470,11 +493,12 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) {
|
public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, BytesRef payloads[]) {
|
||||||
this.liveDocs = liveDocs;
|
this.liveDocs = liveDocs;
|
||||||
this.positions = positions;
|
this.positions = positions;
|
||||||
this.startOffsets = startOffsets;
|
this.startOffsets = startOffsets;
|
||||||
this.endOffsets = endOffsets;
|
this.endOffsets = endOffsets;
|
||||||
|
this.payloads = payloads;
|
||||||
this.doc = -1;
|
this.doc = -1;
|
||||||
didNext = false;
|
didNext = false;
|
||||||
nextPos = 0;
|
nextPos = 0;
|
||||||
|
@ -482,12 +506,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getPayload() {
|
public BytesRef getPayload() {
|
||||||
return null;
|
return payloads == null ? null : payloads[nextPos-1];
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -45,10 +45,12 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||||
static final BytesRef FIELDNAME = new BytesRef(" name ");
|
static final BytesRef FIELDNAME = new BytesRef(" name ");
|
||||||
static final BytesRef FIELDPOSITIONS = new BytesRef(" positions ");
|
static final BytesRef FIELDPOSITIONS = new BytesRef(" positions ");
|
||||||
static final BytesRef FIELDOFFSETS = new BytesRef(" offsets ");
|
static final BytesRef FIELDOFFSETS = new BytesRef(" offsets ");
|
||||||
|
static final BytesRef FIELDPAYLOADS = new BytesRef(" payloads ");
|
||||||
static final BytesRef FIELDTERMCOUNT = new BytesRef(" numterms ");
|
static final BytesRef FIELDTERMCOUNT = new BytesRef(" numterms ");
|
||||||
static final BytesRef TERMTEXT = new BytesRef(" term ");
|
static final BytesRef TERMTEXT = new BytesRef(" term ");
|
||||||
static final BytesRef TERMFREQ = new BytesRef(" freq ");
|
static final BytesRef TERMFREQ = new BytesRef(" freq ");
|
||||||
static final BytesRef POSITION = new BytesRef(" position ");
|
static final BytesRef POSITION = new BytesRef(" position ");
|
||||||
|
static final BytesRef PAYLOAD = new BytesRef(" payload ");
|
||||||
static final BytesRef STARTOFFSET = new BytesRef(" startoffset ");
|
static final BytesRef STARTOFFSET = new BytesRef(" startoffset ");
|
||||||
static final BytesRef ENDOFFSET = new BytesRef(" endoffset ");
|
static final BytesRef ENDOFFSET = new BytesRef(" endoffset ");
|
||||||
|
|
||||||
|
@ -61,6 +63,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||||
private final BytesRef scratch = new BytesRef();
|
private final BytesRef scratch = new BytesRef();
|
||||||
private boolean offsets;
|
private boolean offsets;
|
||||||
private boolean positions;
|
private boolean positions;
|
||||||
|
private boolean payloads;
|
||||||
|
|
||||||
public SimpleTextTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
public SimpleTextTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
|
@ -89,7 +92,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {
|
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {
|
||||||
write(FIELD);
|
write(FIELD);
|
||||||
write(Integer.toString(info.number));
|
write(Integer.toString(info.number));
|
||||||
newLine();
|
newLine();
|
||||||
|
@ -106,12 +109,17 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||||
write(Boolean.toString(offsets));
|
write(Boolean.toString(offsets));
|
||||||
newLine();
|
newLine();
|
||||||
|
|
||||||
|
write(FIELDPAYLOADS);
|
||||||
|
write(Boolean.toString(payloads));
|
||||||
|
newLine();
|
||||||
|
|
||||||
write(FIELDTERMCOUNT);
|
write(FIELDTERMCOUNT);
|
||||||
write(Integer.toString(numTerms));
|
write(Integer.toString(numTerms));
|
||||||
newLine();
|
newLine();
|
||||||
|
|
||||||
this.positions = positions;
|
this.positions = positions;
|
||||||
this.offsets = offsets;
|
this.offsets = offsets;
|
||||||
|
this.payloads = payloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -126,13 +134,22 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addPosition(int position, int startOffset, int endOffset) throws IOException {
|
public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
|
||||||
assert positions || offsets;
|
assert positions || offsets;
|
||||||
|
|
||||||
if (positions) {
|
if (positions) {
|
||||||
write(POSITION);
|
write(POSITION);
|
||||||
write(Integer.toString(position));
|
write(Integer.toString(position));
|
||||||
newLine();
|
newLine();
|
||||||
|
|
||||||
|
if (payloads) {
|
||||||
|
write(PAYLOAD);
|
||||||
|
if (payload != null) {
|
||||||
|
assert payload.length > 0;
|
||||||
|
write(payload);
|
||||||
|
}
|
||||||
|
newLine();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (offsets) {
|
if (offsets) {
|
||||||
|
|
|
@ -39,6 +39,7 @@ public class FieldType implements IndexableFieldType {
|
||||||
private boolean storeTermVectors;
|
private boolean storeTermVectors;
|
||||||
private boolean storeTermVectorOffsets;
|
private boolean storeTermVectorOffsets;
|
||||||
private boolean storeTermVectorPositions;
|
private boolean storeTermVectorPositions;
|
||||||
|
private boolean storeTermVectorPayloads;
|
||||||
private boolean omitNorms;
|
private boolean omitNorms;
|
||||||
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
private DocValues.Type docValueType;
|
private DocValues.Type docValueType;
|
||||||
|
@ -53,6 +54,7 @@ public class FieldType implements IndexableFieldType {
|
||||||
this.storeTermVectors = ref.storeTermVectors();
|
this.storeTermVectors = ref.storeTermVectors();
|
||||||
this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
|
this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
|
||||||
this.storeTermVectorPositions = ref.storeTermVectorPositions();
|
this.storeTermVectorPositions = ref.storeTermVectorPositions();
|
||||||
|
this.storeTermVectorPayloads = ref.storeTermVectorPayloads();
|
||||||
this.omitNorms = ref.omitNorms();
|
this.omitNorms = ref.omitNorms();
|
||||||
this.indexOptions = ref.indexOptions();
|
this.indexOptions = ref.indexOptions();
|
||||||
this.docValueType = ref.docValueType();
|
this.docValueType = ref.docValueType();
|
||||||
|
@ -132,6 +134,15 @@ public class FieldType implements IndexableFieldType {
|
||||||
this.storeTermVectorPositions = value;
|
this.storeTermVectorPositions = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean storeTermVectorPayloads() {
|
||||||
|
return this.storeTermVectorPayloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStoreTermVectorPayloads(boolean value) {
|
||||||
|
checkIfFrozen();
|
||||||
|
this.storeTermVectorPayloads = value;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean omitNorms() {
|
public boolean omitNorms() {
|
||||||
return this.omitNorms;
|
return this.omitNorms;
|
||||||
}
|
}
|
||||||
|
@ -198,24 +209,19 @@ public class FieldType implements IndexableFieldType {
|
||||||
result.append(",");
|
result.append(",");
|
||||||
result.append("indexed");
|
result.append("indexed");
|
||||||
if (tokenized()) {
|
if (tokenized()) {
|
||||||
if (result.length() > 0)
|
result.append(",tokenized");
|
||||||
result.append(",");
|
|
||||||
result.append("tokenized");
|
|
||||||
}
|
}
|
||||||
if (storeTermVectors()) {
|
if (storeTermVectors()) {
|
||||||
if (result.length() > 0)
|
result.append(",termVector");
|
||||||
result.append(",");
|
|
||||||
result.append("termVector");
|
|
||||||
}
|
}
|
||||||
if (storeTermVectorOffsets()) {
|
if (storeTermVectorOffsets()) {
|
||||||
if (result.length() > 0)
|
result.append(",termVectorOffsets");
|
||||||
result.append(",");
|
|
||||||
result.append("termVectorOffsets");
|
|
||||||
}
|
}
|
||||||
if (storeTermVectorPositions()) {
|
if (storeTermVectorPositions()) {
|
||||||
if (result.length() > 0)
|
result.append(",termVectorPosition");
|
||||||
result.append(",");
|
if (storeTermVectorPayloads()) {
|
||||||
result.append("termVectorPosition");
|
result.append(",termVectorPayloads");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (omitNorms()) {
|
if (omitNorms()) {
|
||||||
result.append(",omitNorms");
|
result.append(",omitNorms");
|
||||||
|
@ -232,7 +238,9 @@ public class FieldType implements IndexableFieldType {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (docValueType != null) {
|
if (docValueType != null) {
|
||||||
result.append(",docValueType=");
|
if (result.length() > 0)
|
||||||
|
result.append(",");
|
||||||
|
result.append("docValueType=");
|
||||||
result.append(docValueType);
|
result.append(docValueType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -685,12 +685,7 @@ public class CheckIndex {
|
||||||
DocsAndPositionsEnum postings = null;
|
DocsAndPositionsEnum postings = null;
|
||||||
|
|
||||||
String lastField = null;
|
String lastField = null;
|
||||||
final FieldsEnum fieldsEnum = fields.iterator();
|
for (String field : fields) {
|
||||||
while(true) {
|
|
||||||
final String field = fieldsEnum.next();
|
|
||||||
if (field == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// MultiFieldsEnum relies upon this order...
|
// MultiFieldsEnum relies upon this order...
|
||||||
if (lastField != null && field.compareTo(lastField) <= 0) {
|
if (lastField != null && field.compareTo(lastField) <= 0) {
|
||||||
throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
|
throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
|
||||||
|
@ -713,11 +708,16 @@ public class CheckIndex {
|
||||||
// assert fields.terms(field) != null;
|
// assert fields.terms(field) != null;
|
||||||
computedFieldCount++;
|
computedFieldCount++;
|
||||||
|
|
||||||
final Terms terms = fieldsEnum.terms();
|
final Terms terms = fields.terms(field);
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final boolean hasPositions = terms.hasPositions();
|
||||||
|
final boolean hasOffsets = terms.hasOffsets();
|
||||||
|
// term vectors cannot omit TF
|
||||||
|
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
|
|
||||||
final TermsEnum termsEnum = terms.iterator(null);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
|
||||||
boolean hasOrd = true;
|
boolean hasOrd = true;
|
||||||
|
@ -777,17 +777,10 @@ public class CheckIndex {
|
||||||
status.termCount++;
|
status.termCount++;
|
||||||
|
|
||||||
final DocsEnum docs2;
|
final DocsEnum docs2;
|
||||||
final boolean hasPositions;
|
|
||||||
// if we are checking vectors, we have freqs implicitly
|
|
||||||
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
|
||||||
// if we are checking vectors, offsets are a free-for-all anyway
|
|
||||||
final boolean hasOffsets = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
|
||||||
if (postings != null) {
|
if (postings != null) {
|
||||||
docs2 = postings;
|
docs2 = postings;
|
||||||
hasPositions = true;
|
|
||||||
} else {
|
} else {
|
||||||
docs2 = docs;
|
docs2 = docs;
|
||||||
hasPositions = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastDoc = -1;
|
int lastDoc = -1;
|
||||||
|
@ -824,22 +817,17 @@ public class CheckIndex {
|
||||||
if (hasPositions) {
|
if (hasPositions) {
|
||||||
for(int j=0;j<freq;j++) {
|
for(int j=0;j<freq;j++) {
|
||||||
final int pos = postings.nextPosition();
|
final int pos = postings.nextPosition();
|
||||||
// NOTE: pos=-1 is allowed because of ancient bug
|
|
||||||
// (LUCENE-1542) whereby IndexWriter could
|
if (pos < 0) {
|
||||||
// write pos=-1 when first token's posInc is 0
|
|
||||||
// (separately: analyzers should not give
|
|
||||||
// posInc=0 to first token); also, term
|
|
||||||
// vectors are allowed to return pos=-1 if
|
|
||||||
// they indexed offset but not positions:
|
|
||||||
if (pos < -1) {
|
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
|
||||||
}
|
}
|
||||||
if (pos < lastPos) {
|
if (pos < lastPos) {
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
|
||||||
}
|
}
|
||||||
lastPos = pos;
|
lastPos = pos;
|
||||||
if (postings.hasPayload()) {
|
BytesRef payload = postings.getPayload();
|
||||||
postings.getPayload();
|
if (payload != null && payload.length < 1) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.length);
|
||||||
}
|
}
|
||||||
if (hasOffsets) {
|
if (hasOffsets) {
|
||||||
int startOffset = postings.startOffset();
|
int startOffset = postings.startOffset();
|
||||||
|
@ -924,14 +912,8 @@ public class CheckIndex {
|
||||||
int lastOffset = 0;
|
int lastOffset = 0;
|
||||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||||
final int pos = postings.nextPosition();
|
final int pos = postings.nextPosition();
|
||||||
// NOTE: pos=-1 is allowed because of ancient bug
|
|
||||||
// (LUCENE-1542) whereby IndexWriter could
|
if (pos < 0) {
|
||||||
// write pos=-1 when first token's posInc is 0
|
|
||||||
// (separately: analyzers should not give
|
|
||||||
// posInc=0 to first token); also, term
|
|
||||||
// vectors are allowed to return pos=-1 if
|
|
||||||
// they indexed offset but not positions:
|
|
||||||
if (pos < -1) {
|
|
||||||
throw new RuntimeException("position " + pos + " is out of bounds");
|
throw new RuntimeException("position " + pos + " is out of bounds");
|
||||||
}
|
}
|
||||||
if (pos < lastPosition) {
|
if (pos < lastPosition) {
|
||||||
|
@ -1000,11 +982,7 @@ public class CheckIndex {
|
||||||
// only happen if it's a ghost field (field with
|
// only happen if it's a ghost field (field with
|
||||||
// no terms, eg there used to be terms but all
|
// no terms, eg there used to be terms but all
|
||||||
// docs got deleted and then merged away):
|
// docs got deleted and then merged away):
|
||||||
// make sure TermsEnum is empty:
|
|
||||||
final Terms fieldTerms2 = fieldsEnum.terms();
|
|
||||||
if (fieldTerms2 != null && fieldTerms2.iterator(null).next() != null) {
|
|
||||||
throw new RuntimeException("Fields.terms(field=" + field + ") returned null yet the field appears to have terms");
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
|
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
|
||||||
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
|
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
|
||||||
|
@ -1415,9 +1393,7 @@ public class CheckIndex {
|
||||||
status.docCount++;
|
status.docCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldsEnum fieldsEnum = tfv.iterator();
|
for(String field : tfv) {
|
||||||
String field = null;
|
|
||||||
while((field = fieldsEnum.next()) != null) {
|
|
||||||
if (doStats) {
|
if (doStats) {
|
||||||
status.totVectors++;
|
status.totVectors++;
|
||||||
}
|
}
|
||||||
|
@ -1432,6 +1408,8 @@ public class CheckIndex {
|
||||||
Terms terms = tfv.terms(field);
|
Terms terms = tfv.terms(field);
|
||||||
termsEnum = terms.iterator(termsEnum);
|
termsEnum = terms.iterator(termsEnum);
|
||||||
final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
|
final boolean postingsHasPayload = fieldInfo.hasPayloads();
|
||||||
|
final boolean vectorsHasPayload = terms.hasPayloads();
|
||||||
|
|
||||||
Terms postingsTerms = postingsFields.terms(field);
|
Terms postingsTerms = postingsFields.terms(field);
|
||||||
if (postingsTerms == null) {
|
if (postingsTerms == null) {
|
||||||
|
@ -1439,19 +1417,18 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
|
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
|
||||||
|
|
||||||
|
final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
|
||||||
BytesRef term = null;
|
BytesRef term = null;
|
||||||
while ((term = termsEnum.next()) != null) {
|
while ((term = termsEnum.next()) != null) {
|
||||||
|
|
||||||
final boolean hasProx;
|
if (hasProx) {
|
||||||
|
|
||||||
// Try positions:
|
|
||||||
postings = termsEnum.docsAndPositions(null, postings);
|
postings = termsEnum.docsAndPositions(null, postings);
|
||||||
if (postings == null) {
|
assert postings != null;
|
||||||
hasProx = false;
|
docs = null;
|
||||||
// Try docIDs & freqs:
|
|
||||||
docs = termsEnum.docs(null, docs);
|
|
||||||
} else {
|
} else {
|
||||||
hasProx = true;
|
docs = termsEnum.docs(null, docs);
|
||||||
|
assert docs != null;
|
||||||
|
postings = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final DocsEnum docs2;
|
final DocsEnum docs2;
|
||||||
|
@ -1504,7 +1481,7 @@ public class CheckIndex {
|
||||||
int pos = postings.nextPosition();
|
int pos = postings.nextPosition();
|
||||||
if (postingsPostings != null) {
|
if (postingsPostings != null) {
|
||||||
int postingsPos = postingsPostings.nextPosition();
|
int postingsPos = postingsPostings.nextPosition();
|
||||||
if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
|
if (terms.hasPositions() && pos != postingsPos) {
|
||||||
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
|
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1535,6 +1512,34 @@ public class CheckIndex {
|
||||||
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
|
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BytesRef payload = postings.getPayload();
|
||||||
|
|
||||||
|
if (payload != null) {
|
||||||
|
assert vectorsHasPayload;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (postingsHasPayload && vectorsHasPayload) {
|
||||||
|
assert postingsPostings != null;
|
||||||
|
|
||||||
|
if (payload == null) {
|
||||||
|
// we have payloads, but not at this position.
|
||||||
|
// postings has payloads too, it should not have one at this position
|
||||||
|
if (postingsPostings.getPayload() != null) {
|
||||||
|
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.getPayload());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// we have payloads, and one at this position
|
||||||
|
// postings should also have one at this position, with the same bytes.
|
||||||
|
if (postingsPostings.getPayload() == null) {
|
||||||
|
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
|
||||||
|
}
|
||||||
|
BytesRef postingsPayload = postingsPostings.getPayload();
|
||||||
|
if (!payload.equals(postingsPayload)) {
|
||||||
|
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.MergedIterator;
|
||||||
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
|
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
|
||||||
|
|
||||||
class CoalescedDeletes {
|
class CoalescedDeletes {
|
||||||
|
@ -48,13 +48,14 @@ class CoalescedDeletes {
|
||||||
|
|
||||||
public Iterable<Term> termsIterable() {
|
public Iterable<Term> termsIterable() {
|
||||||
return new Iterable<Term>() {
|
return new Iterable<Term>() {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Term> iterator() {
|
public Iterator<Term> iterator() {
|
||||||
ArrayList<Iterator<Term>> subs = new ArrayList<Iterator<Term>>(iterables.size());
|
Iterator<Term> subs[] = new Iterator[iterables.size()];
|
||||||
for (Iterable<Term> iterable : iterables) {
|
for (int i = 0; i < iterables.size(); i++) {
|
||||||
subs.add(iterable.iterator());
|
subs[i] = iterables.get(i).iterator();
|
||||||
}
|
}
|
||||||
return mergedIterator(subs);
|
return new MergedIterator<Term>(subs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -86,106 +87,4 @@ class CoalescedDeletes {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/** provides a merged view across multiple iterators */
|
|
||||||
static Iterator<Term> mergedIterator(final List<Iterator<Term>> iterators) {
|
|
||||||
return new Iterator<Term>() {
|
|
||||||
Term current;
|
|
||||||
TermMergeQueue queue = new TermMergeQueue(iterators.size());
|
|
||||||
SubIterator[] top = new SubIterator[iterators.size()];
|
|
||||||
int numTop;
|
|
||||||
|
|
||||||
{
|
|
||||||
int index = 0;
|
|
||||||
for (Iterator<Term> iterator : iterators) {
|
|
||||||
if (iterator.hasNext()) {
|
|
||||||
SubIterator sub = new SubIterator();
|
|
||||||
sub.current = iterator.next();
|
|
||||||
sub.iterator = iterator;
|
|
||||||
sub.index = index++;
|
|
||||||
queue.add(sub);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
if (queue.size() > 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < numTop; i++) {
|
|
||||||
if (top[i].iterator.hasNext()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Term next() {
|
|
||||||
// restore queue
|
|
||||||
pushTop();
|
|
||||||
|
|
||||||
// gather equal top fields
|
|
||||||
if (queue.size() > 0) {
|
|
||||||
pullTop();
|
|
||||||
} else {
|
|
||||||
current = null;
|
|
||||||
}
|
|
||||||
return current;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void pullTop() {
|
|
||||||
// extract all subs from the queue that have the same top term
|
|
||||||
assert numTop == 0;
|
|
||||||
while (true) {
|
|
||||||
top[numTop++] = queue.pop();
|
|
||||||
if (queue.size() == 0
|
|
||||||
|| !(queue.top()).current.equals(top[0].current)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
current = top[0].current;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void pushTop() {
|
|
||||||
// call next() on each top, and put back into queue
|
|
||||||
for (int i = 0; i < numTop; i++) {
|
|
||||||
if (top[i].iterator.hasNext()) {
|
|
||||||
top[i].current = top[i].iterator.next();
|
|
||||||
queue.add(top[i]);
|
|
||||||
} else {
|
|
||||||
// no more terms
|
|
||||||
top[i].current = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
numTop = 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class SubIterator {
|
|
||||||
Iterator<Term> iterator;
|
|
||||||
Term current;
|
|
||||||
int index;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class TermMergeQueue extends PriorityQueue<SubIterator> {
|
|
||||||
TermMergeQueue(int size) {
|
|
||||||
super(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean lessThan(SubIterator a, SubIterator b) {
|
|
||||||
final int cmp = a.current.compareTo(b.current);
|
|
||||||
if (cmp != 0) {
|
|
||||||
return cmp < 0;
|
|
||||||
} else {
|
|
||||||
return a.index < b.index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,7 +105,7 @@ public abstract class DocValues implements Closeable {
|
||||||
* <p>
|
* <p>
|
||||||
* {@link Source} instances obtained from this method are closed / released
|
* {@link Source} instances obtained from this method are closed / released
|
||||||
* from the cache once this {@link DocValues} instance is closed by the
|
* from the cache once this {@link DocValues} instance is closed by the
|
||||||
* {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
|
* {@link IndexReader}, {@link Fields} or the
|
||||||
* {@link DocValues} was created from.
|
* {@link DocValues} was created from.
|
||||||
*/
|
*/
|
||||||
public Source getSource() throws IOException {
|
public Source getSource() throws IOException {
|
||||||
|
|
|
@ -48,11 +48,8 @@ public abstract class DocsAndPositionsEnum extends DocsEnum {
|
||||||
public abstract int endOffset() throws IOException;
|
public abstract int endOffset() throws IOException;
|
||||||
|
|
||||||
/** Returns the payload at this position, or null if no
|
/** Returns the payload at this position, or null if no
|
||||||
* payload was indexed. Only call this once per
|
* payload was indexed. You should not modify anything
|
||||||
* position. You should not modify anything (neither
|
* (neither members of the returned BytesRef nor bytes
|
||||||
* members of the returned BytesRef nor bytes in the
|
* in the byte[]). */
|
||||||
* byte[]). */
|
|
||||||
public abstract BytesRef getPayload() throws IOException;
|
public abstract BytesRef getPayload() throws IOException;
|
||||||
|
|
||||||
public abstract boolean hasPayload();
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,15 +18,16 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
/** Flex API for access to fields and terms
|
/** Flex API for access to fields and terms
|
||||||
* @lucene.experimental */
|
* @lucene.experimental */
|
||||||
|
|
||||||
public abstract class Fields {
|
public abstract class Fields implements Iterable<String> {
|
||||||
|
|
||||||
/** Returns an iterator that will step through all fields
|
/** Returns an iterator that will step through all fields
|
||||||
* names. This will not return null. */
|
* names. This will not return null. */
|
||||||
public abstract FieldsEnum iterator() throws IOException;
|
public abstract Iterator<String> iterator();
|
||||||
|
|
||||||
/** Get the {@link Terms} for this field. This will return
|
/** Get the {@link Terms} for this field. This will return
|
||||||
* null if the field does not exist. */
|
* null if the field does not exist. */
|
||||||
|
@ -45,12 +46,7 @@ public abstract class Fields {
|
||||||
// TODO: deprecate?
|
// TODO: deprecate?
|
||||||
public long getUniqueTermCount() throws IOException {
|
public long getUniqueTermCount() throws IOException {
|
||||||
long numTerms = 0;
|
long numTerms = 0;
|
||||||
FieldsEnum it = iterator();
|
for (String field : this) {
|
||||||
while(true) {
|
|
||||||
String field = it.next();
|
|
||||||
if (field == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Terms terms = terms(field);
|
Terms terms = terms(field);
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
final long termCount = terms.size();
|
final long termCount = terms.size();
|
||||||
|
|
|
@ -1,79 +0,0 @@
|
||||||
package org.apache.lucene.index;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
|
|
||||||
/** Enumerates indexed fields. You must first call {@link
|
|
||||||
* #next} before calling {@link #terms}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental */
|
|
||||||
|
|
||||||
public abstract class FieldsEnum {
|
|
||||||
|
|
||||||
// TODO: maybe allow retrieving FieldInfo for current
|
|
||||||
// field, as optional method?
|
|
||||||
|
|
||||||
private AttributeSource atts = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the related attributes.
|
|
||||||
*/
|
|
||||||
public AttributeSource attributes() {
|
|
||||||
if (atts == null) {
|
|
||||||
atts = new AttributeSource();
|
|
||||||
}
|
|
||||||
return atts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Increments the enumeration to the next field. Returns
|
|
||||||
* null when there are no more fields.*/
|
|
||||||
public abstract String next() throws IOException;
|
|
||||||
|
|
||||||
// TODO: would be nice to require/fix all impls so they
|
|
||||||
// never return null here... we have to fix the writers to
|
|
||||||
// never write 0-terms fields... or maybe allow a non-null
|
|
||||||
// Terms instance in just this case
|
|
||||||
|
|
||||||
/** Get {@link Terms} for the current field. After {@link #next} returns
|
|
||||||
* null this method should not be called. This method may
|
|
||||||
* return null in some cases, which means the provided
|
|
||||||
* field does not have any terms. */
|
|
||||||
public abstract Terms terms() throws IOException;
|
|
||||||
|
|
||||||
// TODO: should we allow pulling Terms as well? not just
|
|
||||||
// the iterator?
|
|
||||||
|
|
||||||
public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0];
|
|
||||||
|
|
||||||
/** Provides zero fields */
|
|
||||||
public final static FieldsEnum EMPTY = new FieldsEnum() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String next() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms() {
|
|
||||||
throw new IllegalStateException("this method should never be called");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
/** A <code>FilterAtomicReader</code> contains another AtomicReader, which it
|
/** A <code>FilterAtomicReader</code> contains another AtomicReader, which it
|
||||||
* uses as its basic source of data, possibly transforming the data along the
|
* uses as its basic source of data, possibly transforming the data along the
|
||||||
|
@ -46,7 +47,7 @@ public class FilterAtomicReader extends AtomicReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsEnum iterator() throws IOException {
|
public Iterator<String> iterator() {
|
||||||
return in.iterator();
|
return in.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,28 +110,20 @@ public class FilterAtomicReader extends AtomicReader {
|
||||||
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
|
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
|
||||||
return in.intersect(automaton, bytes);
|
return in.intersect(automaton, bytes);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/** Base class for filtering {@link TermsEnum} implementations. */
|
@Override
|
||||||
public static class FilterFieldsEnum extends FieldsEnum {
|
public boolean hasOffsets() {
|
||||||
protected final FieldsEnum in;
|
return in.hasOffsets();
|
||||||
public FilterFieldsEnum(FieldsEnum in) {
|
|
||||||
this.in = in;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String next() throws IOException {
|
public boolean hasPositions() {
|
||||||
return in.next();
|
return in.hasPositions();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Terms terms() throws IOException {
|
public boolean hasPayloads() {
|
||||||
return in.terms();
|
return in.hasPayloads();
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AttributeSource attributes() {
|
|
||||||
return in.attributes();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,11 +286,6 @@ public class FilterAtomicReader extends AtomicReader {
|
||||||
return in.getPayload();
|
return in.getPayload();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasPayload() {
|
|
||||||
return in.hasPayload();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AttributeSource attributes() {
|
public AttributeSource attributes() {
|
||||||
return in.attributes();
|
return in.attributes();
|
||||||
|
|
|
@ -173,7 +173,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
postings.lastDocCodes[termID] = docState.docID;
|
postings.lastDocCodes[termID] = docState.docID;
|
||||||
} else {
|
} else {
|
||||||
postings.lastDocCodes[termID] = docState.docID << 1;
|
postings.lastDocCodes[termID] = docState.docID << 1;
|
||||||
postings.docFreqs[termID] = 1;
|
postings.termFreqs[termID] = 1;
|
||||||
if (hasProx) {
|
if (hasProx) {
|
||||||
writeProx(termID, fieldState.position);
|
writeProx(termID, fieldState.position);
|
||||||
if (hasOffsets) {
|
if (hasOffsets) {
|
||||||
|
@ -194,10 +194,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
|
|
||||||
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
|
||||||
|
|
||||||
assert !hasFreq || postings.docFreqs[termID] > 0;
|
assert !hasFreq || postings.termFreqs[termID] > 0;
|
||||||
|
|
||||||
if (!hasFreq) {
|
if (!hasFreq) {
|
||||||
assert postings.docFreqs == null;
|
assert postings.termFreqs == null;
|
||||||
if (docState.docID != postings.lastDocIDs[termID]) {
|
if (docState.docID != postings.lastDocIDs[termID]) {
|
||||||
assert docState.docID > postings.lastDocIDs[termID];
|
assert docState.docID > postings.lastDocIDs[termID];
|
||||||
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
||||||
|
@ -212,13 +212,13 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
|
|
||||||
// Now that we know doc freq for previous doc,
|
// Now that we know doc freq for previous doc,
|
||||||
// write it & lastDocCode
|
// write it & lastDocCode
|
||||||
if (1 == postings.docFreqs[termID]) {
|
if (1 == postings.termFreqs[termID]) {
|
||||||
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
|
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
|
||||||
} else {
|
} else {
|
||||||
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
||||||
termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
|
termsHashPerField.writeVInt(0, postings.termFreqs[termID]);
|
||||||
}
|
}
|
||||||
postings.docFreqs[termID] = 1;
|
postings.termFreqs[termID] = 1;
|
||||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||||
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
||||||
postings.lastDocIDs[termID] = docState.docID;
|
postings.lastDocIDs[termID] = docState.docID;
|
||||||
|
@ -233,7 +233,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
}
|
}
|
||||||
fieldState.uniqueTermCount++;
|
fieldState.uniqueTermCount++;
|
||||||
} else {
|
} else {
|
||||||
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
|
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
|
||||||
if (hasProx) {
|
if (hasProx) {
|
||||||
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
||||||
}
|
}
|
||||||
|
@ -252,7 +252,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
|
public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
|
||||||
super(size);
|
super(size);
|
||||||
if (writeFreqs) {
|
if (writeFreqs) {
|
||||||
docFreqs = new int[size];
|
termFreqs = new int[size];
|
||||||
}
|
}
|
||||||
lastDocIDs = new int[size];
|
lastDocIDs = new int[size];
|
||||||
lastDocCodes = new int[size];
|
lastDocCodes = new int[size];
|
||||||
|
@ -267,7 +267,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
//System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
|
//System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
int docFreqs[]; // # times this term occurs in the current doc
|
int termFreqs[]; // # times this term occurs in the current doc
|
||||||
int lastDocIDs[]; // Last docID where this term occurred
|
int lastDocIDs[]; // Last docID where this term occurred
|
||||||
int lastDocCodes[]; // Code for prior doc
|
int lastDocCodes[]; // Code for prior doc
|
||||||
int lastPositions[]; // Last position where this term occurred
|
int lastPositions[]; // Last position where this term occurred
|
||||||
|
@ -275,7 +275,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
ParallelPostingsArray newInstance(int size) {
|
ParallelPostingsArray newInstance(int size) {
|
||||||
return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null);
|
return new FreqProxPostingsArray(size, termFreqs != null, lastPositions != null, lastOffsets != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -295,9 +295,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
assert to.lastOffsets != null;
|
assert to.lastOffsets != null;
|
||||||
System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
|
System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
|
||||||
}
|
}
|
||||||
if (docFreqs != null) {
|
if (termFreqs != null) {
|
||||||
assert to.docFreqs != null;
|
assert to.termFreqs != null;
|
||||||
System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
|
System.arraycopy(termFreqs, 0, to.termFreqs, 0, numToCopy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,7 +310,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
if (lastOffsets != null) {
|
if (lastOffsets != null) {
|
||||||
bytes += RamUsageEstimator.NUM_BYTES_INT;
|
bytes += RamUsageEstimator.NUM_BYTES_INT;
|
||||||
}
|
}
|
||||||
if (docFreqs != null) {
|
if (termFreqs != null) {
|
||||||
bytes += RamUsageEstimator.NUM_BYTES_INT;
|
bytes += RamUsageEstimator.NUM_BYTES_INT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,21 +416,21 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
// Now termStates has numToMerge FieldMergeStates
|
// Now termStates has numToMerge FieldMergeStates
|
||||||
// which all share the same term. Now we must
|
// which all share the same term. Now we must
|
||||||
// interleave the docID streams.
|
// interleave the docID streams.
|
||||||
int numDocs = 0;
|
int docFreq = 0;
|
||||||
long totTF = 0;
|
long totTF = 0;
|
||||||
int docID = 0;
|
int docID = 0;
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
//System.out.println(" cycle");
|
//System.out.println(" cycle");
|
||||||
final int termDocFreq;
|
final int termFreq;
|
||||||
if (freq.eof()) {
|
if (freq.eof()) {
|
||||||
if (postings.lastDocCodes[termID] != -1) {
|
if (postings.lastDocCodes[termID] != -1) {
|
||||||
// Return last doc
|
// Return last doc
|
||||||
docID = postings.lastDocIDs[termID];
|
docID = postings.lastDocIDs[termID];
|
||||||
if (readTermFreq) {
|
if (readTermFreq) {
|
||||||
termDocFreq = postings.docFreqs[termID];
|
termFreq = postings.termFreqs[termID];
|
||||||
} else {
|
} else {
|
||||||
termDocFreq = -1;
|
termFreq = -1;
|
||||||
}
|
}
|
||||||
postings.lastDocCodes[termID] = -1;
|
postings.lastDocCodes[termID] = -1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -441,20 +441,20 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
final int code = freq.readVInt();
|
final int code = freq.readVInt();
|
||||||
if (!readTermFreq) {
|
if (!readTermFreq) {
|
||||||
docID += code;
|
docID += code;
|
||||||
termDocFreq = -1;
|
termFreq = -1;
|
||||||
} else {
|
} else {
|
||||||
docID += code >>> 1;
|
docID += code >>> 1;
|
||||||
if ((code & 1) != 0) {
|
if ((code & 1) != 0) {
|
||||||
termDocFreq = 1;
|
termFreq = 1;
|
||||||
} else {
|
} else {
|
||||||
termDocFreq = freq.readVInt();
|
termFreq = freq.readVInt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert docID != postings.lastDocIDs[termID];
|
assert docID != postings.lastDocIDs[termID];
|
||||||
}
|
}
|
||||||
|
|
||||||
numDocs++;
|
docFreq++;
|
||||||
assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
|
assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
|
||||||
|
|
||||||
// NOTE: we could check here if the docID was
|
// NOTE: we could check here if the docID was
|
||||||
|
@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
// 2nd sweep does the real flush, but I suspect
|
// 2nd sweep does the real flush, but I suspect
|
||||||
// that'd add too much time to flush.
|
// that'd add too much time to flush.
|
||||||
visitedDocs.set(docID);
|
visitedDocs.set(docID);
|
||||||
postingsConsumer.startDoc(docID, writeTermFreq ? termDocFreq : -1);
|
postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
|
||||||
if (docID < delDocLimit) {
|
if (docID < delDocLimit) {
|
||||||
// Mark it deleted. TODO: we could also skip
|
// Mark it deleted. TODO: we could also skip
|
||||||
// writing its postings; this would be
|
// writing its postings; this would be
|
||||||
|
@ -485,7 +485,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
totTF += termDocFreq;
|
totTF += termFreq;
|
||||||
|
|
||||||
// Carefully copy over the prox + payload info,
|
// Carefully copy over the prox + payload info,
|
||||||
// changing the format to match Lucene's segment
|
// changing the format to match Lucene's segment
|
||||||
|
@ -495,7 +495,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
// we did record positions (& maybe payload) and/or offsets
|
// we did record positions (& maybe payload) and/or offsets
|
||||||
int position = 0;
|
int position = 0;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for(int j=0;j<termDocFreq;j++) {
|
for(int j=0;j<termFreq;j++) {
|
||||||
final BytesRef thisPayload;
|
final BytesRef thisPayload;
|
||||||
|
|
||||||
if (readPositions) {
|
if (readPositions) {
|
||||||
|
@ -542,9 +542,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
}
|
}
|
||||||
postingsConsumer.finishDoc();
|
postingsConsumer.finishDoc();
|
||||||
}
|
}
|
||||||
termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
|
termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totTF : -1));
|
||||||
sumTotalTermFreq += totTF;
|
sumTotalTermFreq += totTF;
|
||||||
sumDocFreq += numDocs;
|
sumDocFreq += docFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
|
termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
|
||||||
|
|
|
@ -2312,9 +2312,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
}
|
}
|
||||||
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
||||||
sis.read(dir);
|
sis.read(dir);
|
||||||
final Set<String> dsFilesCopied = new HashSet<String>();
|
|
||||||
final Map<String, String> dsNames = new HashMap<String, String>();
|
|
||||||
final Set<String> copiedFiles = new HashSet<String>();
|
|
||||||
for (SegmentInfoPerCommit info : sis) {
|
for (SegmentInfoPerCommit info : sis) {
|
||||||
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
|
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
|
||||||
|
|
||||||
|
@ -2327,7 +2325,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
|
|
||||||
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
|
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
|
||||||
|
|
||||||
infos.add(copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles));
|
infos.add(copySegmentAsIs(info, newSegName, context));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2463,24 +2461,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Copies the segment files as-is into the IndexWriter's directory. */
|
/** Copies the segment files as-is into the IndexWriter's directory. */
|
||||||
// TODO: this can be substantially simplified now that 3.x support/shared docstores is removed!
|
private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName, IOContext context)
|
||||||
private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName,
|
|
||||||
Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context,
|
|
||||||
Set<String> copiedFiles)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// Determine if the doc store of this segment needs to be copied. It's
|
|
||||||
// only relevant for segments that share doc store with others,
|
|
||||||
// because the DS might have been copied already, in which case we
|
|
||||||
// just want to update the DS name of this SegmentInfo.
|
|
||||||
final String dsName = info.info.name;
|
|
||||||
assert dsName != null;
|
|
||||||
final String newDsName;
|
|
||||||
if (dsNames.containsKey(dsName)) {
|
|
||||||
newDsName = dsNames.get(dsName);
|
|
||||||
} else {
|
|
||||||
dsNames.put(dsName, segName);
|
|
||||||
newDsName = segName;
|
|
||||||
}
|
|
||||||
|
|
||||||
// note: we don't really need this fis (its copied), but we load it up
|
// note: we don't really need this fis (its copied), but we load it up
|
||||||
// so we don't pass a null value to the si writer
|
// so we don't pass a null value to the si writer
|
||||||
|
@ -2496,7 +2478,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
|
//System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
|
||||||
// Same SI as before but we change directory, name and docStoreSegment:
|
// Same SI as before but we change directory and name
|
||||||
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
|
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
|
||||||
info.info.getUseCompoundFile(),
|
info.info.getUseCompoundFile(),
|
||||||
info.info.getCodec(), info.info.getDiagnostics(), attributes);
|
info.info.getCodec(), info.info.getDiagnostics(), attributes);
|
||||||
|
@ -2513,16 +2495,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
}
|
}
|
||||||
newInfo.setFiles(segFiles);
|
newInfo.setFiles(segFiles);
|
||||||
|
|
||||||
// We must rewrite the SI file because it references
|
// We must rewrite the SI file because it references segment name in its list of files, etc
|
||||||
// segment name (its own name, if its 3.x, and doc
|
|
||||||
// store segment name):
|
|
||||||
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
|
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
|
||||||
try {
|
|
||||||
newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
|
newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
|
||||||
} catch (UnsupportedOperationException uoe) {
|
|
||||||
// OK: 3x codec cannot write a new SI file;
|
|
||||||
// SegmentInfos will write this on commit
|
|
||||||
}
|
|
||||||
|
|
||||||
final Collection<String> siFiles = trackingDir.getCreatedFiles();
|
final Collection<String> siFiles = trackingDir.getCreatedFiles();
|
||||||
|
|
||||||
|
@ -2537,8 +2513,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
}
|
}
|
||||||
|
|
||||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
|
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
|
||||||
assert !copiedFiles.contains(file): "file \"" + file + "\" is being copied more than once";
|
|
||||||
copiedFiles.add(file);
|
|
||||||
info.info.dir.copy(directory, file, newFileName, context);
|
info.info.dir.copy(directory, file, newFileName, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,9 @@ public interface IndexableFieldType {
|
||||||
/** True if term vector positions should be indexed */
|
/** True if term vector positions should be indexed */
|
||||||
public boolean storeTermVectorPositions();
|
public boolean storeTermVectorPositions();
|
||||||
|
|
||||||
|
/** True if term vector payloads should be indexed */
|
||||||
|
public boolean storeTermVectorPayloads();
|
||||||
|
|
||||||
/** True if norms should not be indexed */
|
/** True if norms should not be indexed */
|
||||||
public boolean omitNorms();
|
public boolean omitNorms();
|
||||||
|
|
||||||
|
|
|
@ -199,6 +199,7 @@ public class MergeState {
|
||||||
// and we could make a codec(wrapper) to do all of this privately so IW is uninvolved
|
// and we could make a codec(wrapper) to do all of this privately so IW is uninvolved
|
||||||
public PayloadProcessorProvider payloadProcessorProvider;
|
public PayloadProcessorProvider payloadProcessorProvider;
|
||||||
public ReaderPayloadProcessor[] readerPayloadProcessor;
|
public ReaderPayloadProcessor[] readerPayloadProcessor;
|
||||||
|
public ReaderPayloadProcessor currentReaderPayloadProcessor;
|
||||||
public PayloadProcessor[] currentPayloadProcessor;
|
public PayloadProcessor[] currentPayloadProcessor;
|
||||||
|
|
||||||
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
|
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue