mirror of https://github.com/apache/lucene.git
SOLR-2452: merged with trunk up to r1140790
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1140809 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
75d6001f0e
|
@ -0,0 +1,74 @@
|
|||
# /
|
||||
/classes
|
||||
build
|
||||
dist
|
||||
*~
|
||||
velocity.log
|
||||
build.properties
|
||||
.idea
|
||||
*.iml
|
||||
*.ipr
|
||||
*.iws
|
||||
/.project
|
||||
/.classpath
|
||||
/.settings
|
||||
prj.el
|
||||
bin
|
||||
pom.xml
|
||||
target
|
||||
lucene-libs
|
||||
|
||||
# /lucene/
|
||||
|
||||
|
||||
# /lucene/contrib/db/bdb/
|
||||
/lucene/contrib/db/bdb/lib
|
||||
/lucene/contrib/db/bdb/index
|
||||
|
||||
|
||||
# /lucene/contrib/db/bdb-je/
|
||||
/lucene/contrib/db/bdb-je/lib
|
||||
/lucene/contrib/db/bdb-je/index
|
||||
|
||||
|
||||
# /lucene/src/java/org/apache/lucene/queryParser/
|
||||
/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
|
||||
/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java
|
||||
/lucene/src/java/org/apache/lucene/queryParser/ParseException.java
|
||||
/lucene/src/java/org/apache/lucene/queryParser/Token.java
|
||||
/lucene/src/java/org/apache/lucene/queryParser/TokenManager.java
|
||||
/lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java
|
||||
|
||||
# /lucene/src/java/org/apache/lucene/util/automaton/
|
||||
/lucene/src/java/org/apache/lucene/util/automaton/moman
|
||||
|
||||
|
||||
# /modules/analysis/common/src/java/org/apache/lucene/analysis/standard/
|
||||
/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/Token.java
|
||||
/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
|
||||
/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java
|
||||
/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/TokenMgrError.java
|
||||
/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/CharStream.java
|
||||
/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java
|
||||
|
||||
|
||||
# /modules/benchmark/
|
||||
/modules/benchmark/temp
|
||||
/modules/benchmark/work
|
||||
|
||||
# /solr/
|
||||
/solr/.project
|
||||
/solr/.classpath
|
||||
/solr/.settings
|
||||
/solr/bin
|
||||
/solr/luke
|
||||
/solr/package
|
||||
|
||||
# /solr/example
|
||||
/solr/example/webapps
|
||||
/solr/example/work
|
||||
/solr/example/solr/data
|
||||
/solr/example/exampledocs/post.jar
|
||||
|
||||
# /solr/client/ruby/flare/
|
||||
/solr/client/ruby/flare/tmp
|
|
@ -40,8 +40,12 @@
|
|||
<classpathentry kind="src" path="modules/analysis/stempel/src/test"/>
|
||||
<classpathentry kind="src" path="modules/benchmark/src/java"/>
|
||||
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
||||
<classpathentry kind="src" path="modules/common/src/java"/>
|
||||
<classpathentry kind="src" path="modules/common/src/test"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
||||
<classpathentry kind="src" path="modules/queries/src/java"/>
|
||||
<classpathentry kind="src" path="modules/queries/src/test"/>
|
||||
<classpathentry kind="src" path="modules/suggest/src/java"/>
|
||||
<classpathentry kind="src" path="modules/suggest/src/test"/>
|
||||
<classpathentry kind="src" path="solr/src/java"/>
|
||||
|
|
|
@ -9,18 +9,20 @@
|
|||
<module filepath="$PROJECT_DIR$/lucene/contrib/instantiated/instantiated.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/memory/memory.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/misc/misc.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries-contrib.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/common/analysis-common.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/icu/icu.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/phonetic/phonetic.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/common/common.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/queries/queries.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/suggest/suggest.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
||||
|
|
|
@ -22,9 +22,16 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -Dtests.luceneMatchVersion=4.0 -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="common analysis module" type="JUnit" factoryName="JUnit">
|
||||
<configuration default="false" name="common module" type="JUnit" factoryName="JUnit">
|
||||
<module name="common" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/common/build" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="common analysis module" type="JUnit" factoryName="JUnit">
|
||||
<module name="analysis-common" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/analysis/build/common" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
|
@ -107,12 +114,19 @@
|
|||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="queries contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="queries" />
|
||||
<module name="queries-contrib" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build/contrib/queries" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="queries module" type="JUnit" factoryName="JUnit">
|
||||
<module name="queries" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/queries/build" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="queryparser contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="queryparser" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
|
@ -176,32 +190,34 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<list size="25">
|
||||
<list size="27">
|
||||
<item index="0" class="java.lang.String" itemvalue="JUnit.analysis-extras contrib" />
|
||||
<item index="1" class="java.lang.String" itemvalue="JUnit.benchmark module" />
|
||||
<item index="2" class="java.lang.String" itemvalue="JUnit.clustering contrib" />
|
||||
<item index="3" class="java.lang.String" itemvalue="JUnit.common analysis module" />
|
||||
<item index="4" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" />
|
||||
<item index="5" class="java.lang.String" itemvalue="JUnit.dataimporthandler-extras contrib" />
|
||||
<item index="6" class="java.lang.String" itemvalue="JUnit.extraction contrib" />
|
||||
<item index="7" class="java.lang.String" itemvalue="JUnit.grouping module" />
|
||||
<item index="8" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
|
||||
<item index="9" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
|
||||
<item index="10" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
|
||||
<item index="11" class="java.lang.String" itemvalue="JUnit.lucene" />
|
||||
<item index="12" class="java.lang.String" itemvalue="JUnit.memory contrib" />
|
||||
<item index="13" class="java.lang.String" itemvalue="JUnit.misc contrib" />
|
||||
<item index="14" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
|
||||
<item index="15" class="java.lang.String" itemvalue="JUnit.queries contrib" />
|
||||
<item index="16" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
|
||||
<item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||
<item index="18" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||
<item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.suggest module" />
|
||||
<item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||
<item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||
<item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||
<item index="3" class="java.lang.String" itemvalue="JUnit.common module" />
|
||||
<item index="4" class="java.lang.String" itemvalue="JUnit.common analysis module" />
|
||||
<item index="5" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" />
|
||||
<item index="6" class="java.lang.String" itemvalue="JUnit.dataimporthandler-extras contrib" />
|
||||
<item index="7" class="java.lang.String" itemvalue="JUnit.extraction contrib" />
|
||||
<item index="8" class="java.lang.String" itemvalue="JUnit.grouping module" />
|
||||
<item index="9" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
|
||||
<item index="10" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
|
||||
<item index="11" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
|
||||
<item index="12" class="java.lang.String" itemvalue="JUnit.lucene" />
|
||||
<item index="13" class="java.lang.String" itemvalue="JUnit.memory contrib" />
|
||||
<item index="14" class="java.lang.String" itemvalue="JUnit.misc contrib" />
|
||||
<item index="15" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
|
||||
<item index="16" class="java.lang.String" itemvalue="JUnit.queries contrib" />
|
||||
<item index="17" class="java.lang.String" itemvalue="JUnit.queries module" />
|
||||
<item index="18" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
|
||||
<item index="19" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||
<item index="22" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="23" class="java.lang.String" itemvalue="JUnit.suggest module" />
|
||||
<item index="24" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||
<item index="25" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||
<item index="26" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||
</list>
|
||||
</component>
|
||||
</project>
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="memory" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
|
|
|
@ -11,9 +11,8 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="library" name="Servlet API 2.4" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
</library>
|
||||
</orderEntry>
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
</library>
|
||||
</orderEntry>
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -25,10 +25,10 @@
|
|||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="highlighter" />
|
||||
<orderEntry type="module" module-name="icu" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="memory" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/work" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,18 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/work" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
</component>
|
||||
</module>
|
|
@ -11,7 +11,6 @@
|
|||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<orderEntry type="module" module-name="smartcn" />
|
||||
<orderEntry type="module" module-name="solr" />
|
||||
<orderEntry type="module" module-name="stempel" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -28,11 +28,11 @@
|
|||
<orderEntry type="module" module-name="memory" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="phonetic" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="solr" />
|
||||
<orderEntry type="module" module-name="spatial" />
|
||||
<orderEntry type="module" module-name="suggest" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
<orderEntry type="library" name="Solr library" level="project" />
|
||||
<orderEntry type="library" name="Solr DIH library" level="project" />
|
||||
<orderEntry type="module" module-name="solr" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -25,11 +25,13 @@
|
|||
<orderEntry type="module" module-name="grouping" />
|
||||
<orderEntry type="module" module-name="highlighter" />
|
||||
<orderEntry type="module" module-name="icu" />
|
||||
<orderEntry type="module" module-name="queries-contrib" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="phonetic" />
|
||||
<orderEntry type="module" module-name="suggest" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="analysis-common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
|
@ -56,7 +56,7 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<artifactId>lucene-queries-contrib</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -54,11 +54,6 @@
|
|||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
|
|
|
@ -27,9 +27,9 @@
|
|||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<artifactId>lucene-queries-contrib</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Queries</name>
|
||||
<name>Lucene Queries Contrib</name>
|
||||
<description>
|
||||
Queries - various query object exotica not in core
|
||||
</description>
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<artifactId>lucene-queries-contrib</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -47,11 +47,6 @@
|
|||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<artifactId>lucene-queries-contrib</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../lucene/pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-common-module</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Common</name>
|
||||
<description>Lucene Common Module</description>
|
||||
<properties>
|
||||
<module-directory>modules/common</module-directory>
|
||||
<build-directory>build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/java</outputDirectory>
|
||||
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
|
||||
<sourceDirectory>src/java</sourceDirectory>
|
||||
<testSourceDirectory>src/test</testSourceDirectory>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
|
@ -33,7 +33,9 @@
|
|||
<modules>
|
||||
<module>analysis</module>
|
||||
<module>benchmark</module>
|
||||
<module>common</module>
|
||||
<module>grouping</module>
|
||||
<module>queries</module>
|
||||
<module>suggest</module>
|
||||
</modules>
|
||||
<build>
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../lucene/pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Queries</name>
|
||||
<description>Lucene Queries Module</description>
|
||||
<properties>
|
||||
<module-directory>modules/queries</module-directory>
|
||||
<build-directory>build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-common-module</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/java</outputDirectory>
|
||||
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
|
||||
<sourceDirectory>src/java</sourceDirectory>
|
||||
<testSourceDirectory>src/test</testSourceDirectory>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
|
@ -78,7 +78,12 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<artifactId>lucene-queries-contrib</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queries-contrib</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
@ -96,6 +101,16 @@
|
|||
<artifactId>lucene-grouping</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-common-module</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-commons-csv</artifactId>
|
||||
|
|
|
@ -29,6 +29,9 @@ import HTMLParser
|
|||
|
||||
# http://s.apache.org/lusolr32rc2
|
||||
|
||||
JAVA5_HOME = '/usr/local/src/jdk1.5.0_22'
|
||||
JAVA6_HOME = '/usr/local/src/jdk1.6.0_21'
|
||||
|
||||
# TODO
|
||||
# + verify KEYS contains key that signed the release
|
||||
# + make sure changes HTML looks ok
|
||||
|
@ -218,7 +221,7 @@ def testChanges(project, version, changesURLString):
|
|||
|
||||
def run(command, logFile):
|
||||
if os.system('%s > %s 2>&1' % (command, logFile)):
|
||||
raise RuntimeError('command "%s" failed; see log file %s' % (command, logFile))
|
||||
raise RuntimeError('command "%s" failed; see log file %s/%s' % (command, os.getcwd(), logFile))
|
||||
|
||||
def verifyDigests(artifact, urlString, tmpDir):
|
||||
print ' verify md5/sha1 digests'
|
||||
|
@ -327,26 +330,31 @@ def verifyUnpacked(project, artifact, unpackPath, version):
|
|||
if isSrc:
|
||||
if project == 'lucene':
|
||||
print ' run tests w/ Java 5...'
|
||||
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; ant test', '%s/test.log' % unpackPath)
|
||||
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; ant jar', '%s/compile.log' % unpackPath)
|
||||
testDemo(isSrc)
|
||||
run('export JAVA_HOME=%s; ant test' % JAVA5_HOME, '%s/test.log' % unpackPath)
|
||||
run('export JAVA_HOME=%s; ant jar' % JAVA5_HOME, '%s/compile.log' % unpackPath)
|
||||
testDemo(isSrc, version)
|
||||
else:
|
||||
print ' run tests w/ Java 6...'
|
||||
run('export JAVA_HOME=/usr/local/src/jdk1.6.0_21; ant test', '%s/test.log' % unpackPath)
|
||||
run('export JAVA_HOME=%s; ant test' % JAVA6_HOME, '%s/test.log' % unpackPath)
|
||||
else:
|
||||
if project == 'lucene':
|
||||
testDemo(isSrc)
|
||||
testDemo(isSrc, version)
|
||||
|
||||
def testDemo(isSrc):
|
||||
def testDemo(isSrc, version):
|
||||
print ' test demo...'
|
||||
if isSrc:
|
||||
cp = 'build/lucene-core-3.2-SNAPSHOT.jar:build/contrib/demo/lucene-demo-3.2-SNAPSHOT.jar'
|
||||
# allow lucene dev version to be either 3.3 or 3.3.0:
|
||||
if version.endswith('.0'):
|
||||
cp = 'build/lucene-core-%s-SNAPSHOT.jar:build/contrib/demo/lucene-demo-%s-SNAPSHOT.jar' % (version, version)
|
||||
cp += ':build/lucene-core-%s-SNAPSHOT.jar:build/contrib/demo/lucene-demo-%s-SNAPSHOT.jar' % (version[:-2], version[:-2])
|
||||
else:
|
||||
cp = 'build/lucene-core-%s-SNAPSHOT.jar:build/contrib/demo/lucene-demo-%s-SNAPSHOT.jar' % (version, version)
|
||||
docsDir = 'src'
|
||||
else:
|
||||
cp = 'lucene-core-3.2.0.jar:contrib/demo/lucene-demo-3.2.0.jar'
|
||||
cp = 'lucene-core-%s.jar:contrib/demo/lucene-demo-%s.jar' % (version, version)
|
||||
docsDir = 'docs'
|
||||
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; java -cp %s org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir), 'index.log')
|
||||
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; java -cp %s org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp, 'search.log')
|
||||
run('export JAVA_HOME=%s; %s/bin/java -cp %s org.apache.lucene.demo.IndexFiles -index index -docs %s' % (JAVA5_HOME, JAVA5_HOME, cp, docsDir), 'index.log')
|
||||
run('export JAVA_HOME=%s; %s/bin/java -cp %s org.apache.lucene.demo.SearchFiles -index index -query lucene' % (JAVA5_HOME, JAVA5_HOME, cp), 'search.log')
|
||||
reMatchingDocs = re.compile('(\d+) total matching documents')
|
||||
m = reMatchingDocs.search(open('search.log', 'rb').read())
|
||||
if m is None:
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
Lucene Build Instructions
|
||||
|
||||
Basic steps:
|
||||
0) Install JDK 1.5 (or greater), Ant 1.7.0 (or greater)
|
||||
0) Install JDK 1.5 (or greater), Ant 1.7.x (not 1.6.x, not 1.8.x)
|
||||
1) Download Lucene from Apache and unpack it
|
||||
2) Connect to the top-level of your Lucene installation
|
||||
3) Install JavaCC (optional)
|
||||
4) Run ant
|
||||
|
||||
Step 0) Set up your development environment (JDK 1.5 or greater,
|
||||
Ant 1.7.0 or greater)
|
||||
Ant 1.7.x)
|
||||
|
||||
We'll assume that you know how to get and set up the JDK - if you
|
||||
don't, then we suggest starting at http://java.sun.com and learning
|
||||
|
@ -16,7 +16,7 @@ more about Java, before returning to this README. Lucene runs with
|
|||
JDK 1.5 and later.
|
||||
|
||||
Like many Open Source java projects, Lucene uses Apache Ant for build
|
||||
control. Specifically, you MUST use Ant version 1.7.0 or greater.
|
||||
control. Specifically, you MUST use Ant version 1.7.x
|
||||
|
||||
Ant is "kind of like make without make's wrinkles". Ant is
|
||||
implemented in java and uses XML-based configuration files. You can
|
||||
|
|
|
@ -149,6 +149,9 @@ Changes in backwards compatibility policy
|
|||
files holding stored fields and term vectors) while flushing a
|
||||
segment. (Mike McCandless)
|
||||
|
||||
* LUCENE-2548: Field names (eg in Term, FieldInfo) are no longer
|
||||
interned. (Mike McCandless)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
|
@ -281,6 +284,17 @@ API Changes
|
|||
* LUCENE-2953: In addition to changes in 3.x, PriorityQueue#initialize(int)
|
||||
function was moved into the ctor. (Uwe Schindler, Yonik Seeley)
|
||||
|
||||
* LUCENE-3219: SortField type properties have been moved to an enum
|
||||
SortField.Type. In be consistent, CachedArrayCreator.getSortTypeID() has
|
||||
been changed CachedArrayCreator.getSortType(). (Chris Male)
|
||||
|
||||
* LUCENE-3225: Add TermsEnum.seekExact for faster seeking when you
|
||||
don't need the ceiling term; renamed existing seek methods to either
|
||||
seekCeil or seekExact; changed seekExact(ord) to return no value.
|
||||
Fixed MemoryCodec and SimpleTextCodec to optimize the seekExact
|
||||
case, and fixed places in Lucene to use seekExact when possible.
|
||||
(Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
|
||||
|
@ -454,7 +468,7 @@ Optimizations
|
|||
MultiTermQuery now stores TermState per leaf reader during rewrite to re-
|
||||
seek the term dictionary in TermQuery / TermWeight.
|
||||
(Simon Willnauer, Mike McCandless, Robert Muir)
|
||||
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-2633: PackedInts Packed32 and Packed64 did not support internal
|
||||
|
@ -470,6 +484,26 @@ Bug fixes
|
|||
|
||||
======================= Lucene 3.x (not yet released) ================
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-3251: Directory#copy failed to close target output if opening the
|
||||
source stream failed. (Simon Willnauer)
|
||||
|
||||
* LUCENE-3254: Fixed minor bug in deletes were written to disk,
|
||||
causing the file to sometimes be larger than it needed to be. (Mike
|
||||
McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
|
||||
into a Directory implementation. Reading is optimized for MMapDirectory,
|
||||
NIOFSDirectory and SimpleFSDirectory to only map requested parts of the
|
||||
CFS into an IndexInput. Writing to a CFS now tries to append to the CF
|
||||
directly if possible and merges separately written files on the fly instead
|
||||
of during close. (Simon Willnauer, Robert Muir)
|
||||
|
||||
======================= Lucene 3.3.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
* LUCENE-3140: IndexOutput.copyBytes now takes a DataInput (superclass
|
||||
|
@ -485,6 +519,13 @@ Changes in backwards compatibility policy
|
|||
overridden one of these methods, cut over to the non-deprecated
|
||||
implementation. (Uwe Schindler, Robert Muir, Yonik Seeley)
|
||||
|
||||
* LUCENE-3238: Made MultiTermQuery.rewrite() final, to prevent
|
||||
problems (such as not properly setting rewrite methods, or
|
||||
not working correctly with things like SpanMultiTermQueryWrapper).
|
||||
To rewrite to a simpler form, instead return a simpler enum
|
||||
from getEnum(IndexReader). For example, to rewrite to a single term,
|
||||
return a SingleTermEnum. (ludovic Boutros, Uwe Schindler, Robert Muir)
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
* LUCENE-2834: the hash used to compute the lock file name when the
|
||||
|
@ -533,6 +574,14 @@ Bug fixes
|
|||
background optimize when documents are still being deleted
|
||||
concurrently with the optimize (Mike McCandless)
|
||||
|
||||
* LUCENE-3222: The RAM accounting for buffered delete terms was
|
||||
failing to measure the space required to hold the term's field and
|
||||
text character data. (Mike McCandless)
|
||||
|
||||
* LUCENE-3238: Fixed bug where using WildcardQuery("prefix*") inside
|
||||
of a SpanMultiTermQueryWrapper rewrote incorrectly and returned
|
||||
an error instead. (ludovic Boutros, Uwe Schindler, Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3208: Renamed protected IndexSearcher.createWeight() to expert
|
||||
|
|
|
@ -22,6 +22,7 @@ lucene-core-XX-javadoc.jar
|
|||
|
||||
lucene-test-framework-XX.jar
|
||||
The compiled Lucene test-framework library.
|
||||
Depends on junit 4.7.x (not 4.6.x, not 4.8.x), and Apache Ant 1.7.x (not 1.6.x, not 1.8.x)
|
||||
|
||||
lucene-test-framework-XX-javadoc.jar
|
||||
The Javadoc jar for the compiled Lucene test-framework library.
|
||||
|
|
|
@ -4,6 +4,11 @@ For more information on past and future Lucene versions, please see:
|
|||
http://s.apache.org/luceneversions
|
||||
|
||||
======================= Trunk (not yet released) =======================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
* LUCENE-3250: Wordnet's SynExpand requires a non-null Analyzer (it no longer
|
||||
treats null as StandardAnalyzer). (Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
|
@ -55,6 +60,19 @@ Bug Fixes
|
|||
|
||||
======================= Lucene 3.x (not yet released) ================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-3234: provide a limit on phrase analysis in FastVectorHighlighter for
|
||||
highlighting speed up. Use FastVectorHighlighter.setPhraseLimit() to set limit
|
||||
(e.g. 5000). (Mike Sokolov via Koji Sekiguchi)
|
||||
|
||||
API Changes
|
||||
|
||||
Bug Fixes
|
||||
|
||||
|
||||
======================= Lucene 3.3.0 =======================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-152: Add KStem (light stemmer for English).
|
||||
|
|
|
@ -116,6 +116,7 @@
|
|||
<macrodef name="contrib-uptodate">
|
||||
<attribute name="name"/>
|
||||
<attribute name="property" default="@{name}.uptodate"/>
|
||||
<attribute name="contrib-src-name" default="@{name}"/>
|
||||
<attribute name="classpath.property" default="@{name}.jar"/>
|
||||
<!-- set jarfile only, if the target jar file has no generic name, applies to analyzers with its common and smartcn subdir -->
|
||||
<attribute name="jarfile" default="${common.dir}/build/contrib/@{name}/lucene-@{name}-${version}.jar"/>
|
||||
|
@ -123,7 +124,7 @@
|
|||
<!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
|
||||
<property name="@{classpath.property}" location="@{jarfile}"/>
|
||||
<uptodate property="@{property}" targetfile="@{jarfile}">
|
||||
<srcfiles dir="${common.dir}/contrib/@{name}/src/java" includes="**/*.java"/>
|
||||
<srcfiles dir="${common.dir}/contrib/@{contrib-src-name}/src/java" includes="**/*.java"/>
|
||||
</uptodate>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
@ -131,13 +132,14 @@
|
|||
<macrodef name="module-uptodate">
|
||||
<attribute name="name"/>
|
||||
<attribute name="property" default="@{name}.uptodate"/>
|
||||
<attribute name="module-src-name" default="@{name}"/>
|
||||
<attribute name="classpath.property" default="@{name}.jar"/>
|
||||
<!-- set jarfile only, if the target jar file has no generic name, applies to analyzers with its common and smartcn subdir -->
|
||||
<attribute name="jarfile" default="${common.dir}/../modules/@{name}/build/lucene-@{name}-${version}.jar"/>
|
||||
<attribute name="jarfile" default="${common.dir}/../modules/@{module-src-name}/build/lucene-@{name}-${version}.jar"/>
|
||||
<sequential>
|
||||
<property name="@{classpath.property}" location="@{jarfile}"/>
|
||||
<uptodate property="@{property}" targetfile="@{jarfile}">
|
||||
<srcfiles dir="${common.dir}/../modules/@{name}/src/java" includes="**/*.java"/>
|
||||
<srcfiles dir="${common.dir}/../modules/@{module-src-name}/src/java" includes="**/*.java"/>
|
||||
</uptodate>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
|
|
@ -38,7 +38,11 @@ import java.io.IOException;
|
|||
import java.io.InputStreamReader;
|
||||
import java.util.Date;
|
||||
|
||||
/** Index all text files under a directory. See http://lucene.apache.org/java/4_0/demo.html. */
|
||||
/** Index all text files under a directory.
|
||||
* <p>
|
||||
* This is a command-line application demonstrating simple Lucene indexing.
|
||||
* Run it with no command-line arguments for usage information.
|
||||
*/
|
||||
public class IndexFiles {
|
||||
|
||||
private IndexFiles() {}
|
||||
|
@ -47,8 +51,8 @@ public class IndexFiles {
|
|||
public static void main(String[] args) {
|
||||
String usage = "java org.apache.lucene.demo.IndexFiles"
|
||||
+ " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
|
||||
// TODO: Change the link with every release (or: fill in some less error-prone alternative here...)
|
||||
+ "See http://lucene.apache.org/java/4_0/demo.html for details.";
|
||||
+ "This indexes the documents in DOCS_PATH, creating a Lucene index"
|
||||
+ "in INDEX_PATH that can be searched with SearchFiles";
|
||||
String indexPath = "index";
|
||||
String docsPath = null;
|
||||
boolean create = true;
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<contrib-uptodate name="memory" property="memory.uptodate" classpath.property="memory.jar"/>
|
||||
<contrib-uptodate name="queries" property="queries.uptodate" classpath.property="queries.jar"/>
|
||||
<contrib-uptodate name="queries-contrib" contrib-src-name="queries" property="queries.uptodate" classpath.property="queries.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${memory.jar}"/>
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* {@link Scorer} implementation which scores text fragments by the number of
|
||||
|
@ -88,7 +87,7 @@ public class QueryScorer implements Scorer {
|
|||
* @param defaultField
|
||||
*/
|
||||
public QueryScorer(Query query, IndexReader reader, String field, String defaultField) {
|
||||
this.defaultField = StringHelper.intern(defaultField);
|
||||
this.defaultField = defaultField;
|
||||
init(query, field, reader, true);
|
||||
}
|
||||
|
||||
|
@ -96,7 +95,7 @@ public class QueryScorer implements Scorer {
|
|||
* @param defaultField - The default field for queries with the field name unspecified
|
||||
*/
|
||||
public QueryScorer(Query query, String field, String defaultField) {
|
||||
this.defaultField = StringHelper.intern(defaultField);
|
||||
this.defaultField = defaultField;
|
||||
init(query, field, null, true);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.search.BooleanClause;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FilteredQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* Utility class used to extract the terms used in a query, plus any weights.
|
||||
|
@ -94,10 +93,6 @@ public final class QueryTermExtractor
|
|||
public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
|
||||
{
|
||||
HashSet<WeightedTerm> terms=new HashSet<WeightedTerm>();
|
||||
if(fieldName!=null)
|
||||
{
|
||||
fieldName= StringHelper.intern(fieldName);
|
||||
}
|
||||
getTerms(query,terms,prohibited,fieldName);
|
||||
return terms.toArray(new WeightedTerm[0]);
|
||||
}
|
||||
|
@ -114,7 +109,6 @@ public final class QueryTermExtractor
|
|||
return getTerms(query,prohibited,null);
|
||||
}
|
||||
|
||||
//fieldname MUST be interned prior to this call
|
||||
private static final void getTerms(Query query, HashSet<WeightedTerm> terms,boolean prohibited, String fieldName)
|
||||
{
|
||||
try
|
||||
|
@ -131,7 +125,7 @@ public final class QueryTermExtractor
|
|||
for (Iterator<Term> iter = nonWeightedTerms.iterator(); iter.hasNext();)
|
||||
{
|
||||
Term term = iter.next();
|
||||
if((fieldName==null)||(term.field()==fieldName))
|
||||
if((fieldName==null)||(term.field().equals(fieldName)))
|
||||
{
|
||||
terms.add(new WeightedTerm(query.getBoost(),term.text()));
|
||||
}
|
||||
|
|
|
@ -41,7 +41,6 @@ import org.apache.lucene.search.spans.SpanOrQuery;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
|
||||
|
@ -63,7 +62,7 @@ public class WeightedSpanTermExtractor {
|
|||
|
||||
public WeightedSpanTermExtractor(String defaultField) {
|
||||
if (defaultField != null) {
|
||||
this.defaultField = StringHelper.intern(defaultField);
|
||||
this.defaultField = defaultField;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -314,8 +313,8 @@ public class WeightedSpanTermExtractor {
|
|||
* Necessary to implement matches for queries against <code>defaultField</code>
|
||||
*/
|
||||
private boolean fieldNameComparator(String fieldNameToCheck) {
|
||||
boolean rv = fieldName == null || fieldNameToCheck == fieldName
|
||||
|| fieldNameToCheck == defaultField;
|
||||
boolean rv = fieldName == null || fieldName.equals(fieldNameToCheck)
|
||||
|| (defaultField != null && defaultField.equals(fieldNameToCheck));
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
@ -372,7 +371,7 @@ public class WeightedSpanTermExtractor {
|
|||
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream,
|
||||
String fieldName) throws IOException {
|
||||
if (fieldName != null) {
|
||||
this.fieldName = StringHelper.intern(fieldName);
|
||||
this.fieldName = fieldName;
|
||||
} else {
|
||||
this.fieldName = null;
|
||||
}
|
||||
|
@ -408,7 +407,7 @@ public class WeightedSpanTermExtractor {
|
|||
public Map<String,WeightedSpanTerm> getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
|
||||
IndexReader reader) throws IOException {
|
||||
if (fieldName != null) {
|
||||
this.fieldName = StringHelper.intern(fieldName);
|
||||
this.fieldName = fieldName;
|
||||
} else {
|
||||
this.fieldName = null;
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ public class FastVectorHighlighter {
|
|||
private final boolean fieldMatch;
|
||||
private final FragListBuilder fragListBuilder;
|
||||
private final FragmentsBuilder fragmentsBuilder;
|
||||
private int phraseLimit = Integer.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* the default constructor.
|
||||
|
@ -173,7 +174,7 @@ public class FastVectorHighlighter {
|
|||
final FieldQuery fieldQuery, IndexReader reader, int docId,
|
||||
String fieldName, int fragCharSize ) throws IOException {
|
||||
FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, fieldName, fieldQuery );
|
||||
FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery );
|
||||
FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery, phraseLimit );
|
||||
return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize );
|
||||
}
|
||||
|
||||
|
@ -190,4 +191,15 @@ public class FastVectorHighlighter {
|
|||
* @return whether fieldMatch or not
|
||||
*/
|
||||
public boolean isFieldMatch(){ return fieldMatch; }
|
||||
|
||||
/**
|
||||
* @return the maximum number of phrases to analyze when searching for the highest-scoring phrase.
|
||||
*/
|
||||
public int getPhraseLimit () { return phraseLimit; }
|
||||
|
||||
/**
|
||||
* set the maximum number of phrases to analyze when searching for the highest-scoring phrase.
|
||||
* The default is unlimited (Integer.MAX_VALUE).
|
||||
*/
|
||||
public void setPhraseLimit (int phraseLimit) { this.phraseLimit = phraseLimit; }
|
||||
}
|
||||
|
|
|
@ -30,21 +30,32 @@ import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
|
|||
public class FieldPhraseList {
|
||||
|
||||
LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
|
||||
|
||||
|
||||
/**
|
||||
* create a FieldPhraseList that has no limit on the number of phrases to analyze
|
||||
*
|
||||
* @param fieldTermStack FieldTermStack object
|
||||
* @param fieldQuery FieldQuery object
|
||||
*/
|
||||
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery){
|
||||
this (fieldTermStack, fieldQuery, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param fieldTermStack FieldTermStack object
|
||||
* @param fieldQuery FieldQuery object
|
||||
* @param phraseLimit maximum size of phraseList
|
||||
*/
|
||||
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery ){
|
||||
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit){
|
||||
final String field = fieldTermStack.getFieldName();
|
||||
|
||||
LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
|
||||
QueryPhraseMap currMap = null;
|
||||
QueryPhraseMap nextMap = null;
|
||||
while( !fieldTermStack.isEmpty() ){
|
||||
|
||||
while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
|
||||
{
|
||||
phraseCandidate.clear();
|
||||
|
||||
TermInfo ti = fieldTermStack.pop();
|
||||
|
|
|
@ -188,4 +188,34 @@ public class FieldPhraseListTest extends AbstractTestCase {
|
|||
assertEquals( 1, fpl.phraseList.size() );
|
||||
assertEquals( "sppeeeed(1.0)((88,93))", fpl.phraseList.get( 0 ).toString() );
|
||||
}
|
||||
|
||||
/* This test shows a big speedup from limiting the number of analyzed phrases in
|
||||
* this bad case for FieldPhraseList */
|
||||
/* But it is not reliable as a unit test since it is timing-dependent
|
||||
public void testManyRepeatedTerms() throws Exception {
|
||||
long t = System.currentTimeMillis();
|
||||
testManyTermsWithLimit (-1);
|
||||
long t1 = System.currentTimeMillis();
|
||||
testManyTermsWithLimit (1);
|
||||
long t2 = System.currentTimeMillis();
|
||||
assertTrue (t2-t1 * 1000 < t1-t);
|
||||
}
|
||||
private void testManyTermsWithLimit (int limit) throws Exception {
|
||||
StringBuilder buf = new StringBuilder ();
|
||||
for (int i = 0; i < 16000; i++) {
|
||||
buf.append("a b c ");
|
||||
}
|
||||
make1d1fIndex( buf.toString());
|
||||
|
||||
Query query = tq("a");
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq, limit);
|
||||
if (limit < 0 || limit > 16000)
|
||||
assertEquals( 16000, fpl.phraseList.size() );
|
||||
else
|
||||
assertEquals( limit, fpl.phraseList.size() );
|
||||
assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() );
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -400,7 +400,7 @@ public class InstantiatedIndexReader extends IndexReader {
|
|||
if (i < 0) {
|
||||
i = -i - 1;
|
||||
}
|
||||
if (i >= orderedTerms.length || orderedTerms[i].field() != field) {
|
||||
if (i >= orderedTerms.length || !orderedTerms[i].field().equals(field)) {
|
||||
// field does not exist
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -44,7 +44,6 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
@ -475,7 +474,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
|
||||
if (fieldSetting == null) {
|
||||
fieldSetting = new FieldSetting();
|
||||
fieldSetting.fieldName = StringHelper.intern(field.name());
|
||||
fieldSetting.fieldName = field.name();
|
||||
fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting);
|
||||
fieldNameBuffer.add(fieldSetting.fieldName);
|
||||
}
|
||||
|
|
|
@ -44,7 +44,20 @@ public class InstantiatedTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(BytesRef text, boolean useCache) {
|
||||
public boolean seekExact(BytesRef text, boolean useCache) {
|
||||
final Term t = new Term(field, text);
|
||||
int loc = Arrays.binarySearch(terms, t, InstantiatedTerm.termComparator);
|
||||
if (loc < 0) {
|
||||
return false;
|
||||
} else {
|
||||
upto = loc;
|
||||
br.copy(text);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text, boolean useCache) {
|
||||
final Term t = new Term(field, text);
|
||||
int loc = Arrays.binarySearch(terms, t, InstantiatedTerm.termComparator);
|
||||
if (loc < 0) {
|
||||
|
@ -63,19 +76,10 @@ public class InstantiatedTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(long ord) {
|
||||
public void seekExact(long ord) {
|
||||
assert (start + (int) ord) < terms.length;
|
||||
upto = start + (int) ord;
|
||||
if (upto >= terms.length) {
|
||||
return SeekStatus.END;
|
||||
}
|
||||
|
||||
if (terms[upto].field() == field) {
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
// make sure field was interned
|
||||
assert !terms[upto].field().equals(field);
|
||||
return SeekStatus.END;
|
||||
}
|
||||
assert field.equals(terms[upto].field());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -84,12 +88,10 @@ public class InstantiatedTermsEnum extends TermsEnum {
|
|||
if (upto >= terms.length) {
|
||||
return null;
|
||||
}
|
||||
if (terms[upto].field() == field) {
|
||||
if (terms[upto].field().equals(field)) {
|
||||
br.copy(terms[upto].getTerm().text());
|
||||
return br;
|
||||
} else {
|
||||
// make sure field was interned
|
||||
assert !terms[upto].field().equals(field);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -144,9 +146,9 @@ public class InstantiatedTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void seek(BytesRef term, TermState state) throws IOException {
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
assert state != null && state instanceof OrdTermState;
|
||||
seek(((OrdTermState)state).ord); // just use the ord for simplicity
|
||||
seekExact(((OrdTermState)state).ord); // just use the ord for simplicity
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -133,9 +133,9 @@ public class TestIndicesEquals extends LuceneTestCase {
|
|||
|
||||
Term t = new Term("c", "danny");
|
||||
TermsEnum aprioriTermEnum = MultiFields.getTerms(aprioriReader, t.field()).iterator();
|
||||
aprioriTermEnum.seek(new BytesRef(t.text()));
|
||||
aprioriTermEnum.seekCeil(new BytesRef(t.text()));
|
||||
TermsEnum testTermEnum = MultiFields.getTerms(testReader, t.field()).iterator();
|
||||
testTermEnum.seek(new BytesRef(t.text()));
|
||||
testTermEnum.seekCeil(new BytesRef(t.text()));
|
||||
assertEquals(aprioriTermEnum.term(), testTermEnum.term());
|
||||
|
||||
DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null);
|
||||
|
|
|
@ -860,7 +860,18 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(BytesRef text, boolean useCache) {
|
||||
public boolean seekExact(BytesRef text, boolean useCache) {
|
||||
termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
|
||||
if (termUpto >= 0) {
|
||||
br.copy(info.sortedTerms[termUpto].getKey());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text, boolean useCache) {
|
||||
termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
|
||||
if (termUpto < 0) { // not found; choose successor
|
||||
termUpto = -termUpto -1;
|
||||
|
@ -877,13 +888,9 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(long ord) {
|
||||
public void seekExact(long ord) {
|
||||
assert ord < info.sortedTerms.length;
|
||||
termUpto = (int) ord;
|
||||
if (ord < info.sortedTerms.length) {
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
return SeekStatus.END;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -939,9 +946,9 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void seek(BytesRef term, TermState state) throws IOException {
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
assert state != null;
|
||||
this.seek(((OrdTermState)state).ord);
|
||||
this.seekExact(((OrdTermState)state).ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,22 +27,6 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="build-native-unix" >
|
||||
<mkdir dir="${common.build.dir}/native"/>
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.search.Similarity;
|
|||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
|
@ -111,7 +110,6 @@ public class FieldNormModifier {
|
|||
* @param field the field whose norms should be reset
|
||||
*/
|
||||
public void reSetNorms(String field) throws IOException {
|
||||
String fieldName = StringHelper.intern(field);
|
||||
Similarity fieldSim = sim.get(field);
|
||||
IndexReader reader = null;
|
||||
try {
|
||||
|
@ -149,7 +147,7 @@ public class FieldNormModifier {
|
|||
for (int d = 0; d < termCounts.length; d++) {
|
||||
if (delDocs == null || !delDocs.get(d)) {
|
||||
invertState.setLength(termCounts[d]);
|
||||
subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(invertState)));
|
||||
subReader.setNorm(d, field, fieldSim.encodeNormValue(fieldSim.computeNorm(invertState)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriter; // javadoc
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -98,7 +97,7 @@ public class MultiPassIndexSplitter {
|
|||
}
|
||||
IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
|
||||
Version.LUCENE_CURRENT,
|
||||
new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
|
||||
null)
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
System.err.println("Writing part " + (i + 1) + " ...");
|
||||
w.addIndexes(input);
|
||||
|
|
|
@ -15,7 +15,6 @@ package org.apache.lucene.index;
|
|||
*
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -54,9 +53,6 @@ public class TermVectorAccessor {
|
|||
* @throws IOException
|
||||
*/
|
||||
public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException {
|
||||
|
||||
fieldName = StringHelper.intern(fieldName);
|
||||
|
||||
decoratedMapper.decorated = mapper;
|
||||
decoratedMapper.termVectorStored = false;
|
||||
|
||||
|
|
|
@ -186,7 +186,7 @@ public class HighFreqTerms {
|
|||
}
|
||||
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
if (termsEnum.seek(termText) != TermsEnum.SeekStatus.FOUND) {
|
||||
if (termsEnum.seekCeil(termText) != TermsEnum.SeekStatus.FOUND) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@ package org.apache.lucene.misc;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -40,7 +39,7 @@ public class IndexMergeTool {
|
|||
FSDirectory mergedIndex = FSDirectory.open(new File(args[0]));
|
||||
|
||||
IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(
|
||||
Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
|
||||
Version.LUCENE_CURRENT, null)
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
|
||||
Directory[] indexes = new Directory[args.length - 1];
|
||||
|
|
|
@ -233,6 +233,25 @@ public class NRTCachingDirectory extends Directory {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
return cache.openCompoundInput(name, bufferSize);
|
||||
} else {
|
||||
return delegate.openCompoundInput(name, bufferSize);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized CompoundFileDirectory createCompoundOutput(String name)
|
||||
throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
throw new IOException("File " + name + "already exists");
|
||||
} else {
|
||||
return delegate.createCompoundOutput(name);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
|
|
|
@ -78,7 +78,7 @@ public class TestIndexSplitter extends LuceneTestCase {
|
|||
_TestUtil.rmDir(destDir2);
|
||||
destDir2.mkdirs();
|
||||
IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName});
|
||||
assertEquals(3, destDir2.listFiles().length);
|
||||
assertEquals(4, destDir2.listFiles().length);
|
||||
Directory fsDirDest2 = newFSDirectory(destDir2);
|
||||
r = IndexReader.open(fsDirDest2, true);
|
||||
assertEquals(50, r.maxDoc());
|
||||
|
|
|
@ -73,7 +73,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
|||
Document doc = ir.document(0);
|
||||
assertEquals("0", doc.get("id"));
|
||||
TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("1")));
|
||||
assertNotSame("1", te.term().utf8ToString());
|
||||
ir.close();
|
||||
ir = IndexReader.open(dirs[1], true);
|
||||
|
@ -81,7 +81,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
|||
doc = ir.document(0);
|
||||
assertEquals("1", doc.get("id"));
|
||||
te = MultiFields.getTerms(ir, "id").iterator();
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("0")));
|
||||
|
||||
assertNotSame("0", te.term().utf8ToString());
|
||||
ir.close();
|
||||
|
@ -91,10 +91,10 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
|||
assertEquals("2", doc.get("id"));
|
||||
|
||||
te = MultiFields.getTerms(ir, "id").iterator();
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("1")));
|
||||
assertNotSame("1", te.term());
|
||||
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("0")));
|
||||
assertNotSame("0", te.term().utf8ToString());
|
||||
ir.close();
|
||||
for (Directory d : dirs)
|
||||
|
@ -132,7 +132,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
|||
// make sure the deleted doc is not here
|
||||
TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
|
||||
Term t = new Term("id", (NUM_DOCS - 1) + "");
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef(t.text())));
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(t.text())));
|
||||
assertNotSame(t.text(), te.term().utf8ToString());
|
||||
ir.close();
|
||||
for (Directory d : dirs)
|
||||
|
|
|
@ -526,7 +526,7 @@ public class TestNRTManager extends LuceneTestCase {
|
|||
//System.out.println("trigger " + trigger);
|
||||
shift = random.nextInt(trigger);
|
||||
}
|
||||
termsEnum.seek(new BytesRef(""));
|
||||
termsEnum.seekCeil(new BytesRef(""));
|
||||
continue;
|
||||
}
|
||||
seenTermCount++;
|
||||
|
@ -672,7 +672,7 @@ public class TestNRTManager extends LuceneTestCase {
|
|||
|
||||
private int runQuery(IndexSearcher s, Query q) throws Exception {
|
||||
s.search(q, 10);
|
||||
return s.search(q, null, 10, new Sort(new SortField("title", SortField.STRING))).totalHits;
|
||||
return s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
|
||||
}
|
||||
|
||||
private void smokeTestSearcher(IndexSearcher s) throws Exception {
|
||||
|
|
|
@ -154,14 +154,14 @@ public class TestAppendingCodec extends LuceneTestCase {
|
|||
Terms terms = fields.terms("f");
|
||||
assertNotNull(terms);
|
||||
TermsEnum te = terms.iterator();
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("quick")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("brown")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("fox")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("jumped")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("over")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("lazy")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("dog")));
|
||||
assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("the")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("quick")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("brown")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("fox")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("jumped")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("over")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("lazy")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("dog")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("the")));
|
||||
DocsEnum de = te.docs(null, null);
|
||||
assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(2, de.freq());
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="queries" default="default">
|
||||
<project name="queries-contrib" default="default">
|
||||
|
||||
<description>
|
||||
Queries - various query object exotica not in core
|
||||
|
|
|
@ -190,7 +190,6 @@ public class FuzzyLikeThisQuery extends Query
|
|||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
int corpusNumDocs=reader.numDocs();
|
||||
Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
|
||||
HashSet<String> processedTerms=new HashSet<String>();
|
||||
ts.reset();
|
||||
while (ts.incrementToken())
|
||||
|
@ -201,7 +200,7 @@ public class FuzzyLikeThisQuery extends Query
|
|||
processedTerms.add(term);
|
||||
ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
|
||||
float minScore=0;
|
||||
Term startTerm=internSavingTemplateTerm.createTerm(term);
|
||||
Term startTerm=new Term(f.fieldName, term);
|
||||
AttributeSource atts = new AttributeSource();
|
||||
MaxNonCompetitiveBoostAttribute maxBoostAtt =
|
||||
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class SlowCollatedStringComparator extends FieldComparator<BytesRef> {
|
||||
public final class SlowCollatedStringComparator extends FieldComparator<String> {
|
||||
|
||||
private final String[] values;
|
||||
private DocTerms currentDocTerms;
|
||||
|
@ -99,13 +99,12 @@ public final class SlowCollatedStringComparator extends FieldComparator<BytesRef
|
|||
}
|
||||
|
||||
@Override
|
||||
public BytesRef value(int slot) {
|
||||
final String s = values[slot];
|
||||
return s == null ? null : new BytesRef(values[slot]);
|
||||
public String value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareValues(BytesRef first, BytesRef second) {
|
||||
public int compareValues(String first, String second) {
|
||||
if (first == null) {
|
||||
if (second == null) {
|
||||
return 0;
|
||||
|
|
|
@ -71,7 +71,7 @@ public class TermsFilter extends Filter
|
|||
DocsEnum docs = null;
|
||||
for (Iterator<Term> iter = terms.iterator(); iter.hasNext();) {
|
||||
Term term = iter.next();
|
||||
if (term.field() != lastField) {
|
||||
if (!term.field().equals(lastField)) {
|
||||
termsC = fields.terms(term.field());
|
||||
termsEnum = termsC.iterator();
|
||||
lastField = term.field();
|
||||
|
@ -79,7 +79,7 @@ public class TermsFilter extends Filter
|
|||
|
||||
if (terms != null) {
|
||||
br.copy(term.bytes());
|
||||
if (termsEnum.seek(br) == TermsEnum.SeekStatus.FOUND) {
|
||||
if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
|
||||
docs = termsEnum.docs(delDocs, docs);
|
||||
while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
result.set(docs.docID());
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search.regex;
|
|||
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.FilteredTermsEnum;
|
||||
import org.apache.lucene.search.RegexpQuery; // javadoc
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
@ -29,7 +30,10 @@ import java.io.IOException;
|
|||
/** Implements the regular expression term search query.
|
||||
* The expressions supported depend on the regular expression implementation
|
||||
* used by way of the {@link RegexCapabilities} interface.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: You may wish to consider using the regex query support
|
||||
* in {@link RegexpQuery} instead, as it has better performance.
|
||||
*
|
||||
* @see RegexTermsEnum
|
||||
*/
|
||||
public class RegexQuery extends MultiTermQuery implements RegexQueryCapable {
|
||||
|
|
|
@ -41,6 +41,7 @@ public class TestSlowCollationMethods extends LuceneTestCase {
|
|||
private static IndexReader reader;
|
||||
private static Directory dir;
|
||||
private static int numDocs;
|
||||
private static String splitDoc;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
|
@ -59,6 +60,7 @@ public class TestSlowCollationMethods extends LuceneTestCase {
|
|||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
splitDoc = _TestUtil.randomUnicodeString(random);
|
||||
reader = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
|
@ -76,6 +78,15 @@ public class TestSlowCollationMethods extends LuceneTestCase {
|
|||
dir = null;
|
||||
}
|
||||
|
||||
private void doCheckSorting(TopDocs docs) throws Exception {
|
||||
String prev = "";
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, prev) >= 0);
|
||||
prev = value;
|
||||
}
|
||||
}
|
||||
|
||||
public void testSort() throws Exception {
|
||||
SortField sf = new SortField("field", new FieldComparatorSource() {
|
||||
@Override
|
||||
|
@ -83,13 +94,16 @@ public class TestSlowCollationMethods extends LuceneTestCase {
|
|||
return new SlowCollatedStringComparator(numHits, fieldname, collator);
|
||||
}
|
||||
});
|
||||
TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, numDocs, new Sort(sf));
|
||||
String prev = "";
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, prev) >= 0);
|
||||
prev = value;
|
||||
}
|
||||
final Sort sort = new Sort(sf);
|
||||
|
||||
final TopDocs docs1 = searcher.search(TermRangeQuery.newStringRange("field", null, splitDoc, true, true), null, numDocs/(1+random.nextInt(4)), sort);
|
||||
doCheckSorting(docs1);
|
||||
|
||||
final TopDocs docs2 = searcher.search(TermRangeQuery.newStringRange("field", splitDoc, null, true, true), null, numDocs/(1+random.nextInt(4)), sort);
|
||||
doCheckSorting(docs2);
|
||||
|
||||
final TopDocs docs = TopDocs.merge(sort, numDocs/(1+random.nextInt(4)), new TopDocs[]{docs1, docs2});
|
||||
doCheckSorting(docs);
|
||||
}
|
||||
|
||||
private void doTestRanges(String startPoint, String endPoint, Query query) throws Exception {
|
||||
|
|
|
@ -63,7 +63,7 @@ public class SrndPrefixQuery extends SimpleTerm {
|
|||
TermsEnum termsEnum = terms.iterator();
|
||||
|
||||
boolean skip = false;
|
||||
TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getPrefix()));
|
||||
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
|
||||
if (status == TermsEnum.SeekStatus.FOUND) {
|
||||
mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
|
||||
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
|
||||
|
|
|
@ -53,7 +53,7 @@ public class SrndTermQuery extends SimpleTerm {
|
|||
if (terms != null) {
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
|
||||
TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getTermText()));
|
||||
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
|
||||
if (status == TermsEnum.SeekStatus.FOUND) {
|
||||
mtv.visitMatchingTerm(getLuceneTerm(fieldName));
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ public class SrndTruncQuery extends SimpleTerm {
|
|||
try {
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
|
||||
TermsEnum.SeekStatus status = termsEnum.seek(prefixRef);
|
||||
TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
|
||||
BytesRef text;
|
||||
if (status == TermsEnum.SeekStatus.FOUND) {
|
||||
text = prefixRef;
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<contrib-uptodate name="queries" property="queries.uptodate" classpath.property="queries.jar"/>
|
||||
<contrib-uptodate name="queries-contrib" contrib-src-name="queries" property="queries.uptodate" classpath.property="queries.jar" />
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${queries.jar}"/>
|
||||
|
|
|
@ -30,22 +30,6 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="index" depends="compile" description="Build WordNet index">
|
||||
<fail if="synindex.exists">
|
||||
Index already exists - must remove first.
|
||||
|
@ -83,24 +67,4 @@
|
|||
</java>
|
||||
</target>
|
||||
|
||||
<target name="expand" description="Perform synonym expansion on a query">
|
||||
<fail unless="synindex.exists">
|
||||
Index does not exist.
|
||||
</fail>
|
||||
|
||||
<fail unless="query">
|
||||
Must specify 'query' property.
|
||||
</fail>
|
||||
|
||||
<java classname="org.apache.lucene.wordnet.SynExpand">
|
||||
<classpath>
|
||||
<path refid="compile.classpath"/>
|
||||
<pathelement location="${build.dir}/classes"/>
|
||||
</classpath>
|
||||
|
||||
<arg file="${synindex.dir}"/>
|
||||
<arg value="${query}"/>
|
||||
</java>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.wordnet;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashSet;
|
||||
|
@ -28,7 +27,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -41,8 +39,6 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -53,41 +49,6 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public final class SynExpand {
|
||||
|
||||
/**
|
||||
* Test driver for synonym expansion.
|
||||
* Uses boost factor of 0.9 for illustrative purposes.
|
||||
*
|
||||
* If you pass in the query "big dog" then it prints out:
|
||||
*
|
||||
* <code><pre>
|
||||
* Query: big adult^0.9 bad^0.9 bighearted^0.9 boastful^0.9 boastfully^0.9 bounteous^0.9 bountiful^0.9 braggy^0.9 crowing^0.9 freehanded^0.9 giving^0.9 grown^0.9 grownup^0.9 handsome^0.9 large^0.9 liberal^0.9 magnanimous^0.9 momentous^0.9 openhanded^0.9 prominent^0.9 swelled^0.9 vainglorious^0.9 vauntingly^0.9
|
||||
* dog andiron^0.9 blackguard^0.9 bounder^0.9 cad^0.9 chase^0.9 click^0.9 detent^0.9 dogtooth^0.9 firedog^0.9 frank^0.9 frankfurter^0.9 frump^0.9 heel^0.9 hotdog^0.9 hound^0.9 pawl^0.9 tag^0.9 tail^0.9 track^0.9 trail^0.9 weenie^0.9 wiener^0.9 wienerwurst^0.9
|
||||
* </pre></code>
|
||||
*/
|
||||
public static void main(String[] args) throws IOException
|
||||
{
|
||||
if (args.length != 2)
|
||||
{
|
||||
System.out.println(
|
||||
"java org.apache.lucene.wordnet.SynExpand <index path> <query>");
|
||||
}
|
||||
|
||||
FSDirectory directory = FSDirectory.open(new File(args[0]));
|
||||
IndexSearcher searcher = new IndexSearcher(directory, true);
|
||||
|
||||
String query = args[1];
|
||||
String field = "contents";
|
||||
|
||||
Query q = expand( query, searcher, new StandardAnalyzer(Version.LUCENE_CURRENT), field, 0.9f);
|
||||
System.out.println( "Query: " + q.toString( field));
|
||||
|
||||
|
||||
|
||||
searcher.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Perform synonym expansion on a query.
|
||||
*
|
||||
|
@ -95,7 +56,7 @@ public final class SynExpand {
|
|||
*
|
||||
* @param syns a opened to the Lucene index you previously created with {@link Syns2Index}. The searcher is not closed or otherwise altered.
|
||||
*
|
||||
* @param a optional analyzer used to parse the users query else {@link StandardAnalyzer} is used
|
||||
* @param a analyzer used to parse the users query.
|
||||
*
|
||||
* @param f optional field name to search in or null if you want the default of "contents"
|
||||
*
|
||||
|
@ -113,7 +74,6 @@ public final class SynExpand {
|
|||
final Set<String> already = new HashSet<String>(); // avoid dups
|
||||
List<String> top = new LinkedList<String>(); // needs to be separately listed..
|
||||
final String field = ( f == null) ? "contents" : f;
|
||||
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
|
||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
||||
|
||||
|
@ -48,24 +49,6 @@ import org.apache.lucene.store.FSDirectory;
|
|||
* Test program to look up synonyms.
|
||||
*/
|
||||
public class SynLookup {
|
||||
|
||||
final static class CountingCollector extends Collector {
|
||||
public int numHits = 0;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
numHits++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {}
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
if (args.length != 2) {
|
||||
|
@ -78,16 +61,16 @@ public class SynLookup {
|
|||
|
||||
String word = args[1];
|
||||
Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
|
||||
CountingCollector countingCollector = new CountingCollector();
|
||||
TotalHitCountCollector countingCollector = new TotalHitCountCollector();
|
||||
searcher.search(query, countingCollector);
|
||||
|
||||
if (countingCollector.numHits == 0) {
|
||||
if (countingCollector.getTotalHits() == 0) {
|
||||
System.out.println("No synonyms found for " + word);
|
||||
} else {
|
||||
System.out.println("Synonyms found for \"" + word + "\":");
|
||||
}
|
||||
|
||||
ScoreDoc[] hits = searcher.search(query, countingCollector.numHits).scoreDocs;
|
||||
ScoreDoc[] hits = searcher.search(query, countingCollector.getTotalHits()).scoreDocs;
|
||||
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
Document doc = searcher.doc(hits[i].doc);
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintStream;
|
||||
import java.io.Reader;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
@ -31,7 +32,7 @@ import java.util.TreeMap;
|
|||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -90,9 +91,15 @@ public class Syns2Index
|
|||
public static final String F_WORD = "word";
|
||||
|
||||
/**
|
||||
*
|
||||
* we don't actually analyze any text (only a NOT_ANALYZED field),
|
||||
* but analyzer can't be null, docinverter wants the offset gap!
|
||||
*/
|
||||
private static final Analyzer ana = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
private static final Analyzer ana = new Analyzer() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Takes arg of prolog file name and index directory.
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<contrib-uptodate name="queries" property="queries.uptodate" classpath.property="queries.jar"/>
|
||||
<contrib-uptodate name="queries-contrib" contrib-src-name="queries" property="queries.uptodate" classpath.property="queries.jar"/>
|
||||
<property name="lucene.jar" location="${common.dir}/build/lucene-core-${version}.jar"/>
|
||||
<property name="servlet.jar" location="${common.dir}/lib/servlet-api-2.4.jar"/>
|
||||
<available property="servlet.jar.present" type="file" file="${servlet.jar}"/>
|
||||
|
|
|
@ -67,14 +67,7 @@ public class TermsFilterBuilder implements FilterBuilder
|
|||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
if (term == null)
|
||||
{
|
||||
term = new Term(fieldName, new BytesRef(bytes));
|
||||
} else
|
||||
{
|
||||
// create from previous to save fieldName.intern overhead
|
||||
term = term.createTerm(new BytesRef(bytes));
|
||||
}
|
||||
term = new Term(fieldName, new BytesRef(bytes));
|
||||
tf.addTerm(term);
|
||||
}
|
||||
ts.end();
|
||||
|
|
|
@ -64,14 +64,7 @@ public class TermsQueryBuilder implements QueryBuilder {
|
|||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
if (term == null)
|
||||
{
|
||||
term = new Term(fieldName, new BytesRef(bytes));
|
||||
} else
|
||||
{
|
||||
// create from previous to save fieldName.intern overhead
|
||||
term = term.createTerm(new BytesRef(bytes));
|
||||
}
|
||||
term = new Term(fieldName, new BytesRef(bytes));
|
||||
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
|
||||
}
|
||||
ts.end();
|
||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
import org.apache.lucene.util.StringHelper; // for javadocs
|
||||
|
||||
|
||||
/**
|
||||
|
@ -59,7 +58,7 @@ public abstract class AbstractField implements Fieldable {
|
|||
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
this.name = StringHelper.intern(name); // field names are interned
|
||||
this.name = name;
|
||||
|
||||
this.isStored = store.isStored();
|
||||
this.isIndexed = index.isIndexed();
|
||||
|
@ -109,7 +108,7 @@ public abstract class AbstractField implements Fieldable {
|
|||
return boost;
|
||||
}
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
/** Returns the name of the field.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
public String name() { return name; }
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
A field is a section of a Document. Each field has two parts, a name and a
|
||||
|
@ -367,28 +366,6 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
* </ul>
|
||||
*/
|
||||
public Field(String name, String value, Store store, Index index, TermVector termVector) {
|
||||
this(name, true, value, store, index, termVector);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a field by specifying its name, value and how it will
|
||||
* be saved in the index.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param internName Whether to .intern() name or not
|
||||
* @param value The string to process
|
||||
* @param store Whether <code>value</code> should be stored in the index
|
||||
* @param index Whether the field should be indexed, and if so, if it should
|
||||
* be tokenized before indexing
|
||||
* @param termVector Whether term vector should be stored
|
||||
* @throws NullPointerException if name or value is <code>null</code>
|
||||
* @throws IllegalArgumentException in any of the following situations:
|
||||
* <ul>
|
||||
* <li>the field is neither stored nor indexed</li>
|
||||
* <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
|
||||
* </ul>
|
||||
*/
|
||||
public Field(String name, boolean internName, String value, Store store, Index index, TermVector termVector) {
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
if (value == null)
|
||||
|
@ -402,9 +379,6 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
throw new IllegalArgumentException("cannot store term vector information "
|
||||
+ "for a field that is not indexed");
|
||||
|
||||
if (internName) // field names are optionally interned
|
||||
name = StringHelper.intern(name);
|
||||
|
||||
this.name = name;
|
||||
|
||||
this.fieldsData = value;
|
||||
|
@ -422,7 +396,7 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
|
||||
setStoreTermVector(termVector);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a tokenized and indexed field that is not stored. Term vectors will
|
||||
* not be stored. The Reader is read only when the Document is added to the index,
|
||||
|
@ -454,7 +428,7 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
if (reader == null)
|
||||
throw new NullPointerException("reader cannot be null");
|
||||
|
||||
this.name = StringHelper.intern(name); // field names are interned
|
||||
this.name = name;
|
||||
this.fieldsData = reader;
|
||||
|
||||
this.isStored = false;
|
||||
|
@ -500,7 +474,7 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
if (tokenStream == null)
|
||||
throw new NullPointerException("tokenStream cannot be null");
|
||||
|
||||
this.name = StringHelper.intern(name); // field names are interned
|
||||
this.name = name;
|
||||
this.fieldsData = null;
|
||||
this.tokenStream = tokenStream;
|
||||
|
||||
|
@ -540,7 +514,7 @@ public final class Field extends AbstractField implements Fieldable {
|
|||
if (value == null)
|
||||
throw new IllegalArgumentException("value cannot be null");
|
||||
|
||||
this.name = StringHelper.intern(name); // field names are interned
|
||||
this.name = name;
|
||||
fieldsData = value;
|
||||
|
||||
isStored = true;
|
||||
|
|
|
@ -71,7 +71,7 @@ public interface Fieldable {
|
|||
*/
|
||||
float getBoost();
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
/** Returns the name of the field.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
String name();
|
||||
|
|
|
@ -98,12 +98,99 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
|
||||
/**
|
||||
* Sets the given <code>long</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#INTS} unless already set. If you want to change the
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
*/
|
||||
public void setInt(long value) {
|
||||
setInt(value, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>long</code> value as a 64 bit signed integer.
|
||||
*
|
||||
* @param value
|
||||
* the value to set
|
||||
* @param fixed
|
||||
* if <code>true</code> {@link ValueType#FIXED_INTS_64} is used
|
||||
* otherwise {@link ValueType#VAR_INTS}
|
||||
*/
|
||||
public void setInt(long value, boolean fixed) {
|
||||
if (type == null) {
|
||||
type = ValueType.INTS;
|
||||
type = fixed ? ValueType.FIXED_INTS_64 : ValueType.VAR_INTS;
|
||||
}
|
||||
longValue = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>int</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
*/
|
||||
public void setInt(int value) {
|
||||
setInt(value, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>int</code> value as a 32 bit signed integer.
|
||||
*
|
||||
* @param value
|
||||
* the value to set
|
||||
* @param fixed
|
||||
* if <code>true</code> {@link ValueType#FIXED_INTS_32} is used
|
||||
* otherwise {@link ValueType#VAR_INTS}
|
||||
*/
|
||||
public void setInt(int value, boolean fixed) {
|
||||
if (type == null) {
|
||||
type = fixed ? ValueType.FIXED_INTS_32 : ValueType.VAR_INTS;
|
||||
}
|
||||
longValue = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>short</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
*/
|
||||
public void setInt(short value) {
|
||||
setInt(value, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>short</code> value as a 16 bit signed integer.
|
||||
*
|
||||
* @param value
|
||||
* the value to set
|
||||
* @param fixed
|
||||
* if <code>true</code> {@link ValueType#FIXED_INTS_16} is used
|
||||
* otherwise {@link ValueType#VAR_INTS}
|
||||
*/
|
||||
public void setInt(short value, boolean fixed) {
|
||||
if (type == null) {
|
||||
type = fixed ? ValueType.FIXED_INTS_16 : ValueType.VAR_INTS;
|
||||
}
|
||||
longValue = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>byte</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
*/
|
||||
public void setInt(byte value) {
|
||||
setInt(value, false);
|
||||
}
|
||||
/**
|
||||
* Sets the given <code>byte</code> value as a 8 bit signed integer.
|
||||
*
|
||||
* @param value
|
||||
* the value to set
|
||||
* @param fixed
|
||||
* if <code>true</code> {@link ValueType#FIXED_INTS_8} is used
|
||||
* otherwise {@link ValueType#VAR_INTS}
|
||||
*/
|
||||
public void setInt(byte value, boolean fixed) {
|
||||
if (type == null) {
|
||||
type = fixed ? ValueType.FIXED_INTS_8 : ValueType.VAR_INTS;
|
||||
}
|
||||
longValue = value;
|
||||
}
|
||||
|
@ -268,7 +355,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
field.stringValue());
|
||||
valField.setBytes(ref, type);
|
||||
break;
|
||||
case INTS:
|
||||
case VAR_INTS:
|
||||
valField.setInt(Long.parseLong(field.stringValue()));
|
||||
break;
|
||||
case FLOAT_32:
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.NumericTokenStream;
|
|||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||
import org.apache.lucene.search.SortField; // javadocs
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
|
||||
/**
|
||||
|
@ -69,8 +68,8 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
* <code>NumericField</code>, use {@link NumericRangeQuery} or {@link
|
||||
* NumericRangeFilter}. To sort according to a
|
||||
* <code>NumericField</code>, use the normal numeric sort types, eg
|
||||
* {@link SortField#INT}. <code>NumericField</code> values
|
||||
* can also be loaded directly from {@link FieldCache}.</p>
|
||||
* {@link org.apache.lucene.search.SortField.Type#INT}. <code>NumericField</code>
|
||||
* values can also be loaded directly from {@link FieldCache}.</p>
|
||||
*
|
||||
* <p>By default, a <code>NumericField</code>'s value is not stored but
|
||||
* is indexed for range filtering and sorting. You can use
|
||||
|
|
|
@ -47,11 +47,12 @@ class BufferedDeletes {
|
|||
key, Integer val, int hash, Entry next
|
||||
(OBJ_HEADER + 3*POINTER + INT). Term is object w/
|
||||
String field and String text (OBJ_HEADER + 2*POINTER).
|
||||
We don't count Term's field since it's interned.
|
||||
Term's field is String (OBJ_HEADER + 4*INT + POINTER +
|
||||
OBJ_HEADER + string.length*CHAR).
|
||||
Term's text is String (OBJ_HEADER + 4*INT + POINTER +
|
||||
OBJ_HEADER + string.length*CHAR). Integer is
|
||||
OBJ_HEADER + INT. */
|
||||
final static int BYTES_PER_DEL_TERM = 8*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 5*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 6*RamUsageEstimator.NUM_BYTES_INT;
|
||||
final static int BYTES_PER_DEL_TERM = 9*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 7*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 10*RamUsageEstimator.NUM_BYTES_INT;
|
||||
|
||||
/* Rough logic: del docIDs are List<Integer>. Say list
|
||||
allocates ~2X size (2*POINTER). Integer is OBJ_HEADER
|
||||
|
@ -189,7 +190,7 @@ class BufferedDeletes {
|
|||
terms.put(term, Integer.valueOf(docIDUpto));
|
||||
numTermDeletes.incrementAndGet();
|
||||
if (current == null) {
|
||||
bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length);
|
||||
bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length + (RamUsageEstimator.NUM_BYTES_CHAR * term.field().length()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -380,7 +380,7 @@ class BufferedDeletesStream {
|
|||
// Since we visit terms sorted, we gain performance
|
||||
// by re-using the same TermsEnum and seeking only
|
||||
// forwards
|
||||
if (term.field() != currentField) {
|
||||
if (!term.field().equals(currentField)) {
|
||||
assert currentField == null || currentField.compareTo(term.field()) < 0;
|
||||
currentField = term.field();
|
||||
Terms terms = fields.terms(currentField);
|
||||
|
@ -398,7 +398,7 @@ class BufferedDeletesStream {
|
|||
|
||||
// System.out.println(" term=" + term);
|
||||
|
||||
if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) {
|
||||
if (termsEnum.seekExact(term.bytes(), false)) {
|
||||
DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs);
|
||||
|
||||
if (docsEnum != null) {
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.index.values.IndexDocValues;
|
|||
import org.apache.lucene.index.values.ValuesEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
import java.text.NumberFormat;
|
||||
import java.io.PrintStream;
|
||||
|
@ -338,6 +339,27 @@ public class CheckIndex {
|
|||
return result;
|
||||
}
|
||||
|
||||
// find the oldest and newest segment versions
|
||||
String oldest = Integer.toString(Integer.MAX_VALUE), newest = Integer.toString(Integer.MIN_VALUE);
|
||||
String oldSegs = null;
|
||||
boolean foundNonNullVersion = false;
|
||||
Comparator<String> versionComparator = StringHelper.getVersionComparator();
|
||||
for (SegmentInfo si : sis) {
|
||||
String version = si.getVersion();
|
||||
if (version == null) {
|
||||
// pre-3.1 segment
|
||||
oldSegs = "pre-3.1";
|
||||
} else {
|
||||
foundNonNullVersion = true;
|
||||
if (versionComparator.compare(version, oldest) < 0) {
|
||||
oldest = version;
|
||||
}
|
||||
if (versionComparator.compare(version, newest) > 0) {
|
||||
newest = version;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final int numSegments = sis.size();
|
||||
final String segmentsFileName = sis.getCurrentSegmentFileName();
|
||||
IndexInput input = null;
|
||||
|
@ -372,7 +394,7 @@ public class CheckIndex {
|
|||
} else if (format == DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
|
||||
sFormat = "FORMAT_HAS_VECTORS [Lucene 3.1]";
|
||||
} else if (format == DefaultSegmentInfosWriter.FORMAT_3_1) {
|
||||
sFormat = "FORMAT_3_1 [Lucene 3.1]";
|
||||
sFormat = "FORMAT_3_1 [Lucene 3.1+]";
|
||||
} else if (format == DefaultSegmentInfosWriter.FORMAT_4_0) {
|
||||
sFormat = "FORMAT_4_0 [Lucene 4.0]";
|
||||
} else if (format == DefaultSegmentInfosWriter.FORMAT_CURRENT) {
|
||||
|
@ -396,7 +418,19 @@ public class CheckIndex {
|
|||
userDataString = "";
|
||||
}
|
||||
|
||||
msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
|
||||
String versionString = null;
|
||||
if (oldSegs != null) {
|
||||
if (foundNonNullVersion) {
|
||||
versionString = "versions=[" + oldSegs + " .. " + newest + "]";
|
||||
} else {
|
||||
versionString = "version=" + oldSegs;
|
||||
}
|
||||
} else {
|
||||
versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]");
|
||||
}
|
||||
|
||||
msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments
|
||||
+ " " + versionString + " format=" + sFormat + userDataString);
|
||||
|
||||
if (onlySegments != null) {
|
||||
result.partial = true;
|
||||
|
@ -847,7 +881,7 @@ public class CheckIndex {
|
|||
|
||||
// Test seek to last term:
|
||||
if (lastTerm != null) {
|
||||
if (terms.seek(lastTerm) != TermsEnum.SeekStatus.FOUND) {
|
||||
if (terms.seekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND) {
|
||||
throw new RuntimeException("seek to last term " + lastTerm + " failed");
|
||||
}
|
||||
|
||||
|
@ -874,14 +908,14 @@ public class CheckIndex {
|
|||
// Seek by ord
|
||||
for(int i=seekCount-1;i>=0;i--) {
|
||||
long ord = i*(termCount/seekCount);
|
||||
terms.seek(ord);
|
||||
terms.seekExact(ord);
|
||||
seekTerms[i] = new BytesRef(terms.term());
|
||||
}
|
||||
|
||||
// Seek by term
|
||||
long totDocCount = 0;
|
||||
for(int i=seekCount-1;i>=0;i--) {
|
||||
if (terms.seek(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
|
||||
if (terms.seekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
|
||||
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
|
||||
}
|
||||
|
||||
|
@ -991,7 +1025,11 @@ public class CheckIndex {
|
|||
case FLOAT_64:
|
||||
values.getFloat();
|
||||
break;
|
||||
case INTS:
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
values.getInt();
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1,307 +0,0 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Class for accessing a compound stream.
|
||||
* This class implements a directory, but is limited to only read operations.
|
||||
* Directory methods that would normally modify data throw an exception.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CompoundFileReader extends Directory {
|
||||
|
||||
private int readBufferSize;
|
||||
|
||||
private static final class FileEntry {
|
||||
long offset;
|
||||
long length;
|
||||
}
|
||||
|
||||
// Base info
|
||||
private Directory directory;
|
||||
private String fileName;
|
||||
|
||||
private IndexInput stream;
|
||||
private HashMap<String,FileEntry> entries = new HashMap<String,FileEntry>();
|
||||
|
||||
public CompoundFileReader(Directory dir, String name) throws IOException {
|
||||
this(dir, name, BufferedIndexInput.BUFFER_SIZE);
|
||||
}
|
||||
|
||||
public CompoundFileReader(Directory dir, String name, int readBufferSize) throws IOException {
|
||||
assert !(dir instanceof CompoundFileReader) : "compound file inside of compound file: " + name;
|
||||
directory = dir;
|
||||
fileName = name;
|
||||
this.readBufferSize = readBufferSize;
|
||||
|
||||
boolean success = false;
|
||||
|
||||
try {
|
||||
stream = dir.openInput(name, readBufferSize);
|
||||
|
||||
// read the first VInt. If it is negative, it's the version number
|
||||
// otherwise it's the count (pre-3.1 indexes)
|
||||
int firstInt = stream.readVInt();
|
||||
|
||||
final int count;
|
||||
final boolean stripSegmentName;
|
||||
if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) {
|
||||
if (firstInt < CompoundFileWriter.FORMAT_CURRENT) {
|
||||
throw new CorruptIndexException("Incompatible format version: "
|
||||
+ firstInt + " expected " + CompoundFileWriter.FORMAT_CURRENT);
|
||||
}
|
||||
// It's a post-3.1 index, read the count.
|
||||
count = stream.readVInt();
|
||||
stripSegmentName = false;
|
||||
} else {
|
||||
count = firstInt;
|
||||
stripSegmentName = true;
|
||||
}
|
||||
|
||||
// read the directory and init files
|
||||
FileEntry entry = null;
|
||||
for (int i=0; i<count; i++) {
|
||||
long offset = stream.readLong();
|
||||
String id = stream.readString();
|
||||
|
||||
if (stripSegmentName) {
|
||||
// Fix the id to not include the segment names. This is relevant for
|
||||
// pre-3.1 indexes.
|
||||
id = IndexFileNames.stripSegmentName(id);
|
||||
}
|
||||
|
||||
if (entry != null) {
|
||||
// set length of the previous entry
|
||||
entry.length = offset - entry.offset;
|
||||
}
|
||||
|
||||
entry = new FileEntry();
|
||||
entry.offset = offset;
|
||||
entries.put(id, entry);
|
||||
}
|
||||
|
||||
// set the length of the final entry
|
||||
if (entry != null) {
|
||||
entry.length = stream.length() - entry.offset;
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
||||
} finally {
|
||||
if (!success && (stream != null)) {
|
||||
try {
|
||||
stream.close();
|
||||
} catch (IOException e) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Directory getDirectory() {
|
||||
return directory;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
if (stream == null)
|
||||
throw new IOException("Already closed");
|
||||
|
||||
entries.clear();
|
||||
stream.close();
|
||||
stream = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized IndexInput openInput(String id) throws IOException {
|
||||
// Default to readBufferSize passed in when we were opened
|
||||
return openInput(id, readBufferSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized IndexInput openInput(String id, int readBufferSize) throws IOException {
|
||||
if (stream == null)
|
||||
throw new IOException("Stream closed");
|
||||
|
||||
id = IndexFileNames.stripSegmentName(id);
|
||||
final FileEntry entry = entries.get(id);
|
||||
if (entry == null)
|
||||
throw new IOException("No sub-file with id " + id + " found (files: " + entries.keySet() + ")");
|
||||
|
||||
return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize);
|
||||
}
|
||||
|
||||
/** Returns an array of strings, one for each file in the directory. */
|
||||
@Override
|
||||
public String[] listAll() {
|
||||
String[] res = entries.keySet().toArray(new String[entries.size()]);
|
||||
// Add the segment name
|
||||
String seg = fileName.substring(0, fileName.indexOf('.'));
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = seg + res[i];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Returns true iff a file with the given name exists. */
|
||||
@Override
|
||||
public boolean fileExists(String name) {
|
||||
return entries.containsKey(IndexFileNames.stripSegmentName(name));
|
||||
}
|
||||
|
||||
/** Returns the time the compound file was last modified. */
|
||||
@Override
|
||||
public long fileModified(String name) throws IOException {
|
||||
return directory.fileModified(fileName);
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException */
|
||||
@Override
|
||||
public void deleteFile(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException */
|
||||
public void renameFile(String from, String to) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Returns the length of a file in the directory.
|
||||
* @throws IOException if the file does not exist */
|
||||
@Override
|
||||
public long fileLength(String name) throws IOException {
|
||||
FileEntry e = entries.get(IndexFileNames.stripSegmentName(name));
|
||||
if (e == null)
|
||||
throw new FileNotFoundException(name);
|
||||
return e.length;
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException */
|
||||
@Override
|
||||
public IndexOutput createOutput(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) throws IOException {
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException */
|
||||
@Override
|
||||
public Lock makeLock(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Implementation of an IndexInput that reads from a portion of the
|
||||
* compound file. The visibility is left as "package" *only* because
|
||||
* this helps with testing since JUnit test cases in a different class
|
||||
* can then access package fields of this class.
|
||||
*/
|
||||
static final class CSIndexInput extends BufferedIndexInput {
|
||||
IndexInput base;
|
||||
long fileOffset;
|
||||
long length;
|
||||
|
||||
CSIndexInput(final IndexInput base, final long fileOffset, final long length) {
|
||||
this(base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE);
|
||||
}
|
||||
|
||||
CSIndexInput(final IndexInput base, final long fileOffset, final long length, int readBufferSize) {
|
||||
super(readBufferSize);
|
||||
this.base = (IndexInput)base.clone();
|
||||
this.fileOffset = fileOffset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
CSIndexInput clone = (CSIndexInput)super.clone();
|
||||
clone.base = (IndexInput)base.clone();
|
||||
clone.fileOffset = fileOffset;
|
||||
clone.length = length;
|
||||
return clone;
|
||||
}
|
||||
|
||||
/** Expert: implements buffer refill. Reads bytes from the current
|
||||
* position in the input.
|
||||
* @param b the array to read bytes into
|
||||
* @param offset the offset in the array to start storing bytes
|
||||
* @param len the number of bytes to read
|
||||
*/
|
||||
@Override
|
||||
protected void readInternal(byte[] b, int offset, int len) throws IOException {
|
||||
long start = getFilePointer();
|
||||
if(start + len > length)
|
||||
throw new IOException("read past EOF");
|
||||
base.seek(fileOffset + start);
|
||||
base.readBytes(b, offset, len, false);
|
||||
}
|
||||
|
||||
/** Expert: implements seek. Sets current position in this file, where
|
||||
* the next {@link #readInternal(byte[],int,int)} will occur.
|
||||
* @see #readInternal(byte[],int,int)
|
||||
*/
|
||||
@Override
|
||||
protected void seekInternal(long pos) {}
|
||||
|
||||
/** Closes the stream to further operations. */
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
base.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() {
|
||||
return length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
|
||||
// Copy first whatever is in the buffer
|
||||
numBytes -= flushBuffer(out, numBytes);
|
||||
|
||||
// If there are more bytes left to copy, delegate the copy task to the
|
||||
// base IndexInput, in case it can do an optimized copy.
|
||||
if (numBytes > 0) {
|
||||
long start = getFilePointer();
|
||||
if (start + numBytes > length) {
|
||||
throw new IOException("read past EOF");
|
||||
}
|
||||
base.seek(fileOffset + start);
|
||||
base.copyBytes(out, numBytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,252 +0,0 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.lucene.index.codecs.MergeState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Combines multiple files into a single compound file.
|
||||
* The file format:<br>
|
||||
* <ul>
|
||||
* <li>VInt fileCount</li>
|
||||
* <li>{Directory}
|
||||
* fileCount entries with the following structure:</li>
|
||||
* <ul>
|
||||
* <li>long dataOffset</li>
|
||||
* <li>String fileName</li>
|
||||
* </ul>
|
||||
* <li>{File Data}
|
||||
* fileCount entries with the raw data of the corresponding file</li>
|
||||
* </ul>
|
||||
*
|
||||
* The fileCount integer indicates how many files are contained in this compound
|
||||
* file. The {directory} that follows has that many entries. Each directory entry
|
||||
* contains a long pointer to the start of this file's data section, and a String
|
||||
* with that file's name.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class CompoundFileWriter {
|
||||
|
||||
private static final class FileEntry {
|
||||
/** source file */
|
||||
String file;
|
||||
|
||||
/** temporary holder for the start of directory entry for this file */
|
||||
long directoryOffset;
|
||||
|
||||
/** temporary holder for the start of this file's data section */
|
||||
long dataOffset;
|
||||
|
||||
/** the directory which contains the file. */
|
||||
Directory dir;
|
||||
}
|
||||
|
||||
// Before versioning started.
|
||||
static final int FORMAT_PRE_VERSION = 0;
|
||||
|
||||
// Segment name is not written in the file names.
|
||||
static final int FORMAT_NO_SEGMENT_PREFIX = -1;
|
||||
|
||||
// NOTE: if you introduce a new format, make it 1 lower
|
||||
// than the current one, and always change this if you
|
||||
// switch to a new format!
|
||||
static final int FORMAT_CURRENT = FORMAT_NO_SEGMENT_PREFIX;
|
||||
|
||||
private Directory directory;
|
||||
private String fileName;
|
||||
private HashSet<String> ids;
|
||||
private LinkedList<FileEntry> entries;
|
||||
private boolean merged = false;
|
||||
private MergeState.CheckAbort checkAbort;
|
||||
|
||||
/** Create the compound stream in the specified file. The file name is the
|
||||
* entire name (no extensions are added).
|
||||
* @throws NullPointerException if <code>dir</code> or <code>name</code> is null
|
||||
*/
|
||||
public CompoundFileWriter(Directory dir, String name) {
|
||||
this(dir, name, null);
|
||||
}
|
||||
|
||||
CompoundFileWriter(Directory dir, String name, MergeState.CheckAbort checkAbort) {
|
||||
if (dir == null)
|
||||
throw new NullPointerException("directory cannot be null");
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
this.checkAbort = checkAbort;
|
||||
directory = dir;
|
||||
fileName = name;
|
||||
ids = new HashSet<String>();
|
||||
entries = new LinkedList<FileEntry>();
|
||||
}
|
||||
|
||||
/** Returns the directory of the compound file. */
|
||||
public Directory getDirectory() {
|
||||
return directory;
|
||||
}
|
||||
|
||||
/** Returns the name of the compound file. */
|
||||
public String getName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
/** Add a source stream. <code>file</code> is the string by which the
|
||||
* sub-stream will be known in the compound stream.
|
||||
*
|
||||
* @throws IllegalStateException if this writer is closed
|
||||
* @throws NullPointerException if <code>file</code> is null
|
||||
* @throws IllegalArgumentException if a file with the same name
|
||||
* has been added already
|
||||
*/
|
||||
public void addFile(String file) {
|
||||
addFile(file, directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link #addFile(String)}, only for files that are found in an
|
||||
* external {@link Directory}.
|
||||
*/
|
||||
public void addFile(String file, Directory dir) {
|
||||
if (merged)
|
||||
throw new IllegalStateException(
|
||||
"Can't add extensions after merge has been called");
|
||||
|
||||
if (file == null)
|
||||
throw new NullPointerException(
|
||||
"file cannot be null");
|
||||
|
||||
if (! ids.add(file))
|
||||
throw new IllegalArgumentException(
|
||||
"File " + file + " already added");
|
||||
|
||||
FileEntry entry = new FileEntry();
|
||||
entry.file = file;
|
||||
entry.dir = dir;
|
||||
entries.add(entry);
|
||||
}
|
||||
|
||||
/** Merge files with the extensions added up to now.
|
||||
* All files with these extensions are combined sequentially into the
|
||||
* compound stream.
|
||||
* @throws IllegalStateException if close() had been called before or
|
||||
* if no file has been added to this object
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
if (merged)
|
||||
throw new IllegalStateException("Merge already performed");
|
||||
|
||||
if (entries.isEmpty())
|
||||
throw new IllegalStateException("No entries to merge have been defined");
|
||||
|
||||
merged = true;
|
||||
|
||||
// open the compound stream
|
||||
IndexOutput os = directory.createOutput(fileName);
|
||||
IOException priorException = null;
|
||||
try {
|
||||
// Write the Version info - must be a VInt because CFR reads a VInt
|
||||
// in older versions!
|
||||
os.writeVInt(FORMAT_CURRENT);
|
||||
|
||||
// Write the number of entries
|
||||
os.writeVInt(entries.size());
|
||||
|
||||
// Write the directory with all offsets at 0.
|
||||
// Remember the positions of directory entries so that we can
|
||||
// adjust the offsets later
|
||||
long totalSize = 0;
|
||||
for (FileEntry fe : entries) {
|
||||
fe.directoryOffset = os.getFilePointer();
|
||||
os.writeLong(0); // for now
|
||||
os.writeString(IndexFileNames.stripSegmentName(fe.file));
|
||||
totalSize += fe.dir.fileLength(fe.file);
|
||||
}
|
||||
|
||||
// Pre-allocate size of file as optimization --
|
||||
// this can potentially help IO performance as
|
||||
// we write the file and also later during
|
||||
// searching. It also uncovers a disk-full
|
||||
// situation earlier and hopefully without
|
||||
// actually filling disk to 100%:
|
||||
final long finalLength = totalSize+os.getFilePointer();
|
||||
os.setLength(finalLength);
|
||||
|
||||
// Open the files and copy their data into the stream.
|
||||
// Remember the locations of each file's data section.
|
||||
for (FileEntry fe : entries) {
|
||||
fe.dataOffset = os.getFilePointer();
|
||||
copyFile(fe, os);
|
||||
}
|
||||
|
||||
// Write the data offsets into the directory of the compound stream
|
||||
for (FileEntry fe : entries) {
|
||||
os.seek(fe.directoryOffset);
|
||||
os.writeLong(fe.dataOffset);
|
||||
}
|
||||
|
||||
assert finalLength == os.length();
|
||||
|
||||
// Close the output stream. Set the os to null before trying to
|
||||
// close so that if an exception occurs during the close, the
|
||||
// finally clause below will not attempt to close the stream
|
||||
// the second time.
|
||||
IndexOutput tmp = os;
|
||||
os = null;
|
||||
tmp.close();
|
||||
} catch (IOException e) {
|
||||
priorException = e;
|
||||
} finally {
|
||||
IOUtils.closeSafely(priorException, os);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the contents of the file with specified extension into the provided
|
||||
* output stream.
|
||||
*/
|
||||
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
|
||||
IndexInput is = source.dir.openInput(source.file);
|
||||
try {
|
||||
long startPtr = os.getFilePointer();
|
||||
long length = is.length();
|
||||
os.copyBytes(is, length);
|
||||
|
||||
if (checkAbort != null) {
|
||||
checkAbort.work(length);
|
||||
}
|
||||
|
||||
// Verify that the output length diff is equal to original file
|
||||
long endPtr = os.getFilePointer();
|
||||
long diff = endPtr - startPtr;
|
||||
if (diff != length)
|
||||
throw new IOException("Difference in the output file offsets " + diff
|
||||
+ " does not match the original file length " + length);
|
||||
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -237,7 +237,7 @@ public class DocTermOrds {
|
|||
final TermsEnum te = terms.iterator();
|
||||
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
|
||||
//System.out.println("seekStart=" + seekStart.utf8ToString());
|
||||
if (te.seek(seekStart) == TermsEnum.SeekStatus.END) {
|
||||
if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
|
||||
// No terms match
|
||||
return;
|
||||
}
|
||||
|
@ -693,7 +693,7 @@ public class DocTermOrds {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(BytesRef target, boolean useCache) throws IOException {
|
||||
public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
|
||||
|
||||
// already here
|
||||
if (term != null && term.equals(target)) {
|
||||
|
@ -704,7 +704,7 @@ public class DocTermOrds {
|
|||
|
||||
if (startIdx >= 0) {
|
||||
// we hit the term exactly... lucky us!
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seek(target);
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
ord = startIdx << indexIntervalBits;
|
||||
setTerm();
|
||||
|
@ -717,7 +717,7 @@ public class DocTermOrds {
|
|||
|
||||
if (startIdx == 0) {
|
||||
// our target occurs *before* the first term
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seek(target);
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
|
||||
ord = 0;
|
||||
setTerm();
|
||||
|
@ -733,7 +733,7 @@ public class DocTermOrds {
|
|||
// so we don't need to seek.
|
||||
} else {
|
||||
// seek to the right block
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seek(indexedTermsArray[startIdx]);
|
||||
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
ord = startIdx << indexIntervalBits;
|
||||
setTerm();
|
||||
|
@ -754,16 +754,16 @@ public class DocTermOrds {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(long targetOrd) throws IOException {
|
||||
public void seekExact(long targetOrd) throws IOException {
|
||||
int delta = (int) (targetOrd - ordBase - ord);
|
||||
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord);
|
||||
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
|
||||
if (delta < 0 || delta > indexInterval) {
|
||||
final int idx = (int) (targetOrd >>> indexIntervalBits);
|
||||
final BytesRef base = indexedTermsArray[idx];
|
||||
//System.out.println(" do seek term=" + base.utf8ToString());
|
||||
ord = idx << indexIntervalBits;
|
||||
delta = (int) (targetOrd - ord);
|
||||
final TermsEnum.SeekStatus seekStatus = termsEnum.seek(base, true);
|
||||
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base, true);
|
||||
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||
} else {
|
||||
//System.out.println("seek w/in block");
|
||||
|
@ -772,15 +772,14 @@ public class DocTermOrds {
|
|||
while (--delta >= 0) {
|
||||
BytesRef br = termsEnum.next();
|
||||
if (br == null) {
|
||||
term = null;
|
||||
return null;
|
||||
assert false;
|
||||
return;
|
||||
}
|
||||
ord++;
|
||||
}
|
||||
|
||||
setTerm();
|
||||
return term == null ? SeekStatus.END : SeekStatus.FOUND;
|
||||
//System.out.println(" return term=" + term.utf8ToString());
|
||||
assert term != null;
|
||||
}
|
||||
|
||||
private BytesRef setTerm() throws IOException {
|
||||
|
@ -794,8 +793,7 @@ public class DocTermOrds {
|
|||
}
|
||||
|
||||
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
|
||||
TermsEnum.SeekStatus status = termsEnum.seek(ord);
|
||||
assert status == TermsEnum.SeekStatus.FOUND;
|
||||
termsEnum.seekExact(ord);
|
||||
return termsEnum.term();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ public final class DocumentsWriterFlushControl {
|
|||
final long ram = flushBytes + activeBytes;
|
||||
final long ramBufferBytes = (long) (maxConfiguredRamBuffer * 1024 * 1024);
|
||||
// take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta
|
||||
final long expected = (long)(2 * (ramBufferBytes)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
|
||||
final long expected = (2 * (ramBufferBytes)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
|
||||
if (peakDelta < (ramBufferBytes >> 1)) {
|
||||
/*
|
||||
* if we are indexing with very low maxRamBuffer like 0.1MB memory can
|
||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/** Access to the Fieldable Info file that describes document fields and whether or
|
||||
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
|
||||
|
@ -475,12 +474,11 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
boolean storeTermVector, boolean storePositionWithTermVector,
|
||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) {
|
||||
// don't check modifiable here since we use that to initially build up FIs
|
||||
name = StringHelper.intern(name);
|
||||
if (globalFieldNumbers != null) {
|
||||
globalFieldNumbers.setIfNotSet(fieldNumber, name);
|
||||
}
|
||||
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
||||
putInternal(fi);
|
||||
return fi;
|
||||
}
|
||||
|
@ -611,7 +609,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
b = 0;
|
||||
} else {
|
||||
switch(fi.docValues) {
|
||||
case INTS:
|
||||
case VAR_INTS:
|
||||
b = 1;
|
||||
break;
|
||||
case FLOAT_32:
|
||||
|
@ -638,6 +636,19 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
case BYTES_VAR_SORTED:
|
||||
b = 9;
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
b = 10;
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
b = 11;
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
b = 12;
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
b = 13;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
|
||||
}
|
||||
|
@ -659,7 +670,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
final int size = input.readVInt(); //read in the size
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = StringHelper.intern(input.readString());
|
||||
String name = input.readString();
|
||||
// if this is a previous format codec 0 will be preflex!
|
||||
final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
|
||||
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
|
||||
|
@ -688,7 +699,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
docValuesType = null;
|
||||
break;
|
||||
case 1:
|
||||
docValuesType = ValueType.INTS;
|
||||
docValuesType = ValueType.VAR_INTS;
|
||||
break;
|
||||
case 2:
|
||||
docValuesType = ValueType.FLOAT_32;
|
||||
|
@ -714,6 +725,19 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
case 9:
|
||||
docValuesType = ValueType.BYTES_VAR_SORTED;
|
||||
break;
|
||||
case 10:
|
||||
docValuesType = ValueType.FIXED_INTS_16;
|
||||
break;
|
||||
case 11:
|
||||
docValuesType = ValueType.FIXED_INTS_32;
|
||||
break;
|
||||
case 12:
|
||||
docValuesType = ValueType.FIXED_INTS_64;
|
||||
break;
|
||||
case 13:
|
||||
docValuesType = ValueType.FIXED_INTS_8;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + b);
|
||||
}
|
||||
|
|
|
@ -43,10 +43,8 @@ public abstract class FieldsEnum {
|
|||
return atts;
|
||||
}
|
||||
|
||||
/** Increments the enumeration to the next field. The
|
||||
* returned field is always interned, so simple ==
|
||||
* comparison is allowed. Returns null when there are no
|
||||
* more fields.*/
|
||||
/** Increments the enumeration to the next field. Returns
|
||||
* null when there are no more fields.*/
|
||||
public abstract String next() throws IOException;
|
||||
|
||||
/** Get {@link TermsEnum} for the current field. You
|
||||
|
|
|
@ -364,7 +364,6 @@ public final class FieldsReader implements Cloneable {
|
|||
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
|
||||
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
|
||||
f = new Field(fi.name, // name
|
||||
false,
|
||||
fieldsStream.readString(), // read value
|
||||
Field.Store.YES,
|
||||
index,
|
||||
|
|
|
@ -132,13 +132,18 @@ public class FilterIndexReader extends IndexReader {
|
|||
public FilterTermsEnum(TermsEnum in) { this.in = in; }
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
|
||||
return in.seek(text, useCache);
|
||||
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
|
||||
return in.seekExact(text, useCache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seek(long ord) throws IOException {
|
||||
return in.seek(ord);
|
||||
public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
|
||||
return in.seekCeil(text, useCache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
in.seekExact(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -182,8 +187,8 @@ public class FilterIndexReader extends IndexReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void seek(BytesRef term, TermState state) throws IOException {
|
||||
in.seek(term, state);
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
in.seekExact(term, state);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue