LUCENE-3312: Merge up to trunk and fix basic Javadocs merge conflicts. The new classes now need method descriptions, mainly oal.index.StorableField(Type)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3312@1379200 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-08-30 22:43:41 +00:00
commit 50d2639308
337 changed files with 6488 additions and 27150 deletions

View File

@ -1,49 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="clover" basedir=".">
<import file="lucene/common-build.xml"/>
<!--
Run after Junit tests.
This target is in a separate file, as it needs to include common-build.xml,
but must run from top-level!
-->
<target name="generate-clover-reports" depends="clover">
<fail unless="run.clover">Clover not enabled!</fail>
<mkdir dir="${clover.report.dir}"/>
<fileset dir="." id="clover.test.result.files">
<include name="*/build/**/test/TEST-*.xml"/>
<exclude name="lucene/build/backwards/**"/>
</fileset>
<clover-report>
<current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
<format type="html" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
<current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
<format type="xml" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
</clover-report>
<echo>You can find the merged Lucene/Solr Clover report in '${clover.report.dir}'.</echo>
</target>
</project>

View File

@ -74,7 +74,7 @@
</pathconvert>
<fail if="validate.patternsFound">The following files contain @author tags or nocommits:${line.separator}${validate.patternsFound}</fail>
</target>
<target name="rat-sources" description="Runs rat across all sources and tests">
<sequential><subant target="rat-sources" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
@ -248,15 +248,21 @@
</sequential>
</target>
<target name="check-svn-working-copy">
<subant target="check-svn-working-copy" inheritall="false" failonerror="true">
<fileset dir="." includes="extra-targets.xml" />
</subant>
</target>
<!-- Calls only generate-clover-reports on Lucene, as Solr's is just a clone with other target; the database itsself is fixed -->
<target name="generate-clover-reports">
<subant target="generate-clover-reports" inheritall="false" failonerror="true">
<fileset dir="." includes="build-clover.xml" />
<fileset dir="." includes="extra-targets.xml" />
</subant>
</target>
<!-- Jenkins tasks -->
<target name="jenkins-hourly" depends="clean,test,validate,-jenkins-javadocs-lint,-svn-status"/>
<target name="jenkins-hourly" depends="clean,test,validate,-jenkins-javadocs-lint,check-svn-working-copy"/>
<target name="jenkins-clover">
<antcall target="-jenkins-clover">
@ -280,31 +286,4 @@
<target name="-jenkins-javadocs-lint" unless="-disable.javadocs-lint">
<antcall target="javadocs-lint"/>
</target>
<!-- define here, as common-build is not included! -->
<property name="svn.exe" value="svn" />
<target name="-svn-status">
<exec executable="${svn.exe}" dir="." failonerror="true">
<arg value="status"/>
<redirector outputproperty="svn.status.output">
<outputfilterchain>
<linecontainsregexp>
<regexp pattern="^\?" />
</linecontainsregexp>
<tokenfilter>
<replaceregex pattern="^........" replace="* " />
<replacestring from="${file.separator}" to="/" />
</tokenfilter>
</outputfilterchain>
</redirector>
</exec>
<fail message="Source checkout is dirty after running tests!!! Offending files:${line.separator}${svn.status.output}">
<condition>
<not>
<equals arg1="${svn.status.output}" arg2=""/>
</not>
</condition>
</fail>
</target>
</project>

View File

@ -95,7 +95,6 @@
<classpathentry kind="src" path="solr/contrib/velocity/src/test"/>
<classpathentry kind="src" path="solr/contrib/velocity/src/test-files"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/ant-1.8.2.jar"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/ant-junit-1.8.2.jar"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/junit-4.10.jar"/>
<classpathentry kind="lib" path="lucene/sandbox/lib/jakarta-regexp-1.4.jar"/>
<classpathentry kind="lib" path="lucene/analysis/icu/lib/icu4j-49.1.jar"/>

View File

@ -2,7 +2,6 @@
<library name="Ant">
<CLASSES>
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/ant-1.8.2.jar!/" />
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/ant-junit-1.8.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />

View File

@ -1,9 +1,9 @@
<component name="libraryTable">
<library name="HSQLDB">
<CLASSES>
<root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-1.8.0.10.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
<component name="libraryTable">
<library name="HSQLDB">
<CLASSES>
<root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-1.8.0.10.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

View File

@ -52,11 +52,6 @@
<artifactId>ant</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant-junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>randomizedtesting-runner</artifactId>

View File

@ -51,10 +51,6 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant-junit</artifactId>
</dependency>
<dependency>
<groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>randomizedtesting-runner</artifactId>

View File

@ -227,11 +227,6 @@
<artifactId>ant</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant-junit</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>

View File

@ -20,7 +20,225 @@ import re
reHREF = re.compile('<a.*?>(.*?)</a>', re.IGNORECASE)
reMarkup = re.compile('<.*?>')
reDivBlock = re.compile('<div class="block">(.*?)</div>', re.IGNORECASE)
reCaption = re.compile('<caption><span>(.*?)</span>', re.IGNORECASE)
reTDLastNested = re.compile('^<td class="colLast"><code><strong><a href="[^>]*\.([^>]*?)\.html" title="class in[^>]*">', re.IGNORECASE)
reTDLast = re.compile('^<td class="colLast"><code><strong><a href="[^>]*#([^>]*?)">', re.IGNORECASE)
reColOne = re.compile('^<td class="colOne"><code><strong><a href="[^>]*#([^>]*?)">', re.IGNORECASE)
# the Method detail section at the end
reMethodDetail = re.compile('^<h3>Method Detail</h3>$', re.IGNORECASE)
reMethodDetailAnchor = re.compile('^(?:</a>)?<a name="([^>]*?)">$', re.IGNORECASE)
reMethodOverridden = re.compile('^<dt><strong>(Specified by:|Overrides:)</strong></dt>$', re.IGNORECASE)
reTag = re.compile("(?i)<(\/?\w+)((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>")
def verifyHTML(s):
stack = []
upto = 0
while True:
m = reTag.search(s, upto)
if m is None:
break
tag = m.group(1)
upto = m.end(0)
if tag[:1] == '/':
justTag = tag[1:]
else:
justTag = tag
if justTag.lower() in ('br', 'li', 'p', 'col'):
continue
if tag[:1] == '/':
if len(stack) == 0:
raise RuntimeError('saw closing "%s" without opening <%s...>' % (m.group(0), tag[1:]))
elif stack[-1][0] != tag[1:].lower():
raise RuntimeError('closing "%s" does not match opening "%s"' % (m.group(0), stack[-1][1]))
stack.pop()
else:
stack.append((tag.lower(), m.group(0)))
if len(stack) != 0:
raise RuntimeError('"%s" was never closed' % stack[-1][1])
def cleanHTML(s):
s = reMarkup.sub('', s)
s = s.replace('&nbsp;', ' ')
s = s.replace('&lt;', '<')
s = s.replace('&gt;', '>')
s = s.replace('&amp;', '&')
return s.strip()
reH3 = re.compile('^<h3>(.*?)</h3>', re.IGNORECASE | re.MULTILINE)
reH4 = re.compile('^<h4>(.*?)</h4>', re.IGNORECASE | re.MULTILINE)
def checkClassDetails(fullPath):
"""
Checks for invalid HTML in the full javadocs under each field/method.
"""
# TODO: only works with java7 generated javadocs now!
with open(fullPath, encoding='UTF-8') as f:
desc = None
cat = None
item = None
errors = []
for line in f.readlines():
m = reH3.search(line)
if m is not None:
if desc is not None:
# Have to fake <ul> context because we pulled a fragment out "across" two <ul>s:
desc = ''.join(desc)
if True or cat == 'Constructor Detail':
idx = desc.find('</div>')
if idx == -1:
# Ctor missing javadocs ... checkClassSummaries catches it
desc = None
continue
desc = desc[:idx+6]
else:
desc = '<ul>%s</ul>' % ''.join(desc)
#print(' VERIFY %s: %s: %s' % (cat, item, desc))
try:
verifyHTML(desc)
except RuntimeError as re:
#print(' FAILED: %s' % re)
errors.append((cat, item, str(re)))
desc = None
cat = m.group(1)
continue
m = reH4.search(line)
if m is not None:
if desc is not None:
# Have to fake <ul> context because we pulled a fragment out "across" two <ul>s:
desc = '<ul>%s</ul>' % ''.join(desc)
#print(' VERIFY %s: %s: %s' % (cat, item, desc))
try:
verifyHTML(desc)
except RuntimeError as re:
#print(' FAILED: %s' % re)
errors.append((cat, item, str(re)))
item = m.group(1)
desc = []
continue
if desc is not None:
desc.append(line)
if len(errors) != 0:
print()
print(fullPath)
for cat, item, message in errors:
print(' broken details HTML: %s: %s: %s' % (cat, item, message))
return True
else:
return False
def checkClassSummaries(fullPath):
# TODO: only works with java7 generated javadocs now!
f = open(fullPath, encoding='UTF-8')
missing = []
broken = []
inThing = False
lastCaption = None
lastItem = None
desc = None
foundMethodDetail = False
lastMethodAnchor = None
for line in f.readlines():
m = reMethodDetail.search(line)
if m is not None:
foundMethodDetail = True
continue
# prune methods that are just @Overrides of other interface/classes,
# they should be specified elsewhere, if they are e.g. jdk or
# external classes we cannot inherit their docs anyway
if foundMethodDetail:
m = reMethodDetailAnchor.search(line)
if m is not None:
lastMethodAnchor = m.group(1)
continue
m = reMethodOverridden.search(line)
if m is not None and ('Methods', lastMethodAnchor) in missing:
#print('removing @overridden method: %s' % lastMethodAnchor)
missing.remove(('Methods', lastMethodAnchor))
m = reCaption.search(line)
if m is not None:
lastCaption = m.group(1)
#print(' caption %s' % lastCaption)
m = reTDLastNested.search(line)
if m is not None:
# nested classes
lastItem = m.group(1)
#print(' item %s' % lastItem)
else:
m = reTDLast.search(line)
if m is not None:
# methods etc
lastItem = m.group(1)
else:
# ctors etc
m = reColOne.search(line)
if m is not None:
lastItem = m.group(1)
#print(' item %s' % lastItem)
lineLower = line.strip().lower()
if lineLower.find('<tr class="') != -1:
inThing = True
hasDesc = False
continue
if inThing:
if lineLower.find('</tr>') != -1:
if not hasDesc:
missing.append((lastCaption, lastItem))
inThing = False
continue
else:
if line.find('<div class="block">') != -1:
desc = []
if desc is not None:
desc.append(line)
if line.find('</div>') != -1:
desc = ''.join(desc)
try:
verifyHTML(desc)
except RuntimeError as e:
broken.append((lastCaption, lastItem, str(e)))
#print('FAIL: %s: %s: %s: %s' % (lastCaption, lastItem, e, desc))
desc = desc.replace('<div class="block">', '')
desc = desc.replace('</div>', '')
desc = desc.strip()
hasDesc = len(desc) > 0
desc = None
f.close()
if len(missing) > 0 or len(broken) > 0:
print()
print(fullPath)
for (caption, item) in missing:
print(' missing %s: %s' % (caption, item))
for (caption, item, why) in broken:
print(' broken HTML: %s: %s: %s' % (caption, item, why))
return True
else:
return False
def checkSummary(fullPath):
printed = False
f = open(fullPath, encoding='UTF-8')
@ -84,8 +302,8 @@ def checkPackageSummaries(root, level='class'):
True if there are problems.
"""
if level != 'class' and level != 'package':
print('unsupported level: %s, must be "class" or "package"' % level)
if level != 'class' and level != 'package' and level != 'method':
print('unsupported level: %s, must be "class" or "package" or "method"' % level)
sys.exit(1)
#for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root):
@ -99,8 +317,13 @@ def checkPackageSummaries(root, level='class'):
sys.exit(1)
anyMissing = False
for dirPath, dirNames, fileNames in os.walk(root):
if not os.path.isdir(root):
checkClassSummaries(root)
checkClassDetails(root)
sys.exit(0)
for dirPath, dirNames, fileNames in os.walk(root):
if dirPath.find('/all/') != -1:
# These are dups (this is a bit risk, eg, root IS this /all/ directory..)
continue
@ -108,6 +331,16 @@ def checkPackageSummaries(root, level='class'):
if 'package-summary.html' in fileNames:
if level != 'package' and checkSummary('%s/package-summary.html' % dirPath):
anyMissing = True
for fileName in fileNames:
fullPath = '%s/%s' % (dirPath, fileName)
if not fileName.startswith('package-') and fileName.endswith('.html') and os.path.isfile(fullPath):
if level == 'method':
if checkClassSummaries(fullPath):
anyMissing = True
# always look for broken html, regardless of level supplied
if checkClassDetails(fullPath):
anyMissing = True
if 'overview-summary.html' in fileNames:
if checkSummary('%s/overview-summary.html' % dirPath):
anyMissing = True
@ -116,7 +349,7 @@ def checkPackageSummaries(root, level='class'):
if __name__ == '__main__':
if len(sys.argv) < 2 or len(sys.argv) > 3:
print('usage: %s <dir> [class|package]' % sys.argv[0])
print('usage: %s <dir> [class|package|method]' % sys.argv[0])
sys.exit(1)
if len(sys.argv) == 2:
level = 'class'

114
extra-targets.xml Normal file
View File

@ -0,0 +1,114 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="extra-targets" basedir=".">
<description>
This file is designed for importing into a main build file, and not intended
for standalone use.
</description>
<import file="lucene/common-build.xml"/>
<!--
Run after Junit tests.
This target is in a separate file, as it needs to include common-build.xml,
but must run from top-level!
-->
<target name="generate-clover-reports" depends="clover">
<fail unless="run.clover">Clover not enabled!</fail>
<mkdir dir="${clover.report.dir}"/>
<fileset dir="." id="clover.test.result.files">
<include name="*/build/**/test/TEST-*.xml"/>
<exclude name="lucene/build/backwards/**"/>
</fileset>
<clover-report>
<current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
<format type="html" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
<current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
<format type="xml" filter="assert"/>
<testresults refid="clover.test.result.files"/>
</current>
</clover-report>
<echo>You can find the merged Lucene/Solr Clover report in '${clover.report.dir}'.</echo>
</target>
<target xmlns:ivy="antlib:org.apache.ivy.ant" name="check-svn-working-copy" depends="ivy-availability-check,ivy-fail,ivy-configure">
<ivy:cachepath organisation="org.tmatesoft.svnkit" module="svnkit" revision="1.7.5-v1"
inline="true" conf="default" type="jar" transitive="true" pathid="svnkit.classpath"/>
<script language="javascript" classpathref="svnkit.classpath" taskname="svn"><![CDATA[
importClass(java.io.File);
importClass(java.util.TreeSet);
importPackage(org.tmatesoft.svn.core);
importPackage(org.tmatesoft.svn.core.wc);
var manager = SVNClientManager.newInstance();
var statusClient = manager.getStatusClient();
var wcClient = manager.getWCClient();
var basedir = new File(project.getProperty("basedir")).getAbsoluteFile();
var baseLen = basedir.toString().length();
var convertRelative = function(file) {
return file.getAbsolutePath().substring(baseLen + 1).replace(File.separatorChar, '/');
}
var missingProps = new TreeSet(), unversioned = new TreeSet();
self.log("Getting all versioned and unversioned files...");
statusClient.doStatus(basedir, SVNRevision.WORKING, SVNDepth.fromRecurse(true), false, true, false, false, new ISVNStatusHandler({
handleStatus: function(status) {
var nodeStatus = status.getNodeStatus();
if (nodeStatus == SVNStatusType.STATUS_UNVERSIONED) {
unversioned.add(convertRelative(status.getFile()));
} else if (status.getKind() == SVNNodeKind.FILE && nodeStatus != SVNStatusType.STATUS_DELETED) {
missingProps.add(convertRelative(status.getFile()));
}
}
}), null);
self.log("Filtering files with existing svn:eol-style...");
wcClient.doGetProperty(basedir, "svn:eol-style", SVNRevision.WORKING, SVNRevision.WORKING, true, new ISVNPropertyHandler({
handleProperty: function(file, prop) {
missingProps.remove(convertRelative(file));
}
}));
self.log("Filtering files with binary svn:mime-type...");
wcClient.doGetProperty(basedir, "svn:mime-type", SVNRevision.WORKING, SVNRevision.WORKING, true, new ISVNPropertyHandler({
handleProperty: function(file, prop) {
prop = SVNPropertyValue.getPropertyAsString(prop.getValue());
if (prop.startsWith("application/") || prop.startsWith("image/")) {
missingProps.remove(convertRelative(file));
}
}
}));
var convertSet2String = function(set) {
return set.isEmpty() ? null : ("* " + set.toArray().join(project.getProperty("line.separator") + "* "))
};
project.setProperty("svn.checkprops.failed", convertSet2String(missingProps));
project.setProperty("svn.unversioned.failed", convertSet2String(unversioned));
]]></script>
<fail if="svn.checkprops.failed"
message="The following files are missing svn:eol-style (or binary svn:mime-type):${line.separator}${svn.checkprops.failed}"/>
<fail if="svn.unversioned.failed"
message="Source checkout is dirty after running tests!!! Offending files:${line.separator}${svn.unversioned.failed}"/>
</target>
</project>

View File

@ -66,35 +66,13 @@ system.
NOTE: the ~ character represents your user account home directory.
Step 3) Install JavaCC
Building the Lucene distribution from the source does not require the JavaCC
parser generator, but if you wish to regenerate any of the pre-generated
parser pieces, you will need to install JavaCC. Version 4.1 is tested to
work correctly.
http://javacc.dev.java.net
Follow the download links and download the zip file to a temporary
location on your file system.
After JavaCC is installed, create a build.properties file
(as in step 2), and add the line
javacc.home=/javacc
where this points to the root directory of your javacc installation
(the directory that contains bin/lib/javacc.jar).
Step 4) Run ant
Step 3) Run ant
Assuming you have ant in your PATH and have set ANT_HOME to the
location of your ant installation, typing "ant" at the shell prompt
and command prompt should run ant. Ant will by default look for the
"build.xml" file in your current directory, and compile Lucene.
To rebuild any of the JavaCC-based parsers, run "ant javacc".
For further information on Lucene, go to:
http://lucene.apache.org/

View File

@ -23,6 +23,11 @@ New Features
* LUCENE-4323: Added support for an absolute maximum CFS segment size
(in MiB) to LogMergePolicy and TieredMergePolicy.
(Alexey Lef via Uwe Schindler)
* LUCENE-4339: Allow deletes against 3.x segments for easier upgrading.
Lucene3x Codec is still otherwise read-only, you should not set it
as the default Codec on IndexWriter, because it cannot write new segments.
(Mike McCandless, Robert Muir)
API Changes
@ -77,6 +82,10 @@ API Changes
fields in a stored document, has been replaced with the simpler
StoredFieldVisitor API. (Mike McCandless)
* LUCENE-4343: Made Tokenizer.setReader final. This is a setter that should
not be overriden by subclasses: per-stream initialization should happen
in reset(). (Robert Muir)
Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
@ -110,8 +119,18 @@ Bug Fixes
containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir,
Mike McCandless)
* LUCENE-4224: Add in-order scorer to query time joining and the
out-of-order scorer throws an UOE. (Martijn van Groningen, Robert Muir)
* LUCENE-4333: Fixed NPE in TermGroupFacetCollector when faceting on mv fields.
(Jesse MacVicar, Martijn van Groningen)
Optimizations
* LUCENE-4322: Decrease lucene-core JAR size. The core JAR size had increased a
lot because of generated code introduced in LUCENE-4161 and LUCENE-3892.
(Adrien Grand)
* LUCENE-4317: Improve reuse of internal TokenStreams and StringReader
in oal.document.Field. (Uwe Schindler, Chris Male, Robert Muir)
@ -883,7 +902,7 @@ API Changes
* LUCENE-3866: IndexReaderContext.leaves() is now the preferred way to access
atomic sub-readers of any kind of IndexReader (for AtomicReaders it returns
itsself as only leaf with docBase=0). (Uwe Schindler)
itself as only leaf with docBase=0). (Uwe Schindler)
New features
@ -2306,7 +2325,7 @@ Changes in backwards compatibility policy
(Mike McCandless, Shai Erera)
* LUCENE-3084: MergePolicy.OneMerge.segments was changed from
SegmentInfos to a List<SegmentInfo>. SegmentInfos itsself was changed
SegmentInfos to a List<SegmentInfo>. SegmentInfos itself was changed
to no longer extend Vector<SegmentInfo> (to update code that is using
Vector-API, use the new asList() and asSet() methods returning unmodifiable
collections; modifying SegmentInfos is now only possible through
@ -7547,7 +7566,7 @@ Infrastructure
11. Fixed bugs in GermanAnalyzer (gschwarz)
1.2 RC2:
1.2 RC2
- added sources to distribution
- removed broken build scripts and libraries from distribution
- SegmentsReader: fixed potential race condition
@ -7562,7 +7581,8 @@ Infrastructure
- JDK 1.1 compatibility fix: disabled lock files for JDK 1.1,
since they rely on a feature added in JDK 1.2.
1.2 RC1 (first Apache release):
1.2 RC1
- first Apache release
- packages renamed from com.lucene to org.apache.lucene
- license switched from LGPL to Apache
- ant-only build -- no more makefiles
@ -7573,7 +7593,8 @@ Infrastructure
- Analyzers can choose tokenizer based on field name
- misc bug fixes.
1.01b (last Sourceforge release)
1.01b
. last Sourceforge release
. a few bug fixes
. new Query Parser
. new prefix query (search for "foo*" matches "food")

View File

@ -318,9 +318,9 @@ FieldCache, use them with care!
The method IndexReader#getSequentialSubReaders() was moved to CompositeReader
(see LUCENE-2858, LUCENE-3733) and made protected. It is solely used by
CompositeReader itsself to build its reader tree. To get all atomic leaves
CompositeReader itself to build its reader tree. To get all atomic leaves
of a reader, use IndexReader#leaves(), which also provides the doc base
of each leave. Readers that are already atomic return itsself as leaf with
of each leave. Readers that are already atomic return itself as leaf with
doc base 0. To emulate Lucene 3.x getSequentialSubReaders(),
use getContext().children().
@ -626,3 +626,8 @@ you can now do this:
method, StoredFieldVisitor has a needsField method: if that method
returns true then the field will be loaded and the appropriate
type-specific method will be invoked with that fields's value.
* LUCENE-4122: Removed the Payload class and replaced with BytesRef.
PayloadAttribute's name is unchanged, it just uses the BytesRef
class to refer to the payload bytes/start offset/end offset
(or null if there is no payload).

View File

@ -94,8 +94,7 @@ public final class KeywordTokenizer extends Tokenizer {
}
@Override
public void setReader(Reader input) throws IOException {
super.setReader(input);
public void reset() throws IOException {
this.done = false;
}
}

View File

@ -78,9 +78,6 @@ public final class PatternTokenizer extends Tokenizer {
if (group >= 0 && group > matcher.groupCount()) {
throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
}
fillBuffer(str, input);
matcher.reset(str);
index = 0;
}
@Override
@ -136,8 +133,7 @@ public final class PatternTokenizer extends Tokenizer {
}
@Override
public void setReader(Reader input) throws IOException {
super.setReader(input);
public void reset() throws IOException {
fillBuffer(str, input);
matcher.reset(str);
index = 0;

View File

@ -175,8 +175,7 @@ public final class ClassicTokenizer extends Tokenizer {
}
@Override
public void setReader(Reader reader) throws IOException {
super.setReader(reader);
scanner.yyreset(reader);
public void reset() throws IOException {
scanner.yyreset(input);
}
}

View File

@ -183,8 +183,7 @@ public final class StandardTokenizer extends Tokenizer {
}
@Override
public void setReader(Reader reader) throws IOException {
super.setReader(reader);
scanner.yyreset(reader);
public void reset() throws IOException {
scanner.yyreset(input);
}
}

View File

@ -162,8 +162,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
}
@Override
public void setReader(Reader reader) throws IOException {
super.setReader(reader);
scanner.yyreset(reader);
public void reset() throws IOException {
scanner.yyreset(input);
}
}

View File

@ -37,6 +37,15 @@ import java.util.regex.PatternSyntaxException;
/**
* Abstract parent class for analysis factories {@link TokenizerFactory},
* {@link TokenFilterFactory} and {@link CharFilterFactory}.
* <p>
* The typical lifecycle for a factory consumer is:
* <ol>
* <li>Create factory via its a no-arg constructor
* <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
* <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
* <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
* <li>Consumer calls create() to obtain instances.
* </ol>
*/
public abstract class AbstractAnalysisFactory {
@ -46,6 +55,9 @@ public abstract class AbstractAnalysisFactory {
/** the luceneVersion arg */
protected Version luceneMatchVersion = null;
/**
* Initialize this factory via a set of key-value pairs.
*/
public void init(Map<String,String> args) {
this.args = args;
}
@ -104,6 +116,9 @@ public abstract class AbstractAnalysisFactory {
return Boolean.parseBoolean(s);
}
/**
* Compiles a pattern for the value of the specified argument key <code>name</code>
*/
protected Pattern getPattern(String name) {
try {
String pat = args.get(name);
@ -118,6 +133,10 @@ public abstract class AbstractAnalysisFactory {
}
}
/**
* Returns as {@link CharArraySet} from wordFiles, which
* can be a comma-separated list of filenames
*/
protected CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
@ -137,6 +156,9 @@ public abstract class AbstractAnalysisFactory {
return words;
}
/**
* Returns the resource's lines (with content treated as UTF-8)
*/
protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
}

View File

@ -78,7 +78,8 @@ public abstract class CharTokenizer extends Tokenizer {
charUtils = CharacterUtils.getInstance(matchVersion);
}
private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
// note: bufferIndex is -1 here to best-effort AIOOBE consumers that don't call reset()
private int offset = 0, bufferIndex = -1, dataLen = 0, finalOffset = 0;
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
@ -162,8 +163,7 @@ public abstract class CharTokenizer extends Tokenizer {
}
@Override
public void setReader(Reader input) throws IOException {
super.setReader(input);
public void reset() throws IOException {
bufferIndex = 0;
offset = 0;
dataLen = 0;

View File

@ -27,5 +27,9 @@ import java.io.IOException;
*/
public interface ResourceLoaderAware {
/**
* Initializes this component with the provided ResourceLoader
* (used for loading classes, files, etc).
*/
void inform(ResourceLoader loader) throws IOException;
}

View File

@ -19,6 +19,9 @@ package org.apache.lucene.analysis.util;
/** Some commonly-used stemming functions */
public class StemmerUtil {
/** no instance */
private StemmerUtil() {}
/**
* Returns true if the character array starts with the suffix.
*

View File

@ -36,7 +36,10 @@ import org.apache.lucene.util.Version;
*/
public class WordlistLoader {
private static final int INITITAL_CAPACITY = 16;
private static final int INITIAL_CAPACITY = 16;
/** no instance */
private WordlistLoader() {}
/**
* Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
@ -74,7 +77,7 @@ public class WordlistLoader {
* @return A {@link CharArraySet} with the reader's words
*/
public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException {
return getWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}
/**
@ -89,7 +92,7 @@ public class WordlistLoader {
* @return A CharArraySet with the reader's words
*/
public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException {
return getWordSet(reader, comment, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}
/**
@ -171,7 +174,7 @@ public class WordlistLoader {
* @return A {@link CharArraySet} with the reader's words
*/
public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException {
return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}

View File

@ -318,18 +318,12 @@ public final class WikipediaTokenizer extends Tokenizer {
*/
@Override
public void reset() throws IOException {
super.reset();
scanner.yyreset(input);
tokens = null;
scanner.reset();
first = true;
}
@Override
public void setReader(Reader reader) throws IOException {
super.setReader(reader);
scanner.yyreset(input);
}
@Override
public void end() {
// set final offset

View File

@ -39,6 +39,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
assertTrue(cgf.incrementToken());
@ -61,6 +62,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(nsf.incrementToken());

View File

@ -235,6 +235,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
@ -256,6 +257,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
stream.reset();
while (stream.incrementToken()) {
assertTrue("Custom attribute value was lost", retAtt.getRetain());
}

View File

@ -80,6 +80,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
void verifyPayload(TokenStream ts) throws IOException {
PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
ts.reset();
for(byte b=1;;b++) {
boolean hasNext = ts.incrementToken();
if (!hasNext) break;

View File

@ -66,6 +66,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
assertNotNull(stream);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
@ -83,6 +84,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
stream.reset();
while (stream.incrementToken()) {
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));

View File

@ -111,6 +111,7 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase
// assign bogus values
in.clearAttributes();
termAtt.setEmpty().append("bogusTerm");
in.reset();
while (in.incrementToken()) {
if (out.length() > 0)
out.append(' ');

View File

@ -45,7 +45,8 @@ public final class ICUTokenizer extends Tokenizer {
/** true length of text in the buffer */
private int length = 0;
/** length in buffer that can be evaluated safely, up to a safe end point */
private int usableLength = 0;
// note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset()
private int usableLength = -1;
/** accumulated offset of previous buffers for this reader, for offsetAtt */
private int offset = 0;
@ -101,12 +102,6 @@ public final class ICUTokenizer extends Tokenizer {
breaker.setText(buffer, 0, 0);
length = usableLength = offset = 0;
}
@Override
public void setReader(Reader input) throws IOException {
super.setReader(input);
reset();
}
@Override
public void end() {

View File

@ -244,15 +244,9 @@ public final class JapaneseTokenizer extends Tokenizer {
this.dotOut = dotOut;
}
@Override
public void setReader(Reader input) throws IOException {
super.setReader(input);
buffer.reset(input);
}
@Override
public void reset() throws IOException {
super.reset();
buffer.reset(input);
resetState();
}

View File

@ -112,16 +112,9 @@ public final class SentenceTokenizer extends Tokenizer {
@Override
public void reset() throws IOException {
super.reset();
tokenStart = tokenEnd = 0;
}
@Override
public void setReader(Reader input) throws IOException {
super.setReader(input);
reset();
}
@Override
public void end() {
// set final offset

View File

@ -220,7 +220,7 @@ public class Row {
* Character.
*
* @param way the Character associated with the desired Cell
* @return the reference, or -1 if the Cell is <tt>null,/tt>
* @return the reference, or -1 if the Cell is <tt>null</tt>
*/
public int getRef(Character way) {
Cell c = at(way);

View File

@ -80,8 +80,7 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
}
@Override
public void setReader(Reader input) throws IOException {
super.setReader(input);
public void reset() throws IOException {
iterator = null;
}

View File

@ -248,6 +248,10 @@
<!-- spatial: problems -->
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
<check-missing-javadocs dir="build/docs/test-framework" level="class"/>
<!-- too much to fix for now, but enforce full javadocs for key packages -->
<check-missing-javadocs dir="build/docs/core/org/apache/lucene/analysis" level="method"/>
<check-missing-javadocs dir="build/docs/core/org/apache/lucene/document" level="method"/>
</sequential>
</target>
@ -452,16 +456,6 @@
<sign-artifacts-macro artifacts.dir="${dist.dir}"/>
</target>
<!-- ================================================================== -->
<!-- Build the JavaCC files into the source tree -->
<!-- ================================================================== -->
<target name="javacc" depends="javacc-check">
<subant target="javacc" failonerror="true" inheritall="false">
<fileset dir="${common.dir}/queryparser" includes="build.xml"/>
</subant>
</target>
<target name="build-modules" depends="compile-test"
description="Builds all additional modules and their tests">
<modules-crawl target="build-artifacts-and-tests"/>
@ -480,24 +474,6 @@
<modules-crawl target="test" failonerror="true"/>
</target>
<!--
compile changes.txt into an html file
-->
<macrodef name="build-changes">
<attribute name="changes.src.dir" default="${changes.src.dir}"/>
<attribute name="changes.target.dir" default="${changes.target.dir}"/>
<sequential>
<mkdir dir="@{changes.target.dir}"/>
<exec executable="perl" input="CHANGES.txt" output="@{changes.target.dir}/Changes.html"
failonerror="true" logError="true">
<arg value="@{changes.src.dir}/changes2html.pl"/>
</exec>
<copy todir="@{changes.target.dir}">
<fileset dir="@{changes.src.dir}" includes="*.css"/>
</copy>
</sequential>
</macrodef>
<target name="changes-to-html">
<build-changes changes.src.dir="${changes.src.dir}" changes.target.dir="${changes.target.dir}" />
</target>

View File

@ -173,7 +173,6 @@
<property name="m2.repository.url" value="file://${maven.dist.dir}"/>
<property name="m2.repository.private.key" value="${user.home}/.ssh/id_dsa"/>
<property name="javacc.home" location="${common.dir}"/>
<property name="jflex.home" location="${common.dir}"/>
<path id="jflex.classpath">
@ -185,12 +184,6 @@
</fileset>
</path>
<path id="javacc.classpath">
<fileset dir="${javacc.home}/">
<include name="bin/lib/*.jar"/>
</fileset>
</path>
<property name="backwards.dir" location="backwards"/>
<property name="build.dir.backwards" location="${build.dir}/backwards"/>
@ -261,23 +254,6 @@
<delete file="velocity.log"/>
</target>
<!-- TODO: maybe make JavaCC checking specific to only the projects
that use it (Lucene core and queryparsers)
-->
<target name="javacc-uptodate-check">
<uptodate property="javacc.files.uptodate">
<srcfiles dir="${src.dir}" includes="**/*.jj" />
<mapper type="glob" from="*.jj" to="*.java"/>
</uptodate>
</target>
<target name="javacc-notice" depends="javacc-uptodate-check" unless="javacc.files.uptodate">
<echo>
One or more of the JavaCC .jj files is newer than its corresponding
.java file. Run the "javacc" target to regenerate the artifacts.
</echo>
</target>
<target name="init" depends="resolve">
<!-- currently empty -->
</target>
@ -391,36 +367,6 @@
</echo>
</target>
<target name="javacc-check">
<available property="javacc.present" classname="org.javacc.parser.Main">
<classpath refid="javacc.classpath"/>
</available>
<fail unless="javacc.present">
##################################################################
JavaCC not found.
JavaCC Home: ${javacc.home}
Please download and install JavaCC 4.1 from:
&lt;http://javacc.dev.java.net&gt;
Then, create a build.properties file either in your home
directory, or within the Lucene directory and set the javacc.home
property to the path where JavaCC is installed. For example,
if you installed JavaCC in /usr/local/java/javacc-4.1, then set the
javacc.home property to:
javacc.home=/usr/local/java/javacc-4.1
If you get an error like the one below, then you have not installed
things correctly. Please check all your paths and try again.
java.lang.NoClassDefFoundError: org.javacc.parser.Main
##################################################################
</fail>
</target>
<target name="jflex-check">
<available property="jflex.present" classname="jflex.anttask.JFlexTask">
<classpath refid="jflex.classpath"/>
@ -508,6 +454,9 @@
<attribute name="spec.version"/>
<attribute name="manifest.file" default="${manifest.file}"/>
<sequential>
<!-- If possible, include the svnversion -->
<exec dir="." executable="${svnversion.exe}" outputproperty="svnversion" failifexecutionfails="false"/>
<manifest file="@{manifest.file}">
<!--
http://java.sun.com/j2se/1.5.0/docs/guide/jar/jar.html#JAR%20Manifest
@ -558,12 +507,6 @@
<attribute name="manifest.file" default="${manifest.file}"/>
<element name="nested" optional="true" implicit="true"/>
<sequential>
<!-- If possible, include the svnversion -->
<exec dir="." executable="${svnversion.exe}"
outputproperty="svnversion" failifexecutionfails="false">
<arg value="."/>
</exec>
<build-manifest title="@{title}"
implementation.title="@{implementation.title}"
spec.version="@{spec.version}"
@ -850,7 +793,14 @@
<sysproperty key="tests.multiplier" value="@{tests.multiplier}"/>
<!-- Temporary directory in the cwd. -->
<sysproperty key="tempDir" value="."/>
<sysproperty key="tempDir" value="." />
<sysproperty key="java.io.tmpdir" value="." />
<!-- Restrict access to certain Java features and install security manager: -->
<sysproperty key="tests.sandbox.dir" value="${build.dir}" />
<sysproperty key="clover.db.dir" value="${clover.db.dir}" />
<sysproperty key="java.security.manager" value="java.lang.SecurityManager" />
<sysproperty key="java.security.policy" value="${common.dir}/tools/junit4/tests.policy" />
<sysproperty key="lucene.version" value="${dev.version}"/>
@ -1381,31 +1331,11 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites
<!-- <compilerarg line="-Xmaxwarns 10000000"/>
<compilerarg line="-Xmaxerrs 10000000"/> -->
<!-- for generics in Java 1.5: -->
<compilerarg line="${javac.args}"/>
<compilerarg line="${javac.args}"/>
</javac>
</sequential>
</macrodef>
<macrodef name="invoke-javacc">
<attribute name="target"/>
<attribute name="outputDir"/>
<sequential>
<mkdir dir="@{outputDir}"/>
<javacc
target="@{target}"
outputDirectory="@{outputDir}"
debugTokenManager="${javacc.debug.tokenmgr}"
debugParser="${javacc.debug.parser}"
debuglookahead="${javacc.debug.lookahead}"
javacchome="${javacc.home}"
jdkversion="${javac.source}"
/>
<fixcrlf srcdir="@{outputDir}" includes="*.java" encoding="UTF-8">
<containsregexp expression="Generated.*By.*JavaCC"/>
</fixcrlf>
</sequential>
</macrodef>
<property name="failonjavadocwarning" value="true"/>
<macrodef name="invoke-javadoc">
<element name="sources" optional="yes"/>
@ -1547,10 +1477,10 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites
description="Populates properties svn.URL and svn.Revision using 'svn info'.">
<attribute name="directory"/>
<sequential>
<exec dir="." executable="${svnversion.exe}" outputproperty="svn.ver"/>
<exec dir="@{directory}" executable="${svnversion.exe}" outputproperty="svn.ver"/>
<fail message="A subversion checkout is required for this target">
<condition>
<equals arg1="${svn.ver}" arg2="exported"/>
<matches pattern="(exported|unversioned.*)" string="${svn.ver}" casesensitive="false"/>
</condition>
</fail>
<exec dir="@{directory}" executable="${svn.exe}" outputproperty="svn.info" failonerror="true">
@ -1697,7 +1627,7 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites
<element name="nested" optional="false" implicit="true"/>
<sequential>
<copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true"
preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8"
preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8" taskname="pegdown"
>
<filterchain>
<tokenfilter>
@ -1757,4 +1687,22 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites
</sequential>
</macrodef>
<!--
compile changes.txt into an html file
-->
<macrodef name="build-changes">
<attribute name="changes.src.dir" default="${changes.src.dir}"/>
<attribute name="changes.target.dir" default="${changes.target.dir}"/>
<sequential>
<mkdir dir="@{changes.target.dir}"/>
<exec executable="perl" input="CHANGES.txt" output="@{changes.target.dir}/Changes.html"
failonerror="true" logError="true">
<arg value="@{changes.src.dir}/changes2html.pl"/>
</exec>
<copy todir="@{changes.target.dir}">
<fileset dir="@{changes.src.dir}" includes="*.css"/>
</copy>
</sequential>
</macrodef>
</project>

View File

@ -38,8 +38,6 @@
<pathelement path="${java.class.path}"/>
</path>
<target name="compile-core" depends="jflex-notice, javacc-notice, common.compile-core"/>
<target name="test-core" depends="common.test"/>
<target name="javadocs-core" depends="javadocs"/>

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.CloseableThreadLocal;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
@ -67,14 +68,26 @@ import java.util.Map;
* Analysis integration with Apache UIMA.
* </ul>
*/
public abstract class Analyzer {
public abstract class Analyzer implements Closeable {
private final ReuseStrategy reuseStrategy;
/**
* Create a new Analyzer, reusing the same set of components per-thread
* across calls to {@link #tokenStream(String, Reader)}.
*/
public Analyzer() {
this(new GlobalReuseStrategy());
}
/**
* Expert: create a new Analyzer with a custom {@link ReuseStrategy}.
* <p>
* NOTE: if you just want to reuse on a per-field basis, its easier to
* use a subclass of {@link AnalyzerWrapper} such as
* <a href="{@docRoot}/../analyzers-common/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.html">
* PerFieldAnalyerWrapper</a> instead.
*/
public Analyzer(ReuseStrategy reuseStrategy) {
this.reuseStrategy = reuseStrategy;
}
@ -93,20 +106,25 @@ public abstract class Analyzer {
Reader reader);
/**
* Creates a TokenStream that is allowed to be re-use from the previous time
* that the same thread called this method. Callers that do not need to use
* more than one TokenStream at the same time from this analyzer should use
* this method for better performance.
* Returns a TokenStream suitable for <code>fieldName</code>, tokenizing
* the contents of <code>reader</code>.
* <p>
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents}. It returns the sink of the
* components and stores the components internally. Subsequent calls to this
* method will reuse the previously stored components after resetting them
* through {@link TokenStreamComponents#setReader(Reader)}.
* </p>
* <p>
* <b>NOTE:</b> After calling this method, the consumer must follow the
* workflow described in {@link TokenStream} to properly consume its contents.
* See the {@link org.apache.lucene.analysis Analysis package documentation} for
* some examples demonstrating this.
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
* @return TokenStream for iterating the analyzed content of <code>reader</code>
* @throws AlreadyClosedException if the Analyzer is closed.
* @throws IOException if an i/o error occurs.
*/
public final TokenStream tokenStream(final String fieldName,
final Reader reader) throws IOException {
@ -123,6 +141,13 @@ public abstract class Analyzer {
/**
* Override this if you want to add a CharFilter chain.
* <p>
* The default implementation returns <code>reader</code>
* unchanged.
*
* @param fieldName IndexableField name being indexed
* @param reader original Reader
* @return reader, optionally decorated with CharFilter(s)
*/
protected Reader initReader(String fieldName, Reader reader) {
return reader;
@ -139,7 +164,8 @@ public abstract class Analyzer {
* exact PhraseQuery matches, for instance, across IndexableField instance boundaries.
*
* @param fieldName IndexableField name being indexed.
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}.
* This value must be {@code >= 0}.
*/
public int getPositionIncrementGap(String fieldName) {
return 0;
@ -152,7 +178,8 @@ public abstract class Analyzer {
* produced at least one token for indexing.
*
* @param fieldName the field just indexed
* @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
* @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}.
* This value must be {@code >= 0}.
*/
public int getOffsetGap(String fieldName) {
return 1;
@ -171,7 +198,14 @@ public abstract class Analyzer {
* {@link Analyzer#tokenStream(String, Reader)}.
*/
public static class TokenStreamComponents {
/**
* Original source of the tokens.
*/
protected final Tokenizer source;
/**
* Sink tokenstream, such as the outer tokenfilter decorating
* the chain. This can be the source if there are no filters.
*/
protected final TokenStream sink;
/**
@ -235,10 +269,13 @@ public abstract class Analyzer {
* Strategy defining how TokenStreamComponents are reused per call to
* {@link Analyzer#tokenStream(String, java.io.Reader)}.
*/
public static abstract class ReuseStrategy {
public static abstract class ReuseStrategy implements Closeable {
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
public ReuseStrategy() {}
/**
* Gets the reusable TokenStreamComponents for the field with the given name
*
@ -262,6 +299,7 @@ public abstract class Analyzer {
* Returns the currently stored value
*
* @return Currently stored value or {@code null} if no value is stored
* @throws AlreadyClosedException if the ReuseStrategy is closed.
*/
protected final Object getStoredValue() {
try {
@ -279,6 +317,7 @@ public abstract class Analyzer {
* Sets the stored value
*
* @param storedValue Value to store
* @throws AlreadyClosedException if the ReuseStrategy is closed.
*/
protected final void setStoredValue(Object storedValue) {
try {
@ -296,8 +335,10 @@ public abstract class Analyzer {
* Closes the ReuseStrategy, freeing any resources
*/
public void close() {
storedValue.close();
storedValue = null;
if (storedValue != null) {
storedValue.close();
storedValue = null;
}
}
}
@ -306,17 +347,16 @@ public abstract class Analyzer {
* every field.
*/
public final static class GlobalReuseStrategy extends ReuseStrategy {
/** Creates a new instance, with empty per-thread values */
public GlobalReuseStrategy() {}
/**
* {@inheritDoc}
*/
@Override
public TokenStreamComponents getReusableComponents(String fieldName) {
return (TokenStreamComponents) getStoredValue();
}
/**
* {@inheritDoc}
*/
@Override
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
setStoredValue(components);
}
@ -328,19 +368,18 @@ public abstract class Analyzer {
*/
public static class PerFieldReuseStrategy extends ReuseStrategy {
/**
* {@inheritDoc}
*/
/** Creates a new instance, with empty per-thread-per-field values */
public PerFieldReuseStrategy() {}
@SuppressWarnings("unchecked")
@Override
public TokenStreamComponents getReusableComponents(String fieldName) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
if (componentsPerField == null) {

View File

@ -61,25 +61,16 @@ public abstract class AnalyzerWrapper extends Analyzer {
*/
protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
/**
* {@inheritDoc}
*/
@Override
protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
}
/**
* {@inheritDoc}
*/
@Override
public final int getPositionIncrementGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
/**
* {@inheritDoc}
*/
@Override
public final int getOffsetGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getOffsetGap(fieldName);

View File

@ -38,6 +38,11 @@ public final class CachingTokenFilter extends TokenFilter {
private Iterator<AttributeSource.State> iterator = null;
private AttributeSource.State finalState;
/**
* Create a new CachingTokenFilter around <code>input</code>,
* caching its token attributes, which can be replayed again
* after a call to {@link #reset()}.
*/
public CachingTokenFilter(TokenStream input) {
super(input);
}
@ -67,6 +72,13 @@ public final class CachingTokenFilter extends TokenFilter {
}
}
/**
* Rewinds the iterator to the beginning of the cached list.
* <p>
* Note that this does not call reset() on the wrapped tokenstream ever, even
* the first time. You should reset() the inner tokenstream before wrapping
* it with CachingTokenFilter.
*/
@Override
public void reset() {
if(cache != null) {

View File

@ -33,6 +33,9 @@ import java.io.Reader;
* You can optionally provide more efficient implementations of additional methods
* like {@link #read()}, {@link #read(char[])}, {@link #read(java.nio.CharBuffer)},
* but this is not required.
* <p>
* For examples and integration with {@link Analyzer}, see the
* {@link org.apache.lucene.analysis Analysis package documentation}.
*/
// the way java.io.FilterReader should work!
public abstract class CharFilter extends Reader {
@ -52,6 +55,10 @@ public abstract class CharFilter extends Reader {
/**
* Closes the underlying input stream.
* <p>
* <b>NOTE:</b>
* The default implementation closes the input Reader, so
* be sure to call <code>super.close()</code> when overriding this method.
*/
@Override
public void close() throws IOException {

View File

@ -144,6 +144,12 @@ public final class NumericTokenStream extends TokenStream {
private long value = 0L;
private int valueSize = 0, shift = 0, precisionStep = 0;
private BytesRef bytes = new BytesRef();
/**
* Creates, but does not yet initialize this attribute instance
* @see #init(long, int, int, int)
*/
public NumericTermAttributeImpl() {}
public BytesRef getBytesRef() {
return bytes;

View File

@ -176,8 +176,8 @@ public class Token extends CharTermAttributeImpl
* instead use the char[] termBuffer methods to set the
* term text.
* @param text term text
* @param start start offset
* @param end end offset
* @param start start offset in the source text
* @param end end offset in the source text
*/
public Token(String text, int start, int end) {
checkOffsets(start, end);
@ -191,8 +191,8 @@ public class Token extends CharTermAttributeImpl
* speed you should instead use the char[] termBuffer
* methods to set the term text.
* @param text term text
* @param start start offset
* @param end end offset
* @param start start offset in the source text
* @param end end offset in the source text
* @param typ token type
*/
public Token(String text, int start, int end, String typ) {
@ -208,9 +208,9 @@ public class Token extends CharTermAttributeImpl
* offsets, & type. <b>NOTE:</b> for better indexing
* speed you should instead use the char[] termBuffer
* methods to set the term text.
* @param text
* @param start
* @param end
* @param text term text
* @param start start offset in the source text
* @param end end offset in the source text
* @param flags token type bits
*/
public Token(String text, int start, int end, int flags) {
@ -225,11 +225,11 @@ public class Token extends CharTermAttributeImpl
* Constructs a Token with the given term buffer (offset
* & length), start and end
* offsets
* @param startTermBuffer
* @param termBufferOffset
* @param termBufferLength
* @param start
* @param end
* @param startTermBuffer buffer containing term text
* @param termBufferOffset the index in the buffer of the first character
* @param termBufferLength number of valid characters in the buffer
* @param start start offset in the source text
* @param end end offset in the source text
*/
public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
checkOffsets(start, end);
@ -238,31 +238,9 @@ public class Token extends CharTermAttributeImpl
endOffset = end;
}
/** Set the position increment. This determines the position of this token
* relative to the previous Token in a {@link TokenStream}, used in phrase
* searching.
*
* <p>The default value is one.
*
* <p>Some common uses for this are:<ul>
*
* <li>Set it to zero to put multiple terms in the same position. This is
* useful if, e.g., a word has multiple stems. Searches for phrases
* including either stem will match. In this case, all but the first stem's
* increment should be set to zero: the increment of the first instance
* should be one. Repeating a token with an increment of zero can also be
* used to boost the scores of matches on that token.
*
* <li>Set it to values greater than one to inhibit exact phrase matches.
* If, for example, one does not want phrases to match across removed stop
* words, then one could build a stop word filter that removes stop words and
* also sets the increment to the number of stop words removed before each
* non-stop word. Then exact phrase queries will only match when the terms
* occur with no intervening stop words.
*
* </ul>
* @param positionIncrement the distance from the prior term
* @see org.apache.lucene.index.DocsAndPositionsEnum
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0)
@ -271,93 +249,101 @@ public class Token extends CharTermAttributeImpl
this.positionIncrement = positionIncrement;
}
/** Returns the position increment of this Token.
* @see #setPositionIncrement
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
public int getPositionIncrement() {
return positionIncrement;
}
/** Set the position length.
* @see PositionLengthAttribute */
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public void setPositionLength(int positionLength) {
this.positionLength = positionLength;
}
/** Get the position length.
* @see PositionLengthAttribute */
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public int getPositionLength() {
return positionLength;
}
/** Returns this Token's starting offset, the position of the first character
corresponding to this token in the source text.
Note that the difference between endOffset() and startOffset() may not be
equal to {@link #length}, as the term text may have been altered by a
stemmer or some other filter. */
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
public final int startOffset() {
return startOffset;
}
/** Returns this Token's ending offset, one greater than the position of the
last character corresponding to this token in the source text. The length
of the token in the source text is (endOffset - startOffset). */
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
public final int endOffset() {
return endOffset;
}
/** Set the starting and ending offset.
@see #startOffset() and #endOffset()*/
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
public void setOffset(int startOffset, int endOffset) {
checkOffsets(startOffset, endOffset);
this.startOffset = startOffset;
this.endOffset = endOffset;
}
/** Returns this Token's lexical type. Defaults to "word". */
/**
* {@inheritDoc}
* @see TypeAttribute
*/
public final String type() {
return type;
}
/** Set the lexical type.
@see #type() */
/**
* {@inheritDoc}
* @see TypeAttribute
*/
public final void setType(String type) {
this.type = type;
}
/**
* <p/>
*
* Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although they do share similar purposes.
* The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
*
*
* @return The bits
* @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
* {@inheritDoc}
* @see FlagsAttribute
*/
public int getFlags() {
return flags;
}
/**
* @see #getFlags()
* {@inheritDoc}
* @see FlagsAttribute
*/
public void setFlags(int flags) {
this.flags = flags;
}
/**
* Returns this Token's payload.
*/
* {@inheritDoc}
* @see PayloadAttribute
*/
public BytesRef getPayload() {
return this.payload;
}
/**
* Sets this Token's payload.
/**
* {@inheritDoc}
* @see PayloadAttribute
*/
public void setPayload(BytesRef payload) {
this.payload = payload;
@ -551,8 +537,8 @@ public class Token extends CharTermAttributeImpl
/**
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
* @param prototype
* @param newTerm
* @param prototype existing Token
* @param newTerm new term text
*/
public void reinit(Token prototype, String newTerm) {
setEmpty().append(newTerm);
@ -566,10 +552,10 @@ public class Token extends CharTermAttributeImpl
/**
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
* @param prototype
* @param newTermBuffer
* @param offset
* @param length
* @param prototype existing Token
* @param newTermBuffer buffer containing new term text
* @param offset the index in the buffer of the first character
* @param length number of valid characters in the buffer
*/
public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
copyBuffer(newTermBuffer, offset, length);

View File

@ -34,21 +34,37 @@ public abstract class TokenFilter extends TokenStream {
this.input = input;
}
/** Performs end-of-stream operations, if any, and calls then <code>end()</code> on the
* input TokenStream.<p/>
* <b>NOTE:</b> Be sure to call <code>super.end()</code> first when overriding this method.*/
/**
* {@inheritDoc}
* <p>
* <b>NOTE:</b>
* The default implementation chains the call to the input TokenStream, so
* be sure to call <code>super.end()</code> first when overriding this method.
*/
@Override
public void end() throws IOException {
input.end();
}
/** Close the input TokenStream. */
/**
* {@inheritDoc}
* <p>
* <b>NOTE:</b>
* The default implementation chains the call to the input TokenStream, so
* be sure to call <code>super.close()</code> when overriding this method.
*/
@Override
public void close() throws IOException {
input.close();
}
/** Reset the filter as well as the input TokenStream. */
/**
* {@inheritDoc}
* <p>
* <b>NOTE:</b>
* The default implementation chains the call to the input TokenStream, so
* be sure to call <code>super.reset()</code> when overriding this method.
*/
@Override
public void reset() throws IOException {
input.reset();

View File

@ -170,12 +170,8 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
* This method is called by a consumer before it begins consumption using
* {@link #incrementToken()}.
* <p/>
* Resets this stream to the beginning. As all TokenStreams must be reusable,
* any implementations which have state that needs to be reset between usages
* of the TokenStream, must implement this method. Note that if your TokenStream
* caches tokens and feeds them back again after a reset, it is imperative
* that you clone the tokens when you store them away (on the first pass) as
* well as when you return them (on future passes after {@link #reset()}).
* Resets this stream to a clean state. Stateful implementations must implement
* this method so that they can be reused, just as if they had been created fresh.
*/
public void reset() throws IOException {}

View File

@ -54,7 +54,13 @@ public abstract class Tokenizer extends TokenStream {
this.input = input;
}
/** By default, closes the input Reader. */
/**
* {@inheritDoc}
* <p>
* <b>NOTE:</b>
* The default implementation closes the input Reader, so
* be sure to call <code>super.close()</code> when overriding this method.
*/
@Override
public void close() throws IOException {
if (input != null) {
@ -76,12 +82,18 @@ public abstract class Tokenizer extends TokenStream {
return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
}
/** Expert: Reset the tokenizer to a new reader. Typically, an
/** Expert: Set a new reader on the Tokenizer. Typically, an
* analyzer (in its tokenStream method) will use
* this to re-use a previously created tokenizer. */
public void setReader(Reader input) throws IOException {
public final void setReader(Reader input) throws IOException {
assert input != null: "input must not be null";
this.input = input;
assert setReaderTestPoint();
}
// only used by assert, for testing
boolean setReaderTestPoint() {
return true;
}
}

View File

@ -817,5 +817,30 @@ As a small hint, this is how the new Attribute class could begin:
...
</pre>
<h4>Adding a CharFilter chain</h4>
Analyzers take Java {@link java.io.Reader}s as input. Of course you can wrap your Readers with {@link java.io.FilterReader}s
to manipulate content, but this would have the big disadvantage that character offsets might be inconsistent with your original
text.
<p>
{@link org.apache.lucene.analysis.CharFilter} is designed to allow you to pre-process input like a FilterReader would, but also
preserve the original offsets associated with those characters. This way mechanisms like highlighting still work correctly.
CharFilters can be chained.
<p>
Example:
<pre class="prettyprint">
public class MyAnalyzer extends Analyzer {
{@literal @Override}
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new MyTokenizer(reader));
}
{@literal @Override}
protected Reader initReader(String fieldName, Reader reader) {
// wrap the Reader in a CharFilter chain.
return new SecondCharFilter(new FirstCharFilter(reader));
}
}
</pre>
</body>
</html>

View File

@ -38,7 +38,11 @@ public interface CharTermAttribute extends Attribute, CharSequence, Appendable {
* #resizeBuffer(int)} to increase it. After
* altering the buffer be sure to call {@link
* #setLength} to record the number of valid
* characters that were placed into the termBuffer. */
* characters that were placed into the termBuffer.
* <p>
* <b>NOTE</b>: The returned buffer may be larger than
* the valid {@link #length()}.
*/
public char[] buffer();
/** Grows the termBuffer to at least size newSize, preserving the

View File

@ -26,14 +26,15 @@ import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
/**
* The term text of a Token.
*/
/** Default implementation of {@link CharTermAttribute}. */
public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
private static int MIN_BUFFER_SIZE = 10;
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
private int termLength = 0;
/** Initialize this attribute with empty term text */
public CharTermAttributeImpl() {}
public final void copyBuffer(char[] buffer, int offset, int length) {
growTermBuffer(length);

View File

@ -22,22 +22,23 @@ import org.apache.lucene.util.Attribute;
/**
* This attribute can be used to pass different flags down the {@link Tokenizer} chain,
* eg from one TokenFilter to another one.
* e.g. from one TokenFilter to another one.
* <p>
* This is completely distinct from {@link TypeAttribute}, although they do share similar purposes.
* The flags can be used to encode information about the token for use by other
* {@link org.apache.lucene.analysis.TokenFilter}s.
* @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
*/
public interface FlagsAttribute extends Attribute {
/**
* <p/>
*
* Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
* The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
*
*
* Get the bitset for any bits that have been set.
* @return The bits
* @see #getFlags()
*/
public int getFlags();
/**
* Set the flags to a new bitset.
* @see #getFlags()
*/
public void setFlags(int flags);

View File

@ -19,30 +19,17 @@ package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.util.AttributeImpl;
/**
* This attribute can be used to pass different flags down the tokenizer chain,
* eg from one TokenFilter to another one.
* @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
*/
/** Default implementation of {@link FlagsAttribute}. */
public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
private int flags = 0;
/**
* <p/>
*
* Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
* The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
*
*
* @return The bits
*/
/** Initialize this attribute with no bits set */
public FlagsAttributeImpl() {}
public int getFlags() {
return flags;
}
/**
* @see #getFlags()
*/
public void setFlags(int flags) {
this.flags = flags;
}

View File

@ -30,20 +30,22 @@ import org.apache.lucene.util.Attribute;
public interface KeywordAttribute extends Attribute {
/**
* Returns <code>true</code> iff the current token is a keyword, otherwise
* <code>false</code>/
* Returns <code>true</code> if the current token is a keyword, otherwise
* <code>false</code>
*
* @return <code>true</code> iff the current token is a keyword, otherwise
* <code>false</code>/
* @return <code>true</code> if the current token is a keyword, otherwise
* <code>false</code>
* @see #setKeyword(boolean)
*/
public boolean isKeyword();
/**
* Marks the current token as keyword iff set to <code>true</code>.
* Marks the current token as keyword if set to <code>true</code>.
*
* @param isKeyword
* <code>true</code> iff the current token is a keyword, otherwise
* <code>true</code> if the current token is a keyword, otherwise
* <code>false</code>.
* @see #isKeyword()
*/
public void setKeyword(boolean isKeyword);
}

View File

@ -17,19 +17,15 @@ package org.apache.lucene.analysis.tokenattributes;
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.AttributeImpl;
/**
*This attribute can be used to mark a token as a keyword. Keyword aware
* {@link TokenStream}s can decide to modify a token based on the return value
* of {@link #isKeyword()} if the token is modified. Stemming filters for
* instance can use this attribute to conditionally skip a term if
* {@link #isKeyword()} returns <code>true</code>.
*/
/** Default implementation of {@link KeywordAttribute}. */
public final class KeywordAttributeImpl extends AttributeImpl implements
KeywordAttribute {
private boolean keyword;
/** Initialize this attribute with the keyword value as false. */
public KeywordAttributeImpl() {}
@Override
public void clear() {
@ -57,24 +53,10 @@ public final class KeywordAttributeImpl extends AttributeImpl implements
return keyword == other.keyword;
}
/**
* Returns <code>true</code> iff the current token is a keyword, otherwise
* <code>false</code>/
*
* @return <code>true</code> iff the current token is a keyword, otherwise
* <code>false</code>/
*/
public boolean isKeyword() {
return keyword;
}
/**
* Marks the current token as keyword iff set to <code>true</code>.
*
* @param isKeyword
* <code>true</code> iff the current token is a keyword, otherwise
* <code>false</code>.
*/
public void setKeyword(boolean isKeyword) {
keyword = isKeyword;
}

View File

@ -23,22 +23,34 @@ import org.apache.lucene.util.Attribute;
* The start and end character offset of a Token.
*/
public interface OffsetAttribute extends Attribute {
/** Returns this Token's starting offset, the position of the first character
corresponding to this token in the source text.
Note that the difference between endOffset() and startOffset() may not be
equal to termText.length(), as the term text may have been altered by a
stemmer or some other filter. */
/**
* Returns this Token's starting offset, the position of the first character
* corresponding to this token in the source text.
* <p>
* Note that the difference between {@link #endOffset()} and <code>startOffset()</code>
* may not be equal to termText.length(), as the term text may have been altered by a
* stemmer or some other filter.
* @see #setOffset(int, int)
*/
public int startOffset();
/** Set the starting and ending offset.
@see #startOffset() and #endOffset()*/
/**
* Set the starting and ending offset.
* @throws IllegalArgumentException If <code>startOffset</code> or <code>endOffset</code>
* are negative, or if <code>startOffset</code> is greater than
* <code>endOffset</code>
* @see #startOffset()
* @see #endOffset()
*/
public void setOffset(int startOffset, int endOffset);
/** Returns this Token's ending offset, one greater than the position of the
last character corresponding to this token in the source text. The length
of the token in the source text is (endOffset - startOffset). */
/**
* Returns this Token's ending offset, one greater than the position of the
* last character corresponding to this token in the source text. The length
* of the token in the source text is (<code>endOffset()</code> - {@link #startOffset()}).
* @see #setOffset(int, int)
*/
public int endOffset();
}

View File

@ -19,26 +19,18 @@ package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.util.AttributeImpl;
/**
* The start and end character offset of a Token.
*/
/** Default implementation of {@link OffsetAttribute}. */
public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
private int startOffset;
private int endOffset;
/** Initialize this attribute with startOffset and endOffset of 0. */
public OffsetAttributeImpl() {}
/** Returns this Token's starting offset, the position of the first character
corresponding to this token in the source text.
Note that the difference between endOffset() and startOffset() may not be
equal to termText.length(), as the term text may have been altered by a
stemmer or some other filter. */
public int startOffset() {
return startOffset;
}
/** Set the starting and ending offset.
@see #startOffset() and #endOffset()*/
public void setOffset(int startOffset, int endOffset) {
// TODO: we could assert that this is set-once, ie,
@ -56,10 +48,6 @@ public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribut
this.endOffset = endOffset;
}
/** Returns this Token's ending offset, one greater than the position of the
last character corresponding to this token in the source text. The length
of the token in the source text is (endOffset - startOffset). */
public int endOffset() {
return endOffset;
}

View File

@ -17,20 +17,34 @@ package org.apache.lucene.analysis.tokenattributes;
* limitations under the License.
*/
import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.BytesRef;
/**
* The payload of a Token.
* The payload of a Token.
* <p>
* The payload is stored in the index at each position, and can
* be used to influence scoring when using Payload-based queries
* in the {@link org.apache.lucene.search.payloads} and
* {@link org.apache.lucene.search.spans} packages.
* <p>
* NOTE: because the payload will be stored at each position, its usually
* best to use the minimum number of bytes necessary. Some codec implementations
* may optimize payload storage when all payloads have the same length.
*
* @see DocsAndPositionsEnum
*/
public interface PayloadAttribute extends Attribute {
/**
* Returns this Token's payload.
* @see #setPayload(BytesRef)
*/
public BytesRef getPayload();
/**
* Sets this Token's payload.
* @see #getPayload()
*/
public void setPayload(BytesRef payload);
}

View File

@ -20,9 +20,7 @@ package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
/**
* The payload of a Token.
*/
/** Default implementation of {@link PayloadAttribute}. */
public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
private BytesRef payload;
@ -38,16 +36,10 @@ public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttrib
this.payload = payload;
}
/**
* Returns this Token's payload.
*/
public BytesRef getPayload() {
return this.payload;
}
/**
* Sets this Token's payload.
*/
public void setPayload(BytesRef payload) {
this.payload = payload;
}

View File

@ -49,11 +49,14 @@ public interface PositionIncrementAttribute extends Attribute {
/** Set the position increment. The default value is one.
*
* @param positionIncrement the distance from the prior term
* @throws IllegalArgumentException if <code>positionIncrement</code>
* is negative.
* @see #getPositionIncrement()
*/
public void setPositionIncrement(int positionIncrement);
/** Returns the position increment of this Token.
* @see #setPositionIncrement
* @see #setPositionIncrement(int)
*/
public int getPositionIncrement();
}

View File

@ -17,40 +17,15 @@ package org.apache.lucene.analysis.tokenattributes;
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.AttributeImpl;
/** Determines the position of this token
* relative to the previous Token in a {@link TokenStream}, used in phrase
* searching.
*
* <p>The default value is one.
*
* <p>Some common uses for this are:<ul>
*
* <li>Set it to zero to put multiple terms in the same position. This is
* useful if, e.g., a word has multiple stems. Searches for phrases
* including either stem will match. In this case, all but the first stem's
* increment should be set to zero: the increment of the first instance
* should be one. Repeating a token with an increment of zero can also be
* used to boost the scores of matches on that token.
*
* <li>Set it to values greater than one to inhibit exact phrase matches.
* If, for example, one does not want phrases to match across removed stop
* words, then one could build a stop word filter that removes stop words and
* also sets the increment to the number of stop words removed before each
* non-stop word. Then exact phrase queries will only match when the terms
* occur with no intervening stop words.
*
* </ul>
*/
/** Default implementation of {@link PositionIncrementAttribute}. */
public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable {
private int positionIncrement = 1;
/** Set the position increment. The default value is one.
*
* @param positionIncrement the distance from the prior term
*/
/** Initialize this attribute with position increment of 1 */
public PositionIncrementAttributeImpl() {}
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0) {
throw new IllegalArgumentException
@ -59,9 +34,6 @@ public class PositionIncrementAttributeImpl extends AttributeImpl implements Pos
this.positionIncrement = positionIncrement;
}
/** Returns the position increment of this Token.
* @see #setPositionIncrement
*/
public int getPositionIncrement() {
return positionIncrement;
}

View File

@ -26,11 +26,20 @@ import org.apache.lucene.util.Attribute;
* produced by decompounding, word splitting/joining,
* synonym filtering, etc.
*
* <p>The default value is one. */
* <p>NOTE: this is optional, and most analyzers
* don't change the default value (1). */
public interface PositionLengthAttribute extends Attribute {
/** @param positionLength how many positions this token
* spans. */
/**
* Set the position length of this Token.
* <p>
* The default value is one.
* @param positionLength how many positions this token
* spans.
* @throws IllegalArgumentException if <code>positionLength</code>
* is zero or negative.
* @see #getPositionLength()
*/
public void setPositionLength(int positionLength);
/** Returns the position length of this Token.

View File

@ -19,13 +19,13 @@ package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.util.AttributeImpl;
/** See {@link PositionLengthAttribute}. */
/** Default implementation of {@link PositionLengthAttribute}. */
public class PositionLengthAttributeImpl extends AttributeImpl implements PositionLengthAttribute, Cloneable {
private int positionLength = 1;
/** @param positionLength how many positions this token
* spans. NOTE: this is optional, and most analyzers
* don't change the default value (1). */
/** Initializes this attribute with position length of 1. */
public PositionLengthAttributeImpl() {}
public void setPositionLength(int positionLength) {
if (positionLength < 1) {
throw new IllegalArgumentException
@ -34,9 +34,6 @@ public class PositionLengthAttributeImpl extends AttributeImpl implements Positi
this.positionLength = positionLength;
}
/** Returns the position length of this Token.
* @see #setPositionLength
*/
public int getPositionLength() {
return positionLength;
}

View File

@ -56,7 +56,7 @@ public interface TermToBytesRefAttribute extends Attribute {
* Updates the bytes {@link #getBytesRef()} to contain this term's
* final encoding, and returns its hashcode.
* @return the hashcode as defined by {@link BytesRef#hashCode}:
* <pre>
* <pre class="prettyprint">
* int hash = 0;
* for (int i = termBytes.offset; i &lt; termBytes.offset+termBytes.length; i++) {
* hash = 31*hash + termBytes.bytes[i];

View File

@ -27,10 +27,15 @@ public interface TypeAttribute extends Attribute {
/** the default type */
public static final String DEFAULT_TYPE = "word";
/** Returns this Token's lexical type. Defaults to "word". */
/**
* Returns this Token's lexical type. Defaults to "word".
* @see #setType(String)
*/
public String type();
/** Set the lexical type.
@see #type() */
/**
* Set the lexical type.
* @see #type()
*/
public void setType(String type);
}

View File

@ -19,27 +19,24 @@ package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.util.AttributeImpl;
/**
* A Token's lexical type. The Default value is "word".
*/
/** Default implementation of {@link TypeAttribute}. */
public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable {
private String type;
/** Initialize this attribute with {@link TypeAttribute#DEFAULT_TYPE} */
public TypeAttributeImpl() {
this(DEFAULT_TYPE);
}
/** Initialize this attribute with <code>type</code> */
public TypeAttributeImpl(String type) {
this.type = type;
}
/** Returns this Token's lexical type. Defaults to "word". */
public String type() {
return type;
}
/** Set the lexical type.
@see #type() */
public void setType(String type) {
this.type = type;
}

View File

@ -41,6 +41,14 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
private final String name;
/**
* Creates a new codec.
* <p>
* The provided name will be written into the index segment: in order to
* for the segment to be read this class should be registered with Java's
* SPI mechanism (registered in META-INF/ of your jar file, etc).
* @param name must be all ascii alphanumeric, and less than 128 characters in length.
*/
public Codec(String name) {
NamedSPILoader.checkServiceName(name);
this.name = name;
@ -118,6 +126,10 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
defaultCodec = codec;
}
/**
* returns the codec's name. Subclasses can override to provide
* more detail (such as parameters).
*/
@Override
public String toString() {
return name;

View File

@ -18,14 +18,24 @@ package org.apache.lucene.codecs;
*/
import java.io.IOException;
import java.util.ServiceLoader;
import java.util.Set;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; // javadocs
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.util.NamedSPILoader;
/**
* Encodes/decodes terms, postings, and proximity data.
* <p>
* Note, when extending this class, the name ({@link #getName}) may
* written into the index in certain configurations. In order for the segment
* to be read, the name must resolve to your implementation via {@link #forName(String)}.
* This method uses Java's
* {@link ServiceLoader Service Provider Interface} to resolve codec names.
* <p>
* @see ServiceLoader
* @lucene.experimental */
public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
@ -38,11 +48,21 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
*/
private final String name;
/**
* Creates a new postings format.
* <p>
* The provided name will be written into the index segment in some configurations
* (such as when using {@link PerFieldPostingsFormat}): in such configurations,
* for the segment to be read this class should be registered with Java's
* SPI mechanism (registered in META-INF/ of your jar file, etc).
* @param name must be all ascii alphanumeric, and less than 128 characters in length.
*/
protected PostingsFormat(String name) {
NamedSPILoader.checkServiceName(name);
this.name = name;
}
/** Returns this posting format's name */
@Override
public final String getName() {
return name;

View File

@ -1,485 +1,485 @@
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.FuzzySet.ContainsResult;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* <p>
* A {@link PostingsFormat} useful for low doc-frequency fields such as primary
* keys. Bloom filters are maintained in a ".blm" file which offers "fast-fail"
* for reads in segments known to have no record of the key. A choice of
* delegate PostingsFormat is used to record all other Postings data.
* </p>
* <p>
* A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter
* settings on a per-field basis. The default configuration is
* {@link DefaultBloomFilterFactory} which allocates a ~8mb bitset and hashes
* values using {@link MurmurHash2}. This should be suitable for most purposes.
* </p>
* <p>
* The format of the blm file is as follows:
* </p>
* <ul>
* <li>BloomFilter (.blm) --&gt; Header, DelegatePostingsFormatName,
* NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
* <li>Filter --&gt; FieldNumber, FuzzySet</li>
* <li>FuzzySet --&gt;See {@link FuzzySet#serialize(DataOutput)}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DelegatePostingsFormatName --&gt; {@link DataOutput#writeString(String)
* String} The name of a ServiceProvider registered {@link PostingsFormat}</li>
* <li>NumFilteredFields --&gt; {@link DataOutput#writeInt Uint32}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt Uint32} The number of the
* field in this segment</li>
* </ul>
* @lucene.experimental
*/
public class BloomFilteringPostingsFormat extends PostingsFormat {
public static final String BLOOM_CODEC_NAME = "BloomFilter";
public static final int BLOOM_CODEC_VERSION = 1;
/** Extension of Bloom Filters file */
static final String BLOOM_EXTENSION = "blm";
BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();
private PostingsFormat delegatePostingsFormat;
/**
* Creates Bloom filters for a selection of fields created in the index. This
* is recorded as a set of Bitsets held as a segment summary in an additional
* "blm" file. This PostingsFormat delegates to a choice of delegate
* PostingsFormat for encoding all other postings data.
*
* @param delegatePostingsFormat
* The PostingsFormat that records all the non-bloom filter data i.e.
* postings info.
* @param bloomFilterFactory
* The {@link BloomFilterFactory} responsible for sizing BloomFilters
* appropriately
*/
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat,
BloomFilterFactory bloomFilterFactory) {
super(BLOOM_CODEC_NAME);
this.delegatePostingsFormat = delegatePostingsFormat;
this.bloomFilterFactory = bloomFilterFactory;
}
/**
* Creates Bloom filters for a selection of fields created in the index. This
* is recorded as a set of Bitsets held as a segment summary in an additional
* "blm" file. This PostingsFormat delegates to a choice of delegate
* PostingsFormat for encoding all other postings data. This choice of
* constructor defaults to the {@link DefaultBloomFilterFactory} for
* configuring per-field BloomFilters.
*
* @param delegatePostingsFormat
* The PostingsFormat that records all the non-bloom filter data i.e.
* postings info.
*/
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) {
this(delegatePostingsFormat, new DefaultBloomFilterFactory());
}
// Used only by core Lucene at read-time via Service Provider instantiation -
// do not use at Write-time in application code.
public BloomFilteringPostingsFormat() {
super(BLOOM_CODEC_NAME);
}
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
if (delegatePostingsFormat == null) {
throw new UnsupportedOperationException("Error - " + getClass().getName()
+ " has been constructed without a choice of PostingsFormat");
}
return new BloomFilteredFieldsConsumer(
delegatePostingsFormat.fieldsConsumer(state), state,
delegatePostingsFormat);
}
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new BloomFilteredFieldsProducer(state);
}
public class BloomFilteredFieldsProducer extends FieldsProducer {
private FieldsProducer delegateFieldsProducer;
HashMap<String,FuzzySet> bloomsByFieldName = new HashMap<String,FuzzySet>();
public BloomFilteredFieldsProducer(SegmentReadState state)
throws IOException {
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexInput bloomIn = null;
try {
bloomIn = state.dir.openInput(bloomFileName, state.context);
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
BLOOM_CODEC_VERSION);
// // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
.readString());
this.delegateFieldsProducer = delegatePostingsFormat
.fieldsProducer(state);
int numBlooms = bloomIn.readInt();
for (int i = 0; i < numBlooms; i++) {
int fieldNum = bloomIn.readInt();
FuzzySet bloom = FuzzySet.deserialize(bloomIn);
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
bloomsByFieldName.put(fieldInfo.name, bloom);
}
} finally {
IOUtils.close(bloomIn);
}
}
public Iterator<String> iterator() {
return delegateFieldsProducer.iterator();
}
public void close() throws IOException {
delegateFieldsProducer.close();
}
public Terms terms(String field) throws IOException {
FuzzySet filter = bloomsByFieldName.get(field);
if (filter == null) {
return delegateFieldsProducer.terms(field);
} else {
Terms result = delegateFieldsProducer.terms(field);
if (result == null) {
return null;
}
return new BloomFilteredTerms(result, filter);
}
}
public int size() {
return delegateFieldsProducer.size();
}
class BloomFilteredTerms extends Terms {
private Terms delegateTerms;
private FuzzySet filter;
public BloomFilteredTerms(Terms terms, FuzzySet filter) {
this.delegateTerms = terms;
this.filter = filter;
}
@Override
public TermsEnum intersect(CompiledAutomaton compiled,
final BytesRef startTerm) throws IOException {
return delegateTerms.intersect(compiled, startTerm);
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
TermsEnum result;
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
// recycle the existing BloomFilteredTermsEnum by asking the delegate
// to recycle its contained TermsEnum
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
if (bfte.filter == filter) {
bfte.delegateTermsEnum = delegateTerms
.iterator(bfte.delegateTermsEnum);
return bfte;
}
}
// We have been handed something we cannot reuse (either null, wrong
// class or wrong filter) so allocate a new object
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
filter);
return result;
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return delegateTerms.getComparator();
}
@Override
public long size() throws IOException {
return delegateTerms.size();
}
@Override
public long getSumTotalTermFreq() throws IOException {
return delegateTerms.getSumTotalTermFreq();
}
@Override
public long getSumDocFreq() throws IOException {
return delegateTerms.getSumDocFreq();
}
@Override
public int getDocCount() throws IOException {
return delegateTerms.getDocCount();
}
@Override
public boolean hasOffsets() {
return delegateTerms.hasOffsets();
}
@Override
public boolean hasPositions() {
return delegateTerms.hasPositions();
}
@Override
public boolean hasPayloads() {
return delegateTerms.hasPayloads();
}
}
class BloomFilteredTermsEnum extends TermsEnum {
TermsEnum delegateTermsEnum;
private FuzzySet filter;
public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
this.delegateTermsEnum = iterator;
this.filter = filter;
}
@Override
public final BytesRef next() throws IOException {
return delegateTermsEnum.next();
}
@Override
public final Comparator<BytesRef> getComparator() {
return delegateTermsEnum.getComparator();
}
@Override
public final boolean seekExact(BytesRef text, boolean useCache)
throws IOException {
// The magical fail-fast speed up that is the entire point of all of
// this code - save a disk seek if there is a match on an in-memory
// structure
// that may occasionally give a false positive but guaranteed no false
// negatives
if (filter.contains(text) == ContainsResult.NO) {
return false;
}
return delegateTermsEnum.seekExact(text, useCache);
}
@Override
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
throws IOException {
return delegateTermsEnum.seekCeil(text, useCache);
}
@Override
public final void seekExact(long ord) throws IOException {
delegateTermsEnum.seekExact(ord);
}
@Override
public final BytesRef term() throws IOException {
return delegateTermsEnum.term();
}
@Override
public final long ord() throws IOException {
return delegateTermsEnum.ord();
}
@Override
public final int docFreq() throws IOException {
return delegateTermsEnum.docFreq();
}
@Override
public final long totalTermFreq() throws IOException {
return delegateTermsEnum.totalTermFreq();
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
DocsAndPositionsEnum reuse, int flags) throws IOException {
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
throws IOException {
return delegateTermsEnum.docs(liveDocs, reuse, flags);
}
}
}
class BloomFilteredFieldsConsumer extends FieldsConsumer {
private FieldsConsumer delegateFieldsConsumer;
private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
private SegmentWriteState state;
// private PostingsFormat delegatePostingsFormat;
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
this.delegateFieldsConsumer = fieldsConsumer;
// this.delegatePostingsFormat=delegatePostingsFormat;
this.state = state;
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
FuzzySet bloomFilter = bloomFilterFactory.getSetForField(state,field);
if (bloomFilter != null) {
assert bloomFilters.containsKey(field) == false;
bloomFilters.put(field, bloomFilter);
return new WrappedTermsConsumer(delegateFieldsConsumer.addField(field),bloomFilter);
} else {
// No, use the unfiltered fieldsConsumer - we are not interested in
// recording any term Bitsets.
return delegateFieldsConsumer.addField(field);
}
}
@Override
public void close() throws IOException {
delegateFieldsConsumer.close();
// Now we are done accumulating values for these fields
List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
FuzzySet bloomFilter = entry.getValue();
if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){
nonSaturatedBlooms.add(entry);
}
}
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexOutput bloomOutput = null;
try {
bloomOutput = state.directory
.createOutput(bloomFileName, state.context);
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
BLOOM_CODEC_VERSION);
// remember the name of the postings format we will delegate to
bloomOutput.writeString(delegatePostingsFormat.getName());
// First field in the output file is the number of fields+blooms saved
bloomOutput.writeInt(nonSaturatedBlooms.size());
for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
FieldInfo fieldInfo = entry.getKey();
FuzzySet bloomFilter = entry.getValue();
bloomOutput.writeInt(fieldInfo.number);
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
}
} finally {
IOUtils.close(bloomOutput);
}
//We are done with large bitsets so no need to keep them hanging around
bloomFilters.clear();
}
private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
bloomFilter);
if (rightSizedSet == null) {
rightSizedSet = bloomFilter;
}
rightSizedSet.serialize(bloomOutput);
}
}
class WrappedTermsConsumer extends TermsConsumer {
private TermsConsumer delegateTermsConsumer;
private FuzzySet bloomFilter;
public WrappedTermsConsumer(TermsConsumer termsConsumer,FuzzySet bloomFilter) {
this.delegateTermsConsumer = termsConsumer;
this.bloomFilter = bloomFilter;
}
public PostingsConsumer startTerm(BytesRef text) throws IOException {
return delegateTermsConsumer.startTerm(text);
}
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// Record this term in our BloomFilter
if (stats.docFreq > 0) {
bloomFilter.addValue(text);
}
delegateTermsConsumer.finishTerm(text, stats);
}
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
throws IOException {
delegateTermsConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
}
public Comparator<BytesRef> getComparator() throws IOException {
return delegateTermsConsumer.getComparator();
}
}
}
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.FuzzySet.ContainsResult;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* <p>
* A {@link PostingsFormat} useful for low doc-frequency fields such as primary
* keys. Bloom filters are maintained in a ".blm" file which offers "fast-fail"
* for reads in segments known to have no record of the key. A choice of
* delegate PostingsFormat is used to record all other Postings data.
* </p>
* <p>
* A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter
* settings on a per-field basis. The default configuration is
* {@link DefaultBloomFilterFactory} which allocates a ~8mb bitset and hashes
* values using {@link MurmurHash2}. This should be suitable for most purposes.
* </p>
* <p>
* The format of the blm file is as follows:
* </p>
* <ul>
* <li>BloomFilter (.blm) --&gt; Header, DelegatePostingsFormatName,
* NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
* <li>Filter --&gt; FieldNumber, FuzzySet</li>
* <li>FuzzySet --&gt;See {@link FuzzySet#serialize(DataOutput)}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DelegatePostingsFormatName --&gt; {@link DataOutput#writeString(String)
* String} The name of a ServiceProvider registered {@link PostingsFormat}</li>
* <li>NumFilteredFields --&gt; {@link DataOutput#writeInt Uint32}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt Uint32} The number of the
* field in this segment</li>
* </ul>
* @lucene.experimental
*/
public class BloomFilteringPostingsFormat extends PostingsFormat {
public static final String BLOOM_CODEC_NAME = "BloomFilter";
public static final int BLOOM_CODEC_VERSION = 1;
/** Extension of Bloom Filters file */
static final String BLOOM_EXTENSION = "blm";
BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();
private PostingsFormat delegatePostingsFormat;
/**
* Creates Bloom filters for a selection of fields created in the index. This
* is recorded as a set of Bitsets held as a segment summary in an additional
* "blm" file. This PostingsFormat delegates to a choice of delegate
* PostingsFormat for encoding all other postings data.
*
* @param delegatePostingsFormat
* The PostingsFormat that records all the non-bloom filter data i.e.
* postings info.
* @param bloomFilterFactory
* The {@link BloomFilterFactory} responsible for sizing BloomFilters
* appropriately
*/
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat,
BloomFilterFactory bloomFilterFactory) {
super(BLOOM_CODEC_NAME);
this.delegatePostingsFormat = delegatePostingsFormat;
this.bloomFilterFactory = bloomFilterFactory;
}
/**
* Creates Bloom filters for a selection of fields created in the index. This
* is recorded as a set of Bitsets held as a segment summary in an additional
* "blm" file. This PostingsFormat delegates to a choice of delegate
* PostingsFormat for encoding all other postings data. This choice of
* constructor defaults to the {@link DefaultBloomFilterFactory} for
* configuring per-field BloomFilters.
*
* @param delegatePostingsFormat
* The PostingsFormat that records all the non-bloom filter data i.e.
* postings info.
*/
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) {
this(delegatePostingsFormat, new DefaultBloomFilterFactory());
}
// Used only by core Lucene at read-time via Service Provider instantiation -
// do not use at Write-time in application code.
public BloomFilteringPostingsFormat() {
super(BLOOM_CODEC_NAME);
}
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
if (delegatePostingsFormat == null) {
throw new UnsupportedOperationException("Error - " + getClass().getName()
+ " has been constructed without a choice of PostingsFormat");
}
return new BloomFilteredFieldsConsumer(
delegatePostingsFormat.fieldsConsumer(state), state,
delegatePostingsFormat);
}
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new BloomFilteredFieldsProducer(state);
}
public class BloomFilteredFieldsProducer extends FieldsProducer {
private FieldsProducer delegateFieldsProducer;
HashMap<String,FuzzySet> bloomsByFieldName = new HashMap<String,FuzzySet>();
public BloomFilteredFieldsProducer(SegmentReadState state)
throws IOException {
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexInput bloomIn = null;
try {
bloomIn = state.dir.openInput(bloomFileName, state.context);
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
BLOOM_CODEC_VERSION);
// // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
.readString());
this.delegateFieldsProducer = delegatePostingsFormat
.fieldsProducer(state);
int numBlooms = bloomIn.readInt();
for (int i = 0; i < numBlooms; i++) {
int fieldNum = bloomIn.readInt();
FuzzySet bloom = FuzzySet.deserialize(bloomIn);
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
bloomsByFieldName.put(fieldInfo.name, bloom);
}
} finally {
IOUtils.close(bloomIn);
}
}
public Iterator<String> iterator() {
return delegateFieldsProducer.iterator();
}
public void close() throws IOException {
delegateFieldsProducer.close();
}
public Terms terms(String field) throws IOException {
FuzzySet filter = bloomsByFieldName.get(field);
if (filter == null) {
return delegateFieldsProducer.terms(field);
} else {
Terms result = delegateFieldsProducer.terms(field);
if (result == null) {
return null;
}
return new BloomFilteredTerms(result, filter);
}
}
public int size() {
return delegateFieldsProducer.size();
}
class BloomFilteredTerms extends Terms {
private Terms delegateTerms;
private FuzzySet filter;
public BloomFilteredTerms(Terms terms, FuzzySet filter) {
this.delegateTerms = terms;
this.filter = filter;
}
@Override
public TermsEnum intersect(CompiledAutomaton compiled,
final BytesRef startTerm) throws IOException {
return delegateTerms.intersect(compiled, startTerm);
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
TermsEnum result;
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
// recycle the existing BloomFilteredTermsEnum by asking the delegate
// to recycle its contained TermsEnum
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
if (bfte.filter == filter) {
bfte.delegateTermsEnum = delegateTerms
.iterator(bfte.delegateTermsEnum);
return bfte;
}
}
// We have been handed something we cannot reuse (either null, wrong
// class or wrong filter) so allocate a new object
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
filter);
return result;
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return delegateTerms.getComparator();
}
@Override
public long size() throws IOException {
return delegateTerms.size();
}
@Override
public long getSumTotalTermFreq() throws IOException {
return delegateTerms.getSumTotalTermFreq();
}
@Override
public long getSumDocFreq() throws IOException {
return delegateTerms.getSumDocFreq();
}
@Override
public int getDocCount() throws IOException {
return delegateTerms.getDocCount();
}
@Override
public boolean hasOffsets() {
return delegateTerms.hasOffsets();
}
@Override
public boolean hasPositions() {
return delegateTerms.hasPositions();
}
@Override
public boolean hasPayloads() {
return delegateTerms.hasPayloads();
}
}
class BloomFilteredTermsEnum extends TermsEnum {
TermsEnum delegateTermsEnum;
private FuzzySet filter;
public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
this.delegateTermsEnum = iterator;
this.filter = filter;
}
@Override
public final BytesRef next() throws IOException {
return delegateTermsEnum.next();
}
@Override
public final Comparator<BytesRef> getComparator() {
return delegateTermsEnum.getComparator();
}
@Override
public final boolean seekExact(BytesRef text, boolean useCache)
throws IOException {
// The magical fail-fast speed up that is the entire point of all of
// this code - save a disk seek if there is a match on an in-memory
// structure
// that may occasionally give a false positive but guaranteed no false
// negatives
if (filter.contains(text) == ContainsResult.NO) {
return false;
}
return delegateTermsEnum.seekExact(text, useCache);
}
@Override
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
throws IOException {
return delegateTermsEnum.seekCeil(text, useCache);
}
@Override
public final void seekExact(long ord) throws IOException {
delegateTermsEnum.seekExact(ord);
}
@Override
public final BytesRef term() throws IOException {
return delegateTermsEnum.term();
}
@Override
public final long ord() throws IOException {
return delegateTermsEnum.ord();
}
@Override
public final int docFreq() throws IOException {
return delegateTermsEnum.docFreq();
}
@Override
public final long totalTermFreq() throws IOException {
return delegateTermsEnum.totalTermFreq();
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
DocsAndPositionsEnum reuse, int flags) throws IOException {
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
throws IOException {
return delegateTermsEnum.docs(liveDocs, reuse, flags);
}
}
}
class BloomFilteredFieldsConsumer extends FieldsConsumer {
private FieldsConsumer delegateFieldsConsumer;
private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
private SegmentWriteState state;
// private PostingsFormat delegatePostingsFormat;
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
this.delegateFieldsConsumer = fieldsConsumer;
// this.delegatePostingsFormat=delegatePostingsFormat;
this.state = state;
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
FuzzySet bloomFilter = bloomFilterFactory.getSetForField(state,field);
if (bloomFilter != null) {
assert bloomFilters.containsKey(field) == false;
bloomFilters.put(field, bloomFilter);
return new WrappedTermsConsumer(delegateFieldsConsumer.addField(field),bloomFilter);
} else {
// No, use the unfiltered fieldsConsumer - we are not interested in
// recording any term Bitsets.
return delegateFieldsConsumer.addField(field);
}
}
@Override
public void close() throws IOException {
delegateFieldsConsumer.close();
// Now we are done accumulating values for these fields
List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
FuzzySet bloomFilter = entry.getValue();
if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){
nonSaturatedBlooms.add(entry);
}
}
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexOutput bloomOutput = null;
try {
bloomOutput = state.directory
.createOutput(bloomFileName, state.context);
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
BLOOM_CODEC_VERSION);
// remember the name of the postings format we will delegate to
bloomOutput.writeString(delegatePostingsFormat.getName());
// First field in the output file is the number of fields+blooms saved
bloomOutput.writeInt(nonSaturatedBlooms.size());
for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
FieldInfo fieldInfo = entry.getKey();
FuzzySet bloomFilter = entry.getValue();
bloomOutput.writeInt(fieldInfo.number);
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
}
} finally {
IOUtils.close(bloomOutput);
}
//We are done with large bitsets so no need to keep them hanging around
bloomFilters.clear();
}
private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
bloomFilter);
if (rightSizedSet == null) {
rightSizedSet = bloomFilter;
}
rightSizedSet.serialize(bloomOutput);
}
}
class WrappedTermsConsumer extends TermsConsumer {
private TermsConsumer delegateTermsConsumer;
private FuzzySet bloomFilter;
public WrappedTermsConsumer(TermsConsumer termsConsumer,FuzzySet bloomFilter) {
this.delegateTermsConsumer = termsConsumer;
this.bloomFilter = bloomFilter;
}
public PostingsConsumer startTerm(BytesRef text) throws IOException {
return delegateTermsConsumer.startTerm(text);
}
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// Record this term in our BloomFilter
if (stats.docFreq > 0) {
bloomFilter.addValue(text);
}
delegateTermsConsumer.finishTerm(text, stats);
}
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
throws IOException {
delegateTermsConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
}
public Comparator<BytesRef> getComparator() throws IOException {
return delegateTermsConsumer.getComparator();
}
}
}

View File

@ -1,25 +1,25 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Codec PostingsFormat for fast access to low-frequency terms such as primary key fields.
</body>
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Codec PostingsFormat for fast access to low-frequency terms such as primary key fields.
</body>
</html>

View File

@ -38,12 +38,21 @@ import org.apache.lucene.index.DocValues;
public class ByteDocValuesField extends StoredField {
/**
* Type for 8-bit byte DocValues.
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValues.Type.FIXED_INTS_8);
TYPE.freeze();
}
/**
* Creates a new DocValues field with the specified 8-bit byte value
* @param name field name
* @param value 8-bit byte value
* @throws IllegalArgumentException if the field name is null.
*/
public ByteDocValuesField(String name, byte value) {
super(name, TYPE);
fieldsData = Byte.valueOf(value);

View File

@ -92,10 +92,14 @@ public class CompressionTools {
return compress(result.bytes, 0, result.length, compressionLevel);
}
/** Decompress the byte array previously returned by
* compress (referenced by the provided BytesRef) */
public static byte[] decompress(BytesRef bytes) throws DataFormatException {
return decompress(bytes.bytes, bytes.offset, bytes.length);
}
/** Decompress the byte array previously returned by
* compress */
public static byte[] decompress(byte[] value) throws DataFormatException {
return decompress(value, 0, value.length);
}
@ -130,6 +134,8 @@ public class CompressionTools {
return decompressString(value, 0, value.length);
}
/** Decompress the byte array previously returned by
* compressString back into a String */
public static String decompressString(byte[] value, int offset, int length) throws DataFormatException {
final byte[] bytes = decompress(value, offset, length);
CharsRef result = new CharsRef(bytes.length);
@ -137,6 +143,8 @@ public class CompressionTools {
return new String(result.chars, 0, result.length);
}
/** Decompress the byte array (referenced by the provided BytesRef)
* previously returned by compressString back into a String */
public static String decompressString(BytesRef bytes) throws DataFormatException {
return decompressString(bytes.bytes, bytes.offset, bytes.length);
}

View File

@ -185,7 +185,20 @@ public class DateTools {
/** Specifies the time granularity. */
public static enum Resolution {
YEAR(4), MONTH(6), DAY(8), HOUR(10), MINUTE(12), SECOND(14), MILLISECOND(17);
/** Limit a date's resolution to year granularity. */
YEAR(4),
/** Limit a date's resolution to month granularity. */
MONTH(6),
/** Limit a date's resolution to day granularity. */
DAY(8),
/** Limit a date's resolution to hour granularity. */
HOUR(10),
/** Limit a date's resolution to minute granularity. */
MINUTE(12),
/** Limit a date's resolution to second granularity. */
SECOND(14),
/** Limit a date's resolution to millisecond granularity. */
MILLISECOND(17);
final int formatLen;
final SimpleDateFormat format;//should be cloned before use, since it's not threadsafe

View File

@ -44,23 +44,49 @@ import org.apache.lucene.util.BytesRef;
public class DerefBytesDocValuesField extends StoredField {
// TODO: ideally indexer figures out var vs fixed on its own!?
/**
* Type for indirect bytes DocValues: all with the same length
*/
public static final FieldType TYPE_FIXED_LEN = new FieldType();
static {
TYPE_FIXED_LEN.setDocValueType(DocValues.Type.BYTES_FIXED_DEREF);
TYPE_FIXED_LEN.freeze();
}
/**
* Type for indirect bytes DocValues: can have variable lengths
*/
public static final FieldType TYPE_VAR_LEN = new FieldType();
static {
TYPE_VAR_LEN.setDocValueType(DocValues.Type.BYTES_VAR_DEREF);
TYPE_VAR_LEN.freeze();
}
/**
* Create a new variable-length indirect DocValues field.
* <p>
* This calls
* {@link DerefBytesDocValuesField#DerefBytesDocValuesField(String, BytesRef, boolean)
* DerefBytesDocValuesField(name, bytes, false}, meaning by default
* it allows for values of different lengths. If your values are all
* the same length, use that constructor instead.
* @param name field name
* @param bytes binary content
* @throws IllegalArgumentException if the field name is null
*/
public DerefBytesDocValuesField(String name, BytesRef bytes) {
super(name, TYPE_VAR_LEN);
fieldsData = bytes;
}
/**
* Create a new fixed or variable length indirect DocValues field.
* <p>
* @param name field name
* @param bytes binary content
* @param isFixedLength true if all values have the same length.
* @throws IllegalArgumentException if the field name is null
*/
public DerefBytesDocValuesField(String name, BytesRef bytes, boolean isFixedLength) {
super(name, isFixedLength ? TYPE_FIXED_LEN : TYPE_VAR_LEN);
fieldsData = bytes;

View File

@ -97,6 +97,10 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name) ? Status.YES : Status.NO;
}
/**
* Retrieve the visited document.
* @return StoredDocument populated with stored fields.
*/
public StoredDocument getDocument() {
return doc;
}

View File

@ -38,12 +38,21 @@ import org.apache.lucene.index.DocValues;
public class DoubleDocValuesField extends StoredField {
/**
* Type for 64-bit double DocValues.
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValues.Type.FLOAT_64);
TYPE.freeze();
}
/**
* Creates a new DocValues field with the specified 64-bit double value
* @param name field name
* @param value 64-bit double value
* @throws IllegalArgumentException if the field name is null
*/
public DoubleDocValuesField(String name, double value) {
super(name, TYPE);
fieldsData = Double.valueOf(value);

View File

@ -114,6 +114,10 @@ import org.apache.lucene.util.NumericUtils;
public final class DoubleField extends Field {
/**
* Type for a DoubleField that is not stored:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_NOT_STORED = new FieldType();
static {
TYPE_NOT_STORED.setIndexed(true);
@ -124,6 +128,10 @@ public final class DoubleField extends Field {
TYPE_NOT_STORED.freeze();
}
/**
* Type for a stored DoubleField:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_STORED.setIndexed(true);
@ -137,14 +145,26 @@ public final class DoubleField extends Field {
/** Creates a stored or un-stored DoubleField with the provided value
* and default <code>precisionStep</code> {@link
* NumericUtils#PRECISION_STEP_DEFAULT} (4). */
* NumericUtils#PRECISION_STEP_DEFAULT} (4).
* @param name field name
* @param value 64-bit double value
* @param stored Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name is null.
*/
public DoubleField(String name, double value, Store stored) {
super(name, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
fieldsData = Double.valueOf(value);
}
/** Expert: allows you to customize the {@link
* FieldType}. */
* FieldType}.
* @param name field name
* @param value 64-bit double value
* @param type customized field type: must have {@link FieldType#numericType()}
* of {@link FieldType.NumericType#DOUBLE}.
* @throws IllegalArgumentException if the field name or type is null, or
* if the field type does not have a DOUBLE numericType()
*/
public DoubleField(String name, double value, FieldType type) {
super(name, type);
if (type.numericType() != FieldType.NumericType.DOUBLE) {

View File

@ -61,23 +61,42 @@ import org.apache.lucene.index.FieldInvertState; // javadocs
*/
public class Field implements IndexableField, StorableField {
/**
* Field's type
*/
protected final FieldType type;
/**
* Field's name
*/
protected final String name;
// Field's value:
/** Field's value */
protected Object fieldsData;
// Pre-analyzed tokenStream for indexed fields; this is
// separate from fieldsData because you are allowed to
// have both; eg maybe field has a String value but you
// customize how it's tokenized:
/** Pre-analyzed tokenStream for indexed fields; this is
* separate from fieldsData because you are allowed to
* have both; eg maybe field has a String value but you
* customize how it's tokenized */
protected TokenStream tokenStream;
private transient TokenStream internalTokenStream;
private transient ReusableStringReader internalReader;
/**
* Field's boost
* @see #boost()
*/
protected float boost = 1.0f;
/**
* Expert: creates a field with no initial value.
* Intended only for custom Field subclasses.
* @param name field name
* @param type field type
* @throws IllegalArgumentException if either the name or type
* is null.
*/
protected Field(String name, FieldType type) {
if (name == null) {
throw new IllegalArgumentException("name cannot be null");
@ -91,6 +110,13 @@ public class Field implements IndexableField, StorableField {
/**
* Create field with Reader value.
* @param name field name
* @param reader reader value
* @param type field type
* @throws IllegalArgumentException if either the name or type
* is null, or if the field's type is stored(), or
* if tokenized() is false.
* @throws NullPointerException if the reader is null
*/
public Field(String name, Reader reader, FieldType type) {
if (name == null) {
@ -116,6 +142,13 @@ public class Field implements IndexableField, StorableField {
/**
* Create field with TokenStream value.
* @param name field name
* @param tokenStream TokenStream value
* @param type field type
* @throws IllegalArgumentException if either the name or type
* is null, or if the field's type is stored(), or
* if tokenized() is false, or if indexed() is false.
* @throws NullPointerException if the tokenStream is null
*/
public Field(String name, TokenStream tokenStream, FieldType type) {
if (name == null) {
@ -139,6 +172,15 @@ public class Field implements IndexableField, StorableField {
/**
* Create field with binary value.
*
* <p>NOTE: the provided byte[] is not copied so be sure
* not to change it until you're done with this field.
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @param type field type
* @throws IllegalArgumentException if the field name is null,
* or the field's type is indexed()
* @throws NullPointerException if the type is null
*/
public Field(String name, byte[] value, FieldType type) {
this(name, value, 0, value.length, type);
@ -146,6 +188,17 @@ public class Field implements IndexableField, StorableField {
/**
* Create field with binary value.
*
* <p>NOTE: the provided byte[] is not copied so be sure
* not to change it until you're done with this field.
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @param offset starting position of the byte array
* @param length valid length of the byte array
* @param type field type
* @throws IllegalArgumentException if the field name is null,
* or the field's type is indexed()
* @throws NullPointerException if the type is null
*/
public Field(String name, byte[] value, int offset, int length, FieldType type) {
this(name, new BytesRef(value, offset, length), type);
@ -156,6 +209,12 @@ public class Field implements IndexableField, StorableField {
*
* <p>NOTE: the provided BytesRef is not copied so be sure
* not to change it until you're done with this field.
* @param name field name
* @param bytes BytesRef pointing to binary content (not copied)
* @param type field type
* @throws IllegalArgumentException if the field name is null,
* or the field's type is indexed()
* @throws NullPointerException if the type is null
*/
public Field(String name, BytesRef bytes, FieldType type) {
if (name == null) {
@ -173,6 +232,13 @@ public class Field implements IndexableField, StorableField {
/**
* Create field with String value.
* @param name field name
* @param value string value
* @param type field type
* @throws IllegalArgumentException if either the name or value
* is null, or if the field's type is neither indexed() nor stored(),
* or if indexed() is false but storeTermVectors() is true.
* @throws NullPointerException if the type is null
*/
public Field(String name, String value, FieldType type) {
if (name == null) {
@ -214,7 +280,7 @@ public class Field implements IndexableField, StorableField {
}
/**
* The TokesStream for this field to be used when indexing, or null. If null,
* The TokenStream for this field to be used when indexing, or null. If null,
* the Reader value or String value is analyzed to produce the indexed tokens.
*/
public TokenStream tokenStreamValue() {
@ -280,6 +346,10 @@ public class Field implements IndexableField, StorableField {
fieldsData = value;
}
/**
* Expert: change the value of this field. See
* {@link #setStringValue(String)}.
*/
public void setByteValue(byte value) {
if (!(fieldsData instanceof Byte)) {
throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Byte");
@ -287,6 +357,10 @@ public class Field implements IndexableField, StorableField {
fieldsData = Byte.valueOf(value);
}
/**
* Expert: change the value of this field. See
* {@link #setStringValue(String)}.
*/
public void setShortValue(short value) {
if (!(fieldsData instanceof Short)) {
throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Short");
@ -294,6 +368,10 @@ public class Field implements IndexableField, StorableField {
fieldsData = Short.valueOf(value);
}
/**
* Expert: change the value of this field. See
* {@link #setStringValue(String)}.
*/
public void setIntValue(int value) {
if (!(fieldsData instanceof Integer)) {
throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Integer");
@ -301,6 +379,10 @@ public class Field implements IndexableField, StorableField {
fieldsData = Integer.valueOf(value);
}
/**
* Expert: change the value of this field. See
* {@link #setStringValue(String)}.
*/
public void setLongValue(long value) {
if (!(fieldsData instanceof Long)) {
throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Long");
@ -308,6 +390,10 @@ public class Field implements IndexableField, StorableField {
fieldsData = Long.valueOf(value);
}
/**
* Expert: change the value of this field. See
* {@link #setStringValue(String)}.
*/
public void setFloatValue(float value) {
if (!(fieldsData instanceof Float)) {
throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Float");
@ -315,6 +401,10 @@ public class Field implements IndexableField, StorableField {
fieldsData = Float.valueOf(value);
}
/**
* Expert: change the value of this field. See
* {@link #setStringValue(String)}.
*/
public void setDoubleValue(double value) {
if (!(fieldsData instanceof Double)) {
throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Double");
@ -341,23 +431,21 @@ public class Field implements IndexableField, StorableField {
return name;
}
/**
* {@inheritDoc}
* <p>
* The default value is <code>1.0f</code> (no boost).
* @see #setBoost(float)
*/
public float boost() {
return boost;
}
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
*
* <p>The boost is used to compute the norm factor for the field. By
* default, in the {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState, Norm)} method,
* the boost value is multiplied by the length normalization factor and then
* rounded by {@link org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
* @see org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState, Norm)
* @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
/**
* Sets the boost factor on this field.
* @throws IllegalArgumentException if this field is not indexed,
* or if it omits norms.
* @see #boost()
*/
public void setBoost(float boost) {
if (boost != 1.0f) {
@ -406,9 +494,6 @@ public class Field implements IndexableField, StorableField {
return type;
}
/**
* {@inheritDoc}
*/
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;

View File

@ -17,6 +17,7 @@ package org.apache.lucene.document;
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexableFieldType;
@ -31,7 +32,16 @@ public class FieldType implements IndexableFieldType {
/** Data type of the numeric value
* @since 3.2
*/
public static enum NumericType {INT, LONG, FLOAT, DOUBLE}
public static enum NumericType {
/** 32-bit integer numeric type */
INT,
/** 64-bit long numeric type */
LONG,
/** 32-bit float numeric type */
FLOAT,
/** 64-bit double numeric type */
DOUBLE
}
private boolean indexed;
private boolean stored;
@ -47,6 +57,9 @@ public class FieldType implements IndexableFieldType {
private int numericPrecisionStep = NumericUtils.PRECISION_STEP_DEFAULT;
private DocValues.Type docValueType;
/**
* Create a new mutable FieldType with all of the properties from <code>ref</code>
*/
public FieldType(FieldType ref) {
this.indexed = ref.indexed();
this.stored = ref.stored();
@ -62,6 +75,9 @@ public class FieldType implements IndexableFieldType {
// Do not copy frozen!
}
/**
* Create a new FieldType with default properties.
*/
public FieldType() {
}
@ -80,100 +96,241 @@ public class FieldType implements IndexableFieldType {
this.frozen = true;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>false</code>.
* @see #setIndexed(boolean)
*/
public boolean indexed() {
return this.indexed;
}
/**
* Set to <code>true</code> to index (invert) this field.
* @param value true if this field should be indexed.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #indexed()
*/
public void setIndexed(boolean value) {
checkIfFrozen();
this.indexed = value;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>false</code>.
* @see #setStored(boolean)
*/
public boolean stored() {
return this.stored;
}
/**
* Set to <code>true</code> to store this field.
* @param value true if this field should be stored.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #stored()
*/
public void setStored(boolean value) {
checkIfFrozen();
this.stored = value;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>true</code>.
* @see #setTokenized(boolean)
*/
public boolean tokenized() {
return this.tokenized;
}
/**
* Set to <code>true</code> to tokenize this field's contents via the
* configured {@link Analyzer}.
* @param value true if this field should be tokenized.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #tokenized()
*/
public void setTokenized(boolean value) {
checkIfFrozen();
this.tokenized = value;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>false</code>.
* @see #setStoreTermVectors(boolean)
*/
public boolean storeTermVectors() {
return this.storeTermVectors;
}
/**
* Set to <code>true</code> if this field's indexed form should be also stored
* into term vectors.
* @param value true if this field should store term vectors.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #storeTermVectors()
*/
public void setStoreTermVectors(boolean value) {
checkIfFrozen();
this.storeTermVectors = value;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>false</code>.
* @see #setStoreTermVectorOffsets(boolean)
*/
public boolean storeTermVectorOffsets() {
return this.storeTermVectorOffsets;
}
/**
* Set to <code>true</code> to also store token character offsets into the term
* vector for this field.
* @param value true if this field should store term vector offsets.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #storeTermVectorOffsets()
*/
public void setStoreTermVectorOffsets(boolean value) {
checkIfFrozen();
this.storeTermVectorOffsets = value;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>false</code>.
* @see #setStoreTermVectorPositions(boolean)
*/
public boolean storeTermVectorPositions() {
return this.storeTermVectorPositions;
}
/**
* Set to <code>true</code> to also store token positions into the term
* vector for this field.
* @param value true if this field should store term vector positions.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #storeTermVectorPositions()
*/
public void setStoreTermVectorPositions(boolean value) {
checkIfFrozen();
this.storeTermVectorPositions = value;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>false</code>.
* @see #setStoreTermVectorPayloads(boolean)
*/
public boolean storeTermVectorPayloads() {
return this.storeTermVectorPayloads;
}
/**
* Set to <code>true</code> to also store token payloads into the term
* vector for this field.
* @param value true if this field should store term vector payloads.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #storeTermVectorPayloads()
*/
public void setStoreTermVectorPayloads(boolean value) {
checkIfFrozen();
this.storeTermVectorPayloads = value;
}
/**
* {@inheritDoc}
* <p>
* The default is <code>false</code>.
* @see #setOmitNorms(boolean)
*/
public boolean omitNorms() {
return this.omitNorms;
}
/**
* Set to <code>true</code> to omit normalization values for the field.
* @param value true if this field should omit norms.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #omitNorms()
*/
public void setOmitNorms(boolean value) {
checkIfFrozen();
this.omitNorms = value;
}
/**
* {@inheritDoc}
* <p>
* The default is {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS}.
* @see #setIndexOptions(FieldInfo.IndexOptions)
*/
public IndexOptions indexOptions() {
return this.indexOptions;
}
/**
* Sets the indexing options for the field:
* @param value indexing options
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #indexOptions()
*/
public void setIndexOptions(IndexOptions value) {
checkIfFrozen();
this.indexOptions = value;
}
/**
* Specifies the field's numeric type.
* @param type numeric type, or null if the field has no numeric type.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #numericType()
*/
public void setNumericType(NumericType type) {
checkIfFrozen();
numericType = type;
}
/** NumericDataType; if
* non-null then the field's value will be indexed
* numerically so that {@link NumericRangeQuery} can be
* used at search time. */
/**
* NumericType: if non-null then the field's value will be indexed
* numerically so that {@link NumericRangeQuery} can be used at
* search time.
* <p>
* The default is <code>null</code> (no numeric type)
* @see #setNumericType(NumericType)
*/
public NumericType numericType() {
return numericType;
}
/**
* Sets the numeric precision step for the field.
* @param precisionStep numeric precision step for the field
* @throws IllegalArgumentException if precisionStep is less than 1.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #numericPrecisionStep()
*/
public void setNumericPrecisionStep(int precisionStep) {
checkIfFrozen();
if (precisionStep < 1) {
@ -182,7 +339,14 @@ public class FieldType implements IndexableFieldType {
this.numericPrecisionStep = precisionStep;
}
/** Precision step for numeric field. */
/**
* Precision step for numeric field.
* <p>
* This has no effect if {@link #numericType()} returns null.
* <p>
* The default is {@link NumericUtils#PRECISION_STEP_DEFAULT}
* @see #setNumericPrecisionStep(int)
*/
public int numericPrecisionStep() {
return numericPrecisionStep;
}
@ -239,11 +403,24 @@ public class FieldType implements IndexableFieldType {
/* from StorableFieldType */
/**
* {@inheritDoc}
* <p>
* The default is <code>null</code> (no docValues)
* @see #setDocValueType(DocValues.Type)
*/
@Override
public DocValues.Type docValueType() {
return docValueType;
}
/**
* Set's the field's DocValues.Type
* @param type DocValues type, or null if no DocValues should be stored.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #docValueType()
*/
public void setDocValueType(DocValues.Type type) {
checkIfFrozen();
docValueType = type;

View File

@ -37,12 +37,21 @@ import org.apache.lucene.index.DocValues;
public class FloatDocValuesField extends StoredField {
/**
* Type for 32-bit float DocValues.
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValues.Type.FLOAT_32);
TYPE.freeze();
}
/**
* Creates a new DocValues field with the specified 32-bit float value
* @param name field name
* @param value 32-bit float value
* @throws IllegalArgumentException if the field name is null
*/
public FloatDocValuesField(String name, float value) {
super(name, TYPE);
fieldsData = Float.valueOf(value);

View File

@ -114,6 +114,10 @@ import org.apache.lucene.util.NumericUtils;
public final class FloatField extends Field {
/**
* Type for a FloatField that is not stored:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_NOT_STORED = new FieldType();
static {
TYPE_NOT_STORED.setIndexed(true);
@ -124,6 +128,10 @@ public final class FloatField extends Field {
TYPE_NOT_STORED.freeze();
}
/**
* Type for a stored FloatField:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_STORED.setIndexed(true);
@ -137,14 +145,26 @@ public final class FloatField extends Field {
/** Creates a stored or un-stored FloatField with the provided value
* and default <code>precisionStep</code> {@link
* NumericUtils#PRECISION_STEP_DEFAULT} (4). */
* NumericUtils#PRECISION_STEP_DEFAULT} (4).
* @param name field name
* @param value 32-bit double value
* @param stored Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name is null.
*/
public FloatField(String name, float value, Store stored) {
super(name, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
fieldsData = Float.valueOf(value);
}
/** Expert: allows you to customize the {@link
* FieldType}. */
* FieldType}.
* @param name field name
* @param value 32-bit float value
* @param type customized field type: must have {@link FieldType#numericType()}
* of {@link FieldType.NumericType#FLOAT}.
* @throws IllegalArgumentException if the field name or type is null, or
* if the field type does not have a FLOAT numericType()
*/
public FloatField(String name, float value, FieldType type) {
super(name, type);
if (type.numericType() != FieldType.NumericType.FLOAT) {

View File

@ -37,12 +37,21 @@ import org.apache.lucene.index.DocValues;
public class IntDocValuesField extends StoredField {
/**
* Type for 32-bit integer DocValues.
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValues.Type.FIXED_INTS_32);
TYPE.freeze();
}
/**
* Creates a new DocValues field with the specified 32-bit integer value
* @param name field name
* @param value 32-bit integer value
* @throws IllegalArgumentException if the field name is null
*/
public IntDocValuesField(String name, int value) {
super(name, TYPE);
fieldsData = Integer.valueOf(value);

View File

@ -114,6 +114,10 @@ import org.apache.lucene.util.NumericUtils;
public final class IntField extends Field {
/**
* Type for an IntField that is not stored:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_NOT_STORED = new FieldType();
static {
TYPE_NOT_STORED.setIndexed(true);
@ -124,6 +128,10 @@ public final class IntField extends Field {
TYPE_NOT_STORED.freeze();
}
/**
* Type for a stored IntField:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_STORED.setIndexed(true);
@ -137,14 +145,26 @@ public final class IntField extends Field {
/** Creates a stored or un-stored IntField with the provided value
* and default <code>precisionStep</code> {@link
* NumericUtils#PRECISION_STEP_DEFAULT} (4). */
* NumericUtils#PRECISION_STEP_DEFAULT} (4).
* @param name field name
* @param value 32-bit integer value
* @param stored Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name is null.
*/
public IntField(String name, int value, Store stored) {
super(name, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
fieldsData = Integer.valueOf(value);
}
/** Expert: allows you to customize the {@link
* FieldType}. */
* FieldType}.
* @param name field name
* @param value 32-bit integer value
* @param type customized field type: must have {@link FieldType#numericType()}
* of {@link FieldType.NumericType#INT}.
* @throws IllegalArgumentException if the field name or type is null, or
* if the field type does not have a INT numericType()
*/
public IntField(String name, int value, FieldType type) {
super(name, type);
if (type.numericType() != FieldType.NumericType.INT) {

View File

@ -37,12 +37,21 @@ import org.apache.lucene.index.DocValues;
public class LongDocValuesField extends StoredField {
/**
* Type for 64-bit long DocValues.
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValues.Type.FIXED_INTS_64);
TYPE.freeze();
}
/**
* Creates a new DocValues field with the specified 64-bit long value
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public LongDocValuesField(String name, long value) {
super(name, TYPE);
fieldsData = Long.valueOf(value);

View File

@ -124,6 +124,10 @@ import org.apache.lucene.util.NumericUtils;
public final class LongField extends Field {
/**
* Type for a LongField that is not stored:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_NOT_STORED = new FieldType();
static {
TYPE_NOT_STORED.setIndexed(true);
@ -134,6 +138,10 @@ public final class LongField extends Field {
TYPE_NOT_STORED.freeze();
}
/**
* Type for a stored LongField:
* normalization factors, frequencies, and positions are omitted.
*/
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_STORED.setIndexed(true);
@ -147,14 +155,26 @@ public final class LongField extends Field {
/** Creates a stored or un-stored LongField with the provided value
* and default <code>precisionStep</code> {@link
* NumericUtils#PRECISION_STEP_DEFAULT} (4). */
* NumericUtils#PRECISION_STEP_DEFAULT} (4).
* @param name field name
* @param value 64-bit long value
* @param stored Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name is null.
*/
public LongField(String name, long value, Store stored) {
super(name, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
fieldsData = Long.valueOf(value);
}
/** Expert: allows you to customize the {@link
* FieldType}. */
* FieldType}.
* @param name field name
* @param value 64-bit long value
* @param type customized field type: must have {@link FieldType#numericType()}
* of {@link FieldType.NumericType#LONG}.
* @throws IllegalArgumentException if the field name or type is null, or
* if the field type does not have a LONG numericType()
*/
public LongField(String name, long value, FieldType type) {
super(name, type);
if (type.numericType() != FieldType.NumericType.LONG) {

View File

@ -41,6 +41,9 @@ import org.apache.lucene.index.AtomicReader; // javadocs
public class PackedLongDocValuesField extends StoredField {
/**
* Type for packed long DocValues.
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValues.Type.VAR_INTS);
@ -48,6 +51,12 @@ public class PackedLongDocValuesField extends StoredField {
TYPE.freeze();
}
/**
* Creates a new DocValues field with the specified long value
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public PackedLongDocValuesField(String name, long value) {
super(name, TYPE);
fieldsData = Long.valueOf(value);

View File

@ -38,12 +38,21 @@ import org.apache.lucene.index.DocValues;
public class ShortDocValuesField extends StoredField {
/**
* Type for 16-bit short DocValues.
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValues.Type.FIXED_INTS_16);
TYPE.freeze();
}
/**
* Creates a new DocValues field with the specified 16-bit short value
* @param name field name
* @param value 16-bit short value
* @throws IllegalArgumentException if the field name is null
*/
public ShortDocValuesField(String name, short value) {
super(name, TYPE);
fieldsData = Short.valueOf(value);

View File

@ -40,22 +40,47 @@ import org.apache.lucene.util.BytesRef;
public class SortedBytesDocValuesField extends StoredField {
// TODO: ideally indexer figures out var vs fixed on its own!?
/**
* Type for sorted bytes DocValues: all with the same length
*/
public static final FieldType TYPE_FIXED_LEN = new FieldType();
static {
TYPE_FIXED_LEN.setDocValueType(DocValues.Type.BYTES_FIXED_SORTED);
TYPE_FIXED_LEN.freeze();
}
/**
* Type for sorted bytes DocValues: can have variable lengths
*/
public static final FieldType TYPE_VAR_LEN = new FieldType();
static {
TYPE_VAR_LEN.setDocValueType(DocValues.Type.BYTES_VAR_SORTED);
TYPE_VAR_LEN.freeze();
}
/**
* Create a new variable-length sorted DocValues field.
* <p>
* This calls
* {@link SortedBytesDocValuesField#SortedBytesDocValuesField(String, BytesRef, boolean)
* SortedBytesDocValuesField(name, bytes, false}, meaning by default
* it allows for values of different lengths. If your values are all
* the same length, use that constructor instead.
* @param name field name
* @param bytes binary content
* @throws IllegalArgumentException if the field name is null
*/
public SortedBytesDocValuesField(String name, BytesRef bytes) {
this(name, bytes, false);
}
/**
* Create a new fixed or variable length sorted DocValues field.
* @param name field name
* @param bytes binary content
* @param isFixedLength true if all values have the same length.
* @throws IllegalArgumentException if the field name is null
*/
public SortedBytesDocValuesField(String name, BytesRef bytes, boolean isFixedLength) {
super(name, isFixedLength ? TYPE_FIXED_LEN : TYPE_VAR_LEN);
fieldsData = bytes;

View File

@ -27,6 +27,9 @@ import org.apache.lucene.util.BytesRef;
* return the field and its value. */
public class StoredField extends Field {
/**
* Type for a stored-only field.
*/
public final static FieldType TYPE;
static {
TYPE = new FieldType();
@ -34,10 +37,28 @@ public class StoredField extends Field {
TYPE.freeze();
}
/**
* Create a stored-only field with the given binary value.
* <p>NOTE: the provided byte[] is not copied so be sure
* not to change it until you're done with this field.
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @throws IllegalArgumentException if the field name is null.
*/
protected StoredField(String name, FieldType type) {
super(name, type);
}
/**
* Expert: allows you to customize the {@link
* FieldType}.
* <p>NOTE: the provided byte[] is not copied so be sure
* not to change it until you're done with this field.
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @param type custom {@link FieldType} for this field
* @throws IllegalArgumentException if the field name is null.
*/
public StoredField(String name, BytesRef bytes, FieldType type) {
super(name, bytes, type);
}
@ -46,14 +67,38 @@ public class StoredField extends Field {
super(name, value, TYPE);
}
/**
* Create a stored-only field with the given binary value.
* <p>NOTE: the provided byte[] is not copied so be sure
* not to change it until you're done with this field.
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @param offset starting position of the byte array
* @param length valid length of the byte array
* @throws IllegalArgumentException if the field name is null.
*/
public StoredField(String name, byte[] value, int offset, int length) {
super(name, value, offset, length, TYPE);
}
/**
* Create a stored-only field with the given binary value.
* <p>NOTE: the provided BytesRef is not copied so be sure
* not to change it until you're done with this field.
* @param name field name
* @param value BytesRef pointing to binary content (not copied)
* @throws IllegalArgumentException if the field name is null.
*/
public StoredField(String name, BytesRef value) {
super(name, value, TYPE);
}
/**
* Create a stored-only field with the given string value.
* @param name field name
* @param value string value
* @throws IllegalArgumentException if the field name or value is null.
*/
public StoredField(String name, String value) {
super(name, value, TYPE);
}
@ -63,21 +108,45 @@ public class StoredField extends Field {
}
// TODO: not great but maybe not a big problem?
/**
* Create a stored-only field with the given integer value.
* @param name field name
* @param value integer value
* @throws IllegalArgumentException if the field name is null.
*/
public StoredField(String name, int value) {
super(name, TYPE);
fieldsData = value;
}
/**
* Create a stored-only field with the given float value.
* @param name field name
* @param value float value
* @throws IllegalArgumentException if the field name is null.
*/
public StoredField(String name, float value) {
super(name, TYPE);
fieldsData = value;
}
/**
* Create a stored-only field with the given long value.
* @param name field name
* @param value long value
* @throws IllegalArgumentException if the field name is null.
*/
public StoredField(String name, long value) {
super(name, TYPE);
fieldsData = value;
}
/**
* Create a stored-only field with the given double value.
* @param name field name
* @param value double value
* @throws IllegalArgumentException if the field name is null.
*/
public StoredField(String name, double value) {
super(name, TYPE);
fieldsData = value;

View File

@ -43,23 +43,49 @@ import org.apache.lucene.util.BytesRef;
public class StraightBytesDocValuesField extends StoredField {
// TODO: ideally indexer figures out var vs fixed on its own!?
/**
* Type for direct bytes DocValues: all with the same length
*/
public static final FieldType TYPE_FIXED_LEN = new FieldType();
static {
TYPE_FIXED_LEN.setDocValueType(DocValues.Type.BYTES_FIXED_STRAIGHT);
TYPE_FIXED_LEN.freeze();
}
/**
* Type for direct bytes DocValues: can have variable lengths
*/
public static final FieldType TYPE_VAR_LEN = new FieldType();
static {
TYPE_VAR_LEN.setDocValueType(DocValues.Type.BYTES_VAR_STRAIGHT);
TYPE_VAR_LEN.freeze();
}
/**
* Create a new variable-length direct DocValues field.
* <p>
* This calls
* {@link StraightBytesDocValuesField#StraightBytesDocValuesField(String, BytesRef, boolean)
* StraightBytesDocValuesField(name, bytes, false}, meaning by default
* it allows for values of different lengths. If your values are all
* the same length, use that constructor instead.
* @param name field name
* @param bytes binary content
* @throws IllegalArgumentException if the field name is null
*/
public StraightBytesDocValuesField(String name, BytesRef bytes) {
super(name, TYPE_VAR_LEN);
fieldsData = bytes;
}
/**
* Create a new fixed or variable length direct DocValues field.
* <p>
* @param name field name
* @param bytes binary content
* @param isFixedLength true if all values have the same length.
* @throws IllegalArgumentException if the field name is null
*/
public StraightBytesDocValuesField(String name, BytesRef bytes, boolean isFixedLength) {
super(name, isFixedLength ? TYPE_FIXED_LEN : TYPE_VAR_LEN);
fieldsData = bytes;

View File

@ -50,7 +50,12 @@ public final class StringField extends Field {
TYPE_STORED.freeze();
}
/** Creates a new StringField. */
/** Creates a new StringField.
* @param name field name
* @param value String value
* @param stored Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name or value is null.
*/
public StringField(String name, String value, Store stored) {
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
}

View File

@ -27,10 +27,10 @@ import org.apache.lucene.analysis.TokenStream;
public final class TextField extends Field {
/* Indexed, tokenized, not stored. */
/** Indexed, tokenized, not stored. */
public static final FieldType TYPE_NOT_STORED = new FieldType();
/* Indexed, tokenized, stored. */
/** Indexed, tokenized, stored. */
public static final FieldType TYPE_STORED = new FieldType();
static {
@ -46,17 +46,32 @@ public final class TextField extends Field {
// TODO: add sugar for term vectors...?
/** Creates a new un-stored TextField with Reader value. */
/** Creates a new un-stored TextField with Reader value.
* @param name field name
* @param reader reader value
* @throws IllegalArgumentException if the field name is null
* @throws NullPointerException if the reader is null
*/
public TextField(String name, Reader reader) {
super(name, reader, TYPE_NOT_STORED);
}
/** Creates a new TextField with String value. */
/** Creates a new TextField with String value.
* @param name field name
* @param value string value
* @param store Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name or value is null.
*/
public TextField(String name, String value, Store store) {
super(name, value, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
}
/** Creates a new un-stored TextField with TokenStream value. */
/** Creates a new un-stored TextField with TokenStream value.
* @param name field name
* @param stream TokenStream value
* @throws IllegalArgumentException if the field name is null.
* @throws NullPointerException if the tokenStream is null
*/
public TextField(String name, TokenStream stream) {
super(name, stream, TYPE_NOT_STORED);
}

View File

@ -51,6 +51,7 @@ import org.apache.lucene.store.Directory;
public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader> {
public static final int DEFAULT_TERMS_INDEX_DIVISOR = 1;
/** The index directory. */
protected final Directory directory;
/** Returns a IndexReader reading the index in the given

View File

@ -31,7 +31,9 @@ import org.apache.lucene.index.DocValues.Type;
**/
public final class FieldInfo {
/** Field's name */
public final String name;
/** Internal field number */
public final int number;
private boolean indexed;
@ -55,14 +57,29 @@ public final class FieldInfo {
// NOTE: order is important here; FieldInfo uses this
// order to merge two conflicting IndexOptions (always
// "downgrades" by picking the lowest).
/** only documents are indexed: term frequencies and positions are omitted */
/**
* Only documents are indexed: term frequencies and positions are omitted.
* Phrase and other positional queries on the field will throw an exception, and scoring
* will behave as if any term in the document appears only once.
*/
// TODO: maybe rename to just DOCS?
DOCS_ONLY,
/** only documents and term frequencies are indexed: positions are omitted */
/**
* Only documents and term frequencies are indexed: positions are omitted.
* This enables normal scoring, except Phrase and other positional queries
* will throw an exception.
*/
DOCS_AND_FREQS,
/** documents, frequencies and positions */
/**
* Indexes documents, frequencies and positions.
* This is a typical default for full-text search: full scoring is enabled
* and positional queries are supported.
*/
DOCS_AND_FREQS_AND_POSITIONS,
/** documents, frequencies, positions and offsets */
/**
* Indexes documents, frequencies, positions and offsets.
* Character offsets are encoded alongside the positions.
*/
DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
};
@ -149,27 +166,27 @@ public final class FieldInfo {
assert checkConsistency();
}
/** @return IndexOptions for the field, or null if the field is not indexed */
/** Returns IndexOptions for the field, or null if the field is not indexed */
public IndexOptions getIndexOptions() {
return indexOptions;
}
/**
* @return true if this field has any docValues.
* Returns true if this field has any docValues.
*/
public boolean hasDocValues() {
return docValueType != null;
}
/**
* @return {@link DocValues.Type} of the docValues. this may be null if the field has no docvalues.
* Returns {@link DocValues.Type} of the docValues. this may be null if the field has no docvalues.
*/
public DocValues.Type getDocValuesType() {
return docValueType;
}
/**
* @return {@link DocValues.Type} of the norm. this may be null if the field has no norms.
* Returns {@link DocValues.Type} of the norm. this may be null if the field has no norms.
*/
public DocValues.Type getNormType() {
return normType;
@ -193,35 +210,35 @@ public final class FieldInfo {
}
/**
* @return true if norms are explicitly omitted for this field
* Returns true if norms are explicitly omitted for this field
*/
public boolean omitsNorms() {
return omitNorms;
}
/**
* @return true if this field actually has any norms.
* Returns true if this field actually has any norms.
*/
public boolean hasNorms() {
return normType != null;
}
/**
* @return true if this field is indexed.
* Returns true if this field is indexed.
*/
public boolean isIndexed() {
return indexed;
}
/**
* @return true if any payloads exist for this field.
* Returns true if any payloads exist for this field.
*/
public boolean hasPayloads() {
return storePayloads;
}
/**
* @return true if any term vectors exist for this field.
* Returns true if any term vectors exist for this field.
*/
public boolean hasVectors() {
return storeTermVector;
@ -256,7 +273,7 @@ public final class FieldInfo {
}
/**
* @return internal codec attributes map. May be null if no mappings exist.
* Returns internal codec attributes map. May be null if no mappings exist.
*/
public Map<String,String> attributes() {
return attributes;

View File

@ -44,6 +44,9 @@ public class FieldInfos implements Iterable<FieldInfo> {
private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
private final Collection<FieldInfo> values; // for an unmodifiable iterator
/**
* Constructs a new FieldInfos from an array of FieldInfo objects
*/
public FieldInfos(FieldInfo[] infos) {
boolean hasVectors = false;
boolean hasProx = false;
@ -98,30 +101,22 @@ public class FieldInfos implements Iterable<FieldInfo> {
return hasOffsets;
}
/**
* @return true if at least one field has any vectors
*/
/** Returns true if any fields have vectors */
public boolean hasVectors() {
return hasVectors;
}
/**
* @return true if at least one field has any norms
*/
/** Returns true if any fields have norms */
public boolean hasNorms() {
return hasNorms;
}
/**
* @return true if at least one field has doc values
*/
/** Returns true if any fields have DocValues */
public boolean hasDocValues() {
return hasDocValues;
}
/**
* @return number of fields
*/
/** Returns the number of fields */
public int size() {
assert byNumber.size() == byName.size();
return byNumber.size();

View File

@ -40,8 +40,13 @@ public class FilterAtomicReader extends AtomicReader {
/** Base class for filtering {@link Fields}
* implementations. */
public static class FilterFields extends Fields {
/** The underlying Fields instance. */
protected final Fields in;
/**
* Creates a new FilterFields.
* @param in the underlying Fields instance.
*/
public FilterFields(Fields in) {
this.in = in;
}
@ -65,8 +70,13 @@ public class FilterAtomicReader extends AtomicReader {
/** Base class for filtering {@link Terms}
* implementations. */
public static class FilterTerms extends Terms {
/** The underlying Terms instance. */
protected final Terms in;
/**
* Creates a new FilterTerms
* @param in the underlying Terms instance.
*/
public FilterTerms(Terms in) {
this.in = in;
}
@ -124,8 +134,13 @@ public class FilterAtomicReader extends AtomicReader {
/** Base class for filtering {@link TermsEnum} implementations. */
public static class FilterTermsEnum extends TermsEnum {
/** The underlying TermsEnum instance. */
protected final TermsEnum in;
/**
* Creates a new FilterTermsEnum
* @param in the underlying TermsEnum instance.
*/
public FilterTermsEnum(TermsEnum in) { this.in = in; }
@Override
@ -201,8 +216,13 @@ public class FilterAtomicReader extends AtomicReader {
/** Base class for filtering {@link DocsEnum} implementations. */
public static class FilterDocsEnum extends DocsEnum {
/** The underlying DocsEnum instance. */
protected final DocsEnum in;
/**
* Create a new FilterDocsEnum
* @param in the underlying DocsEnum instance.
*/
public FilterDocsEnum(DocsEnum in) {
this.in = in;
}
@ -235,8 +255,13 @@ public class FilterAtomicReader extends AtomicReader {
/** Base class for filtering {@link DocsAndPositionsEnum} implementations. */
public static class FilterDocsAndPositionsEnum extends DocsAndPositionsEnum {
/** The underlying DocsAndPositionsEnum instance. */
protected final DocsAndPositionsEnum in;
/**
* Create a new FilterDocsAndPositionsEnum
* @param in the underlying DocsAndPositionsEnum instance.
*/
public FilterDocsAndPositionsEnum(DocsAndPositionsEnum in) {
this.in = in;
}
@ -287,6 +312,7 @@ public class FilterAtomicReader extends AtomicReader {
}
}
/** The underlying AtomicReader. */
protected final AtomicReader in;
/**

View File

@ -48,7 +48,20 @@ public abstract class FilteredTermsEnum extends TermsEnum {
* the enum should call {@link #nextSeekTerm} and step forward.
* @see #accept(BytesRef)
*/
protected static enum AcceptStatus {YES, YES_AND_SEEK, NO, NO_AND_SEEK, END};
protected static enum AcceptStatus {
/** Accept the term and position the enum at the next term. */
YES,
/** Accept the term and advance ({@link FilteredTermsEnum#nextSeekTerm(BytesRef)})
* to the next term. */
YES_AND_SEEK,
/** Reject the term and position the enum at the next term. */
NO,
/** Reject the term and advance ({@link FilteredTermsEnum#nextSeekTerm(BytesRef)})
* to the next term. */
NO_AND_SEEK,
/** Reject the term and stop enumerating. */
END
};
/** Return if term is accepted, not accepted or the iteration should ended
* (and possibly seek).

View File

@ -40,6 +40,9 @@ import org.apache.lucene.codecs.Codec;
*/
public final class IndexFileNames {
/** No instance */
private IndexFileNames() {}
/** Name of the index segment file */
public static final String SEGMENTS = "segments";
@ -184,6 +187,10 @@ public final class IndexFileNames {
return filename;
}
/**
* Removes the extension (anything after the first '.'),
* otherwise returns the original filename.
*/
public static String stripExtension(String filename) {
int idx = filename.indexOf('.');
if (idx != -1) {

View File

@ -243,7 +243,8 @@ public abstract class IndexReader implements Closeable {
}
/**
* @throws AlreadyClosedException if this IndexReader is closed
* Throws AlreadyClosedException if this IndexReader or any
* of its child readers is closed, otherwise returns.
*/
protected final void ensureOpen() throws AlreadyClosedException {
if (refCount.get() <= 0) {

View File

@ -549,6 +549,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
/**
* Used internally to throw an {@link
* AlreadyClosedException} if this IndexWriter has been
* closed.
* <p>
* Calls {@link #ensureOpen(boolean) ensureOpen(true)}.
* @throws AlreadyClosedException if this IndexWriter is closed
*/
protected final void ensureOpen() throws AlreadyClosedException {
ensureOpen(true);
}
@ -1030,6 +1038,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
return count;
}
/**
* Returns true if this index has deletions (including buffered deletions).
*/
public synchronized boolean hasDeletions() {
ensureOpen();
if (bufferedDeletesStream.any()) {

View File

@ -22,6 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.similarities.DefaultSimilarity; // javadocs
import org.apache.lucene.search.similarities.Similarity; // javadocs
import org.apache.lucene.util.BytesRef;
// TODO: how to handle versioning here...?
@ -46,6 +48,25 @@ public interface IndexableField extends GeneralField {
*/
public TokenStream tokenStream(Analyzer analyzer) throws IOException;
/** Field boost (you must pre-multiply in any doc boost). */
/**
* Returns the field's index-time boost.
* <p>
* Only fields can have an index-time boost, if you want to simulate
* a "document boost", then you must pre-multiply it across all the
* relevant fields yourself.
* <p>The boost is used to compute the norm factor for the field. By
* default, in the {@link Similarity#computeNorm(FieldInvertState, Norm)} method,
* the boost value is multiplied by the length normalization factor and then
* rounded by {@link DefaultSimilarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
* <p>
* It is illegal to return a boost other than 1.0f for a field that is not
* indexed ({@link IndexableFieldType#indexed()} is false) or omits normalization values
* ({@link IndexableFieldType#omitNorms()} returns true).
*
* @see Similarity#computeNorm(FieldInvertState, Norm)
* @see DefaultSimilarity#encodeNormValue(float)
*/
public float boost();
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.FieldInfo.IndexOptions;
/**
@ -31,29 +32,68 @@ public interface IndexableFieldType {
/** True if the field's value should be stored */
public boolean stored();
/** True if this field's value should be analyzed */
/**
* True if this field's value should be analyzed by the
* {@link Analyzer}.
* <p>
* This has no effect if {@link #indexed()} returns false.
*/
public boolean tokenized();
/** True if term vectors should be indexed */
/**
* True if this field's indexed form should be also stored
* into term vectors.
* <p>
* This builds a miniature inverted-index for this field which
* can be accessed in a document-oriented way from
* {@link IndexReader#getTermVector(int,String)}.
* <p>
* This option is illegal if {@link #indexed()} returns false.
*/
public boolean storeTermVectors();
/** True if term vector offsets should be indexed */
/**
* True if this field's token character offsets should also
* be stored into term vectors.
* <p>
* This option is illegal if term vectors are not enabled for the field
* ({@link #storeTermVectors()} is false)
*/
public boolean storeTermVectorOffsets();
/** True if term vector positions should be indexed */
/**
* True if this field's token positions should also be stored
* into the term vectors.
* <p>
* This option is illegal if term vectors are not enabled for the field
* ({@link #storeTermVectors()} is false).
*/
public boolean storeTermVectorPositions();
/** True if term vector payloads should be indexed */
/**
* True if this field's token payloads should also be stored
* into the term vectors.
* <p>
* This option is illegal if term vector positions are not enabled
* for the field ({@link #storeTermVectors()} is false).
*/
public boolean storeTermVectorPayloads();
/** True if norms should not be indexed */
/**
* True if normalization values should be omitted for the field.
* <p>
* This saves memory, but at the expense of scoring quality (length normalization
* will be disabled), and if you omit norms, you cannot use index-time boosts.
*/
public boolean omitNorms();
/** {@link IndexOptions}, describing what should be
* recorded into the inverted index */
public IndexOptions indexOptions();
/** DocValues type; if non-null then the field's value
* will be indexed into docValues */
/**
* DocValues {@link DocValues.Type}: if non-null then the field's value
* will be indexed into docValues.
*/
public DocValues.Type docValueType();
}

Some files were not shown because too many files have changed in this diff Show More