LUCENE-3982: trunk upgrade

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1369470 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-08-04 21:26:10 +00:00
commit aed1b5d760
139 changed files with 23815 additions and 20921 deletions

View File

@ -176,22 +176,12 @@
</subant>
</target>
<target name="jar-checksums" depends="resolve" description="Recompute SHA1 checksums for all JAR files.">
<delete>
<fileset dir="${basedir}">
<include name="**/*.jar.sha1"/>
</fileset>
</delete>
<checksum algorithm="SHA1" fileext=".sha1">
<fileset dir="${basedir}">
<include name="**/*.jar"/>
</fileset>
</checksum>
<fixcrlf
srcdir="${basedir}"
includes="**/*.jar.sha1"
eol="lf" fixlast="true" encoding="US-ASCII" />
<target name="jar-checksums" description="Recompute SHA1 checksums for all JAR files.">
<sequential>
<subant target="jar-checksums" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
</subant>
</sequential>
</target>
</project>

View File

@ -15,30 +15,30 @@
<classpathentry kind="src" path="lucene/sandbox/src/java"/>
<classpathentry kind="src" path="lucene/sandbox/src/test"/>
<classpathentry kind="src" path="lucene/test-framework/src/java"/>
<classpathentry kind="src" output="bin.tests-framework" path="lucene/test-framework/src/resources"/>
<classpathentry kind="src" output="bin/tests-framework" path="lucene/test-framework/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/common/src/java"/>
<classpathentry kind="src" output="bin.analysis-common" path="lucene/analysis/common/src/resources"/>
<classpathentry kind="src" output="bin/analysis-common" path="lucene/analysis/common/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/common/src/test"/>
<classpathentry kind="src" path="lucene/analysis/icu/src/java"/>
<classpathentry kind="src" output="bin.analysis-icu" path="lucene/analysis/icu/src/resources"/>
<classpathentry kind="src" output="bin/analysis-icu" path="lucene/analysis/icu/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/icu/src/test"/>
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/java"/>
<classpathentry kind="src" output="bin.analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
<classpathentry kind="src" output="bin/analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/test"/>
<classpathentry kind="src" path="lucene/analysis/phonetic/src/java"/>
<classpathentry kind="src" output="bin.analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
<classpathentry kind="src" output="bin/analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/phonetic/src/test"/>
<classpathentry kind="src" path="lucene/analysis/smartcn/src/java"/>
<classpathentry kind="src" output="bin.analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
<classpathentry kind="src" output="bin/analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/smartcn/src/test"/>
<classpathentry kind="src" path="lucene/analysis/stempel/src/java"/>
<classpathentry kind="src" output="bin.analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
<classpathentry kind="src" output="bin/analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/stempel/src/test"/>
<classpathentry kind="src" path="lucene/analysis/morfologik/src/java"/>
<classpathentry kind="src" output="bin.analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
<classpathentry kind="src" output="bin/analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/morfologik/src/test"/>
<classpathentry kind="src" path="lucene/analysis/uima/src/java"/>
<classpathentry kind="src" output="bin.analysis-uima" path="lucene/analysis/uima/src/resources"/>
<classpathentry kind="src" output="bin/analysis-uima" path="lucene/analysis/uima/src/resources"/>
<classpathentry kind="src" path="lucene/analysis/uima/src/test"/>
<classpathentry kind="src" path="lucene/benchmark/src/java"/>
<classpathentry kind="src" path="lucene/benchmark/src/test"/>
@ -120,7 +120,7 @@
<classpathentry kind="lib" path="solr/lib/slf4j-api-1.6.4.jar"/>
<classpathentry kind="lib" path="solr/lib/slf4j-jdk14-1.6.4.jar"/>
<classpathentry kind="lib" path="solr/lib/wstx-asl-3.2.7.jar"/>
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.5.jar"/>
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.6.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-continuation-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-deploy-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-http-8.1.2.v20120308.jar"/>
@ -175,5 +175,5 @@
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar"/>
<classpathentry kind="output" path="bin"/>
<classpathentry kind="output" path="bin/other"/>
</classpath>

View File

@ -298,7 +298,7 @@
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.3.5</version>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.carrot2</groupId>

View File

@ -20,12 +20,12 @@ import subprocess
import signal
import shutil
import hashlib
import httplib
import http.client
import re
import urllib2
import urlparse
import urllib.request, urllib.error, urllib.parse
import urllib.parse
import sys
import HTMLParser
import html.parser
from collections import defaultdict
import xml.etree.ElementTree as ET
import filecmp
@ -38,9 +38,9 @@ import checkJavadocLinks
# tested on Linux and on Cygwin under Windows 7.
def unshortenURL(url):
parsed = urlparse.urlparse(url)
parsed = urllib.parse.urlparse(url)
if parsed[0] in ('http', 'https'):
h = httplib.HTTPConnection(parsed.netloc)
h = http.client.HTTPConnection(parsed.netloc)
h.request('HEAD', parsed.path)
response = h.getresponse()
if response.status/100 == 3 and response.getheader('Location'):
@ -101,8 +101,8 @@ def getHREFs(urlString):
# Deref any redirects
while True:
url = urlparse.urlparse(urlString)
h = httplib.HTTPConnection(url.netloc)
url = urllib.parse.urlparse(urlString)
h = http.client.HTTPConnection(url.netloc)
h.request('GET', url.path)
r = h.getresponse()
newLoc = r.getheader('location')
@ -112,8 +112,8 @@ def getHREFs(urlString):
break
links = []
for subUrl, text in reHREF.findall(urllib2.urlopen(urlString).read()):
fullURL = urlparse.urljoin(urlString, subUrl)
for subUrl, text in reHREF.findall(urllib.request.urlopen(urlString).read().decode('UTF-8')):
fullURL = urllib.parse.urljoin(urlString, subUrl)
links.append((text, fullURL))
return links
@ -121,15 +121,15 @@ def download(name, urlString, tmpDir, quiet=False):
fileName = '%s/%s' % (tmpDir, name)
if DEBUG and os.path.exists(fileName):
if not quiet and fileName.find('.asc') == -1:
print ' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
print(' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
return
fIn = urllib2.urlopen(urlString)
fIn = urllib.request.urlopen(urlString)
fOut = open(fileName, 'wb')
success = False
try:
while True:
s = fIn.read(65536)
if s == '':
if s == b'':
break
fOut.write(s)
fOut.close()
@ -141,14 +141,14 @@ def download(name, urlString, tmpDir, quiet=False):
if not success:
os.remove(fileName)
if not quiet and fileName.find('.asc') == -1:
print ' %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
print(' %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
def load(urlString):
return urllib2.urlopen(urlString).read()
return urllib.request.urlopen(urlString).read().decode('utf-8')
def checkSigs(project, urlString, version, tmpDir, isSigned):
print ' test basics...'
print(' test basics...')
ents = getDirEntries(urlString)
artifact = None
keysURL = None
@ -210,7 +210,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
if keysURL is None:
raise RuntimeError('%s is missing KEYS' % project)
print ' get KEYS'
print(' get KEYS')
download('%s.KEYS' % project, keysURL, tmpDir)
keysFile = '%s/%s.KEYS' % (tmpDir, project)
@ -219,7 +219,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
if os.path.exists(gpgHomeDir):
shutil.rmtree(gpgHomeDir)
os.makedirs(gpgHomeDir, 0700)
os.makedirs(gpgHomeDir, 0o700)
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
'%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
@ -232,12 +232,12 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
testChanges(project, version, changesURL)
for artifact, urlString in artifacts:
print ' download %s...' % artifact
print(' download %s...' % artifact)
download(artifact, urlString, tmpDir)
verifyDigests(artifact, urlString, tmpDir)
if isSigned:
print ' verify sig'
print(' verify sig')
# Test sig (this is done with a clean brand-new GPG world)
download(artifact + '.asc', urlString + '.asc', tmpDir)
sigFile = '%s/%s.asc' % (tmpDir, artifact)
@ -246,28 +246,28 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
logFile)
# Forward any GPG warnings, except the expected one (since its a clean world)
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
# Test trust (this is done with the real users config)
run('gpg --import %s' % (keysFile),
'%s/%s.gpg.trust.import.log 2>&1' % (tmpDir, project))
print ' verify trust'
print(' verify trust')
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
# Forward any GPG warnings:
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
def testChanges(project, version, changesURLString):
print ' check changes HTML...'
print(' check changes HTML...')
changesURL = None
for text, subURL in getDirEntries(changesURLString):
if text == 'Changes.html':
@ -287,7 +287,7 @@ def testChangesText(dir, version, project):
if 'CHANGES.txt' in files:
fullPath = '%s/CHANGES.txt' % root
#print 'CHECK %s' % fullPath
checkChangesContent(open(fullPath).read(), version, fullPath, project, False)
checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False)
def checkChangesContent(s, version, name, project, isHTML):
@ -336,7 +336,7 @@ def run(command, logFile):
raise RuntimeError('command "%s" failed; see log file %s' % (command, logPath))
def verifyDigests(artifact, urlString, tmpDir):
print ' verify md5/sha1 digests'
print(' verify md5/sha1 digests')
md5Expected, t = load(urlString + '.md5').strip().split()
if t != '*'+artifact:
raise RuntimeError('MD5 %s.md5 lists artifact %s but expected *%s' % (urlString, t, artifact))
@ -347,10 +347,10 @@ def verifyDigests(artifact, urlString, tmpDir):
m = hashlib.md5()
s = hashlib.sha1()
f = open('%s/%s' % (tmpDir, artifact))
f = open('%s/%s' % (tmpDir, artifact), 'rb')
while True:
x = f.read(65536)
if x == '':
if len(x) == 0:
break
m.update(x)
s.update(x)
@ -388,7 +388,7 @@ def unpack(project, tmpDir, artifact, version):
shutil.rmtree(destDir)
os.makedirs(destDir)
os.chdir(destDir)
print ' unpack %s...' % artifact
print(' unpack %s...' % artifact)
unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact)
if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'):
run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile)
@ -437,12 +437,14 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
if project == 'lucene':
# TODO: clean this up to not be a list of modules that we must maintain
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework')
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
if isSrc:
extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'backwards', 'tools', 'site')
else:
extras = ()
# TODO: if solr, verify lucene/licenses, solr/licenses are present
for e in extras:
if e not in l:
raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact))
@ -453,81 +455,81 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l))
if isSrc:
print ' make sure no JARs/WARs in src dist...'
print(' make sure no JARs/WARs in src dist...')
lines = os.popen('find . -name \\*.jar').readlines()
if len(lines) != 0:
print ' FAILED:'
print(' FAILED:')
for line in lines:
print ' %s' % line.strip()
print(' %s' % line.strip())
raise RuntimeError('source release has JARs...')
lines = os.popen('find . -name \\*.war').readlines()
if len(lines) != 0:
print ' FAILED:'
print(' FAILED:')
for line in lines:
print ' %s' % line.strip()
print(' %s' % line.strip())
raise RuntimeError('source release has WARs...')
print ' run "ant validate"'
print(' run "ant validate"')
run('%s; ant validate' % javaExe('1.7'), '%s/validate.log' % unpackPath)
if project == 'lucene':
print ' run tests w/ Java 6...'
print(' run tests w/ Java 6...')
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
run('%s; ant jar' % javaExe('1.6'), '%s/compile.log' % unpackPath)
testDemo(isSrc, version)
# test javadocs
print ' generate javadocs w/ Java 6...'
print(' generate javadocs w/ Java 6...')
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
else:
print ' run tests w/ Java 6...'
print(' run tests w/ Java 6...')
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
# test javadocs
print ' generate javadocs w/ Java 6...'
print(' generate javadocs w/ Java 6...')
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
print ' run tests w/ Java 7...'
print(' run tests w/ Java 7...')
run('%s; ant test' % javaExe('1.7'), '%s/test.log' % unpackPath)
# test javadocs
print ' generate javadocs w/ Java 7...'
print(' generate javadocs w/ Java 7...')
run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
os.chdir('solr')
print ' test solr example w/ Java 6...'
print(' test solr example w/ Java 6...')
run('%s; ant clean example' % javaExe('1.6'), '%s/antexample.log' % unpackPath)
testSolrExample(unpackPath, JAVA6_HOME, True)
print ' test solr example w/ Java 7...'
print(' test solr example w/ Java 7...')
run('%s; ant clean example' % javaExe('1.7'), '%s/antexample.log' % unpackPath)
testSolrExample(unpackPath, JAVA7_HOME, True)
os.chdir('..')
print ' check NOTICE'
print(' check NOTICE')
testNotice(unpackPath)
else:
if project == 'lucene':
testDemo(isSrc, version)
else:
print ' test solr example w/ Java 6...'
print(' test solr example w/ Java 6...')
testSolrExample(unpackPath, JAVA6_HOME, False)
print ' test solr example w/ Java 7...'
print(' test solr example w/ Java 7...')
testSolrExample(unpackPath, JAVA7_HOME, False)
testChangesText('.', version, project)
if project == 'lucene' and not isSrc:
print ' check Lucene\'s javadoc JAR'
print(' check Lucene\'s javadoc JAR')
checkJavadocpath('%s/docs' % unpackPath)
def testNotice(unpackPath):
solrNotice = open('%s/NOTICE.txt' % unpackPath).read()
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath).read()
solrNotice = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
expected = """
=========================================================================
@ -545,12 +547,12 @@ def readSolrOutput(p, startupEvent, logFile):
try:
while True:
line = p.readline()
if line == '':
if len(line) == 0:
break
f.write(line)
f.flush()
# print 'SOLR: %s' % line.strip()
if line.find('Started SocketConnector@0.0.0.0:8983') != -1:
if line.decode('UTF-8').find('Started SocketConnector@0.0.0.0:8983') != -1:
startupEvent.set()
finally:
f.close()
@ -558,7 +560,7 @@ def readSolrOutput(p, startupEvent, logFile):
def testSolrExample(unpackPath, javaPath, isSrc):
logFile = '%s/solr-example.log' % unpackPath
os.chdir('example')
print ' start Solr instance (log=%s)...' % logFile
print(' start Solr instance (log=%s)...' % logFile)
env = {}
env.update(os.environ)
env['JAVA_HOME'] = javaPath
@ -572,21 +574,21 @@ def testSolrExample(unpackPath, javaPath, isSrc):
# Make sure Solr finishes startup:
startupEvent.wait()
print ' startup done'
print(' startup done')
try:
print ' test utf8...'
print(' test utf8...')
run('sh ./exampledocs/test_utf8.sh', 'utf8.log')
print ' index example docs...'
print(' index example docs...')
run('sh ./exampledocs/post.sh ./exampledocs/*.xml', 'post-example-docs.log')
print ' run query...'
s = urllib2.urlopen('http://localhost:8983/solr/select/?q=video').read()
print(' run query...')
s = urllib.request.urlopen('http://localhost:8983/solr/select/?q=video').read().decode('UTF-8')
if s.find('<result name="response" numFound="3" start="0">') == -1:
print 'FAILED: response is:\n%s' % s
print('FAILED: response is:\n%s' % s)
raise RuntimeError('query on solr example instance failed')
finally:
# Stop server:
print ' stop server (SIGINT)...'
print(' stop server (SIGINT)...')
os.kill(server.pid, signal.SIGINT)
# Give it 10 seconds to gracefully shut down
@ -594,14 +596,14 @@ def testSolrExample(unpackPath, javaPath, isSrc):
if serverThread.isAlive():
# Kill server:
print '***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...'
print('***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...')
os.kill(server.pid, signal.SIGKILL)
serverThread.join(10.0)
if serverThread.isAlive():
# Shouldn't happen unless something is seriously wrong...
print '***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...'
print('***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...')
os.chdir('..')
@ -615,13 +617,13 @@ def checkJavadocpath(path):
if checkJavaDocs.checkPackageSummaries(path):
# disabled: RM cannot fix all this, see LUCENE-3887
# raise RuntimeError('javadoc problems')
print '\n***WARNING***: javadocs want to fail!\n'
print('\n***WARNING***: javadocs want to fail!\n')
if checkJavadocLinks.checkAll(path):
raise RuntimeError('broken javadocs links found!')
def testDemo(isSrc, version):
print ' test demo...'
print(' test demo...')
sep = ';' if cygwin else ':'
if isSrc:
cp = 'build/core/classes/java{0}build/demo/classes/java{0}build/analysis/common/classes/java{0}build/queryparser/classes/java'.format(sep)
@ -632,14 +634,14 @@ def testDemo(isSrc, version):
run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe('1.6'), cp, docsDir), 'index.log')
run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe('1.6'), cp), 'search.log')
reMatchingDocs = re.compile('(\d+) total matching documents')
m = reMatchingDocs.search(open('search.log', 'rb').read())
m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read())
if m is None:
raise RuntimeError('lucene demo\'s SearchFiles found no results')
else:
numHits = int(m.group(1))
if numHits < 100:
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
print ' got %d hits for query "lucene"' % numHits
print(' got %d hits for query "lucene"' % numHits)
def checkMaven(baseURL, tmpDir, version, isSigned):
# Locate the release branch in subversion
@ -652,11 +654,11 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
if text == releaseBranchText:
releaseBranchSvnURL = subURL
print ' get POM templates',
print(' get POM templates', end=' ')
POMtemplates = defaultdict()
getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
print
print ' download artifacts',
print()
print(' download artifacts', end=' ')
artifacts = {'lucene': [], 'solr': []}
for project in ('lucene', 'solr'):
artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
@ -664,30 +666,30 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
if not os.path.exists(targetDir):
os.makedirs(targetDir)
crawl(artifacts[project], artifactsURL, targetDir)
print
print ' verify that each binary artifact has a deployed POM...'
print()
print(' verify that each binary artifact has a deployed POM...')
verifyPOMperBinaryArtifact(artifacts, version)
print ' verify that there is an artifact for each POM template...'
print(' verify that there is an artifact for each POM template...')
verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
print " verify Maven artifacts' md5/sha1 digests..."
print(" verify Maven artifacts' md5/sha1 digests...")
verifyMavenDigests(artifacts)
print ' verify that all non-Mavenized deps are deployed...'
print(' verify that all non-Mavenized deps are deployed...')
nonMavenizedDeps = dict()
checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir,
version, releaseBranchSvnURL)
print ' check for javadoc and sources artifacts...'
print(' check for javadoc and sources artifacts...')
checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
print " verify deployed POMs' coordinates..."
print(" verify deployed POMs' coordinates...")
verifyDeployedPOMsCoordinates(artifacts, version)
if isSigned:
print ' verify maven artifact sigs',
print(' verify maven artifact sigs', end=' ')
verifyMavenSigs(baseURL, tmpDir, artifacts)
distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
print ' verify that non-Mavenized deps are same as in the binary distribution...'
print(' verify that non-Mavenized deps are same as in the binary distribution...')
checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
print ' verify that Maven artifacts are same as in the binary distribution...'
print(' verify that Maven artifacts are same as in the binary distribution...')
checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
def getDistributionsForMavenChecks(tmpDir, version, baseURL):
@ -697,19 +699,19 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
if project == 'solr': distribution = 'apache-' + distribution
if not os.path.exists('%s/%s' % (tmpDir, distribution)):
distURL = '%s/%s/%s' % (baseURL, project, distribution)
print ' download %s...' % distribution,
print(' download %s...' % distribution, end=' ')
download(distribution, distURL, tmpDir)
destDir = '%s/unpack-%s-maven' % (tmpDir, project)
if os.path.exists(destDir):
shutil.rmtree(destDir)
os.makedirs(destDir)
os.chdir(destDir)
print ' unpack %s...' % distribution
print(' unpack %s...' % distribution)
unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
if project == 'solr': # unpack the Solr war
unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
print ' unpack Solr war...'
print(' unpack Solr war...')
run('jar xvf */dist/*.war', unpackLogFile)
distributionFiles[project] = []
for root, dirs, files in os.walk(destDir):
@ -719,7 +721,7 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
for project in ('lucene', 'solr'):
for artifact in artifacts[project]:
if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps.keys():
if artifact.endswith(version + '.jar') and artifact not in list(nonMavenizedDeps.keys()):
javadocJar = artifact[:-4] + '-javadoc.jar'
if javadocJar not in artifacts[project]:
raise RuntimeError('missing: %s' % javadocJar)
@ -732,7 +734,7 @@ def checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps):
distFilenames = dict()
for file in distributionFiles[project]:
distFilenames[os.path.basename(file)] = file
for dep in nonMavenizedDeps.keys():
for dep in list(nonMavenizedDeps.keys()):
if ('/%s/' % project) in dep:
depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
if not depOrigFilename in distFilenames:
@ -753,9 +755,9 @@ def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts,
distFilenames[baseName] = file
for artifact in artifacts[project]:
if reJarWar.search(artifact):
if artifact not in nonMavenizedDeps.keys():
if artifact not in list(nonMavenizedDeps.keys()):
artifactFilename = os.path.basename(artifact)
if artifactFilename not in distFilenames.keys():
if artifactFilename not in list(distFilenames.keys()):
raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
% (artifact, project))
# TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
@ -772,16 +774,17 @@ def verifyMavenDigests(artifacts):
raise RuntimeError('missing: MD5 digest for %s' % artifactFile)
if artifactFile + '.sha1' not in artifacts[project]:
raise RuntimeError('missing: SHA1 digest for %s' % artifactFile)
with open(artifactFile + '.md5', 'r') as md5File:
with open(artifactFile + '.md5', encoding='UTF-8') as md5File:
md5Expected = md5File.read().strip()
with open(artifactFile + '.sha1', 'r') as sha1File:
with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File:
sha1Expected = sha1File.read().strip()
md5 = hashlib.md5()
sha1 = hashlib.sha1()
inputFile = open(artifactFile)
inputFile = open(artifactFile, 'rb')
while True:
bytes = inputFile.read(65536)
if bytes == '': break
if len(bytes) == 0:
break
md5.update(bytes)
sha1.update(bytes)
inputFile.close()
@ -846,7 +849,7 @@ def checkNonMavenizedDeps(nonMavenizedDependencies, POMtemplates, artifacts,
if releaseBranchSvnURL is None:
pomPath = '%s/%s/%s' % (workingCopy, pomDir, pomFile)
if os.path.exists(pomPath):
doc2 = ET.XML(open(pomPath).read())
doc2 = ET.XML(open(pomPath, encoding='UTF-8').read())
break
else:
entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir))
@ -891,7 +894,7 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
if os.path.exists(gpgHomeDir):
shutil.rmtree(gpgHomeDir)
os.makedirs(gpgHomeDir, 0700)
os.makedirs(gpgHomeDir, 0o700)
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
'%s/%s.gpg.import.log' % (tmpDir, project))
@ -904,12 +907,12 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
logFile)
# Forward any GPG warnings, except the expected one (since its a clean world)
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
and line.find('WARNING: using insecure memory') == -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
# Test trust (this is done with the real users config)
@ -918,16 +921,16 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
# Forward any GPG warnings:
f = open(logFile, 'rb')
f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
and line.find('WARNING: using insecure memory') == -1:
print ' GPG: %s' % line.strip()
print(' GPG: %s' % line.strip())
f.close()
sys.stdout.write('.')
print
print()
def verifyPOMperBinaryArtifact(artifacts, version):
"""verify that each binary jar and war has a corresponding POM file"""
@ -1024,9 +1027,9 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
def main():
if len(sys.argv) != 4:
print
print 'Usage python -u %s BaseURL version tmpDir' % sys.argv[0]
print
print()
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
print()
sys.exit(1)
baseURL = sys.argv[1]
@ -1046,11 +1049,11 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
lucenePath = None
solrPath = None
print
print 'Load release URL "%s"...' % baseURL
print()
print('Load release URL "%s"...' % baseURL)
newBaseURL = unshortenURL(baseURL)
if newBaseURL != baseURL:
print ' unshortened: %s' % newBaseURL
print(' unshortened: %s' % newBaseURL)
baseURL = newBaseURL
for text, subURL in getDirEntries(baseURL):
@ -1064,23 +1067,27 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
if solrPath is None:
raise RuntimeError('could not find solr subdir')
print
print 'Test Lucene...'
print()
print('Test Lucene...')
checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
unpack('lucene', tmpDir, artifact, version)
unpack('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
print
print 'Test Solr...'
print()
print('Test Solr...')
checkSigs('solr', solrPath, version, tmpDir, isSigned)
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
unpack('solr', tmpDir, artifact, version)
unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
print 'Test Maven artifacts for Lucene and Solr...'
print('Test Maven artifacts for Lucene and Solr...')
checkMaven(baseURL, tmpDir, version, isSigned)
if __name__ == '__main__':
main()
try:
main()
except:
import traceback
traceback.print_exc()

View File

@ -15,6 +15,9 @@ New features
underlying PayloadFunction's explanation as the explanation
for the payload score. (Scott Smerchek via Robert Muir)
* LUCENE-4069: Added BloomFilteringPostingsFormat for use with low-frequency terms
such as primary keys (Mark Harwood, Mike McCandless)
* LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
iteration marks. (Robert Muir, Christian Moen)
@ -40,6 +43,11 @@ New features
implementations to optimize the enum implementation. (Robert Muir,
Mike McCandless)
* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
int docID), to attempt deletion by docID as long as the provided
reader is an NRT reader, and the segment has not yet been merged
away (Mike McCandless).
API Changes
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
@ -87,6 +95,10 @@ API Changes
instead of the previous boolean needsFlags; consistent with the changes
for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
as the Store parameter didn't make sense: if you supplied Store.YES, you would only
receive an exception anyway. (Robert Muir)
Optimizations
@ -99,6 +111,10 @@ Optimizations
* LUCENE-4235: Remove enforcing of Filter rewrite for NRQ queries.
(Uwe Schindler)
* LUCENE-4279: Regenerated snowball Stemmers from snowball r554,
making them substantially more lightweight. Behavior is unchanged.
(Robert Muir)
Bug Fixes
* LUCENE-4109: BooleanQueries are not parsed correctly with the
@ -140,11 +156,21 @@ Bug Fixes
IndexWriter to only delete files matching this pattern from an index
directory, to reduce risk when the wrong index path is accidentally
passed to IndexWriter (Robert Muir, Mike McCandless)
* LUCENE-4277: Fix IndexWriter deadlock during rollback if flushable DWPT
instance are already checked out and queued up but not yet flushed.
(Simon Willnauer)
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
(Johannes Christen, Uwe Schindler, Robert Muir)
Changes in Runtime Behavior
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
(Karsten Rauch via Robert Muir)
* LUCENE-3616: Field throws exception if you try to set a boost on an
unindexed field or one that omits norms. (Robert Muir)
Build

View File

@ -1,423 +1,439 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.Among;
/**
* Generated class implementing code defined by a snowball script.
*/
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class DanishStemmer extends SnowballProgram {
private Among a_0[] = {
new Among ( "hed", -1, 1, "", this),
new Among ( "ethed", 0, 1, "", this),
new Among ( "ered", -1, 1, "", this),
new Among ( "e", -1, 1, "", this),
new Among ( "erede", 3, 1, "", this),
new Among ( "ende", 3, 1, "", this),
new Among ( "erende", 5, 1, "", this),
new Among ( "ene", 3, 1, "", this),
new Among ( "erne", 3, 1, "", this),
new Among ( "ere", 3, 1, "", this),
new Among ( "en", -1, 1, "", this),
new Among ( "heden", 10, 1, "", this),
new Among ( "eren", 10, 1, "", this),
new Among ( "er", -1, 1, "", this),
new Among ( "heder", 13, 1, "", this),
new Among ( "erer", 13, 1, "", this),
new Among ( "s", -1, 2, "", this),
new Among ( "heds", 16, 1, "", this),
new Among ( "es", 16, 1, "", this),
new Among ( "endes", 18, 1, "", this),
new Among ( "erendes", 19, 1, "", this),
new Among ( "enes", 18, 1, "", this),
new Among ( "ernes", 18, 1, "", this),
new Among ( "eres", 18, 1, "", this),
new Among ( "ens", 16, 1, "", this),
new Among ( "hedens", 24, 1, "", this),
new Among ( "erens", 24, 1, "", this),
new Among ( "ers", 16, 1, "", this),
new Among ( "ets", 16, 1, "", this),
new Among ( "erets", 28, 1, "", this),
new Among ( "et", -1, 1, "", this),
new Among ( "eret", 30, 1, "", this)
};
private static final long serialVersionUID = 1L;
private Among a_1[] = {
new Among ( "gd", -1, -1, "", this),
new Among ( "dt", -1, -1, "", this),
new Among ( "gt", -1, -1, "", this),
new Among ( "kt", -1, -1, "", this)
};
private final static DanishStemmer methodObject = new DanishStemmer ();
private Among a_2[] = {
new Among ( "ig", -1, 1, "", this),
new Among ( "lig", 0, 1, "", this),
new Among ( "elig", 1, 1, "", this),
new Among ( "els", -1, 1, "", this),
new Among ( "l\u00F8st", -1, 2, "", this)
};
private final static Among a_0[] = {
new Among ( "hed", -1, 1, "", methodObject ),
new Among ( "ethed", 0, 1, "", methodObject ),
new Among ( "ered", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "erede", 3, 1, "", methodObject ),
new Among ( "ende", 3, 1, "", methodObject ),
new Among ( "erende", 5, 1, "", methodObject ),
new Among ( "ene", 3, 1, "", methodObject ),
new Among ( "erne", 3, 1, "", methodObject ),
new Among ( "ere", 3, 1, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "heden", 10, 1, "", methodObject ),
new Among ( "eren", 10, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heder", 13, 1, "", methodObject ),
new Among ( "erer", 13, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "heds", 16, 1, "", methodObject ),
new Among ( "es", 16, 1, "", methodObject ),
new Among ( "endes", 18, 1, "", methodObject ),
new Among ( "erendes", 19, 1, "", methodObject ),
new Among ( "enes", 18, 1, "", methodObject ),
new Among ( "ernes", 18, 1, "", methodObject ),
new Among ( "eres", 18, 1, "", methodObject ),
new Among ( "ens", 16, 1, "", methodObject ),
new Among ( "hedens", 24, 1, "", methodObject ),
new Among ( "erens", 24, 1, "", methodObject ),
new Among ( "ers", 16, 1, "", methodObject ),
new Among ( "ets", 16, 1, "", methodObject ),
new Among ( "erets", 28, 1, "", methodObject ),
new Among ( "et", -1, 1, "", methodObject ),
new Among ( "eret", 30, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private final static Among a_1[] = {
new Among ( "gd", -1, -1, "", methodObject ),
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "gt", -1, -1, "", methodObject ),
new Among ( "kt", -1, -1, "", methodObject )
};
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
private final static Among a_2[] = {
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "lig", 0, 1, "", methodObject ),
new Among ( "elig", 1, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "l\u00F8st", -1, 2, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
private int I_x;
private int I_p1;
private StringBuilder S_ch = new StringBuilder();
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
private void copy_from(DanishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
S_ch = other.S_ch;
super.copy_from(other);
}
private void copy_from(DanishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
S_ch = other.S_ch;
super.copy_from(other);
}
private boolean r_mark_regions() {
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 29
I_p1 = limit;
// test, line 33
v_1 = cursor;
// (, line 33
// hop, line 33
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 33
I_x = cursor;
cursor = v_1;
// goto, line 34
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
// (, line 29
I_p1 = limit;
// test, line 33
v_1 = cursor;
// (, line 33
// hop, line 33
{
break lab1;
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 34
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
// setmark x, line 33
I_x = cursor;
cursor = v_1;
// goto, line 34
golab0: while(true)
{
break lab3;
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
// gopast, line 34
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 34
I_p1 = cursor;
// try, line 35
lab4: do {
// (, line 35
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
cursor++;
}
// setmark p1, line 34
I_p1 = cursor;
// try, line 35
lab4: do {
// (, line 35
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 40
// setlimit, line 41
v_1 = limit - cursor;
// tomark, line 41
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 41
// [, line 41
ket = cursor;
// substring, line 41
among_var = find_among_b(a_0, 32);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 41
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 48
// delete, line 48
slice_del();
break;
case 2:
// (, line 50
if (!(in_grouping_b(g_s_ending, 97, 229)))
// (, line 40
// setlimit, line 41
v_1 = limit - cursor;
// tomark, line 41
if (cursor < I_p1)
{
return false;
}
// delete, line 50
slice_del();
break;
}
return true;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 41
// [, line 41
ket = cursor;
// substring, line 41
among_var = find_among_b(a_0, 32);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 41
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 48
// delete, line 48
slice_del();
break;
case 2:
// (, line 50
if (!(in_grouping_b(g_s_ending, 97, 229)))
{
return false;
}
// delete, line 50
slice_del();
break;
}
return true;
}
private boolean r_consonant_pair() {
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 54
// test, line 55
v_1 = limit - cursor;
// (, line 55
// setlimit, line 56
v_2 = limit - cursor;
// tomark, line 56
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 56
// [, line 56
ket = cursor;
// substring, line 56
if (find_among_b(a_1, 4) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 56
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 62
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 62
bra = cursor;
// delete, line 62
slice_del();
return true;
}
// (, line 54
// test, line 55
v_1 = limit - cursor;
// (, line 55
// setlimit, line 56
v_2 = limit - cursor;
// tomark, line 56
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 56
// [, line 56
ket = cursor;
// substring, line 56
if (find_among_b(a_1, 4) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 56
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 62
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 62
bra = cursor;
// delete, line 62
slice_del();
return true;
}
private boolean r_other_suffix() {
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 65
// do, line 66
v_1 = limit - cursor;
lab0: do {
// (, line 66
// [, line 66
ket = cursor;
// literal, line 66
if (!(eq_s_b(2, "st")))
{
break lab0;
}
// ], line 66
bra = cursor;
// literal, line 66
if (!(eq_s_b(2, "ig")))
{
break lab0;
}
// delete, line 66
slice_del();
} while (false);
cursor = limit - v_1;
// setlimit, line 67
v_2 = limit - cursor;
// tomark, line 67
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 67
// [, line 67
ket = cursor;
// substring, line 67
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_3;
return false;
}
// ], line 67
bra = cursor;
limit_backward = v_3;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 70
// delete, line 70
slice_del();
// do, line 70
v_4 = limit - cursor;
lab1: do {
// call consonant_pair, line 70
if (!r_consonant_pair())
// (, line 65
// do, line 66
v_1 = limit - cursor;
lab0: do {
// (, line 66
// [, line 66
ket = cursor;
// literal, line 66
if (!(eq_s_b(2, "st")))
{
break lab1;
break lab0;
}
// ], line 66
bra = cursor;
// literal, line 66
if (!(eq_s_b(2, "ig")))
{
break lab0;
}
// delete, line 66
slice_del();
} while (false);
cursor = limit - v_4;
break;
case 2:
// (, line 72
// <-, line 72
slice_from("l\u00F8s");
break;
}
return true;
}
cursor = limit - v_1;
// setlimit, line 67
v_2 = limit - cursor;
// tomark, line 67
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 67
// [, line 67
ket = cursor;
// substring, line 67
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_3;
return false;
}
// ], line 67
bra = cursor;
limit_backward = v_3;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 70
// delete, line 70
slice_del();
// do, line 70
v_4 = limit - cursor;
lab1: do {
// call consonant_pair, line 70
if (!r_consonant_pair())
{
break lab1;
}
} while (false);
cursor = limit - v_4;
break;
case 2:
// (, line 72
// <-, line 72
slice_from("l\u00F8s");
break;
}
return true;
}
private boolean r_undouble() {
private boolean r_undouble() {
int v_1;
int v_2;
// (, line 75
// setlimit, line 76
v_1 = limit - cursor;
// tomark, line 76
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 76
// [, line 76
ket = cursor;
if (!(out_grouping_b(g_v, 97, 248)))
{
limit_backward = v_2;
return false;
}
// ], line 76
bra = cursor;
// -> ch, line 76
S_ch = slice_to(S_ch);
limit_backward = v_2;
// name ch, line 77
if (!(eq_v_b(S_ch)))
{
return false;
}
// delete, line 78
slice_del();
return true;
}
// (, line 75
// setlimit, line 76
v_1 = limit - cursor;
// tomark, line 76
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 76
// [, line 76
ket = cursor;
if (!(out_grouping_b(g_v, 97, 248)))
{
limit_backward = v_2;
return false;
}
// ], line 76
bra = cursor;
// -> ch, line 76
S_ch = slice_to(S_ch);
limit_backward = v_2;
// name ch, line 77
if (!(eq_v_b(S_ch)))
{
return false;
}
// delete, line 78
slice_del();
return true;
}
public boolean stem() {
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
// (, line 82
// do, line 84
v_1 = cursor;
lab0: do {
// call mark_regions, line 84
if (!r_mark_regions())
{
break lab0;
// (, line 82
// do, line 84
v_1 = cursor;
lab0: do {
// call mark_regions, line 84
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 85
limit_backward = cursor; cursor = limit;
// (, line 85
// do, line 86
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 86
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 87
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 87
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 88
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 88
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
// do, line 89
v_5 = limit - cursor;
lab4: do {
// call undouble, line 89
if (!r_undouble())
{
break lab4;
}
} while (false);
cursor = limit - v_5;
cursor = limit_backward; return true;
}
} while (false);
cursor = v_1;
// backwards, line 85
limit_backward = cursor; cursor = limit;
// (, line 85
// do, line 86
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 86
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 87
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 87
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 88
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 88
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
// do, line 89
v_5 = limit - cursor;
lab4: do {
// call undouble, line 89
if (!r_undouble())
{
break lab4;
}
} while (false);
cursor = limit - v_5;
cursor = limit_backward; return true;
public boolean equals( Object o ) {
return o instanceof DanishStemmer;
}
}
public int hashCode() {
return DanishStemmer.class.getName().hashCode();
}
}

View File

@ -1,358 +1,375 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.Among;
/**
* Generated class implementing code defined by a snowball script.
*/
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class NorwegianStemmer extends SnowballProgram {
private Among a_0[] = {
new Among ( "a", -1, 1, "", this),
new Among ( "e", -1, 1, "", this),
new Among ( "ede", 1, 1, "", this),
new Among ( "ande", 1, 1, "", this),
new Among ( "ende", 1, 1, "", this),
new Among ( "ane", 1, 1, "", this),
new Among ( "ene", 1, 1, "", this),
new Among ( "hetene", 6, 1, "", this),
new Among ( "erte", 1, 3, "", this),
new Among ( "en", -1, 1, "", this),
new Among ( "heten", 9, 1, "", this),
new Among ( "ar", -1, 1, "", this),
new Among ( "er", -1, 1, "", this),
new Among ( "heter", 12, 1, "", this),
new Among ( "s", -1, 2, "", this),
new Among ( "as", 14, 1, "", this),
new Among ( "es", 14, 1, "", this),
new Among ( "edes", 16, 1, "", this),
new Among ( "endes", 16, 1, "", this),
new Among ( "enes", 16, 1, "", this),
new Among ( "hetenes", 19, 1, "", this),
new Among ( "ens", 14, 1, "", this),
new Among ( "hetens", 21, 1, "", this),
new Among ( "ers", 14, 1, "", this),
new Among ( "ets", 14, 1, "", this),
new Among ( "et", -1, 1, "", this),
new Among ( "het", 25, 1, "", this),
new Among ( "ert", -1, 3, "", this),
new Among ( "ast", -1, 1, "", this)
};
private static final long serialVersionUID = 1L;
private Among a_1[] = {
new Among ( "dt", -1, -1, "", this),
new Among ( "vt", -1, -1, "", this)
};
private final static NorwegianStemmer methodObject = new NorwegianStemmer ();
private Among a_2[] = {
new Among ( "leg", -1, 1, "", this),
new Among ( "eleg", 0, 1, "", this),
new Among ( "ig", -1, 1, "", this),
new Among ( "eig", 2, 1, "", this),
new Among ( "lig", 2, 1, "", this),
new Among ( "elig", 4, 1, "", this),
new Among ( "els", -1, 1, "", this),
new Among ( "lov", -1, 1, "", this),
new Among ( "elov", 7, 1, "", this),
new Among ( "slov", 7, 1, "", this),
new Among ( "hetslov", 9, 1, "", this)
};
private final static Among a_0[] = {
new Among ( "a", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "ede", 1, 1, "", methodObject ),
new Among ( "ande", 1, 1, "", methodObject ),
new Among ( "ende", 1, 1, "", methodObject ),
new Among ( "ane", 1, 1, "", methodObject ),
new Among ( "ene", 1, 1, "", methodObject ),
new Among ( "hetene", 6, 1, "", methodObject ),
new Among ( "erte", 1, 3, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "heten", 9, 1, "", methodObject ),
new Among ( "ar", -1, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heter", 12, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "as", 14, 1, "", methodObject ),
new Among ( "es", 14, 1, "", methodObject ),
new Among ( "edes", 16, 1, "", methodObject ),
new Among ( "endes", 16, 1, "", methodObject ),
new Among ( "enes", 16, 1, "", methodObject ),
new Among ( "hetenes", 19, 1, "", methodObject ),
new Among ( "ens", 14, 1, "", methodObject ),
new Among ( "hetens", 21, 1, "", methodObject ),
new Among ( "ers", 14, 1, "", methodObject ),
new Among ( "ets", 14, 1, "", methodObject ),
new Among ( "et", -1, 1, "", methodObject ),
new Among ( "het", 25, 1, "", methodObject ),
new Among ( "ert", -1, 3, "", methodObject ),
new Among ( "ast", -1, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private final static Among a_1[] = {
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "vt", -1, -1, "", methodObject )
};
private static final char g_s_ending[] = {119, 125, 149, 1 };
private final static Among a_2[] = {
new Among ( "leg", -1, 1, "", methodObject ),
new Among ( "eleg", 0, 1, "", methodObject ),
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "eig", 2, 1, "", methodObject ),
new Among ( "lig", 2, 1, "", methodObject ),
new Among ( "elig", 4, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "lov", -1, 1, "", methodObject ),
new Among ( "elov", 7, 1, "", methodObject ),
new Among ( "slov", 7, 1, "", methodObject ),
new Among ( "hetslov", 9, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private static final char g_s_ending[] = {119, 125, 149, 1 };
private int I_x;
private int I_p1;
private void copy_from(NorwegianStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private void copy_from(NorwegianStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private boolean r_mark_regions() {
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 26
I_p1 = limit;
// test, line 30
v_1 = cursor;
// (, line 30
// hop, line 30
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 30
I_x = cursor;
cursor = v_1;
// goto, line 31
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
// (, line 26
I_p1 = limit;
// test, line 30
v_1 = cursor;
// (, line 30
// hop, line 30
{
break lab1;
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 31
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
// setmark x, line 30
I_x = cursor;
cursor = v_1;
// goto, line 31
golab0: while(true)
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 31
I_p1 = cursor;
// try, line 32
lab4: do {
// (, line 32
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
// (, line 37
// setlimit, line 38
v_1 = limit - cursor;
// tomark, line 38
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 38
// [, line 38
ket = cursor;
// substring, line 38
among_var = find_among_b(a_0, 29);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 38
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
// or, line 46
lab0: do {
v_3 = limit - cursor;
v_2 = cursor;
lab1: do {
if (!(in_grouping_b(g_s_ending, 98, 122)))
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
break lab0;
cursor = v_2;
break golab0;
} while (false);
cursor = limit - v_3;
// (, line 46
// literal, line 46
if (!(eq_s_b(1, "k")))
cursor = v_2;
if (cursor >= limit)
{
return false;
}
if (!(out_grouping_b(g_v, 97, 248)))
cursor++;
}
// gopast, line 31
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 31
I_p1 = cursor;
// try, line 32
lab4: do {
// (, line 32
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
// delete, line 46
slice_del();
break;
case 3:
// (, line 48
// <-, line 48
slice_from("er");
break;
}
return true;
}
return true;
}
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 52
// test, line 53
v_1 = limit - cursor;
// (, line 53
// setlimit, line 54
v_2 = limit - cursor;
// tomark, line 54
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 54
// [, line 54
ket = cursor;
// substring, line 54
if (find_among_b(a_1, 2) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 54
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 59
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 59
bra = cursor;
// delete, line 59
slice_del();
return true;
}
private boolean r_other_suffix() {
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 62
// setlimit, line 63
v_1 = limit - cursor;
// tomark, line 63
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 63
// [, line 63
ket = cursor;
// substring, line 63
among_var = find_among_b(a_2, 11);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 63
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 67
// delete, line 67
slice_del();
break;
}
return true;
}
int v_3;
// (, line 37
// setlimit, line 38
v_1 = limit - cursor;
// tomark, line 38
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 38
// [, line 38
ket = cursor;
// substring, line 38
among_var = find_among_b(a_0, 29);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 38
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
// or, line 46
lab0: do {
v_3 = limit - cursor;
lab1: do {
if (!(in_grouping_b(g_s_ending, 98, 122)))
{
break lab1;
}
break lab0;
} while (false);
cursor = limit - v_3;
// (, line 46
// literal, line 46
if (!(eq_s_b(1, "k")))
{
return false;
}
if (!(out_grouping_b(g_v, 97, 248)))
{
return false;
}
} while (false);
// delete, line 46
slice_del();
break;
case 3:
// (, line 48
// <-, line 48
slice_from("er");
break;
}
return true;
}
public boolean stem() {
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 52
// test, line 53
v_1 = limit - cursor;
// (, line 53
// setlimit, line 54
v_2 = limit - cursor;
// tomark, line 54
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 54
// [, line 54
ket = cursor;
// substring, line 54
if (find_among_b(a_1, 2) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 54
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 59
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 59
bra = cursor;
// delete, line 59
slice_del();
return true;
}
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 62
// setlimit, line 63
v_1 = limit - cursor;
// tomark, line 63
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 63
// [, line 63
ket = cursor;
// substring, line 63
among_var = find_among_b(a_2, 11);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 63
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 67
// delete, line 67
slice_del();
break;
}
return true;
}
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 72
// do, line 74
v_1 = cursor;
lab0: do {
// call mark_regions, line 74
if (!r_mark_regions())
{
break lab0;
// (, line 72
// do, line 74
v_1 = cursor;
lab0: do {
// call mark_regions, line 74
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 75
limit_backward = cursor; cursor = limit;
// (, line 75
// do, line 76
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 76
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 77
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 77
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 78
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 78
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
}
} while (false);
cursor = v_1;
// backwards, line 75
limit_backward = cursor; cursor = limit;
// (, line 75
// do, line 76
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 76
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 77
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 77
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 78
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 78
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
public boolean equals( Object o ) {
return o instanceof NorwegianStemmer;
}
public int hashCode() {
return NorwegianStemmer.class.getName().hashCode();
}
}

View File

@ -1,349 +1,366 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.Among;
/**
* Generated class implementing code defined by a snowball script.
*/
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class SwedishStemmer extends SnowballProgram {
private Among a_0[] = {
new Among ( "a", -1, 1, "", this),
new Among ( "arna", 0, 1, "", this),
new Among ( "erna", 0, 1, "", this),
new Among ( "heterna", 2, 1, "", this),
new Among ( "orna", 0, 1, "", this),
new Among ( "ad", -1, 1, "", this),
new Among ( "e", -1, 1, "", this),
new Among ( "ade", 6, 1, "", this),
new Among ( "ande", 6, 1, "", this),
new Among ( "arne", 6, 1, "", this),
new Among ( "are", 6, 1, "", this),
new Among ( "aste", 6, 1, "", this),
new Among ( "en", -1, 1, "", this),
new Among ( "anden", 12, 1, "", this),
new Among ( "aren", 12, 1, "", this),
new Among ( "heten", 12, 1, "", this),
new Among ( "ern", -1, 1, "", this),
new Among ( "ar", -1, 1, "", this),
new Among ( "er", -1, 1, "", this),
new Among ( "heter", 18, 1, "", this),
new Among ( "or", -1, 1, "", this),
new Among ( "s", -1, 2, "", this),
new Among ( "as", 21, 1, "", this),
new Among ( "arnas", 22, 1, "", this),
new Among ( "ernas", 22, 1, "", this),
new Among ( "ornas", 22, 1, "", this),
new Among ( "es", 21, 1, "", this),
new Among ( "ades", 26, 1, "", this),
new Among ( "andes", 26, 1, "", this),
new Among ( "ens", 21, 1, "", this),
new Among ( "arens", 29, 1, "", this),
new Among ( "hetens", 29, 1, "", this),
new Among ( "erns", 21, 1, "", this),
new Among ( "at", -1, 1, "", this),
new Among ( "andet", -1, 1, "", this),
new Among ( "het", -1, 1, "", this),
new Among ( "ast", -1, 1, "", this)
};
private static final long serialVersionUID = 1L;
private Among a_1[] = {
new Among ( "dd", -1, -1, "", this),
new Among ( "gd", -1, -1, "", this),
new Among ( "nn", -1, -1, "", this),
new Among ( "dt", -1, -1, "", this),
new Among ( "gt", -1, -1, "", this),
new Among ( "kt", -1, -1, "", this),
new Among ( "tt", -1, -1, "", this)
};
private final static SwedishStemmer methodObject = new SwedishStemmer ();
private Among a_2[] = {
new Among ( "ig", -1, 1, "", this),
new Among ( "lig", 0, 1, "", this),
new Among ( "els", -1, 1, "", this),
new Among ( "fullt", -1, 3, "", this),
new Among ( "l\u00F6st", -1, 2, "", this)
};
private final static Among a_0[] = {
new Among ( "a", -1, 1, "", methodObject ),
new Among ( "arna", 0, 1, "", methodObject ),
new Among ( "erna", 0, 1, "", methodObject ),
new Among ( "heterna", 2, 1, "", methodObject ),
new Among ( "orna", 0, 1, "", methodObject ),
new Among ( "ad", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "ade", 6, 1, "", methodObject ),
new Among ( "ande", 6, 1, "", methodObject ),
new Among ( "arne", 6, 1, "", methodObject ),
new Among ( "are", 6, 1, "", methodObject ),
new Among ( "aste", 6, 1, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "anden", 12, 1, "", methodObject ),
new Among ( "aren", 12, 1, "", methodObject ),
new Among ( "heten", 12, 1, "", methodObject ),
new Among ( "ern", -1, 1, "", methodObject ),
new Among ( "ar", -1, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heter", 18, 1, "", methodObject ),
new Among ( "or", -1, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "as", 21, 1, "", methodObject ),
new Among ( "arnas", 22, 1, "", methodObject ),
new Among ( "ernas", 22, 1, "", methodObject ),
new Among ( "ornas", 22, 1, "", methodObject ),
new Among ( "es", 21, 1, "", methodObject ),
new Among ( "ades", 26, 1, "", methodObject ),
new Among ( "andes", 26, 1, "", methodObject ),
new Among ( "ens", 21, 1, "", methodObject ),
new Among ( "arens", 29, 1, "", methodObject ),
new Among ( "hetens", 29, 1, "", methodObject ),
new Among ( "erns", 21, 1, "", methodObject ),
new Among ( "at", -1, 1, "", methodObject ),
new Among ( "andet", -1, 1, "", methodObject ),
new Among ( "het", -1, 1, "", methodObject ),
new Among ( "ast", -1, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
private final static Among a_1[] = {
new Among ( "dd", -1, -1, "", methodObject ),
new Among ( "gd", -1, -1, "", methodObject ),
new Among ( "nn", -1, -1, "", methodObject ),
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "gt", -1, -1, "", methodObject ),
new Among ( "kt", -1, -1, "", methodObject ),
new Among ( "tt", -1, -1, "", methodObject )
};
private static final char g_s_ending[] = {119, 127, 149 };
private final static Among a_2[] = {
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "lig", 0, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "fullt", -1, 3, "", methodObject ),
new Among ( "l\u00F6st", -1, 2, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
private static final char g_s_ending[] = {119, 127, 149 };
private int I_x;
private int I_p1;
private void copy_from(SwedishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private void copy_from(SwedishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private boolean r_mark_regions() {
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 26
I_p1 = limit;
// test, line 29
v_1 = cursor;
// (, line 29
// hop, line 29
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 29
I_x = cursor;
cursor = v_1;
// goto, line 30
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 246)))
// (, line 26
I_p1 = limit;
// test, line 29
v_1 = cursor;
// (, line 29
// hop, line 29
{
break lab1;
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 30
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 246)))
// setmark x, line 29
I_x = cursor;
cursor = v_1;
// goto, line 30
golab0: while(true)
{
break lab3;
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 246)))
{
break lab1;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
// gopast, line 30
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 246)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 30
I_p1 = cursor;
// try, line 31
lab4: do {
// (, line 31
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
cursor++;
}
// setmark p1, line 30
I_p1 = cursor;
// try, line 31
lab4: do {
// (, line 31
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 36
// setlimit, line 37
v_1 = limit - cursor;
// tomark, line 37
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 37
// [, line 37
ket = cursor;
// substring, line 37
among_var = find_among_b(a_0, 37);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 37
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
if (!(in_grouping_b(g_s_ending, 98, 121)))
// (, line 36
// setlimit, line 37
v_1 = limit - cursor;
// tomark, line 37
if (cursor < I_p1)
{
return false;
}
// delete, line 46
slice_del();
break;
}
return true;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 37
// [, line 37
ket = cursor;
// substring, line 37
among_var = find_among_b(a_0, 37);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 37
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
if (!(in_grouping_b(g_s_ending, 98, 121)))
{
return false;
}
// delete, line 46
slice_del();
break;
}
return true;
}
private boolean r_consonant_pair() {
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// setlimit, line 50
v_1 = limit - cursor;
// tomark, line 50
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 50
// and, line 52
v_3 = limit - cursor;
// among, line 51
if (find_among_b(a_1, 7) == 0)
{
limit_backward = v_2;
return false;
}
cursor = limit - v_3;
// (, line 52
// [, line 52
ket = cursor;
// next, line 52
if (cursor <= limit_backward)
{
limit_backward = v_2;
return false;
}
cursor--;
// ], line 52
bra = cursor;
// delete, line 52
slice_del();
limit_backward = v_2;
return true;
}
// setlimit, line 50
v_1 = limit - cursor;
// tomark, line 50
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 50
// and, line 52
v_3 = limit - cursor;
// among, line 51
if (find_among_b(a_1, 7) == 0)
{
limit_backward = v_2;
return false;
}
cursor = limit - v_3;
// (, line 52
// [, line 52
ket = cursor;
// next, line 52
if (cursor <= limit_backward)
{
limit_backward = v_2;
return false;
}
cursor--;
// ], line 52
bra = cursor;
// delete, line 52
slice_del();
limit_backward = v_2;
return true;
}
private boolean r_other_suffix() {
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
// setlimit, line 55
v_1 = limit - cursor;
// tomark, line 55
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 55
// [, line 56
ket = cursor;
// substring, line 56
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 56
bra = cursor;
switch(among_var) {
case 0:
// setlimit, line 55
v_1 = limit - cursor;
// tomark, line 55
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 55
// [, line 56
ket = cursor;
// substring, line 56
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 56
bra = cursor;
switch(among_var) {
case 0:
limit_backward = v_2;
return false;
case 1:
// (, line 57
// delete, line 57
slice_del();
break;
case 2:
// (, line 58
// <-, line 58
slice_from("l\u00F6s");
break;
case 3:
// (, line 59
// <-, line 59
slice_from("full");
break;
}
limit_backward = v_2;
return false;
case 1:
// (, line 57
// delete, line 57
slice_del();
break;
case 2:
// (, line 58
// <-, line 58
slice_from("l\u00F6s");
break;
case 3:
// (, line 59
// <-, line 59
slice_from("full");
break;
}
limit_backward = v_2;
return true;
}
return true;
}
public boolean stem() {
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 64
// do, line 66
v_1 = cursor;
lab0: do {
// call mark_regions, line 66
if (!r_mark_regions())
{
break lab0;
// (, line 64
// do, line 66
v_1 = cursor;
lab0: do {
// call mark_regions, line 66
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 67
limit_backward = cursor; cursor = limit;
// (, line 67
// do, line 68
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 68
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 69
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 69
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 70
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 70
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
}
} while (false);
cursor = v_1;
// backwards, line 67
limit_backward = cursor; cursor = limit;
// (, line 67
// do, line 68
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 68
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 69
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 69
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 70
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 70
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
public boolean equals( Object o ) {
return o instanceof SwedishStemmer;
}
public int hashCode() {
return SwedishStemmer.class.getName().hashCode();
}
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -116,4 +117,21 @@ public class TestSnowball extends BaseTokenStreamTestCase {
checkOneTermReuse(a, "", "");
}
}
public void testRandomStrings() throws IOException {
for (String lang : SNOWBALL_LANGS) {
checkRandomStrings(lang);
}
}
public void checkRandomStrings(final String snowballLanguage) throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new MockTokenizer(reader);
return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
}
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
}
}

View File

@ -331,7 +331,9 @@ public class TaskSequence extends PerfTask {
// Forwards top request to children
if (runningParallelTasks != null) {
for(ParallelTask t : runningParallelTasks) {
t.task.stopNow();
if (t != null) {
t.task.stopNow();
}
}
}
}
@ -355,6 +357,12 @@ public class TaskSequence extends PerfTask {
// run threads
startThreads(t);
if (stopNow) {
for (ParallelTask task : t) {
task.task.stopNow();
}
}
// wait for all threads to complete
int count = 0;
for (int i = 0; i < t.length; i++) {

View File

@ -35,10 +35,9 @@
MIGRATE.txt,JRE_VERSION_MIGRATION.txt,
CHANGES.txt,
**/lib/*.jar,
**/lib/*LICENSE*.txt,
**/lib/*NOTICE*.txt,
licenses/**,
*/docs/,**/README*"
excludes="build/**,site/**"
excludes="build/**,site/**,tools/**"
/>
@ -572,4 +571,8 @@
</sequential>
</target>
<target name="jar-checksums" depends="clean-jars,resolve">
<jar-checksum-macro srcdir="${common.dir}" dstdir="${common.dir}/licenses"/>
</target>
</project>

View File

@ -1567,6 +1567,43 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites
</sequential>
</macrodef>
<macrodef name="jar-checksum-macro">
<attribute name="srcdir"/>
<attribute name="dstdir"/>
<sequential>
<delete>
<fileset dir="@{dstdir}">
<include name="**/*.jar.sha1"/>
</fileset>
</delete>
<!-- checksum task does not have a flatten=true -->
<tempfile property="jar-checksum.temp.dir"/>
<mkdir dir="${jar-checksum.temp.dir}"/>
<copy todir="${jar-checksum.temp.dir}" flatten="true">
<fileset dir="@{srcdir}">
<include name="**/*.jar"/>
<!-- todo make this something passed into the macro and not some hardcoded set -->
<exclude name="build/**"/>
<exclude name="dist/**"/>
<exclude name="package/**"/>
<exclude name="example/exampledocs/**"/>
</fileset>
</copy>
<checksum algorithm="SHA1" fileext=".sha1" todir="@{dstdir}">
<fileset dir="${jar-checksum.temp.dir}"/>
</checksum>
<delete dir="${jar-checksum.temp.dir}"/>
<fixcrlf
srcdir="@{dstdir}"
includes="**/*.jar.sha1"
eol="lf" fixlast="true" encoding="US-ASCII" />
</sequential>
</macrodef>
<macrodef name="sign-artifacts-macro">
<attribute name="artifacts.dir"/>
<sequential>

View File

@ -53,7 +53,13 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
/** Reads a segment. NOTE: by the time this call
* returns, it must hold open any files it will need to
* use; else, those files may be deleted. */
* use; else, those files may be deleted.
* Additionally, required files may be deleted during the execution of
* this call before there is a chance to open them. Under these
* circumstances an IOException should be thrown by the implementation.
* IOExceptions are expected and will automatically cause a retry of the
* segment opening logic with the newly revised segments.
* */
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
@Override

View File

@ -0,0 +1,63 @@
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.FuzzySet;
/**
* Class used to create index-time {@link FuzzySet} appropriately configured for
* each field. Also called to right-size bitsets for serialization.
* @lucene.experimental
*/
public abstract class BloomFilterFactory {
/**
*
* @param state The content to be indexed
* @param info
* the field requiring a BloomFilter
* @return An appropriately sized set or null if no BloomFiltering required
*/
public abstract FuzzySet getSetForField(SegmentWriteState state, FieldInfo info);
/**
* Called when downsizing bitsets for serialization
*
* @param fieldInfo
* The field with sparse set bits
* @param initialSet
* The bits accumulated
* @return null or a hopefully more densely packed, smaller bitset
*/
public FuzzySet downsize(FieldInfo fieldInfo, FuzzySet initialSet) {
// Aim for a bitset size that would have 10% of bits set (so 90% of searches
// would fail-fast)
float targetMaxSaturation = 0.1f;
return initialSet.downsize(targetMaxSaturation);
}
/**
* Used to determine if the given filter has reached saturation and should be retired i.e. not saved any more
* @param bloomFilter The bloomFilter being tested
* @param fieldInfo The field with which this filter is associated
* @return true if the set has reached saturation and should be retired
*/
public abstract boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo);
}

View File

@ -0,0 +1,514 @@
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.FuzzySet.ContainsResult;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* <p>
* A {@link PostingsFormat} useful for low doc-frequency fields such as primary
* keys. Bloom filters are maintained in a ".blm" file which offers "fast-fail"
* for reads in segments known to have no record of the key. A choice of
* delegate PostingsFormat is used to record all other Postings data.
* </p>
* <p>
* A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter
* settings on a per-field basis. The default configuration is
* {@link DefaultBloomFilterFactory} which allocates a ~8mb bitset and hashes
* values using {@link MurmurHash2}. This should be suitable for most purposes.
* </p>
* <p>
* The format of the blm file is as follows:
* </p>
* <ul>
* <li>BloomFilter (.blm) --&gt; Header, DelegatePostingsFormatName,
* NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
* <li>Filter --&gt; FieldNumber, FuzzySet</li>
* <li>FuzzySet --&gt;See {@link FuzzySet#serialize(DataOutput)}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DelegatePostingsFormatName --&gt; {@link DataOutput#writeString(String)
* String} The name of a ServiceProvider registered {@link PostingsFormat}</li>
* <li>NumFilteredFields --&gt; {@link DataOutput#writeInt Uint32}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt Uint32} The number of the
* field in this segment</li>
* </ul>
* @lucene.experimental
*/
public class BloomFilteringPostingsFormat extends PostingsFormat {
public static final String BLOOM_CODEC_NAME = "BloomFilter";
public static final int BLOOM_CODEC_VERSION = 1;
/** Extension of Bloom Filters file */
static final String BLOOM_EXTENSION = "blm";
BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();
private PostingsFormat delegatePostingsFormat;
/**
* Creates Bloom filters for a selection of fields created in the index. This
* is recorded as a set of Bitsets held as a segment summary in an additional
* "blm" file. This PostingsFormat delegates to a choice of delegate
* PostingsFormat for encoding all other postings data.
*
* @param delegatePostingsFormat
* The PostingsFormat that records all the non-bloom filter data i.e.
* postings info.
* @param bloomFilterFactory
* The {@link BloomFilterFactory} responsible for sizing BloomFilters
* appropriately
*/
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat,
BloomFilterFactory bloomFilterFactory) {
super(BLOOM_CODEC_NAME);
this.delegatePostingsFormat = delegatePostingsFormat;
this.bloomFilterFactory = bloomFilterFactory;
}
/**
* Creates Bloom filters for a selection of fields created in the index. This
* is recorded as a set of Bitsets held as a segment summary in an additional
* "blm" file. This PostingsFormat delegates to a choice of delegate
* PostingsFormat for encoding all other postings data. This choice of
* constructor defaults to the {@link DefaultBloomFilterFactory} for
* configuring per-field BloomFilters.
*
* @param delegatePostingsFormat
* The PostingsFormat that records all the non-bloom filter data i.e.
* postings info.
*/
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) {
this(delegatePostingsFormat, new DefaultBloomFilterFactory());
}
// Used only by core Lucene at read-time via Service Provider instantiation -
// do not use at Write-time in application code.
public BloomFilteringPostingsFormat() {
super(BLOOM_CODEC_NAME);
}
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
if (delegatePostingsFormat == null) {
throw new UnsupportedOperationException("Error - " + getClass().getName()
+ " has been constructed without a choice of PostingsFormat");
}
return new BloomFilteredFieldsConsumer(
delegatePostingsFormat.fieldsConsumer(state), state,
delegatePostingsFormat);
}
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new BloomFilteredFieldsProducer(state);
}
public class BloomFilteredFieldsProducer extends FieldsProducer {
private FieldsProducer delegateFieldsProducer;
HashMap<String,FuzzySet> bloomsByFieldName = new HashMap<String,FuzzySet>();
public BloomFilteredFieldsProducer(SegmentReadState state)
throws IOException {
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexInput bloomIn = null;
try {
bloomIn = state.dir.openInput(bloomFileName, state.context);
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
BLOOM_CODEC_VERSION);
// // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
.readString());
this.delegateFieldsProducer = delegatePostingsFormat
.fieldsProducer(state);
int numBlooms = bloomIn.readInt();
for (int i = 0; i < numBlooms; i++) {
int fieldNum = bloomIn.readInt();
FuzzySet bloom = FuzzySet.deserialize(bloomIn);
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
bloomsByFieldName.put(fieldInfo.name, bloom);
}
} finally {
IOUtils.close(bloomIn);
}
}
public FieldsEnum iterator() throws IOException {
return new BloomFilteredFieldsEnum(delegateFieldsProducer.iterator(),
bloomsByFieldName);
}
public void close() throws IOException {
delegateFieldsProducer.close();
}
public Terms terms(String field) throws IOException {
FuzzySet filter = bloomsByFieldName.get(field);
if (filter == null) {
return delegateFieldsProducer.terms(field);
} else {
Terms result = delegateFieldsProducer.terms(field);
if (result == null) {
return null;
}
return new BloomFilteredTerms(result, filter);
}
}
public int size() throws IOException {
return delegateFieldsProducer.size();
}
public long getUniqueTermCount() throws IOException {
return delegateFieldsProducer.getUniqueTermCount();
}
// Not all fields in a segment may be subject to a bloom filter. This class
// wraps Terms objects appropriately if a filtering request is present
class BloomFilteredFieldsEnum extends FieldsEnum {
private FieldsEnum delegateFieldsEnum;
private HashMap<String,FuzzySet> bloomsByFieldName;
private String currentFieldName;
public BloomFilteredFieldsEnum(FieldsEnum iterator,
HashMap<String,FuzzySet> bloomsByFieldName) {
this.delegateFieldsEnum = iterator;
this.bloomsByFieldName = bloomsByFieldName;
}
public AttributeSource attributes() {
return delegateFieldsEnum.attributes();
}
public String next() throws IOException {
currentFieldName = delegateFieldsEnum.next();
return currentFieldName;
}
public Terms terms() throws IOException {
FuzzySet filter = bloomsByFieldName.get(currentFieldName);
if (filter == null) {
return delegateFieldsEnum.terms();
} else {
Terms result = delegateFieldsEnum.terms();
if (result == null) {
return null;
}
// wrap the terms object with a bloom filter
return new BloomFilteredTerms(result, filter);
}
}
}
class BloomFilteredTerms extends Terms {
private Terms delegateTerms;
private FuzzySet filter;
public BloomFilteredTerms(Terms terms, FuzzySet filter) {
this.delegateTerms = terms;
this.filter = filter;
}
@Override
public TermsEnum intersect(CompiledAutomaton compiled,
final BytesRef startTerm) throws IOException {
return delegateTerms.intersect(compiled, startTerm);
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
TermsEnum result;
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
// recycle the existing BloomFilteredTermsEnum by asking the delegate
// to recycle its contained TermsEnum
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
if (bfte.filter == filter) {
bfte.delegateTermsEnum = delegateTerms
.iterator(bfte.delegateTermsEnum);
return bfte;
}
}
// We have been handed something we cannot reuse (either null, wrong
// class or wrong filter) so allocate a new object
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
filter);
return result;
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return delegateTerms.getComparator();
}
@Override
public long size() throws IOException {
return delegateTerms.size();
}
@Override
public long getSumTotalTermFreq() throws IOException {
return delegateTerms.getSumTotalTermFreq();
}
@Override
public long getSumDocFreq() throws IOException {
return delegateTerms.getSumDocFreq();
}
@Override
public int getDocCount() throws IOException {
return delegateTerms.getDocCount();
}
}
class BloomFilteredTermsEnum extends TermsEnum {
TermsEnum delegateTermsEnum;
private FuzzySet filter;
public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
this.delegateTermsEnum = iterator;
this.filter = filter;
}
@Override
public final BytesRef next() throws IOException {
return delegateTermsEnum.next();
}
@Override
public final Comparator<BytesRef> getComparator() {
return delegateTermsEnum.getComparator();
}
@Override
public final boolean seekExact(BytesRef text, boolean useCache)
throws IOException {
// The magical fail-fast speed up that is the entire point of all of
// this code - save a disk seek if there is a match on an in-memory
// structure
// that may occasionally give a false positive but guaranteed no false
// negatives
if (filter.contains(text) == ContainsResult.NO) {
return false;
}
return delegateTermsEnum.seekExact(text, useCache);
}
@Override
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
throws IOException {
return delegateTermsEnum.seekCeil(text, useCache);
}
@Override
public final void seekExact(long ord) throws IOException {
delegateTermsEnum.seekExact(ord);
}
@Override
public final BytesRef term() throws IOException {
return delegateTermsEnum.term();
}
@Override
public final long ord() throws IOException {
return delegateTermsEnum.ord();
}
@Override
public final int docFreq() throws IOException {
return delegateTermsEnum.docFreq();
}
@Override
public final long totalTermFreq() throws IOException {
return delegateTermsEnum.totalTermFreq();
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
DocsAndPositionsEnum reuse, int flags) throws IOException {
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
throws IOException {
return delegateTermsEnum.docs(liveDocs, reuse, flags);
}
}
}
class BloomFilteredFieldsConsumer extends FieldsConsumer {
private FieldsConsumer delegateFieldsConsumer;
private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
private SegmentWriteState state;
// private PostingsFormat delegatePostingsFormat;
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
this.delegateFieldsConsumer = fieldsConsumer;
// this.delegatePostingsFormat=delegatePostingsFormat;
this.state = state;
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
FuzzySet bloomFilter = bloomFilterFactory.getSetForField(state,field);
if (bloomFilter != null) {
assert bloomFilters.containsKey(field) == false;
bloomFilters.put(field, bloomFilter);
return new WrappedTermsConsumer(delegateFieldsConsumer.addField(field),bloomFilter);
} else {
// No, use the unfiltered fieldsConsumer - we are not interested in
// recording any term Bitsets.
return delegateFieldsConsumer.addField(field);
}
}
@Override
public void close() throws IOException {
delegateFieldsConsumer.close();
// Now we are done accumulating values for these fields
List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
FuzzySet bloomFilter = entry.getValue();
if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){
nonSaturatedBlooms.add(entry);
}
}
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexOutput bloomOutput = null;
try {
bloomOutput = state.directory
.createOutput(bloomFileName, state.context);
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
BLOOM_CODEC_VERSION);
// remember the name of the postings format we will delegate to
bloomOutput.writeString(delegatePostingsFormat.getName());
// First field in the output file is the number of fields+blooms saved
bloomOutput.writeInt(nonSaturatedBlooms.size());
for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
FieldInfo fieldInfo = entry.getKey();
FuzzySet bloomFilter = entry.getValue();
bloomOutput.writeInt(fieldInfo.number);
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
}
} finally {
IOUtils.close(bloomOutput);
}
//We are done with large bitsets so no need to keep them hanging around
bloomFilters.clear();
}
private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
bloomFilter);
if (rightSizedSet == null) {
rightSizedSet = bloomFilter;
}
rightSizedSet.serialize(bloomOutput);
}
}
class WrappedTermsConsumer extends TermsConsumer {
private TermsConsumer delegateTermsConsumer;
private FuzzySet bloomFilter;
public WrappedTermsConsumer(TermsConsumer termsConsumer,FuzzySet bloomFilter) {
this.delegateTermsConsumer = termsConsumer;
this.bloomFilter = bloomFilter;
}
public PostingsConsumer startTerm(BytesRef text) throws IOException {
return delegateTermsConsumer.startTerm(text);
}
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// Record this term in our BloomFilter
if (stats.docFreq > 0) {
bloomFilter.addValue(text);
}
delegateTermsConsumer.finishTerm(text, stats);
}
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
throws IOException {
delegateTermsConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
}
public Comparator<BytesRef> getComparator() throws IOException {
return delegateTermsConsumer.getComparator();
}
}
}

View File

@ -0,0 +1,44 @@
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.hash.HashFunction;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* Default policy is to allocate a bitset with 10% saturation given a unique term per document.
* Bits are set via MurmurHash2 hashing function.
* @lucene.experimental
*/
public class DefaultBloomFilterFactory extends BloomFilterFactory {
@Override
public FuzzySet getSetForField(SegmentWriteState state,FieldInfo info) {
//Assume all of the docs have a unique term (e.g. a primary key) and we hope to maintain a set with 10% of bits set
return FuzzySet.createSetBasedOnQuality(state.segmentInfo.getDocCount(), 0.10f, new MurmurHash2());
}
@Override
public boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) {
// Don't bother saving bitsets if >90% of bits are set - we don't want to
// throw any more memory at this problem.
return bloomFilter.getSaturation() > 0.9f;
}
}

View File

@ -0,0 +1,25 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Codec PostingsFormat for fast access to low-frequency terms such as primary key fields.
</body>
</html>

View File

@ -377,6 +377,11 @@ public class Field implements IndexableField {
* @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
*/
public void setBoost(float boost) {
if (boost != 1.0f) {
if (type.indexed() == false || type.omitNorms()) {
throw new IllegalArgumentException("You cannot set an index-time boost on an unindexed field, or one that omits norms");
}
}
this.boost = boost;
}

View File

@ -49,6 +49,7 @@ public final class StoredField extends Field {
super(name, value, TYPE);
}
// TODO: not great but maybe not a big problem?
public StoredField(String name, int value) {
super(name, TYPE);
fieldsData = value;

View File

@ -54,9 +54,4 @@ public final class StringField extends Field {
public StringField(String name, String value, Store stored) {
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
}
@Override
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
}

View File

@ -46,9 +46,9 @@ public final class TextField extends Field {
// TODO: add sugar for term vectors...?
/** Creates a new TextField with Reader value. */
public TextField(String name, Reader reader, Store store) {
super(name, reader, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
/** Creates a new un-stored TextField with Reader value. */
public TextField(String name, Reader reader) {
super(name, reader, TYPE_NOT_STORED);
}
/** Creates a new TextField with String value. */

View File

@ -1,8 +1,5 @@
package org.apache.lucene.index;
import java.util.Collections;
import java.util.List;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -20,6 +17,9 @@ import java.util.List;
* limitations under the License.
*/
import java.util.Collections;
import java.util.List;
/**
* {@link IndexReaderContext} for {@link AtomicReader} instances
* @lucene.experimental
@ -51,8 +51,9 @@ public final class AtomicReaderContext extends IndexReaderContext {
@Override
public List<AtomicReaderContext> leaves() {
if (!isTopLevel)
if (!isTopLevel) {
throw new UnsupportedOperationException("This is not a top-level context.");
}
assert leaves != null;
return leaves;
}

View File

@ -75,6 +75,9 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
* to -1 to skip loading the terms index entirely.
* <b>NOTE:</b> divisor settings &gt; 1 do not apply to all PostingsFormat
* implementations, including the default one in this release. It only makes
* sense for terms indexes that can efficiently re-sample terms at load time.
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException {
@ -126,6 +129,9 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
* to -1 to skip loading the terms index entirely.
* <b>NOTE:</b> divisor settings &gt; 1 do not apply to all PostingsFormat
* implementations, including the default one in this release. It only makes
* sense for terms indexes that can efficiently re-sample terms at load time.
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException {

View File

@ -202,11 +202,9 @@ final class DocumentsWriter {
* discarding any docs added since last flush. */
synchronized void abort() {
boolean success = false;
synchronized (this) {
deleteQueue.clear();
}
try {
deleteQueue.clear();
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "abort");
}
@ -230,6 +228,7 @@ final class DocumentsWriter {
perThread.unlock();
}
}
flushControl.abortPendingFlushes();
flushControl.waitForFlush();
success = true;
} finally {

View File

@ -567,19 +567,34 @@ final class DocumentsWriterFlushControl {
}
synchronized void abortFullFlushes() {
try {
abortPendingFlushes();
} finally {
fullFlush = false;
}
}
synchronized void abortPendingFlushes() {
try {
for (DocumentsWriterPerThread dwpt : flushQueue) {
doAfterFlush(dwpt);
dwpt.abort();
try {
dwpt.abort();
doAfterFlush(dwpt);
} catch (Throwable ex) {
// ignore - keep on aborting the flush queue
}
}
for (BlockedFlush blockedFlush : blockedFlushes) {
flushingWriters
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
doAfterFlush(blockedFlush.dwpt);
blockedFlush.dwpt.abort();
try {
flushingWriters
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
blockedFlush.dwpt.abort();
doAfterFlush(blockedFlush.dwpt);
} catch (Throwable ex) {
// ignore - keep on aborting the blocked queue
}
}
} finally {
fullFlush = false;
flushQueue.clear();
blockedFlushes.clear();
updateStallState();

View File

@ -1,7 +1,5 @@
package org.apache.lucene.index;
import java.util.List;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -19,6 +17,8 @@ import java.util.List;
* limitations under the License.
*/
import java.util.List;
/**
* A struct like class that represents a hierarchical relationship between
* {@link IndexReader} instances.

View File

@ -1241,6 +1241,78 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
/** Expert: attempts to delete by document ID, as long as
* the provided reader is a near-real-time reader (from {@link
* DirectoryReader#open(IndexWriter,boolean)}). If the
* provided reader is an NRT reader obtained from this
* writer, and its segment has not been merged away, then
* the delete succeeds and this method returns true; else, it
* returns false the caller must then separately delete by
* Term or Query.
*
* <b>NOTE</b>: this method can only delete documents
* visible to the currently open NRT reader. If you need
* to delete documents indexed after opening the NRT
* reader you must use the other deleteDocument methods
* (e.g., {@link #deleteDocuments(Term)}). */
public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
final AtomicReader reader;
if (readerIn instanceof AtomicReader) {
// Reader is already atomic: use the incoming docID:
reader = (AtomicReader) readerIn;
} else {
// Composite reader: lookup sub-reader and re-base docID:
List<AtomicReaderContext> leaves = readerIn.getTopReaderContext().leaves();
int subIndex = ReaderUtil.subIndex(docID, leaves);
reader = leaves.get(subIndex).reader();
docID -= leaves.get(subIndex).docBase;
assert docID >= 0;
assert docID < reader.maxDoc();
}
if (!(reader instanceof SegmentReader)) {
throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
}
final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
// TODO: this is a slow linear search, but, number of
// segments should be contained unless something is
// seriously wrong w/ the index, so it should be a minor
// cost:
if (segmentInfos.indexOf(info) != -1) {
ReadersAndLiveDocs rld = readerPool.get(info, false);
if (rld != null) {
synchronized(bufferedDeletesStream) {
rld.initWritableLiveDocs();
if (rld.delete(docID)) {
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
if (fullDelCount == rld.info.info.getDocCount()) {
// If a merge has already registered for this
// segment, we leave it in the readerPool; the
// merge will skip merging it and will then drop
// it once it's done:
if (!mergingSegments.contains(rld.info)) {
segmentInfos.remove(rld.info);
readerPool.drop(rld.info);
checkpoint();
}
}
}
//System.out.println(" yes " + info.info.name + " " + docID);
return true;
}
} else {
//System.out.println(" no rld " + info.info.name + " " + docID);
}
} else {
//System.out.println(" no seg " + info.info.name + " " + docID);
}
return false;
}
/**
* Deletes the document(s) containing any of the
* terms. All given deletes are applied and flushed atomically

View File

@ -392,7 +392,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
* @see #setMaxBufferedDocs(int)
* @see #setRAMBufferSizeMB(double)
*/
public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) {
IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) {
this.flushPolicy = flushPolicy;
return this;
}
@ -422,7 +422,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
}
@Override
public FlushPolicy getFlushPolicy() {
FlushPolicy getFlushPolicy() {
return flushPolicy;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -146,6 +147,29 @@ public class LiveIndexWriterConfig {
* Takes effect immediately, but only applies to newly flushed/merged
* segments.
*
* <p>
* <b>NOTE:</b> This parameter does not apply to all PostingsFormat implementations,
* including the default one in this release. It only makes sense for term indexes
* that are implemented as a fixed gap between terms. For example,
* {@link Lucene40PostingsFormat} implements the term index instead based upon how
* terms share prefixes. To configure its parameters (the minimum and maximum size
* for a block), you would instead use {@link Lucene40PostingsFormat#Lucene40PostingsFormat(int, int)}.
* which can also be configured on a per-field basis:
* <pre class="prettyprint">
* //customize Lucene40PostingsFormat, passing minBlockSize=50, maxBlockSize=100
* final PostingsFormat tweakedPostings = new Lucene40PostingsFormat(50, 100);
* iwc.setCodec(new Lucene40Codec() {
* &#64;Override
* public PostingsFormat getPostingsFormatForField(String field) {
* if (field.equals("fieldWithTonsOfTerms"))
* return tweakedPostings;
* else
* return super.getPostingsFormatForField(field);
* }
* });
* </pre>
* Note that other implementations may have their own parameters, or no parameters at all.
*
* @see IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL
*/
public LiveIndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
@ -335,6 +359,10 @@ public class LiveIndexWriterConfig {
* <p>
* Takes effect immediately, but only applies to readers opened after this
* call
* <p>
* <b>NOTE:</b> divisor settings &gt; 1 do not apply to all PostingsFormat
* implementations, including the default one in this release. It only makes
* sense for terms indexes that can efficiently re-sample terms at load time.
*/
public LiveIndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
if (divisor <= 0 && divisor != -1) {
@ -462,7 +490,7 @@ public class LiveIndexWriterConfig {
/**
* @see IndexWriterConfig#setFlushPolicy(FlushPolicy)
*/
public FlushPolicy getFlushPolicy() {
FlushPolicy getFlushPolicy() {
return flushPolicy;
}
@ -497,7 +525,6 @@ public class LiveIndexWriterConfig {
sb.append("mergePolicy=").append(getMergePolicy()).append("\n");
sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n");
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
sb.append("flushPolicy=").append(getFlushPolicy()).append("\n");
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
return sb.toString();
}

View File

@ -122,7 +122,7 @@ public class FuzzyTermsEnum extends TermsEnum {
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
// if minSimilarity >= 1, we treat it as number of edits
if (minSimilarity >= 1f) {
this.minSimilarity = 1 - (minSimilarity+1) / this.termLength;
this.minSimilarity = 0; // just driven by number of edits
maxEdits = (int) minSimilarity;
raw = true;
} else {

View File

@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.SegmentInfoPerCommit;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
@ -254,6 +255,14 @@ public class NRTManager extends ReferenceManager<IndexSearcher> {
long getAndIncrementGeneration() {
return indexingGen.getAndIncrement();
}
public long tryDeleteDocument(IndexReader reader, int docID) throws IOException {
if (writer.tryDeleteDocument(reader, docID)) {
return indexingGen.get();
} else {
return -1;
}
}
}
/**

View File

@ -54,6 +54,11 @@ public final class FixedBitSet extends DocIdSet implements Bits {
bits = new long[bits2words(numBits)];
}
public FixedBitSet(long[]storedBits,int numBits) {
this.numBits = numBits;
this.bits = storedBits;
}
/** Makes full copy. */
public FixedBitSet(FixedBitSet other) {
bits = new long[other.bits.length];

View File

@ -0,0 +1,292 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.hash.HashFunction;
/**
* <p>
* A class used to represent a set of many, potentially large, values (e.g. many
* long strings such as URLs), using a significantly smaller amount of memory.
* </p>
* <p>
* The set is "lossy" in that it cannot definitively state that is does contain
* a value but it <em>can</em> definitively say if a value is <em>not</em> in
* the set. It can therefore be used as a Bloom Filter.
* </p>
* Another application of the set is that it can be used to perform fuzzy counting because
* it can estimate reasonably accurately how many unique values are contained in the set.
* </p>
* <p>This class is NOT threadsafe.</p>
* <p>
* Internally a Bitset is used to record values and once a client has finished recording
* a stream of values the {@link #downsize(float)} method can be used to create a suitably smaller set that
* is sized appropriately for the number of values recorded and desired saturation levels.
*
* </p>
* @lucene.experimental
*/
public class FuzzySet {
public static final int FUZZY_SERIALIZATION_VERSION=1;
public enum ContainsResult {
MAYBE, NO
};
private HashFunction hashFunction;
private FixedBitSet filter;
private int bloomSize;
//The sizes of BitSet used are all numbers that, when expressed in binary form,
//are all ones. This is to enable fast downsizing from one bitset to another
//by simply ANDing each set index in one bitset with the size of the target bitset
// - this provides a fast modulo of the number. Values previously accumulated in
// a large bitset and then mapped to a smaller set can be looked up using a single
// AND operation of the query term's hash rather than needing to perform a 2-step
// translation of the query term that mirrors the stored content's reprojections.
static final int usableBitSetSizes[];
static
{
usableBitSetSizes=new int[30];
int mask=1;
int size=mask;
for (int i = 0; i < usableBitSetSizes.length; i++) {
size=(size<<1)|mask;
usableBitSetSizes[i]=size;
}
}
/**
* Rounds down required maxNumberOfBits to the nearest number that is made up
* of all ones as a binary number.
* Use this method where controlling memory use is paramount.
*/
public static int getNearestSetSize(int maxNumberOfBits)
{
int result=usableBitSetSizes[0];
for (int i = 0; i < usableBitSetSizes.length; i++) {
if(usableBitSetSizes[i]<=maxNumberOfBits)
{
result=usableBitSetSizes[i];
}
}
return result;
}
/**
* Use this method to choose a set size where accuracy (low content saturation) is more important
* than deciding how much memory to throw at the problem.
* @param maxNumberOfValuesExpected
* @param desiredSaturation A number between 0 and 1 expressing the % of bits set once all values have been recorded
* @return The size of the set nearest to the required size
*/
public static int getNearestSetSize(int maxNumberOfValuesExpected,
float desiredSaturation) {
// Iterate around the various scales of bitset from smallest to largest looking for the first that
// satisfies value volumes at the chosen saturation level
for (int i = 0; i < usableBitSetSizes.length; i++) {
int numSetBitsAtDesiredSaturation = (int) (usableBitSetSizes[i] * desiredSaturation);
int estimatedNumUniqueValues = getEstimatedNumberUniqueValuesAllowingForCollisions(
usableBitSetSizes[i], numSetBitsAtDesiredSaturation);
if (estimatedNumUniqueValues > maxNumberOfValuesExpected) {
return usableBitSetSizes[i];
}
}
return -1;
}
public static FuzzySet createSetBasedOnMaxMemory(int maxNumBytes, HashFunction hashFunction)
{
int setSize=getNearestSetSize(maxNumBytes);
return new FuzzySet(new FixedBitSet(setSize+1),setSize,hashFunction);
}
public static FuzzySet createSetBasedOnQuality(int maxNumUniqueValues, float desiredMaxSaturation, HashFunction hashFunction)
{
int setSize=getNearestSetSize(maxNumUniqueValues,desiredMaxSaturation);
return new FuzzySet(new FixedBitSet(setSize+1),setSize,hashFunction);
}
private FuzzySet(FixedBitSet filter, int bloomSize, HashFunction hashFunction) {
super();
this.filter = filter;
this.bloomSize = bloomSize;
this.hashFunction=hashFunction;
}
/**
* The main method required for a Bloom filter which, given a value determines set membership.
* Unlike a conventional set, the fuzzy set returns NO or MAYBE rather than true or false.
* @param value
* @return NO or MAYBE
*/
public ContainsResult contains(BytesRef value) {
int hash = hashFunction.hash(value);
if (hash < 0) {
hash = hash * -1;
}
return mayContainValue(hash);
}
/**
* Serializes the data set to file using the following format:
* <ul>
* <li>FuzzySet --&gt;FuzzySetVersion,HashFunctionName,BloomSize,
* NumBitSetWords,BitSetWord<sup>NumBitSetWords</sup></li>
* <li>HashFunctionName --&gt; {@link DataOutput#writeString(String) String} The
* name of a ServiceProvider registered {@link HashFunction}</li>
* <li>FuzzySetVersion --&gt; {@link DataOutput#writeInt Uint32} The version number of the {@link FuzzySet} class</li>
* <li>BloomSize --&gt; {@link DataOutput#writeInt Uint32} The modulo value used
* to project hashes into the field's Bitset</li>
* <li>NumBitSetWords --&gt; {@link DataOutput#writeInt Uint32} The number of
* longs (as returned from {@link FixedBitSet#getBits})</li>
* <li>BitSetWord --&gt; {@link DataOutput#writeLong Long} A long from the array
* returned by {@link FixedBitSet#getBits}</li>
* </ul>
* @param out Data output stream
* @throws IOException
*/
public void serialize(DataOutput out) throws IOException
{
out.writeInt(FUZZY_SERIALIZATION_VERSION);
out.writeString(hashFunction.getName());
out.writeInt(bloomSize);
long[] bits = filter.getBits();
out.writeInt(bits.length);
for (int i = 0; i < bits.length; i++) {
// Can't used VLong encoding because cant cope with negative numbers
// output by FixedBitSet
out.writeLong(bits[i]);
}
}
public static FuzzySet deserialize(DataInput in) throws IOException
{
int version=in.readInt();
if(version!=FUZZY_SERIALIZATION_VERSION)
{
throw new IOException("Error deserializing: set version is not "+FUZZY_SERIALIZATION_VERSION);
}
HashFunction hashFunction=HashFunction.forName(in.readString());
int bloomSize=in.readInt();
int numLongs=in.readInt();
long[]longs=new long[numLongs];
for (int i = 0; i < numLongs; i++) {
longs[i]=in.readLong();
}
FixedBitSet bits = new FixedBitSet(longs,bloomSize+1);
return new FuzzySet(bits,bloomSize,hashFunction);
}
private ContainsResult mayContainValue(int positiveHash) {
assert positiveHash >= 0;
// Bloom sizes are always base 2 and so can be ANDed for a fast modulo
int pos = positiveHash & bloomSize;
if (filter.get(pos)) {
// This term may be recorded in this index (but could be a collision)
return ContainsResult.MAYBE;
}
// definitely NOT in this segment
return ContainsResult.NO;
}
/**
* Records a value in the set. The referenced bytes are hashed and then modulo n'd where n is the
* chosen size of the internal bitset.
* @param value the key value to be hashed
* @throws IOException
*/
public void addValue(BytesRef value) throws IOException {
int hash = hashFunction.hash(value);
if (hash < 0) {
hash = hash * -1;
}
// Bitmasking using bloomSize is effectively a modulo operation.
int bloomPos = hash & bloomSize;
filter.set(bloomPos);
}
/**
*
* @param targetMaxSaturation A number between 0 and 1 describing the % of bits that would ideally be set in the
* result. Lower values have better qccuracy but require more space.
* @return a smaller FuzzySet or null if the current set is already over-saturated
*/
public FuzzySet downsize(float targetMaxSaturation)
{
int numBitsSet = filter.cardinality();
FixedBitSet rightSizedBitSet = filter;
int rightSizedBitSetSize = bloomSize;
//Hopefully find a smaller size bitset into which we can project accumulated values while maintaining desired saturation level
for (int i = 0; i < usableBitSetSizes.length; i++) {
int candidateBitsetSize = usableBitSetSizes[i];
float candidateSaturation = (float) numBitsSet
/ (float) candidateBitsetSize;
if (candidateSaturation <= targetMaxSaturation) {
rightSizedBitSetSize = candidateBitsetSize;
break;
}
}
// Re-project the numbers to a smaller space if necessary
if (rightSizedBitSetSize < bloomSize) {
// Reset the choice of bitset to the smaller version
rightSizedBitSet = new FixedBitSet(rightSizedBitSetSize + 1);
// Map across the bits from the large set to the smaller one
int bitIndex = 0;
do {
bitIndex = filter.nextSetBit(bitIndex);
if (bitIndex >= 0) {
// Project the larger number into a smaller one effectively
// modulo-ing by using the target bitset size as a mask
int downSizedBitIndex = bitIndex & rightSizedBitSetSize;
rightSizedBitSet.set(downSizedBitIndex);
bitIndex++;
}
} while ( (bitIndex >= 0)&&(bitIndex<=bloomSize));
} else {
return null;
}
return new FuzzySet(rightSizedBitSet,rightSizedBitSetSize, hashFunction);
}
public int getEstimatedUniqueValues()
{
return getEstimatedNumberUniqueValuesAllowingForCollisions(bloomSize, filter.cardinality());
}
// Given a set size and a the number of set bits, produces an estimate of the number of unique values recorded
public static int getEstimatedNumberUniqueValuesAllowingForCollisions(
int setSize, int numRecordedBits) {
double setSizeAsDouble = setSize;
double numRecordedBitsAsDouble = numRecordedBits;
double saturation = numRecordedBitsAsDouble / setSizeAsDouble;
double logInverseSaturation = Math.log(1 - saturation) * -1;
return (int) (setSizeAsDouble * logInverseSaturation);
}
public float getSaturation() {
int numBitsSet = filter.cardinality();
return (float) numBitsSet / (float) bloomSize;
}
}

View File

@ -0,0 +1,70 @@
package org.apache.lucene.util.hash;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Set;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NamedSPILoader;
/**
* Base class for hashing functions that can be referred to by name.
* Subclasses are expected to provide threadsafe implementations of the hash function
* on the range of bytes referenced in the provided {@link BytesRef}
* @lucene.experimental
*/
public abstract class HashFunction implements NamedSPILoader.NamedSPI {
/**
* Hashes the contents of the referenced bytes
* @param bytes the data to be hashed
* @return the hash of the bytes referenced by bytes.offset and length bytes.length
*/
public abstract int hash(BytesRef bytes);
private static final NamedSPILoader<HashFunction> loader =
new NamedSPILoader<HashFunction>(HashFunction.class);
private final String name;
public HashFunction(String name) {
NamedSPILoader.checkServiceName(name);
this.name = name;
}
/** Returns this codec's name */
@Override
public final String getName() {
return name;
}
/** looks up a hash function by name */
public static HashFunction forName(String name) {
return loader.lookup(name);
}
/** returns a list of all available hash function names */
public static Set<String> availableHashFunctionNames() {
return loader.availableServices();
}
@Override
public String toString() {
return name;
}
}

View File

@ -0,0 +1,105 @@
package org.apache.lucene.util.hash;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
/**
* This is a very fast, non-cryptographic hash suitable for general hash-based
* lookup. See http://murmurhash.googlepages.com/ for more details.
* <p>
* The C version of MurmurHash 2.0 found at that site was ported to Java by
* Andrzej Bialecki (ab at getopt org).
* </p>
* <p>
* The code from getopt.org was adapted by Mark Harwood in the form here as one of a pluggable choice of
* hashing functions as the core function had to be adapted to work with BytesRefs with offsets and lengths
* rather than raw byte arrays.
* </p>
* @lucene.experimental
*/
public class MurmurHash2 extends HashFunction{
public static final String HASH_NAME="MurmurHash2";
public MurmurHash2() {
super(HASH_NAME);
}
public static int hash(byte[] data, int seed, int offset, int len) {
int m = 0x5bd1e995;
int r = 24;
int h = seed ^ len;
int len_4 = len >> 2;
for (int i = 0; i < len_4; i++) {
int i_4 = offset + (i << 2);
int k = data[i_4 + 3];
k = k << 8;
k = k | (data[i_4 + 2] & 0xff);
k = k << 8;
k = k | (data[i_4 + 1] & 0xff);
k = k << 8;
k = k | (data[i_4 + 0] & 0xff);
k *= m;
k ^= k >>> r;
k *= m;
h *= m;
h ^= k;
}
int len_m = len_4 << 2;
int left = len - len_m;
if (left != 0) {
if (left >= 3) {
h ^= data[offset + len - 3] << 16;
}
if (left >= 2) {
h ^= data[offset + len - 2] << 8;
}
if (left >= 1) {
h ^= data[offset + len - 1];
}
h *= m;
}
h ^= h >>> 13;
h *= m;
h ^= h >>> 15;
return h;
}
/**
* Generates 32 bit hash from byte array with default seed value.
*
* @param data
* byte array to hash
* @param offset
* the start position in the array to hash
* @param len
* length of the array elements to hash
* @return 32 bit hash of the given array
*/
public static final int hash32(final byte[] data, int offset, int len) {
return MurmurHash2.hash(data, 0x9747b28c, offset, len);
}
@Override
public final int hash(BytesRef br) {
return hash32(br.bytes, br.offset, br.length);
}
}

View File

@ -0,0 +1,25 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Hashing functions load-able via SPI service
</body>
</html>

View File

@ -18,6 +18,7 @@ org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
org.apache.lucene.codecs.memory.MemoryPostingsFormat
org.apache.lucene.codecs.bulkvint.BulkVIntPostingsFormat
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat
org.apache.lucene.codecs.block.BlockPostingsFormat
org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.util.hash.MurmurHash2

View File

@ -290,33 +290,4 @@ public class TestDocument extends LuceneTestCase {
// expected
}
}
public void testBoost() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new StringField("field1", "sometext", Field.Store.YES));
doc.add(new TextField("field2", "sometext", Field.Store.NO));
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc); // add an 'ok' document
try {
doc = new Document();
// try to boost with norms omitted
StringField field = new StringField("foo", "baz", Field.Store.NO);
field.setBoost(5.0f);
doc.add(field);
iw.addDocument(doc);
fail("didn't get any exception, boost silently discarded");
} catch (UnsupportedOperationException expected) {
// expected
}
DirectoryReader ir = DirectoryReader.open(iw, false);
assertEquals(1, ir.numDocs());
assertEquals("sometext", ir.document(0).get("field1"));
ir.close();
iw.close();
dir.close();
}
}

View File

@ -0,0 +1,613 @@
package org.apache.lucene.document;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.StringReader;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
// sanity check some basics of fields
public class TestField extends LuceneTestCase {
public void testByteDocValuesField() throws Exception {
ByteDocValuesField field = new ByteDocValuesField("foo", (byte) 5);
trySetBoost(field);
field.setByteValue((byte) 6); // ok
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6, field.numericValue().byteValue());
}
public void testDerefBytesDocValuesField() throws Exception {
DerefBytesDocValuesField field = new DerefBytesDocValuesField("foo", new BytesRef("bar"));
trySetBoost(field);
trySetByteValue(field);
field.setBytesValue("fubar".getBytes("UTF-8"));
field.setBytesValue(new BytesRef("baz"));
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(new BytesRef("baz"), field.binaryValue());
}
public void testDoubleField() throws Exception {
Field fields[] = new Field[] {
new DoubleField("foo", 5d, Field.Store.NO),
new DoubleField("foo", 5d, Field.Store.YES)
};
for (Field field : fields) {
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
field.setDoubleValue(6d); // ok
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6d, field.numericValue().doubleValue(), 0.0d);
}
}
public void testDoubleDocValuesField() throws Exception {
DoubleDocValuesField field = new DoubleDocValuesField("foo", 5d);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
field.setDoubleValue(6d); // ok
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6d, field.numericValue().doubleValue(), 0.0d);
}
public void testFloatDocValuesField() throws Exception {
FloatDocValuesField field = new FloatDocValuesField("foo", 5f);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
field.setFloatValue(6f); // ok
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6f, field.numericValue().floatValue(), 0.0f);
}
public void testFloatField() throws Exception {
Field fields[] = new Field[] {
new FloatField("foo", 5f, Field.Store.NO),
new FloatField("foo", 5f, Field.Store.YES)
};
for (Field field : fields) {
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
field.setFloatValue(6f); // ok
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6f, field.numericValue().floatValue(), 0.0f);
}
}
public void testIntDocValuesField() throws Exception {
IntDocValuesField field = new IntDocValuesField("foo", 5);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
field.setIntValue(6); // ok
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6, field.numericValue().intValue());
}
public void testIntField() throws Exception {
Field fields[] = new Field[] {
new IntField("foo", 5, Field.Store.NO),
new IntField("foo", 5, Field.Store.YES)
};
for (Field field : fields) {
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
field.setIntValue(6); // ok
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6, field.numericValue().intValue());
}
}
public void testLongDocValuesField() throws Exception {
LongDocValuesField field = new LongDocValuesField("foo", 5L);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
field.setLongValue(6); // ok
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6L, field.numericValue().longValue());
}
public void testLongField() throws Exception {
Field fields[] = new Field[] {
new LongField("foo", 5L, Field.Store.NO),
new LongField("foo", 5L, Field.Store.YES)
};
for (Field field : fields) {
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
field.setLongValue(6); // ok
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6L, field.numericValue().longValue());
}
}
public void testPackedLongDocValuesField() throws Exception {
PackedLongDocValuesField field = new PackedLongDocValuesField("foo", 5L);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
field.setLongValue(6); // ok
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(6L, field.numericValue().longValue());
}
public void testShortDocValuesField() throws Exception {
ShortDocValuesField field = new ShortDocValuesField("foo", (short)5);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
field.setShortValue((short) 6); // ok
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals((short)6, field.numericValue().shortValue());
}
public void testSortedBytesDocValuesField() throws Exception {
SortedBytesDocValuesField field = new SortedBytesDocValuesField("foo", new BytesRef("bar"));
trySetBoost(field);
trySetByteValue(field);
field.setBytesValue("fubar".getBytes("UTF-8"));
field.setBytesValue(new BytesRef("baz"));
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(new BytesRef("baz"), field.binaryValue());
}
public void testStraightBytesDocValuesField() throws Exception {
StraightBytesDocValuesField field = new StraightBytesDocValuesField("foo", new BytesRef("bar"));
trySetBoost(field);
trySetByteValue(field);
field.setBytesValue("fubar".getBytes("UTF-8"));
field.setBytesValue(new BytesRef("baz"));
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(new BytesRef("baz"), field.binaryValue());
}
public void testStringField() throws Exception {
Field fields[] = new Field[] {
new StringField("foo", "bar", Field.Store.NO),
new StringField("foo", "bar", Field.Store.YES)
};
for (Field field : fields) {
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
field.setStringValue("baz");
trySetTokenStreamValue(field);
assertEquals("baz", field.stringValue());
}
}
public void testTextFieldString() throws Exception {
Field fields[] = new Field[] {
new TextField("foo", "bar", Field.Store.NO),
new TextField("foo", "bar", Field.Store.YES)
};
for (Field field : fields) {
field.setBoost(5f);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
field.setStringValue("baz");
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
assertEquals("baz", field.stringValue());
assertEquals(5f, field.boost(), 0f);
}
}
public void testTextFieldReader() throws Exception {
Field field = new TextField("foo", new StringReader("bar"));
field.setBoost(5f);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
field.setReaderValue(new StringReader("foobar"));
trySetShortValue(field);
trySetStringValue(field);
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
assertNotNull(field.readerValue());
assertEquals(5f, field.boost(), 0f);
}
/* TODO: this is pretty expert and crazy
* see if we can fix it up later
public void testTextFieldTokenStream() throws Exception {
}
*/
public void testStoredFieldBytes() throws Exception {
Field fields[] = new Field[] {
new StoredField("foo", "bar".getBytes("UTF-8")),
new StoredField("foo", "bar".getBytes("UTF-8"), 0, 3),
new StoredField("foo", new BytesRef("bar")),
};
for (Field field : fields) {
trySetBoost(field);
trySetByteValue(field);
field.setBytesValue("baz".getBytes("UTF-8"));
field.setBytesValue(new BytesRef("baz"));
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(new BytesRef("baz"), field.binaryValue());
}
}
public void testStoredFieldString() throws Exception {
Field field = new StoredField("foo", "bar");
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
field.setStringValue("baz");
trySetTokenStreamValue(field);
assertEquals("baz", field.stringValue());
}
public void testStoredFieldInt() throws Exception {
Field field = new StoredField("foo", 1);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
field.setIntValue(5);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(5, field.numericValue().intValue());
}
public void testStoredFieldDouble() throws Exception {
Field field = new StoredField("foo", 1D);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
field.setDoubleValue(5D);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(5D, field.numericValue().doubleValue(), 0.0D);
}
public void testStoredFieldFloat() throws Exception {
Field field = new StoredField("foo", 1F);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
field.setFloatValue(5f);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(5f, field.numericValue().floatValue(), 0.0f);
}
public void testStoredFieldLong() throws Exception {
Field field = new StoredField("foo", 1L);
trySetBoost(field);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
field.setLongValue(5);
trySetReaderValue(field);
trySetShortValue(field);
trySetStringValue(field);
trySetTokenStreamValue(field);
assertEquals(5L, field.numericValue().longValue());
}
private void trySetByteValue(Field f) {
try {
f.setByteValue((byte) 10);
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetBytesValue(Field f) {
try {
f.setBytesValue(new byte[] { 5, 5 });
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetBytesRefValue(Field f) {
try {
f.setBytesValue(new BytesRef("bogus"));
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetDoubleValue(Field f) {
try {
f.setDoubleValue(Double.MAX_VALUE);
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetIntValue(Field f) {
try {
f.setIntValue(Integer.MAX_VALUE);
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetLongValue(Field f) {
try {
f.setLongValue(Long.MAX_VALUE);
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetFloatValue(Field f) {
try {
f.setFloatValue(Float.MAX_VALUE);
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetReaderValue(Field f) {
try {
f.setReaderValue(new StringReader("BOO!"));
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetShortValue(Field f) {
try {
f.setShortValue(Short.MAX_VALUE);
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetStringValue(Field f) {
try {
f.setStringValue("BOO!");
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetTokenStreamValue(Field f) {
try {
f.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
private void trySetBoost(Field f) {
try {
f.setBoost(5.0f);
fail();
} catch (IllegalArgumentException expected) {
// expected
}
}
}

View File

@ -40,24 +40,41 @@ public class TestByteSlices extends LuceneTestCase {
counters[stream] = 0;
}
int num = atLeast(10000);
int num = atLeast(3000);
for (int iter = 0; iter < num; iter++) {
int stream = random().nextInt(NUM_STREAM);
if (VERBOSE)
int stream;
if (random().nextBoolean()) {
stream = random().nextInt(3);
} else {
stream = random().nextInt(NUM_STREAM);
}
if (VERBOSE) {
System.out.println("write stream=" + stream);
}
if (starts[stream] == -1) {
final int spot = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
starts[stream] = uptos[stream] = spot + pool.byteOffset;
if (VERBOSE)
if (VERBOSE) {
System.out.println(" init to " + starts[stream]);
}
}
writer.init(uptos[stream]);
int numValue = random().nextInt(20);
int numValue;
if (random().nextInt(10) == 3) {
numValue = random().nextInt(100);
} else if (random().nextInt(5) == 3) {
numValue = random().nextInt(3);
} else {
numValue = random().nextInt(20);
}
for(int j=0;j<numValue;j++) {
if (VERBOSE)
if (VERBOSE) {
System.out.println(" write " + (counters[stream]+j));
}
// write some large (incl. negative) ints:
writer.writeVInt(random().nextInt());
writer.writeVInt(counters[stream]+j);

View File

@ -186,7 +186,7 @@ public class TestDoc extends LuceneTestCase {
File file = new File(workDir, fileName);
Document doc = new Document();
InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8");
doc.add(new TextField("contents", is, Field.Store.NO));
doc.add(new TextField("contents", is));
writer.addDocument(doc);
writer.commit();
is.close();

View File

@ -891,9 +891,12 @@ public class TestIndexWriterDelete extends LuceneTestCase {
}
public void testIndexingThenDeleting() throws Exception {
// TODO: move this test to its own class and just @SuppressCodecs?
// TODO: is it enough to just use newFSDirectory?
final String fieldFormat = _TestUtil.getPostingsFormat("field");
assumeFalse("This test cannot run with Memory codec", fieldFormat.equals("Memory"));
assumeFalse("This test cannot run with SimpleText codec", fieldFormat.equals("SimpleText"));
assumeFalse("This test cannot run with Direct codec", fieldFormat.equals("Direct"));
final Random r = random();
Directory dir = newDirectory();
// note this test explicitly disables payloads

View File

@ -1542,4 +1542,73 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
iw.close();
dir.close();
}
public void testBoostOmitNorms() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new StringField("field1", "sometext", Field.Store.YES));
doc.add(new TextField("field2", "sometext", Field.Store.NO));
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc); // add an 'ok' document
try {
doc = new Document();
// try to boost with norms omitted
List<IndexableField> list = new ArrayList<IndexableField>();
list.add(new IndexableField() {
@Override
public String name() {
return "foo";
}
@Override
public IndexableFieldType fieldType() {
return StringField.TYPE_NOT_STORED;
}
@Override
public float boost() {
return 5f;
}
@Override
public BytesRef binaryValue() {
return null;
}
@Override
public String stringValue() {
return "baz";
}
@Override
public Reader readerValue() {
return null;
}
@Override
public Number numericValue() {
return null;
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
return null;
}
});
iw.addDocument(list);
fail("didn't get any exception, boost silently discarded");
} catch (UnsupportedOperationException expected) {
// expected
}
DirectoryReader ir = DirectoryReader.open(iw, false);
assertEquals(1, ir.numDocs());
assertEquals("sometext", ir.document(0).get("field1"));
ir.close();
iw.close();
dir.close();
}
}

View File

@ -23,6 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.*;
import org.apache.lucene.util.*;
import org.junit.Test;
@ -48,10 +51,13 @@ public class TestRollingUpdates extends LuceneTestCase {
final int SIZE = atLeast(20);
int id = 0;
IndexReader r = null;
IndexSearcher s = null;
final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
if (VERBOSE) {
System.out.println("TEST: numUpdates=" + numUpdates);
}
int updateCount = 0;
// TODO: sometimes update ids not in order...
for(int docIter=0;docIter<numUpdates;docIter++) {
final Document doc = docs.nextDoc();
final String myID = ""+id;
@ -60,16 +66,59 @@ public class TestRollingUpdates extends LuceneTestCase {
} else {
id++;
}
if (VERBOSE) {
System.out.println(" docIter=" + docIter + " id=" + id);
}
((Field) doc.getField("docid")).setStringValue(myID);
w.updateDocument(new Term("docid", myID), doc);
Term idTerm = new Term("docid", myID);
final boolean doUpdate;
if (s != null && updateCount < SIZE) {
TopDocs hits = s.search(new TermQuery(idTerm), 1);
assertEquals(1, hits.totalHits);
doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
if (VERBOSE) {
if (doUpdate) {
System.out.println(" tryDeleteDocument failed");
} else {
System.out.println(" tryDeleteDocument succeeded");
}
}
} else {
doUpdate = true;
if (VERBOSE) {
System.out.println(" no searcher: doUpdate=true");
}
}
updateCount++;
if (doUpdate) {
w.updateDocument(idTerm, doc);
} else {
w.addDocument(doc);
}
if (docIter >= SIZE && random().nextInt(50) == 17) {
if (r != null) {
r.close();
}
final boolean applyDeletions = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
}
r = w.getReader(applyDeletions);
if (applyDeletions) {
s = new IndexSearcher(r);
} else {
s = null;
}
assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
updateCount = 0;
}
}

View File

@ -22,6 +22,7 @@ import java.util.Arrays;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@ -189,6 +190,41 @@ public class TestFuzzyQuery extends LuceneTestCase {
directory.close();
}
public void test2() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
addDoc("LANGE", writer);
addDoc("LUETH", writer);
addDoc("PIRSING", writer);
addDoc("RIEGEL", writer);
addDoc("TRZECZIAK", writer);
addDoc("WALKER", writer);
addDoc("WBR", writer);
addDoc("WE", writer);
addDoc("WEB", writer);
addDoc("WEBE", writer);
addDoc("WEBER", writer);
addDoc("WEBERE", writer);
addDoc("WEBREE", writer);
addDoc("WEBEREI", writer);
addDoc("WBRE", writer);
addDoc("WITTKOPF", writer);
addDoc("WOJNAROWSKI", writer);
addDoc("WRICKE", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
//query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(8, hits.length);
reader.close();
directory.close();
}
/**
* MultiTermQuery provides (via attribute) information about which values
* must be competitive to enter the priority queue.

View File

@ -206,7 +206,7 @@ public class TestPositionIncrement extends LuceneTestCase {
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
Document doc = new Document();
doc.add(new TextField("content", new StringReader(
"a a b c d e a f g h i j a b k k"), Field.Store.NO));
"a a b c d e a f g h i j a b k k")));
writer.addDocument(doc);
final IndexReader readerFromWriter = writer.getReader();

View File

@ -77,8 +77,7 @@ public class TestShardSearching extends ShardSearchingTestBase {
System.out.println("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
}
start(_TestUtil.getTempDir("TestShardSearching").toString(),
numNodes,
start(numNodes,
runTimeSec,
maxSearcherAgeSeconds
);

View File

@ -196,7 +196,7 @@ public class TestSort extends LuceneTestCase {
if (data[i][11] != null) doc.add (new StringField ("parser", data[i][11], Field.Store.NO));
for(IndexableField f : doc.getFields()) {
if (!f.fieldType().omitNorms()) {
if (f.fieldType().indexed() && !f.fieldType().omitNorms()) {
((Field) f).setBoost(2.0f);
}
}
@ -239,7 +239,7 @@ public class TestSort extends LuceneTestCase {
doc.add(new SortedBytesDocValuesField("string2", new BytesRef(num2)));
doc.add (new Field ("tracer2", num2, onlyStored));
for(IndexableField f2 : doc.getFields()) {
if (!f2.fieldType().omitNorms()) {
if (f2.fieldType().indexed() && !f2.fieldType().omitNorms()) {
((Field) f2).setBoost(2.0f);
}
}
@ -255,7 +255,7 @@ public class TestSort extends LuceneTestCase {
doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored));
for(IndexableField f2 : doc.getFields()) {
if (!f2.fieldType().omitNorms()) {
if (f2.fieldType().indexed() && !f2.fieldType().omitNorms()) {
((Field) f2).setBoost(2.0f);
}
}

View File

@ -256,7 +256,7 @@ public class TestPayloadSpans extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
Document doc = new Document();
doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k"), Field.Store.NO));
doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k")));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
@ -293,7 +293,7 @@ public class TestPayloadSpans extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
Document doc = new Document();
doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k"), Field.Store.NO));
doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k")));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher is = newSearcher(reader);
@ -328,7 +328,7 @@ public class TestPayloadSpans extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
Document doc = new Document();
doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a"), Field.Store.NO));
doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher is = newSearcher(reader);

View File

@ -1096,13 +1096,6 @@ public class TestFSTs extends LuceneTestCase {
// file, up until a time limit
public void testRealTerms() throws Exception {
// TODO: is this necessary? we use the annotation...
final String defaultFormat = _TestUtil.getPostingsFormat("abracadabra");
if (defaultFormat.equals("SimpleText") || defaultFormat.equals("Memory")) {
// no
Codec.setDefault(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
}
final LineFileDocs docs = new LineFileDocs(random(), true);
final int RUN_TIME_MSEC = atLeast(500);
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);

View File

@ -0,0 +1,43 @@
package org.apache.lucene.util.junitcompat;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.Directory;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
public class TestFailIfDirectoryNotClosed extends WithNestedTests {
public TestFailIfDirectoryNotClosed() {
super(true);
}
public static class Nested1 extends WithNestedTests.AbstractNestedTest {
public void testDummy() {
Directory dir = newDirectory();
System.out.println(dir.toString());
}
}
@Test
public void testFailIfDirectoryNotClosed() {
Result r = JUnitCore.runClasses(Nested1.class);
Assert.assertEquals(1, r.getFailureCount());
}
}

View File

@ -190,7 +190,7 @@ public class IndexFiles {
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in UTF-8 encoding.
// If that's not the case searching for special characters will fail.
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")), Field.Store.NO));
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can be there):

View File

@ -440,21 +440,25 @@ public class TestSlowFuzzyQuery extends LuceneTestCase {
assertEquals(1, hits.length);
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "t"), 3);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
// TODO: cannot really be supported given the legacy scoring
// system which scores negative, if the distance > min term len,
// so such matches were always impossible with lucene 3.x, etc
//
//q = new SlowFuzzyQuery(new Term("field", "t"), 3);
//hits = searcher.search(q, 10).scoreDocs;
//assertEquals(1, hits.length);
//assertEquals("test", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
// q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
// hits = searcher.search(q, 10).scoreDocs;
// assertEquals(1, hits.length);
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(2, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
// q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
// hits = searcher.search(q, 10).scoreDocs;
// assertEquals(2, hits.length);
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
// assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
reader.close();
index.close();

View File

@ -0,0 +1,77 @@
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* A class used for testing {@link BloomFilteringPostingsFormat} with a concrete
* delegate (Lucene40). Creates a Bloom filter on ALL fields and with tiny
* amounts of memory reserved for the filter. DO NOT USE IN A PRODUCTION
* APPLICATION This is not a realistic application of Bloom Filters as they
* ordinarily are larger and operate on only primary key type fields.
*/
public class TestBloomFilteredLucene40Postings extends PostingsFormat {
private BloomFilteringPostingsFormat delegate;
// Special class used to avoid OOM exceptions where Junit tests create many
// fields.
static class LowMemoryBloomFactory extends BloomFilterFactory {
@Override
public FuzzySet getSetForField(SegmentWriteState state,FieldInfo info) {
return FuzzySet.createSetBasedOnMaxMemory(1024, new MurmurHash2());
}
@Override
public boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) {
// For test purposes always maintain the BloomFilter - even past the point
// of usefulness when all bits are set
return false;
}
}
public TestBloomFilteredLucene40Postings() {
super("TestBloomFilteredLucene40Postings");
delegate = new BloomFilteringPostingsFormat(new Lucene40PostingsFormat(),
new LowMemoryBloomFactory());
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
return delegate.fieldsConsumer(state);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return delegate.fieldsProducer(state);
}
}

View File

@ -0,0 +1,25 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Support for generating test indexes using the BloomFilteringPostingsFormat
</body>
</html>

View File

@ -29,6 +29,7 @@ import java.util.Set;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds;
@ -98,6 +99,10 @@ public class RandomCodec extends Lucene40Codec {
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
// add pulsing again with (usually) different parameters
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene40Postings to be constructed
//with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing
//with such "wrapper" classes?
new TestBloomFilteredLucene40Postings(),
new MockSepPostingsFormat(),
new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)),
new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)),

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search;
*/
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
@ -27,15 +26,17 @@ import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.PrintStreamInfoStream;
import org.apache.lucene.util._TestUtil;
// TODO
// - doc blocks? so we can test joins/grouping...
@ -423,11 +424,16 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
private volatile ShardIndexSearcher currentShardSearcher;
public NodeState(Random random, String baseDir, int nodeID, int numNodes) throws IOException {
public NodeState(Random random, int nodeID, int numNodes) throws IOException {
myNodeID = nodeID;
dir = newFSDirectory(new File(baseDir + "." + myNodeID));
dir = newFSDirectory(_TestUtil.getTempDir("ShardSearchingTestBase"));
// TODO: set warmer
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
if (VERBOSE) {
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
}
writer = new IndexWriter(dir, iwc);
mgr = new SearcherManager(writer, true, null);
searchers = new SearcherLifetimeManager();
@ -556,14 +562,14 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
long endTimeNanos;
private Thread changeIndicesThread;
protected void start(String baseDirName, int numNodes, double runTimeSec, int maxSearcherAgeSeconds) throws IOException {
protected void start(int numNodes, double runTimeSec, int maxSearcherAgeSeconds) throws IOException {
endTimeNanos = System.nanoTime() + (long) (runTimeSec*1000000000);
this.maxSearcherAgeSeconds = maxSearcherAgeSeconds;
nodes = new NodeState[numNodes];
for(int nodeID=0;nodeID<numNodes;nodeID++) {
nodes[nodeID] = new NodeState(random(), baseDirName, nodeID, numNodes);
nodes[nodeID] = new NodeState(random(), nodeID, numNodes);
}
long[] nodeVersions = new long[nodes.length];

View File

@ -31,8 +31,6 @@ import org.apache.lucene.util._TestUtil;
public class BaseDirectoryWrapper extends Directory {
/** our in directory */
protected final Directory delegate;
/** best effort: base on in Directory is volatile */
protected boolean open;
private boolean checkIndexOnClose = true;
private boolean crossCheckTermVectorsOnClose = true;
@ -43,7 +41,7 @@ public class BaseDirectoryWrapper extends Directory {
@Override
public void close() throws IOException {
open = false;
isOpen = false;
if (checkIndexOnClose && indexPossiblyExists()) {
_TestUtil.checkIndex(this, crossCheckTermVectorsOnClose);
}
@ -51,7 +49,7 @@ public class BaseDirectoryWrapper extends Directory {
}
public boolean isOpen() {
return open;
return isOpen;
}
/**

View File

@ -551,7 +551,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
if (noDeleteOpenFile && openLocks.size() > 0) {
throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open locks: " + openLocks);
}
open = false;
isOpen = false;
if (getCheckIndexOnClose()) {
if (indexPossiblyExists()) {
if (LuceneTestCase.VERBOSE) {
@ -614,11 +614,6 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
public synchronized void removeIndexInput(IndexInput in, String name) {
removeOpenFile(in, name);
}
@Override
public synchronized boolean isOpen() {
return open;
}
/**
* Objects that represent fail-able conditions. Objects of a derived

View File

@ -20,5 +20,6 @@ org.apache.lucene.codecs.mocksep.MockSepPostingsFormat
org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat
org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings
org.apache.lucene.codecs.asserting.AssertingPostingsFormat

View File

@ -32,7 +32,7 @@ Apache Tika 1.1
Carrot2 3.5.0
Velocity 1.6.4 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.3.5
Apache ZooKeeper 3.3.6
Upgrading from Solr 4.0.0-ALPHA
----------------------
@ -134,7 +134,7 @@ Bug Fixes
* SOLR-1781: Replication index directories not always cleaned up.
(Markus Jelsma, Terje Sten Bjerkseth, Mark Miller)
* SOLR-3639: Update ZooKeeper to 3.3.5 for a variety of bug fixes. (Mark Miller)
* SOLR-3639: Update ZooKeeper to 3.3.6 for a variety of bug fixes. (Mark Miller)
* SOLR-3629: Typo in solr.xml persistence when overriding the solrconfig.xml
file name using the "config" attribute prevented the override file from being
@ -173,6 +173,9 @@ Bug Fixes
* SOLR-3677: Fixed missleading error message in web ui to distinguish between
no SolrCores loaded vs. no /admin/ handler available.
(hossman, steffkes)
* SOLR-3428: SolrCmdDistributor flushAdds/flushDeletes can cause repeated
adds/deletes to be sent (Mark Miller, Per Steffensen)
Other Changes
----------------------
@ -207,6 +210,14 @@ Other Changes
* SOLR-3682: Fail to parse schema.xml if uniqueKeyField is multivalued (hossman)
* SOLR-2115: DIH no longer requires the "config" parameter to be specified in solrconfig.xml.
Instead, the configuration is loaded and parsed with every import. This allows the use of
a different configuration with each import, and makes correcting configuration errors simpler.
Also, the configuration itself can be passed using the "dataConfig" parameter rather than
using a file (this previously worked in debug mode only). When configuration errors are
encountered, the error message is returned in XML format. (James Dyer)
================== 4.0.0-ALPHA ==================
More information about this release, including any errata related to the
release notes, upgrade instructions, or other changes may be found online at:
@ -709,6 +720,13 @@ Bug Fixes
* SOLR-3470: contrib/clustering: custom Carrot2 tokenizer and stemmer factories
are respected now (Stanislaw Osinski, Dawid Weiss)
* SOLR-3430: Added a new DIH test against a real SQL database. Fixed problems
revealed by this new test related to the expanded cache support added to
3.6/SOLR-2382 (James Dyer)
* SOLR-1958: When using the MailEntityProcessor, import would fail if
fetchMailsSince was not specified. (Max Lynch via James Dyer)
Other Changes
----------------------
@ -862,7 +880,13 @@ Other Changes
* SOLR-3534: The Dismax and eDismax query parsers will fall back on the 'df' parameter
when 'qf' is absent. And if neither is present nor the schema default search field
then an exception will be thrown now. (dsmiley)
* SOLR-3262: The "threads" feature of DIH is removed (deprecated in Solr 3.6)
(James Dyer)
* SOLR-3422: Refactored DIH internal data classes. All entities in
data-config.xml must have a name (James Dyer)
Documentation
----------------------
@ -898,6 +922,17 @@ Bug Fixes:
* SOLR-3470: contrib/clustering: custom Carrot2 tokenizer and stemmer factories
are respected now (Stanislaw Osinski, Dawid Weiss)
* SOLR-3360: More DIH bug fixes for the deprecated "threads" parameter.
(Mikhail Khludnev, Claudio R, via James Dyer)
* SOLR-3430: Added a new DIH test against a real SQL database. Fixed problems
revealed by this new test related to the expanded cache support added to
3.6/SOLR-2382 (James Dyer)
* SOLR-3336: SolrEntityProcessor substitutes most variables at query time.
(Michael Kroh, Lance Norskog, via Martijn van Groningen)
================== 3.6.0 ==================
More information about this release, including any errata related to the
release notes, upgrade instructions, or other changes may be found online at:
@ -1050,6 +1085,27 @@ New Features
auto detector cannot detect encoding, especially the text file is too short
to detect encoding. (koji)
* SOLR-1499: Added SolrEntityProcessor that imports data from another Solr core
or instance based on a specified query.
(Lance Norskog, Erik Hatcher, Pulkit Singhal, Ahmet Arslan, Luca Cavanna,
Martijn van Groningen)
* SOLR-3190: Minor improvements to SolrEntityProcessor. Add more consistency
between solr parameters and parameters used in SolrEntityProcessor and
ability to specify a custom HttpClient instance.
(Luca Cavanna via Martijn van Groningen)
* SOLR-2382: Added pluggable cache support to DIH so that any Entity can be
made cache-able by adding the "cacheImpl" parameter. Include
"SortedMapBackedCache" to provide in-memory caching (as previously this was
the only option when using CachedSqlEntityProcessor). Users can provide
their own implementations of DIHCache for other caching strategies.
Deprecate CachedSqlEntityProcessor in favor of specifing "cacheImpl" with
SqlEntityProcessor. Make SolrWriter implement DIHWriter and allow the
possibility of pluggable Writers (DIH writing to something other than Solr).
(James Dyer, Noble Paul)
Optimizations
----------------------
* SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter
@ -1296,6 +1352,10 @@ Other Changes
extracting request handler and are willing to use java 6, just add the jar.
(rmuir)
* SOLR-3142: DIH Imports no longer default optimize to true, instead false.
If you want to force all segments to be merged into one, you can specify
this parameter yourself. NOTE: this can be very expensive operation and
usually does not make sense for delta-imports. (Robert Muir)
Build
----------------------
@ -1393,6 +1453,9 @@ Bug Fixes
a wrong number of collation results in the response.
(Bastiaan Verhoef, James Dyer via Simon Willnauer)
* SOLR-2875: Fix the incorrect url in DIH example tika-data-config.xml
(Shinichiro Abe via koji)
Other Changes
----------------------
@ -1585,6 +1648,24 @@ Bug Fixes
* SOLR-2692: contrib/clustering: Typo in param name fixed: "carrot.fragzise"
changed to "carrot.fragSize" (Stanislaw Osinski).
* SOLR-2644: When using DIH with threads=2 the default logging is set too high
(Bill Bell via shalin)
* SOLR-2492: DIH does not commit if only deletes are processed
(James Dyer via shalin)
* SOLR-2186: DataImportHandler's multi-threaded option throws NPE
(Lance Norskog, Frank Wesemann, shalin)
* SOLR-2655: DIH multi threaded mode does not resolve attributes correctly
(Frank Wesemann, shalin)
* SOLR-2695: DIH: Documents are collected in unsynchronized list in
multi-threaded debug mode (Michael McCandless, shalin)
* SOLR-2668: DIH multithreaded mode does not rollback on errors from
EntityProcessor (Frank Wesemann, shalin)
Other Changes
----------------------
@ -1697,6 +1778,9 @@ Bug Fixes
* SOLR-2581: UIMAToSolrMapper wrongly instantiates Type with reflection.
(Tommaso Teofili via koji)
* SOLR-2551: Check dataimport.properties for write access (if delta-import is
supported in DIH configuration) before starting an import (C S, shalin)
Other Changes
----------------------
@ -2141,6 +2225,30 @@ New Features
* SOLR-2237: Added StempelPolishStemFilterFactory to contrib/analysis-extras (rmuir)
* SOLR-1525: allow DIH to refer to core properties (noble)
* SOLR-1547: DIH TemplateTransformer copy objects more intelligently when the
template is a single variable (noble)
* SOLR-1627: DIH VariableResolver should be fetched just in time (noble)
* SOLR-1583: DIH Create DataSources that return InputStream (noble)
* SOLR-1358: Integration of Tika and DataImportHandler (Akshay Ukey, noble)
* SOLR-1654: TikaEntityProcessor example added DIHExample
(Akshay Ukey via noble)
* SOLR-1678: Move onError handling to DIH framework (noble)
* SOLR-1352: Multi-threaded implementation of DIH (noble)
* SOLR-1721: Add explicit option to run DataImportHandler in synchronous mode
(Alexey Serba via noble)
* SOLR-1737: Added FieldStreamDataSource (noble)
Optimizations
----------------------
@ -2166,6 +2274,9 @@ Optimizations
SolrIndexSearcher.doc(int, Set<String>) method b/c it can use the document
cache (gsingers)
* SOLR-2200: Improve the performance of DataImportHandler for large
delta-import updates. (Mark Waddle via rmuir)
Bug Fixes
----------------------
* SOLR-1769: Solr 1.4 Replication - Repeater throwing NullPointerException (Jörgen Rydenius via noble)
@ -2428,6 +2539,61 @@ Bug Fixes
does not properly use the same iterator instance.
(Christoph Brill, Mark Miller)
* SOLR-1638: Fixed NullPointerException during DIH import if uniqueKey is not
specified in schema (Akshay Ukey via shalin)
* SOLR-1639: Fixed misleading error message when dataimport.properties is not
writable (shalin)
* SOLR-1598: DIH: Reader used in PlainTextEntityProcessor is not explicitly
closed (Sascha Szott via noble)
* SOLR-1759: DIH: $skipDoc was not working correctly
(Gian Marco Tagliani via noble)
* SOLR-1762: DIH: DateFormatTransformer does not work correctly with
non-default locale dates (tommy chheng via noble)
* SOLR-1757: DIH multithreading sometimes throws NPE (noble)
* SOLR-1766: DIH with threads enabled doesn't respond to the abort command
(Michael Henson via noble)
* SOLR-1767: dataimporter.functions.escapeSql() does not escape backslash
character (Sean Timm via noble)
* SOLR-1811: formatDate should use the current NOW value always
(Sean Timm via noble)
* SOLR-1794: Dataimport of CLOB fields fails when getCharacterStream() is
defined in a superclass. (Gunnar Gauslaa Bergem via rmuir)
* SOLR-2057: DataImportHandler never calls UpdateRequestProcessor.finish()
(Drew Farris via koji)
* SOLR-1973: Empty fields in XML update messages confuse DataImportHandler.
(koji)
* SOLR-2221: Use StrUtils.parseBool() to get values of boolean options in DIH.
true/on/yes (for TRUE) and false/off/no (for FALSE) can be used for
sub-options (debug, verbose, synchronous, commit, clean, optimize) for
full/delta-import commands. (koji)
* SOLR-2310: DIH: getTimeElapsedSince() returns incorrect hour value when
the elapse is over 60 hours (tom liu via koji)
* SOLR-2252: DIH: When a child entity in nested entities is rootEntity="true",
delta-import doesn't work. (koji)
* SOLR-2330: solrconfig.xml files in example-DIH are broken. (Matt Parker, koji)
* SOLR-1191: resolve DataImportHandler deltaQuery column against pk when pk
has a prefix (e.g. pk="book.id" deltaQuery="select id from ..."). More
useful error reporting when no match found (previously failed with a
NullPointerException in log and no clear user feedback). (gthb via yonik)
* SOLR-2116: Fix TikaConfig classloader bug in TikaEntityProcessor
(Martijn van Groningen via hossman)
Other Changes
----------------------
@ -2561,6 +2727,12 @@ Other Changes
* SOLR-1813: Add ICU4j to contrib/extraction libs and add tests for Arabic
extraction (Robert Muir via gsingers)
* SOLR-1821: Fix TimeZone-dependent test failure in TestEvaluatorBag.
(Chris Male via rmuir)
* SOLR-2367: Reduced noise in test output by ensuring the properties file
can be written. (Gunnlaugur Thor Briem via rmuir)
Build
----------------------
@ -2645,6 +2817,33 @@ error. See SOLR-1410 for more information.
* RussianLowerCaseFilterFactory
* RussianLetterTokenizerFactory
DIH: Evaluator API has been changed in a non back-compatible way. Users who
have developed custom Evaluators will need to change their code according to
the new API for it to work. See SOLR-996 for details.
DIH: The formatDate evaluator's syntax has been changed. The new syntax is
formatDate(<variable>, '<format_string>'). For example,
formatDate(x.date, 'yyyy-MM-dd'). In the old syntax, the date string was
written without a single-quotes. The old syntax has been deprecated and will
be removed in 1.5, until then, using the old syntax will log a warning.
DIH: The Context API has been changed in a non back-compatible way. In
particular, the Context.currentProcess() method now returns a String
describing the type of the current import process instead of an int.
Similarily, the public constants in Context viz. FULL_DUMP, DELTA_DUMP and
FIND_DELTA are changed to a String type. See SOLR-969 for details.
DIH: The EntityProcessor API has been simplified by moving logic for applying
transformers and handling multi-row outputs from Transformers into an
EntityProcessorWrapper class. The EntityProcessor#destroy is now called once
per parent-row at the end of row (end of data). A new method
EntityProcessor#close is added which is called at the end of import.
DIH: In Solr 1.3, if the last_index_time was not available (first import) and
a delta-import was requested, a full-import was run instead. This is no longer
the case. In Solr 1.4 delta import is run with last_index_time as the epoch
date (January 1, 1970, 00:00:00 GMT) if last_index_time is not available.
Versions of Major Components
----------------------------
Apache Lucene 2.9.1 (r832363 on 2.9 branch)
@ -2936,6 +3135,141 @@ New Features
86. SOLR-1274: Added text serialization output for extractOnly
(Peter Wolanin, gsingers)
87. SOLR-768: DIH: Set last_index_time variable in full-import command.
(Wojtek Piaseczny, Noble Paul via shalin)
88. SOLR-811: Allow a "deltaImportQuery" attribute in SqlEntityProcessor
which is used for delta imports instead of DataImportHandler manipulating
the SQL itself. (Noble Paul via shalin)
89. SOLR-842: Better error handling in DataImportHandler with options to
abort, skip and continue imports. (Noble Paul, shalin)
90. SOLR-833: DIH: A DataSource to read data from a field as a reader. This
can be used, for example, to read XMLs residing as CLOBs or BLOBs in
databases. (Noble Paul via shalin)
91. SOLR-887: A DIH Transformer to strip HTML tags. (Ahmed Hammad via shalin)
92. SOLR-886: DataImportHandler should rollback when an import fails or it is
aborted (shalin)
93. SOLR-891: A DIH Transformer to read strings from Clob type.
(Noble Paul via shalin)
94. SOLR-812: Configurable JDBC settings in JdbcDataSource including optimized
defaults for read only mode. (David Smiley, Glen Newton, shalin)
95. SOLR-910: Add a few utility commands to the DIH admin page such as full
import, delta import, status, reload config. (Ahmed Hammad via shalin)
96. SOLR-938: Add event listener API for DIH import start and end.
(Kay Kay, Noble Paul via shalin)
97. SOLR-801: DIH: Add support for configurable pre-import and post-import
delete query per root-entity. (Noble Paul via shalin)
98. SOLR-988: Add a new scope for session data stored in Context to store
objects across imports. (Noble Paul via shalin)
99. SOLR-980: A PlainTextEntityProcessor which can read from any
DataSource<Reader> and output a String.
(Nathan Adams, Noble Paul via shalin)
100.SOLR-1003: XPathEntityprocessor must allow slurping all text from a given
xml node and its children. (Noble Paul via shalin)
101.SOLR-1001: Allow variables in various attributes of RegexTransformer,
HTMLStripTransformer and NumberFormatTransformer.
(Fergus McMenemie, Noble Paul, shalin)
102.SOLR-989: DIH: Expose running statistics from the Context API.
(Noble Paul, shalin)
103.SOLR-996: DIH: Expose Context to Evaluators. (Noble Paul, shalin)
104.SOLR-783: DIH: Enhance delta-imports by maintaining separate
last_index_time for each entity. (Jon Baer, Noble Paul via shalin)
105.SOLR-1033: Current entity's namespace is made available to all DIH
Transformers. This allows one to use an output field of TemplateTransformer
in other transformers, among other things.
(Fergus McMenemie, Noble Paul via shalin)
106.SOLR-1066: New methods in DIH Context to expose Script details.
ScriptTransformer changed to read scripts through the new API methods.
(Noble Paul via shalin)
107.SOLR-1062: A DIH LogTransformer which can log data in a given template
format. (Jon Baer, Noble Paul via shalin)
108.SOLR-1065: A DIH ContentStreamDataSource which can accept HTTP POST data
in a content stream. This can be used to push data to Solr instead of
just pulling it from DB/Files/URLs. (Noble Paul via shalin)
109.SOLR-1061: Improve DIH RegexTransformer to create multiple columns from
regex groups. (Noble Paul via shalin)
110.SOLR-1059: Special DIH flags introduced for deleting documents by query or
id, skipping rows and stopping further transforms. Use $deleteDocById,
$deleteDocByQuery for deleting by id and query respectively. Use $skipRow
to skip the current row but continue with the document. Use $stopTransform
to stop further transformers. New methods are introduced in Context for
deleting by id and query. (Noble Paul, Fergus McMenemie, shalin)
111.SOLR-1076: JdbcDataSource should resolve DIH variables in all its
configuration parameters. (shalin)
112.SOLR-1055: Make DIH JdbcDataSource easily extensible by making the
createConnectionFactory method protected and return a
Callable<Connection> object. (Noble Paul, shalin)
113.SOLR-1058: DIH: JdbcDataSource can lookup javax.sql.DataSource using JNDI.
Use a jndiName attribute to specify the location of the data source.
(Jason Shepherd, Noble Paul via shalin)
114.SOLR-1083: A DIH Evaluator for escaping query characters.
(Noble Paul, shalin)
115.SOLR-934: A MailEntityProcessor to enable indexing mails from
POP/IMAP sources into a solr index. (Preetam Rao, shalin)
116.SOLR-1060: A DIH LineEntityProcessor which can stream lines of text from a
given file to be indexed directly or for processing with transformers and
child entities.
(Fergus McMenemie, Noble Paul, shalin)
117.SOLR-1127: Add support for DIH field name to be templatized.
(Noble Paul, shalin)
118.SOLR-1092: Added a new DIH command named 'import' which does not
automatically clean the index. This is useful and more appropriate when one
needs to import only some of the entities.
(Noble Paul via shalin)
119.SOLR-1153: DIH 'deltaImportQuery' is honored on child entities as well
(noble)
120.SOLR-1230: Enhanced dataimport.jsp to work with all DataImportHandler
request handler configurations, rather than just a hardcoded /dataimport
handler. (ehatcher)
121.SOLR-1235: disallow period (.) in DIH entity names (noble)
122.SOLR-1234: Multiple DIH does not work because all of them write to
dataimport.properties. Use the handler name as the properties file name
(noble)
123.SOLR-1348: Support binary field type in convertType logic in DIH
JdbcDataSource (shalin)
124.SOLR-1406: DIH: Make FileDataSource and FileListEntityProcessor to be more
extensible (Luke Forehand, shalin)
125.SOLR-1437: DIH: XPathEntityProcessor can deal with xpath syntaxes such as
//tagname , /root//tagname (Fergus McMenemie via noble)
Optimizations
----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
@ -2993,6 +3327,21 @@ Optimizations
17. SOLR-1296: Enables setting IndexReader's termInfosIndexDivisor via a new attribute to StandardIndexReaderFactory. Enables
setting termIndexInterval to IndexWriter via SolrIndexConfig. (Jason Rutherglen, hossman, gsingers)
18. SOLR-846: DIH: Reduce memory consumption during delta import by removing
keys when used (Ricky Leung, Noble Paul via shalin)
19. SOLR-974: DataImportHandler skips commit if no data has been updated.
(Wojtek Piaseczny, shalin)
20. SOLR-1004: DIH: Check for abort more frequently during delta-imports.
(Marc Sturlese, shalin)
21. SOLR-1098: DIH DateFormatTransformer can cache the format objects.
(Noble Paul via shalin)
22. SOLR-1465: Replaced string concatenations with StringBuilder append
calls in DIH XPathRecordReader. (Mark Miller, shalin)
Bug Fixes
----------------------
1. SOLR-774: Fixed logging level display (Sean Timm via Otis Gospodnetic)
@ -3210,6 +3559,103 @@ Bug Fixes
caused an error to be returned, although the deletes were
still executed. (asmodean via yonik)
76. SOLR-800: Deep copy collections to avoid ConcurrentModificationException
in XPathEntityprocessor while streaming
(Kyle Morrison, Noble Paul via shalin)
77. SOLR-823: Request parameter variables ${dataimporter.request.xxx} are not
resolved in DIH (Mck SembWever, Noble Paul, shalin)
78. SOLR-728: Add synchronization to avoid race condition of multiple DIH
imports working concurrently (Walter Ferrara, shalin)
79. SOLR-742: Add ability to create dynamic fields with custom
DataImportHandler transformers (Wojtek Piaseczny, Noble Paul, shalin)
80. SOLR-832: Rows parameter is not honored in DIH non-debug mode and can
abort a running import in debug mode. (Akshay Ukey, shalin)
81. SOLR-838: The DIH VariableResolver obtained from a DataSource's context
does not have current data. (Noble Paul via shalin)
82. SOLR-864: DataImportHandler does not catch and log Errors (shalin)
83. SOLR-873: Fix case-sensitive field names and columns (Jon Baer, shalin)
84. SOLR-893: Unable to delete documents via SQL and deletedPkQuery with
deltaimport (Dan Rosher via shalin)
85. SOLR-888: DIH DateFormatTransformer cannot convert non-string type
(Amit Nithian via shalin)
86. SOLR-841: DataImportHandler should throw exception if a field does not
have column attribute (Michael Henson, shalin)
87. SOLR-884: CachedSqlEntityProcessor should check if the cache key is
present in the query results (Noble Paul via shalin)
88. SOLR-985: Fix thread-safety issue with DIH TemplateString for concurrent
imports with multiple cores. (Ryuuichi Kumai via shalin)
89. SOLR-999: DIH XPathRecordReader fails on XMLs with nodes mixed with
CDATA content. (Fergus McMenemie, Noble Paul via shalin)
90. SOLR-1000: DIH FileListEntityProcessor should not apply fileName filter to
directory names. (Fergus McMenemie via shalin)
91. SOLR-1009: Repeated column names result in duplicate values.
(Fergus McMenemie, Noble Paul via shalin)
92. SOLR-1017: Fix DIH thread-safety issue with last_index_time for concurrent
imports in multiple cores due to unsafe usage of SimpleDateFormat by
multiple threads. (Ryuuichi Kumai via shalin)
93. SOLR-1024: Calling abort on DataImportHandler import commits data instead
of calling rollback. (shalin)
94. SOLR-1037: DIH should not add null values in a row returned by
EntityProcessor to documents. (shalin)
95. SOLR-1040: DIH XPathEntityProcessor fails with an xpath like
/feed/entry/link[@type='text/html']/@href (Noble Paul via shalin)
96. SOLR-1042: Fix memory leak in DIH by making TemplateString non-static
member in VariableResolverImpl (Ryuuichi Kumai via shalin)
97. SOLR-1053: IndexOutOfBoundsException in DIH SolrWriter.getResourceAsString
when size of data-config.xml is a multiple of 1024 bytes.
(Herb Jiang via shalin)
98. SOLR-1077: IndexOutOfBoundsException with useSolrAddSchema in DIH
XPathEntityProcessor. (Sam Keen, Noble Paul via shalin)
99. SOLR-1080: DIH RegexTransformer should not replace if regex is not matched.
(Noble Paul, Fergus McMenemie via shalin)
100.SOLR-1090: DataImportHandler should load the data-config.xml using UTF-8
encoding. (Rui Pereira, shalin)
101.SOLR-1146: ConcurrentModificationException in DataImporter.getStatusMessages
(Walter Ferrara, Noble Paul via shalin)
102.SOLR-1229: Fixes for DIH deletedPkQuery, particularly when using
transformed Solr unique id's
(Lance Norskog, Noble Paul via ehatcher)
103.SOLR-1286: Fix the IH commit parameter always defaulting to "true" even
if "false" is explicitly passed in. (Jay Hill, Noble Paul via ehatcher)
104.SOLR-1323: Reset XPathEntityProcessor's $hasMore/$nextUrl when fetching
next URL (noble, ehatcher)
105.SOLR-1450: DIH: Jdbc connection properties such as batchSize are not
applied if the driver jar is placed in solr_home/lib.
(Steve Sun via shalin)
106.SOLR-1474: DIH Delta-import should run even if last_index_time is not set.
(shalin)
Other Changes
----------------------
1. Upgraded to Lucene 2.4.0 (yonik)
@ -3357,6 +3803,55 @@ Other Changes
for discussion on language detection.
See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. (gsingers)
53. SOLR-782: DIH: Refactored SolrWriter to make it a concrete class and
removed wrappers over SolrInputDocument. Refactored to load Evaluators
lazily. Removed multiple document nodes in the configuration xml. Removed
support for 'default' variables, they are automatically available as
request parameters. (Noble Paul via shalin)
54. SOLR-964: DIH: XPathEntityProcessor now ignores DTD validations
(Fergus McMenemie, Noble Paul via shalin)
55. SOLR-1029: DIH: Standardize Evaluator parameter parsing and added helper
functions for parsing all evaluator parameters in a standard way.
(Noble Paul, shalin)
56. SOLR-1081: Change DIH EventListener to be an interface so that components
such as an EntityProcessor or a Transformer can act as an event listener.
(Noble Paul, shalin)
57. SOLR-1027: DIH: Alias the 'dataimporter' namespace to a shorter name 'dih'.
(Noble Paul via shalin)
58. SOLR-1084: Better error reporting when DIH entity name is a reserved word
and data-config.xml root node is not <dataConfig>.
(Noble Paul via shalin)
59. SOLR-1087: Deprecate 'where' attribute in CachedSqlEntityProcessor in
favor of cacheKey and cacheLookup. (Noble Paul via shalin)
60. SOLR-969: Change the FULL_DUMP, DELTA_DUMP, FIND_DELTA constants in DIH
Context to String. Change Context.currentProcess() to return a string
instead of an integer. (Kay Kay, Noble Paul, shalin)
61. SOLR-1120: Simplified DIH EntityProcessor API by moving logic for applying
transformers and handling multi-row outputs from Transformers into an
EntityProcessorWrapper class. The behavior of the method
EntityProcessor#destroy has been modified to be called once per parent-row
at the end of row. A new method EntityProcessor#close is added which is
called at the end of import. A new method
Context#getResolvedEntityAttribute is added which returns the resolved
value of an entity's attribute. Introduced a DocWrapper which takes care
of maintaining document level session variables.
(Noble Paul, shalin)
62. SOLR-1265: Add DIH variable resolving for URLDataSource properties like
baseUrl. (Chris Eldredge via ehatcher)
63. SOLR-1269: Better error messages from DIH JdbcDataSource when JDBC Driver
name or SQL is incorrect. (ehatcher, shalin)
Build
----------------------
1. SOLR-776: Added in ability to sign artifacts via Ant for releases (gsingers)
@ -3382,6 +3877,10 @@ Documentation
3. SOLR-1409: Added Solr Powered By Logos
4. SOLR-1369: Add HSQLDB Jar to example-DIH, unzip database and update
instructions.
================== Release 1.3.0 ==================
Upgrading from Solr 1.2
@ -3727,7 +4226,10 @@ New Features
71. SOLR-1129 : Support binding dynamic fields to beans in SolrJ (Avlesh Singh , noble)
72. SOLR-920 : Cache and reuse IndexSchema . A new attribute added in solr.xml called 'shareSchema' (noble)
73. SOLR-700: DIH: Allow configurable locales through a locale attribute in
fields for NumberFormatTransformer. (Stefan Oestreicher, shalin)
Changes in runtime behavior
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This
removes the maxBufferedDeletes parameter added by SOLR-310 as Lucene
@ -3942,6 +4444,18 @@ Bug Fixes
50. SOLR-749: Allow QParser and ValueSourceParsers to be extended with same name (hossman, gsingers)
51. SOLR-704: DIH NumberFormatTransformer can silently ignore part of the
string while parsing. Now it tries to use the complete string for parsing.
Failure to do so will result in an exception.
(Stefan Oestreicher via shalin)
52. SOLR-729: DIH Context.getDataSource(String) gives current entity's
DataSource instance regardless of argument. (Noble Paul, shalin)
53. SOLR-726: DIH: Jdbc Drivers and DataSources fail to load if placed in
multicore sharedLib or core's lib directory.
(Walter Ferrara, Noble Paul, shalin)
Other Changes
1. SOLR-135: Moved common classes to org.apache.solr.common and altered the
build scripts to make two jars: apache-solr-1.3.jar and

View File

@ -402,11 +402,11 @@
prefix="${fullnamever}"
includes="LICENSE.txt NOTICE.txt CHANGES.txt README.txt example/**
client/README.txt client/ruby/solr-ruby/** contrib/**/lib/**
contrib/**/README.txt contrib/**/CHANGES.txt"
contrib/**/README.txt licenses/**"
excludes="lib/README.committers.txt **/data/ **/logs/*
**/classes/ **/*.sh **/ivy.xml **/build.xml
**/bin/ **/*.iml **/*.ipr **/*.iws **/pom.xml
**/*pom.xml.template **/*.sha1" />
**/*pom.xml.template" />
<tarfileset dir="${dest}/contrib-lucene-libs-to-package"
prefix="${fullnamever}"
includes="**" />
@ -763,4 +763,8 @@
</delete>
</target>
<target name="jar-checksums" depends="clean-jars,resolve">
<jar-checksum-macro srcdir="${common-solr.dir}" dstdir="${common-solr.dir}/licenses"/>
</target>
</project>

View File

@ -7,6 +7,7 @@ rm -r -f example2
rm -r -f dist
rm -r -f build
rm -r -f example/solr/zoo_data
rm -r -f example/solr/collection1/data
rm -f example/example.log
ant example dist

View File

@ -9,6 +9,7 @@ rm -r -f example4
rm -r -f dist
rm -r -f build
rm -r -f example/solr/zoo_data
rm -r -f example/solr/collection1/data
rm -f example/example.log
ant example dist

View File

@ -9,6 +9,7 @@ rm -r -f example4
rm -r -f dist
rm -r -f build
rm -r -f example/solr/zoo_data
rm -r -f example/solr/collection1/data
rm -f example/example.log
ant example dist

View File

@ -13,7 +13,7 @@ rm -r -f example6
rm -r -f dist
rm -r -f build
rm -r -f example/solr/zoo_data
rm -r -f example/solr/data
rm -r -f example/solr/collection1/data
rm -f example/example.log
ant example dist

View File

@ -13,7 +13,7 @@ rm -r -f example6
rm -r -f dist
rm -r -f build
rm -r -f example/solr/zoo_data
rm -r -f example/solr/data
rm -r -f example/solr/collection1/data
rm -f example/example.log
ant example dist

View File

@ -2,9 +2,6 @@
cd ..
rm -r -f dist
rm -r -f build
cd example
java -DzkRun -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar 1>example.log 2>&1 &

View File

@ -11,7 +11,7 @@ rm -r -f example6
rm -r -f dist
rm -r -f build
rm -r -f example/solr/zoo_data
rm -r -f example/solr/data
rm -r -f example/solr/collection1/data
rm -f example/example.log
ant example dist

View File

@ -1,547 +0,0 @@
Apache Solr - DataImportHandler
Release Notes
Introduction
------------
DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
HTTP data sources quick and easy.
$Id$
================== 5.0.0 ==============
(No changes)
================== 4.0.0-ALPHA ==============
Bug Fixes
----------------------
* SOLR-3430: Added a new test against a real SQL database. Fixed problems revealed by this new test
related to the expanded cache support added to 3.6/SOLR-2382 (James Dyer)
* SOLR-1958: When using the MailEntityProcessor, import would fail if fetchMailsSince was not specified.
(Max Lynch via James Dyer)
Other Changes
----------------------
* SOLR-3262: The "threads" feature is removed (deprecated in Solr 3.6) (James Dyer)
* SOLR-3422: Refactored internal data classes.
All entities in data-config.xml must have a name (James Dyer)
================== 3.6.1 ==================
Bug Fixes
----------------------
* SOLR-3360: More bug fixes for the deprecated "threads" parameter. (Mikhail Khludnev, Claudio R, via James Dyer)
* SOLR-3430: Added a new test against a real SQL database. Fixed problems revealed by this new test
related to the expanded cache support added to 3.6/SOLR-2382 (James Dyer)
* SOLR-3336: SolrEntityProcessor substitutes most variables at query time.
(Michael Kroh, Lance Norskog, via Martijn van Groningen)
================== 3.6.0 ==================
New Features
----------------------
* SOLR-1499: Added SolrEntityProcessor that imports data from another Solr core or instance based on a specified query.
(Lance Norskog, Erik Hatcher, Pulkit Singhal, Ahmet Arslan, Luca Cavanna, Martijn van Groningen)
Additional Work:
SOLR-3190: Minor improvements to SolrEntityProcessor. Add more consistency between solr parameters
and parameters used in SolrEntityProcessor and ability to specify a custom HttpClient instance.
(Luca Cavanna via Martijn van Groningen)
* SOLR-2382: Added pluggable cache support so that any Entity can be made cache-able by adding the "cacheImpl" parameter.
Include "SortedMapBackedCache" to provide in-memory caching (as previously this was the only option when
using CachedSqlEntityProcessor). Users can provide their own implementations of DIHCache for other
caching strategies. Deprecate CachedSqlEntityProcessor in favor of specifing "cacheImpl" with
SqlEntityProcessor. Make SolrWriter implement DIHWriter and allow the possibility of pluggable Writers
(DIH writing to something other than Solr). (James Dyer, Noble Paul)
Changes in Runtime Behavior
----------------------
* SOLR-3142: Imports no longer default optimize to true, instead false. If you want to force all segments to be merged
into one, you can specify this parameter yourself. NOTE: this can be very expensive operation and usually
does not make sense for delta-imports. (Robert MUir)
================== 3.5.0 ==================
Bug Fixes
----------------------
* SOLR-2875: Fix the incorrect url in tika-data-config.xml (Shinichiro Abe via koji)
================== 3.4.0 ==================
Bug Fixes
----------------------
* SOLR-2644: When using threads=2 the default logging is set too high (Bill Bell via shalin)
* SOLR-2492: DIH does not commit if only deletes are processed (James Dyer via shalin)
* SOLR-2186: DataImportHandler's multi-threaded option throws NPE (Lance Norskog, Frank Wesemann, shalin)
* SOLR-2655: DIH multi threaded mode does not resolve attributes correctly (Frank Wesemann, shalin)
* SOLR-2695: Documents are collected in unsynchronized list in multi-threaded debug mode (Michael McCandless, shalin)
* SOLR-2668: DIH multithreaded mode does not rollback on errors from EntityProcessor (Frank Wesemann, shalin)
================== 3.3.0 ==================
* SOLR-2551: Check dataimport.properties for write access (if delta-import is supported
in DIH configuration) before starting an import (C S, shalin)
================== 3.2.0 ==================
(No Changes)
================== 3.1.0 ==================
Upgrading from Solr 1.4
----------------------
Versions of Major Components
---------------------
Detailed Change List
----------------------
New Features
----------------------
* SOLR-1525 : allow DIH to refer to core properties (noble)
* SOLR-1547 : TemplateTransformer copy objects more intelligently when there when the template is a single variable (noble)
* SOLR-1627 : VariableResolver should be fetched just in time (noble)
* SOLR-1583 : Create DataSources that return InputStream (noble)
* SOLR-1358 : Integration of Tika and DataImportHandler ( Akshay Ukey, noble)
* SOLR-1654 : TikaEntityProcessor example added DIHExample (Akshay Ukey via noble)
* SOLR-1678 : Move onError handling to DIH framework (noble)
* SOLR-1352 : Multi-threaded implementation of DIH (noble)
* SOLR-1721 : Add explicit option to run DataImportHandler in synchronous mode (Alexey Serba via noble)
* SOLR-1737 : Added FieldStreamDataSource (noble)
Optimizations
----------------------
* SOLR-2200: Improve the performance of DataImportHandler for large delta-import
updates. (Mark Waddle via rmuir)
Bug Fixes
----------------------
* SOLR-1638: Fixed NullPointerException during import if uniqueKey is not specified
in schema (Akshay Ukey via shalin)
* SOLR-1639: Fixed misleading error message when dataimport.properties is not writable (shalin)
* SOLR-1598: Reader used in PlainTextEntityProcessor is not explicitly closed (Sascha Szott via noble)
* SOLR-1759: $skipDoc was not working correctly (Gian Marco Tagliani via noble)
* SOLR-1762: DateFormatTransformer does not work correctly with non-default locale dates (tommy chheng via noble)
* SOLR-1757: DIH multithreading sometimes throws NPE (noble)
* SOLR-1766: DIH with threads enabled doesn't respond to the abort command (Michael Henson via noble)
* SOLR-1767: dataimporter.functions.escapeSql() does not escape backslash character (Sean Timm via noble)
* SOLR-1811: formatDate should use the current NOW value always (Sean Timm via noble)
* SOLR-1794: Dataimport of CLOB fields fails when getCharacterStream() is
defined in a superclass. (Gunnar Gauslaa Bergem via rmuir)
* SOLR-2057: DataImportHandler never calls UpdateRequestProcessor.finish()
(Drew Farris via koji)
* SOLR-1973: Empty fields in XML update messages confuse DataImportHandler. (koji)
* SOLR-2221: Use StrUtils.parseBool() to get values of boolean options in DIH.
true/on/yes (for TRUE) and false/off/no (for FALSE) can be used for sub-options
(debug, verbose, synchronous, commit, clean, optimize) for full/delta-import commands. (koji)
* SOLR-2310: getTimeElapsedSince() returns incorrect hour value when the elapse is over 60 hours
(tom liu via koji)
* SOLR-2252: When a child entity in nested entities is rootEntity="true", delta-import doesn't work.
(koji)
* SOLR-2330: solrconfig.xml files in example-DIH are broken. (Matt Parker, koji)
* SOLR-1191: resolve DataImportHandler deltaQuery column against pk when pk
has a prefix (e.g. pk="book.id" deltaQuery="select id from ..."). More
useful error reporting when no match found (previously failed with a
NullPointerException in log and no clear user feedback). (gthb via yonik)
* SOLR-2116: Fix TikaConfig classloader bug in TikaEntityProcessor
(Martijn van Groningen via hossman)
Other Changes
----------------------
* SOLR-1821: Fix TimeZone-dependent test failure in TestEvaluatorBag.
(Chris Male via rmuir)
* SOLR-2367: Reduced noise in test output by ensuring the properties file can be written.
(Gunnlaugur Thor Briem via rmuir)
Build
----------------------
Documentation
----------------------
================== Release 1.4.0 ==================
Upgrading from Solr 1.3
-----------------------
Evaluator API has been changed in a non back-compatible way. Users who have developed custom Evaluators will need
to change their code according to the new API for it to work. See SOLR-996 for details.
The formatDate evaluator's syntax has been changed. The new syntax is formatDate(<variable>, '<format_string>').
For example, formatDate(x.date, 'yyyy-MM-dd'). In the old syntax, the date string was written without a single-quotes.
The old syntax has been deprecated and will be removed in 1.5, until then, using the old syntax will log a warning.
The Context API has been changed in a non back-compatible way. In particular, the Context.currentProcess() method
now returns a String describing the type of the current import process instead of an int. Similarily, the public
constants in Context viz. FULL_DUMP, DELTA_DUMP and FIND_DELTA are changed to a String type. See SOLR-969 for details.
The EntityProcessor API has been simplified by moving logic for applying transformers and handling multi-row outputs
from Transformers into an EntityProcessorWrapper class. The EntityProcessor#destroy is now called once per
parent-row at the end of row (end of data). A new method EntityProcessor#close is added which is called at the end
of import.
In Solr 1.3, if the last_index_time was not available (first import) and a delta-import was requested, a full-import
was run instead. This is no longer the case. In Solr 1.4 delta import is run with last_index_time as the epoch
date (January 1, 1970, 00:00:00 GMT) if last_index_time is not available.
Detailed Change List
----------------------
New Features
----------------------
1. SOLR-768: Set last_index_time variable in full-import command.
(Wojtek Piaseczny, Noble Paul via shalin)
2. SOLR-811: Allow a "deltaImportQuery" attribute in SqlEntityProcessor which is used for delta imports
instead of DataImportHandler manipulating the SQL itself.
(Noble Paul via shalin)
3. SOLR-842: Better error handling in DataImportHandler with options to abort, skip and continue imports.
(Noble Paul, shalin)
4. SOLR-833: A DataSource to read data from a field as a reader. This can be used, for example, to read XMLs
residing as CLOBs or BLOBs in databases.
(Noble Paul via shalin)
5. SOLR-887: A Transformer to strip HTML tags.
(Ahmed Hammad via shalin)
6. SOLR-886: DataImportHandler should rollback when an import fails or it is aborted
(shalin)
7. SOLR-891: A Transformer to read strings from Clob type.
(Noble Paul via shalin)
8. SOLR-812: Configurable JDBC settings in JdbcDataSource including optimized defaults for read only mode.
(David Smiley, Glen Newton, shalin)
9. SOLR-910: Add a few utility commands to the DIH admin page such as full import, delta import, status, reload config.
(Ahmed Hammad via shalin)
10.SOLR-938: Add event listener API for import start and end.
(Kay Kay, Noble Paul via shalin)
11.SOLR-801: Add support for configurable pre-import and post-import delete query per root-entity.
(Noble Paul via shalin)
12.SOLR-988: Add a new scope for session data stored in Context to store objects across imports.
(Noble Paul via shalin)
13.SOLR-980: A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String.
(Nathan Adams, Noble Paul via shalin)
14.SOLR-1003: XPathEntityprocessor must allow slurping all text from a given xml node and its children.
(Noble Paul via shalin)
15.SOLR-1001: Allow variables in various attributes of RegexTransformer, HTMLStripTransformer
and NumberFormatTransformer.
(Fergus McMenemie, Noble Paul, shalin)
16.SOLR-989: Expose running statistics from the Context API.
(Noble Paul, shalin)
17.SOLR-996: Expose Context to Evaluators.
(Noble Paul, shalin)
18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity.
(Jon Baer, Noble Paul via shalin)
19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field
of TemplateTransformer in other transformers, among other things.
(Fergus McMenemie, Noble Paul via shalin)
20.SOLR-1066: New methods in Context to expose Script details. ScriptTransformer changed to read scripts
through the new API methods.
(Noble Paul via shalin)
21.SOLR-1062: A LogTransformer which can log data in a given template format.
(Jon Baer, Noble Paul via shalin)
22.SOLR-1065: A ContentStreamDataSource which can accept HTTP POST data in a content stream. This can be used to
push data to Solr instead of just pulling it from DB/Files/URLs.
(Noble Paul via shalin)
23.SOLR-1061: Improve RegexTransformer to create multiple columns from regex groups.
(Noble Paul via shalin)
24.SOLR-1059: Special flags introduced for deleting documents by query or id, skipping rows and stopping further
transforms. Use $deleteDocById, $deleteDocByQuery for deleting by id and query respectively.
Use $skipRow to skip the current row but continue with the document. Use $stopTransform to stop
further transformers. New methods are introduced in Context for deleting by id and query.
(Noble Paul, Fergus McMenemie, shalin)
25.SOLR-1076: JdbcDataSource should resolve variables in all its configuration parameters.
(shalin)
26.SOLR-1055: Make DIH JdbcDataSource easily extensible by making the createConnectionFactory method protected and
return a Callable<Connection> object.
(Noble Paul, shalin)
27.SOLR-1058: JdbcDataSource can lookup javax.sql.DataSource using JNDI. Use a jndiName attribute to specify the
location of the data source.
(Jason Shepherd, Noble Paul via shalin)
28.SOLR-1083: An Evaluator for escaping query characters.
(Noble Paul, shalin)
29.SOLR-934: A MailEntityProcessor to enable indexing mails from POP/IMAP sources into a solr index.
(Preetam Rao, shalin)
30.SOLR-1060: A LineEntityProcessor which can stream lines of text from a given file to be indexed directly or
for processing with transformers and child entities.
(Fergus McMenemie, Noble Paul, shalin)
31.SOLR-1127: Add support for field name to be templatized.
(Noble Paul, shalin)
32.SOLR-1092: Added a new command named 'import' which does not automatically clean the index. This is useful and
more appropriate when one needs to import only some of the entities.
(Noble Paul via shalin)
33.SOLR-1153: 'deltaImportQuery' is honored on child entities as well (noble)
34.SOLR-1230: Enhanced dataimport.jsp to work with all DataImportHandler request handler configurations,
rather than just a hardcoded /dataimport handler. (ehatcher)
35.SOLR-1235: disallow period (.) in entity names (noble)
36.SOLR-1234: Multiple DIH does not work because all of them write to dataimport.properties.
Use the handler name as the properties file name (noble)
37.SOLR-1348: Support binary field type in convertType logic in JdbcDataSource (shalin)
38.SOLR-1406: Make FileDataSource and FileListEntityProcessor to be more extensible (Luke Forehand, shalin)
39.SOLR-1437 : XPathEntityProcessor can deal with xpath syntaxes such as //tagname , /root//tagname (Fergus McMenemie via noble)
Optimizations
----------------------
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
(Ricky Leung, Noble Paul via shalin)
2. SOLR-974: DataImportHandler skips commit if no data has been updated.
(Wojtek Piaseczny, shalin)
3. SOLR-1004: Check for abort more frequently during delta-imports.
(Marc Sturlese, shalin)
4. SOLR-1098: DateFormatTransformer can cache the format objects.
(Noble Paul via shalin)
5. SOLR-1465: Replaced string concatenations with StringBuilder append calls in XPathRecordReader.
(Mark Miller, shalin)
Bug Fixes
----------------------
1. SOLR-800: Deep copy collections to avoid ConcurrentModificationException in XPathEntityprocessor while streaming
(Kyle Morrison, Noble Paul via shalin)
2. SOLR-823: Request parameter variables ${dataimporter.request.xxx} are not resolved
(Mck SembWever, Noble Paul, shalin)
3. SOLR-728: Add synchronization to avoid race condition of multiple imports working concurrently
(Walter Ferrara, shalin)
4. SOLR-742: Add ability to create dynamic fields with custom DataImportHandler transformers
(Wojtek Piaseczny, Noble Paul, shalin)
5. SOLR-832: Rows parameter is not honored in non-debug mode and can abort a running import in debug mode.
(Akshay Ukey, shalin)
6. SOLR-838: The VariableResolver obtained from a DataSource's context does not have current data.
(Noble Paul via shalin)
7. SOLR-864: DataImportHandler does not catch and log Errors (shalin)
8. SOLR-873: Fix case-sensitive field names and columns (Jon Baer, shalin)
9. SOLR-893: Unable to delete documents via SQL and deletedPkQuery with deltaimport
(Dan Rosher via shalin)
10. SOLR-888: DateFormatTransformer cannot convert non-string type
(Amit Nithian via shalin)
11. SOLR-841: DataImportHandler should throw exception if a field does not have column attribute
(Michael Henson, shalin)
12. SOLR-884: CachedSqlEntityProcessor should check if the cache key is present in the query results
(Noble Paul via shalin)
13. SOLR-985: Fix thread-safety issue with TemplateString for concurrent imports with multiple cores.
(Ryuuichi Kumai via shalin)
14. SOLR-999: XPathRecordReader fails on XMLs with nodes mixed with CDATA content.
(Fergus McMenemie, Noble Paul via shalin)
15.SOLR-1000: FileListEntityProcessor should not apply fileName filter to directory names.
(Fergus McMenemie via shalin)
16.SOLR-1009: Repeated column names result in duplicate values.
(Fergus McMenemie, Noble Paul via shalin)
17.SOLR-1017: Fix thread-safety issue with last_index_time for concurrent imports in multiple cores due to unsafe usage
of SimpleDateFormat by multiple threads.
(Ryuuichi Kumai via shalin)
18.SOLR-1024: Calling abort on DataImportHandler import commits data instead of calling rollback.
(shalin)
19.SOLR-1037: DIH should not add null values in a row returned by EntityProcessor to documents.
(shalin)
20.SOLR-1040: XPathEntityProcessor fails with an xpath like /feed/entry/link[@type='text/html']/@href
(Noble Paul via shalin)
21.SOLR-1042: Fix memory leak in DIH by making TemplateString non-static member in VariableResolverImpl
(Ryuuichi Kumai via shalin)
22.SOLR-1053: IndexOutOfBoundsException in SolrWriter.getResourceAsString when size of data-config.xml is a
multiple of 1024 bytes.
(Herb Jiang via shalin)
23.SOLR-1077: IndexOutOfBoundsException with useSolrAddSchema in XPathEntityProcessor.
(Sam Keen, Noble Paul via shalin)
24.SOLR-1080: RegexTransformer should not replace if regex is not matched.
(Noble Paul, Fergus McMenemie via shalin)
25.SOLR-1090: DataImportHandler should load the data-config.xml using UTF-8 encoding.
(Rui Pereira, shalin)
26.SOLR-1146: ConcurrentModificationException in DataImporter.getStatusMessages
(Walter Ferrara, Noble Paul via shalin)
27.SOLR-1229: Fixes for deletedPkQuery, particularly when using transformed Solr unique id's
(Lance Norskog, Noble Paul via ehatcher)
28.SOLR-1286: Fix the commit parameter always defaulting to "true" even if "false" is explicitly passed in.
(Jay Hill, Noble Paul via ehatcher)
29.SOLR-1323: Reset XPathEntityProcessor's $hasMore/$nextUrl when fetching next URL (noble, ehatcher)
30.SOLR-1450: Jdbc connection properties such as batchSize are not applied if the driver jar is placed
in solr_home/lib.
(Steve Sun via shalin)
31.SOLR-1474: Delta-import should run even if last_index_time is not set.
(shalin)
Documentation
----------------------
1. SOLR-1369: Add HSQLDB Jar to example-DIH, unzip database and update instructions.
Other
----------------------
1. SOLR-782: Refactored SolrWriter to make it a concrete class and removed wrappers over SolrInputDocument.
Refactored to load Evaluators lazily. Removed multiple document nodes in the configuration xml.
Removed support for 'default' variables, they are automatically available as request parameters.
(Noble Paul via shalin)
2. SOLR-964: XPathEntityProcessor now ignores DTD validations
(Fergus McMenemie, Noble Paul via shalin)
3. SOLR-1029: Standardize Evaluator parameter parsing and added helper functions for parsing all evaluator
parameters in a standard way.
(Noble Paul, shalin)
4. SOLR-1081: Change EventListener to be an interface so that components such as an EntityProcessor or a Transformer
can act as an event listener.
(Noble Paul, shalin)
5. SOLR-1027: Alias the 'dataimporter' namespace to a shorter name 'dih'.
(Noble Paul via shalin)
6. SOLR-1084: Better error reporting when entity name is a reserved word and data-config.xml root node
is not <dataConfig>.
(Noble Paul via shalin)
7. SOLR-1087: Deprecate 'where' attribute in CachedSqlEntityProcessor in favor of cacheKey and cacheLookup.
(Noble Paul via shalin)
8. SOLR-969: Change the FULL_DUMP, DELTA_DUMP, FIND_DELTA constants in Context to String.
Change Context.currentProcess() to return a string instead of an integer.
(Kay Kay, Noble Paul, shalin)
9. SOLR-1120: Simplified EntityProcessor API by moving logic for applying transformers and handling multi-row outputs
from Transformers into an EntityProcessorWrapper class. The behavior of the method
EntityProcessor#destroy has been modified to be called once per parent-row at the end of row. A new
method EntityProcessor#close is added which is called at the end of import. A new method
Context#getResolvedEntityAttribute is added which returns the resolved value of an entity's attribute.
Introduced a DocWrapper which takes care of maintaining document level session variables.
(Noble Paul, shalin)
10.SOLR-1265: Add variable resolving for URLDataSource properties like baseUrl. (Chris Eldredge via ehatcher)
11.SOLR-1269: Better error messages from JdbcDataSource when JDBC Driver name or SQL is incorrect.
(ehatcher, shalin)
================== Release 1.3.0 ==================
Status
------
This is the first release since DataImportHandler was added to the contrib solr distribution.
The following changes list changes since the code was introduced, not since
the first official release.
Detailed Change List
--------------------
New Features
1. SOLR-700: Allow configurable locales through a locale attribute in fields for NumberFormatTransformer.
(Stefan Oestreicher, shalin)
Changes in runtime behavior
Bug Fixes
1. SOLR-704: NumberFormatTransformer can silently ignore part of the string while parsing. Now it tries to
use the complete string for parsing. Failure to do so will result in an exception.
(Stefan Oestreicher via shalin)
2. SOLR-729: Context.getDataSource(String) gives current entity's DataSource instance regardless of argument.
(Noble Paul, shalin)
3. SOLR-726: Jdbc Drivers and DataSources fail to load if placed in multicore sharedLib or core's lib directory.
(Walter Ferrara, Noble Paul, shalin)
Other Changes

View File

@ -1,3 +1,12 @@
Apache Solr - DataImportHandler
Introduction
------------
DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
HTTP data sources quick and easy.
Important Note
--------------
Although Solr strives to be agnostic of the Locale where the server is
running, some code paths in DataImportHandler are known to depend on the
System default Locale, Timezone, or Charset. It is recommended that when

View File

@ -152,7 +152,7 @@ public class ContextImpl extends Context {
}
} else if (SCOPE_SOLR_CORE.equals(scope)){
if(dataImporter != null) {
dataImporter.getCoreScopeSession().put(name, val);
dataImporter.putToCoreScopeSession(name, val);
}
}
}
@ -171,7 +171,7 @@ public class ContextImpl extends Context {
DocBuilder.DocWrapper doc = getDocument();
return doc == null ? null: doc.getSessionAttribute(name);
} else if (SCOPE_SOLR_CORE.equals(scope)){
return dataImporter == null ? null : dataImporter.getCoreScopeSession().get(name);
return dataImporter == null ? null : dataImporter.getFromCoreScopeSession(name);
}
return null;
}

View File

@ -71,14 +71,10 @@ public class DataImportHandler extends RequestHandlerBase implements
private DataImporter importer;
private Map<String, Properties> dataSources = new HashMap<String, Properties>();
private boolean debugEnabled = true;
private String myName = "dataimport";
private Map<String , Object> coreScopeSession = new HashMap<String, Object>();
@Override
@SuppressWarnings("unchecked")
public void init(NamedList args) {
@ -102,21 +98,10 @@ public class DataImportHandler extends RequestHandlerBase implements
}
}
debugEnabled = StrUtils.parseBool((String)initArgs.get(ENABLE_DEBUG), true);
NamedList defaults = (NamedList) initArgs.get("defaults");
if (defaults != null) {
String configLoc = (String) defaults.get("config");
if (configLoc != null && configLoc.length() != 0) {
processConfiguration(defaults);
final InputSource is = new InputSource(core.getResourceLoader().openResource(configLoc));
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(configLoc));
importer = new DataImporter(is, core,
dataSources, coreScopeSession, myName);
}
}
importer = new DataImporter(core, myName);
} catch (Throwable e) {
LOG.error( DataImporter.MSG.LOAD_EXP, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
DataImporter.MSG.INVALID_CONFIG, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, DataImporter.MSG.LOAD_EXP, e);
}
}
@ -136,48 +121,35 @@ public class DataImportHandler extends RequestHandlerBase implements
}
}
SolrParams params = req.getParams();
NamedList defaultParams = (NamedList) initArgs.get("defaults");
RequestInfo requestParams = new RequestInfo(getParamsMap(params), contentStream);
String command = requestParams.getCommand();
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
// Modify incoming request params to add wt=raw
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
rawParams.set(CommonParams.WT, "raw");
req.setParams(rawParams);
String dataConfigFile = defaults.get("config");
ContentStreamBase content = new ContentStreamBase.StringStream(SolrWriter
.getResourceAsString(req.getCore().getResourceLoader().openResource(
dataConfigFile)));
rsp.add(RawResponseWriter.CONTENT, content);
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
String dataConfigFile = params.get("config");
String dataConfig = params.get("dataConfig");
if(dataConfigFile != null) {
dataConfig = SolrWriter.getResourceAsString(req.getCore().getResourceLoader().openResource(dataConfigFile));
}
if(dataConfig==null) {
rsp.add("status", DataImporter.MSG.NO_CONFIG_FOUND);
} else {
// Modify incoming request params to add wt=raw
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
rawParams.set(CommonParams.WT, "raw");
req.setParams(rawParams);
ContentStreamBase content = new ContentStreamBase.StringStream(dataConfig);
rsp.add(RawResponseWriter.CONTENT, content);
}
return;
}
rsp.add("initArgs", initArgs);
String message = "";
if (command != null)
if (command != null) {
rsp.add("command", command);
if (requestParams.isDebug() && (importer == null || !importer.isBusy())) {
// Reload the data-config.xml
importer = null;
if (requestParams.getDataConfig() != null) {
try {
processConfiguration((NamedList) initArgs.get("defaults"));
importer = new DataImporter(new InputSource(new StringReader(requestParams.getDataConfig())), req.getCore()
, dataSources, coreScopeSession, myName);
} catch (RuntimeException e) {
rsp.add("exception", DebugLogger.getStacktraceString(e));
importer = null;
return;
}
} else {
inform(req.getCore());
}
message = DataImporter.MSG.CONFIG_RELOADED;
}
// If importer is still null
if (importer == null) {
rsp.add("status", DataImporter.MSG.NO_INIT);
@ -192,7 +164,7 @@ public class DataImportHandler extends RequestHandlerBase implements
if (DataImporter.FULL_IMPORT_CMD.equals(command)
|| DataImporter.DELTA_IMPORT_CMD.equals(command) ||
IMPORT_CMD.equals(command)) {
importer.maybeReloadConfiguration(requestParams, defaultParams);
UpdateRequestProcessorChain processorChain =
req.getCore().getUpdateProcessingChain(params.get(UpdateParams.UPDATE_CHAIN));
UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);
@ -219,10 +191,12 @@ public class DataImportHandler extends RequestHandlerBase implements
importer.runCmd(requestParams, sw);
}
}
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
importer = null;
inform(req.getCore());
message = DataImporter.MSG.CONFIG_RELOADED;
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
if(importer.maybeReloadConfiguration(requestParams, defaultParams)) {
message = DataImporter.MSG.CONFIG_RELOADED;
} else {
message = DataImporter.MSG.CONFIG_NOT_RELOADED;
}
}
}
rsp.add("status", importer.isBusy() ? "busy" : "idle");
@ -248,36 +222,6 @@ public class DataImportHandler extends RequestHandlerBase implements
return result;
}
@SuppressWarnings("unchecked")
private void processConfiguration(NamedList defaults) {
if (defaults == null) {
LOG.info("No configuration specified in solrconfig.xml for DataImportHandler");
return;
}
LOG.info("Processing configuration from solrconfig.xml: " + defaults);
dataSources = new HashMap<String, Properties>();
int position = 0;
while (position < defaults.size()) {
if (defaults.getName(position) == null)
break;
String name = defaults.getName(position);
if (name.equals("datasource")) {
NamedList dsConfig = (NamedList) defaults.getVal(position);
Properties props = new Properties();
for (int i = 0; i < dsConfig.size(); i++)
props.put(dsConfig.getName(i), dsConfig.getVal(i));
LOG.info("Adding properties to datasource: " + props);
dataSources.put((String) dsConfig.get("name"), props);
}
position++;
}
}
private SolrWriter getSolrWriter(final UpdateRequestProcessor processor,
final SolrResourceLoader loader, final RequestInfo requestParams, SolrQueryRequest req) {

View File

@ -22,6 +22,8 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.SystemIdResolver;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.XMLErrorLogger;
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
@ -41,9 +43,12 @@ import org.apache.commons.io.IOUtils;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.IOException;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
@ -67,14 +72,14 @@ public class DataImporter {
private DIHConfiguration config;
private Date indexStartTime;
private Properties store = new Properties();
private Map<String, Properties> dataSourceProps = new HashMap<String, Properties>();
private Map<String, Map<String,String>> requestLevelDataSourceProps = new HashMap<String, Map<String,String>>();
private IndexSchema schema;
public DocBuilder docBuilder;
public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
private SolrCore core;
private Map<String, Object> coreScopeSession = new ConcurrentHashMap<String,Object>();
private DIHPropertiesWriter propWriter;
private ReentrantLock importLock = new ReentrantLock();
private final Map<String , Object> coreScopeSession;
private boolean isDeltaImportSupported = false;
private final String handlerName;
private Map<String, SchemaField> lowerNameVsSchemaField = new HashMap<String, SchemaField>();
@ -83,12 +88,19 @@ public class DataImporter {
* Only for testing purposes
*/
DataImporter() {
coreScopeSession = new HashMap<String, Object>();
createPropertyWriter();
propWriter.init(this);
this.handlerName = "dataimport" ;
}
DataImporter(SolrCore core, String handlerName) {
this.handlerName = handlerName;
this.core = core;
this.schema = core.getSchema();
loadSchemaFieldMap();
createPropertyWriter();
}
private void createPropertyWriter() {
if (this.core == null
|| !this.core.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) {
@ -99,27 +111,58 @@ public class DataImporter {
propWriter.init(this);
}
DataImporter(InputSource dataConfig, SolrCore core, Map<String, Properties> ds, Map<String, Object> session, String handlerName) {
this.handlerName = handlerName;
if (dataConfig == null) {
throw new DataImportHandlerException(SEVERE, "Configuration not found");
}
this.core = core;
this.schema = core.getSchema();
loadSchemaFieldMap();
createPropertyWriter();
dataSourceProps = ds;
if (session == null)
session = new HashMap<String, Object>();
coreScopeSession = session;
loadDataConfig(dataConfig);
for (Entity e : config.getEntities()) {
if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
isDeltaImportSupported = true;
break;
boolean maybeReloadConfiguration(RequestInfo params,
NamedList<?> defaultParams) throws IOException {
if (importLock.tryLock()) {
boolean success = false;
try {
String dataConfigText = params.getDataConfig();
String dataconfigFile = (String) params.getConfigFile();
InputSource is = null;
if(dataConfigText!=null && dataConfigText.length()>0) {
is = new InputSource(new StringReader(dataConfigText));
} else if(dataconfigFile!=null) {
is = new InputSource(core.getResourceLoader().openResource(dataconfigFile));
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile));
LOG.info("Loading DIH Configuration: " + dataconfigFile);
}
if(is!=null) {
loadDataConfig(is);
success = true;
}
Map<String,Map<String,String>> dsProps = new HashMap<String,Map<String,String>>();
if(defaultParams!=null) {
int position = 0;
while (position < defaultParams.size()) {
if (defaultParams.getName(position) == null) {
break;
}
String name = defaultParams.getName(position);
if (name.equals("datasource")) {
success = true;
NamedList dsConfig = (NamedList) defaultParams.getVal(position);
LOG.info("Getting configuration for Global Datasource...");
Map<String,String> props = new HashMap<String,String>();
for (int i = 0; i < dsConfig.size(); i++) {
props.put(dsConfig.getName(i), dsConfig.getVal(i).toString());
}
LOG.info("Adding properties to datasource: " + props);
dsProps.put((String) dsConfig.get("name"), props);
}
position++;
}
}
requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps);
} catch(IOException ioe) {
throw ioe;
} finally {
importLock.unlock();
}
return success;
} else {
return false;
}
}
@ -188,7 +231,13 @@ public class DataImporter {
LOG.info("Data Configuration loaded successfully");
} catch (Exception e) {
throw new DataImportHandlerException(SEVERE,
"Exception occurred while initializing context", e);
"Data Config problem: " + e.getMessage(), e);
}
for (Entity e : config.getEntities()) {
if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
isDeltaImportSupported = true;
break;
}
}
}
@ -196,7 +245,7 @@ public class DataImporter {
DIHConfiguration config;
List<Map<String, String >> functions = new ArrayList<Map<String ,String>>();
Script script = null;
Map<String, Properties> dataSources = new HashMap<String, Properties>();
Map<String, Map<String,String>> dataSources = new HashMap<String, Map<String,String>>();
NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig");
if(dataConfigTags == null || dataConfigTags.getLength() == 0) {
@ -232,16 +281,16 @@ public class DataImporter {
List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, DATA_SRC);
if (!dataSourceTags.isEmpty()) {
for (Element element : dataSourceTags) {
Properties p = new Properties();
Map<String,String> p = new HashMap<String,String>();
HashMap<String, String> attrs = ConfigParseUtil.getAllAttributes(element);
for (Map.Entry<String, String> entry : attrs.entrySet()) {
p.setProperty(entry.getKey(), entry.getValue());
p.put(entry.getKey(), entry.getValue());
}
dataSources.put(p.getProperty("name"), p);
dataSources.put(p.get("name"), p);
}
}
if(dataSources.get(null) == null){
for (Properties properties : dataSources.values()) {
for (Map<String,String> properties : dataSources.values()) {
dataSources.put(null,properties);
break;
}
@ -270,17 +319,17 @@ public class DataImporter {
}
DataSource getDataSourceInstance(Entity key, String name, Context ctx) {
Properties p = dataSourceProps.get(name);
Map<String,String> p = requestLevelDataSourceProps.get(name);
if (p == null)
p = config.getDataSources().get(name);
if (p == null)
p = dataSourceProps.get(null);// for default data source
p = requestLevelDataSourceProps.get(null);// for default data source
if (p == null)
p = config.getDataSources().get(null);
if (p == null)
throw new DataImportHandlerException(SEVERE,
"No dataSource :" + name + " available for entity :" + key.getName());
String type = p.getProperty(TYPE);
String type = p.get(TYPE);
DataSource dataSrc = null;
if (type == null) {
dataSrc = new JdbcDataSource();
@ -458,6 +507,8 @@ public class DataImporter {
public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";
public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy.";
public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
@ -476,13 +527,16 @@ public class DataImporter {
return schema;
}
Map<String, Object> getCoreScopeSession() {
return coreScopeSession;
}
SolrCore getCore() {
return core;
}
void putToCoreScopeSession(String key, Object val) {
coreScopeSession.put(key, val);
}
Object getFromCoreScopeSession(String key) {
return coreScopeSession.get(key);
}
public static final String COLUMN = "column";

View File

@ -36,6 +36,7 @@ public class RequestInfo {
private final boolean clean;
private final List<String> entitiesToRun;
private final Map<String,Object> rawParams;
private final String configFile;
private final String dataConfig;
//TODO: find a different home for these two...
@ -98,7 +99,8 @@ public class RequestInfo {
} else {
entitiesToRun = null;
}
String configFileParam = (String) requestParams.get("config");
configFile = configFileParam;
String dataConfigParam = (String) requestParams.get("dataConfig");
if (dataConfigParam != null && dataConfigParam.trim().length() == 0) {
// Empty data-config param is not valid, change it to null
@ -161,4 +163,8 @@ public class RequestInfo {
public DebugInfo getDebugInfo() {
return debugInfo;
}
public String getConfigFile() {
return configFile;
}
}

View File

@ -4,7 +4,6 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.solr.handler.dataimport.DataImporter;
import org.w3c.dom.Element;
@ -49,8 +48,8 @@ public class DIHConfiguration {
private final String onImportEnd;
private final List<Map<String, String>> functions;
private final Script script;
private final Map<String, Properties> dataSources;
public DIHConfiguration(Element element, DataImporter di, List<Map<String, String>> functions, Script script, Map<String, Properties> dataSources) {
private final Map<String, Map<String,String>> dataSources;
public DIHConfiguration(Element element, DataImporter di, List<Map<String, String>> functions, Script script, Map<String, Map<String,String>> dataSources) {
this.deleteQuery = ConfigParseUtil.getStringAttribute(element, "deleteQuery", null);
this.onImportStart = ConfigParseUtil.getStringAttribute(element, "onImportStart", null);
this.onImportEnd = ConfigParseUtil.getStringAttribute(element, "onImportEnd", null);
@ -90,7 +89,7 @@ public class DIHConfiguration {
public List<Map<String,String>> getFunctions() {
return functions;
}
public Map<String,Properties> getDataSources() {
public Map<String,Map<String,String>> getDataSources() {
return dataSources;
}
public Script getScript() {

View File

@ -31,11 +31,7 @@
<str name="echoParams">explicit</str>
</lst>
</requestHandler>
<requestHandler name="/dataimport-end-to-end" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config-end-to-end.xml</str>
</lst>
</requestHandler>
<requestHandler name="/dataimport-end-to-end" class="org.apache.solr.handler.dataimport.DataImportHandler" />
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>

View File

@ -31,7 +31,8 @@ public class TestDIHEndToEnd extends AbstractDIHJdbcTestCase {
}
@Test
public void testEndToEnd() throws Exception {
LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import",
LocalSolrQueryRequest request = lrf.makeRequest(
"command", "full-import", "config", "data-config-end-to-end.xml",
"clean", "true", "commit", "true", "synchronous", "true", "indent", "true");
h.query("/dataimport-end-to-end", request);
assertQ(req("*:*"), "//*[@numFound='20']");

View File

@ -250,10 +250,10 @@ sb.append("(group_name=").append(tg.getName()).append(")");
/*** Isn't core specific... prob better logged from zkController
if (info != null) {
CloudState cloudState = zkController.getCloudState();
if (info.cloudState != cloudState) {
ClusterState clusterState = zkController.getClusterState();
if (info.clusterState != clusterState) {
// something has changed in the matrix...
sb.append(zkController.getBaseUrl() + " sees new CloudState:");
sb.append(zkController.getBaseUrl() + " sees new ClusterState:");
}
}
***/
@ -263,7 +263,7 @@ sb.append("(group_name=").append(tg.getName()).append(")");
private Map<String,String> getCoreProps(ZkController zkController, SolrCore core) {
final String collection = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
ZkNodeProps props = zkController.getCloudState().getShardProps(collection, ZkStateReader.getCoreNodeName(zkController.getNodeName(), core.getName()));
ZkNodeProps props = zkController.getClusterState().getShardProps(collection, ZkStateReader.getCoreNodeName(zkController.getNodeName(), core.getName()));
if(props!=null) {
return props.getProperties();
}

View File

@ -24,7 +24,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.common.cloud.CloudState;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Slice;
public class AssignShard {
@ -36,7 +36,7 @@ public class AssignShard {
* @param state
* @return the assigned shard id
*/
public static String assignShard(String collection, CloudState state, Integer numShards) {
public static String assignShard(String collection, ClusterState state, Integer numShards) {
if (numShards == null) {
numShards = 1;
}

View File

@ -5,7 +5,7 @@ import java.util.Map;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.CloudState;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
@ -13,7 +13,6 @@ import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
@ -195,8 +194,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
}
private boolean shouldIBeLeader(ZkNodeProps leaderProps) {
CloudState cloudState = zkController.getZkStateReader().getCloudState();
Map<String,Slice> slices = cloudState.getSlices(this.collection);
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
Map<String,Slice> slices = clusterState.getSlices(this.collection);
Slice slice = slices.get(shardId);
Map<String,ZkNodeProps> shards = slice.getShards();
boolean foundSomeoneElseActive = false;
@ -206,7 +205,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
if (new ZkCoreNodeProps(shard.getValue()).getCoreUrl().equals(
new ZkCoreNodeProps(leaderProps).getCoreUrl())) {
if (state.equals(ZkStateReader.ACTIVE)
&& cloudState.liveNodesContain(shard.getValue().get(
&& clusterState.liveNodesContain(shard.getValue().get(
ZkStateReader.NODE_NAME_PROP))) {
// we are alive
return true;
@ -214,7 +213,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
}
if ((state.equals(ZkStateReader.ACTIVE))
&& cloudState.liveNodesContain(shard.getValue().get(
&& clusterState.liveNodesContain(shard.getValue().get(
ZkStateReader.NODE_NAME_PROP))
&& !new ZkCoreNodeProps(shard.getValue()).getCoreUrl().equals(
new ZkCoreNodeProps(leaderProps).getCoreUrl())) {
@ -226,8 +225,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
}
private boolean anyoneElseActive() {
CloudState cloudState = zkController.getZkStateReader().getCloudState();
Map<String,Slice> slices = cloudState.getSlices(this.collection);
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
Map<String,Slice> slices = clusterState.getSlices(this.collection);
Slice slice = slices.get(shardId);
Map<String,ZkNodeProps> shards = slice.getShards();
@ -236,7 +235,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
if ((state.equals(ZkStateReader.ACTIVE))
&& cloudState.liveNodesContain(shard.getValue().get(
&& clusterState.liveNodesContain(shard.getValue().get(
ZkStateReader.NODE_NAME_PROP))) {
return true;
}
@ -250,16 +249,13 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
final class OverseerElectionContext extends ElectionContext {
private final SolrZkClient zkClient;
private final ZkStateReader stateReader;
private ShardHandler shardHandler;
private String adminPath;
private Overseer overseer;
public OverseerElectionContext(ShardHandler shardHandler, String adminPath, final String zkNodeName, ZkStateReader stateReader) {
super(zkNodeName, "/overseer_elect", "/overseer_elect/leader", null, stateReader.getZkClient());
this.stateReader = stateReader;
this.shardHandler = shardHandler;
this.adminPath = adminPath;
this.zkClient = stateReader.getZkClient();
public OverseerElectionContext(SolrZkClient zkClient, Overseer overseer, final String zkNodeName) {
super(zkNodeName, "/overseer_elect", "/overseer_elect/leader", null, zkClient);
this.overseer = overseer;
this.zkClient = zkClient;
}
@Override
@ -281,7 +277,7 @@ final class OverseerElectionContext extends ElectionContext {
CreateMode.EPHEMERAL, true);
}
new Overseer(shardHandler, adminPath, stateReader, id);
overseer.start(id);
}
}

View File

@ -24,7 +24,7 @@ import java.util.Map;
import java.util.Map.Entry;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.CloudState;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
@ -47,7 +47,7 @@ public class Overseer {
private static Logger log = LoggerFactory.getLogger(Overseer.class);
private static class CloudStateUpdater implements Runnable {
private class ClusterStateUpdater implements Runnable {
private static final String DELETECORE = "deletecore";
private final ZkStateReader reader;
@ -59,7 +59,7 @@ public class Overseer {
//If Overseer dies while extracting the main queue a new overseer will start from this queue
private final DistributedQueue workQueue;
public CloudStateUpdater(final ZkStateReader reader, final String myId) {
public ClusterStateUpdater(final ZkStateReader reader, final String myId) {
this.zkClient = reader.getZkClient();
this.stateUpdateQueue = getInQueue(zkClient);
this.workQueue = getInternalQueue(zkClient);
@ -70,7 +70,7 @@ public class Overseer {
@Override
public void run() {
if(amILeader()) {
if(amILeader() && !Overseer.this.isClosed) {
// see if there's something left from the previous Overseer and re
// process all events that were not persisted into cloud state
synchronized (reader.getUpdateLock()) { //XXX this only protects against edits inside single node
@ -78,17 +78,17 @@ public class Overseer {
byte[] head = workQueue.peek();
if (head != null) {
reader.updateCloudState(true);
CloudState cloudState = reader.getCloudState();
reader.updateClusterState(true);
ClusterState clusterState = reader.getClusterState();
log.info("Replaying operations from work queue.");
while (head != null && amILeader()) {
final ZkNodeProps message = ZkNodeProps.load(head);
final String operation = message
.get(QUEUE_OPERATION);
cloudState = processMessage(cloudState, message, operation);
clusterState = processMessage(clusterState, message, operation);
zkClient.setData(ZkStateReader.CLUSTER_STATE,
ZkStateReader.toJSON(cloudState), true);
ZkStateReader.toJSON(clusterState), true);
workQueue.remove();
head = workQueue.peek();
}
@ -110,26 +110,26 @@ public class Overseer {
}
log.info("Starting to work on the main queue");
while (amILeader()) {
while (amILeader() && !isClosed) {
synchronized (reader.getUpdateLock()) {
try {
byte[] head = stateUpdateQueue.peek();
if (head != null) {
reader.updateCloudState(true);
CloudState cloudState = reader.getCloudState();
reader.updateClusterState(true);
ClusterState clusterState = reader.getClusterState();
while (head != null) {
final ZkNodeProps message = ZkNodeProps.load(head);
final String operation = message.get(QUEUE_OPERATION);
cloudState = processMessage(cloudState, message, operation);
clusterState = processMessage(clusterState, message, operation);
byte[] processed = stateUpdateQueue.remove();
workQueue.offer(processed);
head = stateUpdateQueue.peek();
}
zkClient.setData(ZkStateReader.CLUSTER_STATE,
ZkStateReader.toJSON(cloudState), true);
ZkStateReader.toJSON(clusterState), true);
}
// clean work queue
while (workQueue.poll() != null);
@ -157,12 +157,12 @@ public class Overseer {
}
}
private CloudState processMessage(CloudState cloudState,
private ClusterState processMessage(ClusterState clusterState,
final ZkNodeProps message, final String operation) {
if ("state".equals(operation)) {
cloudState = updateState(cloudState, message);
clusterState = updateState(clusterState, message);
} else if (DELETECORE.equals(operation)) {
cloudState = removeCore(cloudState, message);
clusterState = removeCore(clusterState, message);
} else if (ZkStateReader.LEADER_PROP.equals(operation)) {
StringBuilder sb = new StringBuilder();
String baseUrl = message.get(ZkStateReader.BASE_URL_PROP);
@ -172,14 +172,14 @@ public class Overseer {
sb.append(coreName == null ? "" : coreName);
if (!(sb.substring(sb.length() - 1).equals("/"))) sb
.append("/");
cloudState = setShardLeader(cloudState,
clusterState = setShardLeader(clusterState,
message.get(ZkStateReader.COLLECTION_PROP),
message.get(ZkStateReader.SHARD_ID_PROP), sb.toString());
} else {
throw new RuntimeException("unknown operation:" + operation
+ " contents:" + message.getProperties());
}
return cloudState;
return clusterState;
}
private boolean amILeader() {
@ -199,7 +199,7 @@ public class Overseer {
/**
* Try to assign core to the cluster.
*/
private CloudState updateState(CloudState state, final ZkNodeProps message) {
private ClusterState updateState(ClusterState state, final ZkNodeProps message) {
final String collection = message.get(ZkStateReader.COLLECTION_PROP);
final String zkCoreNodeName = message.get(ZkStateReader.NODE_NAME_PROP) + "_" + message.get(ZkStateReader.CORE_NAME_PROP);
final Integer numShards = message.get(ZkStateReader.NUM_SHARDS_PROP)!=null?Integer.parseInt(message.get(ZkStateReader.NUM_SHARDS_PROP)):null;
@ -214,7 +214,7 @@ public class Overseer {
String shardId = message.get(ZkStateReader.SHARD_ID_PROP);
if (shardId == null) {
String nodeName = message.get(ZkStateReader.NODE_NAME_PROP);
//get shardId from CloudState
//get shardId from ClusterState
shardId = getAssignedId(state, nodeName, message);
}
if(shardId == null) {
@ -242,11 +242,11 @@ public class Overseer {
shardProps.put(zkCoreNodeName, zkProps);
slice = new Slice(shardId, shardProps);
CloudState newCloudState = updateSlice(state, collection, slice);
return newCloudState;
ClusterState newClusterState = updateSlice(state, collection, slice);
return newClusterState;
}
private CloudState createCollection(CloudState state, String collectionName, int numShards) {
private ClusterState createCollection(ClusterState state, String collectionName, int numShards) {
Map<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String, Slice>>();
Map<String, Slice> newSlices = new LinkedHashMap<String,Slice>();
newStates.putAll(state.getCollectionStates());
@ -255,14 +255,14 @@ public class Overseer {
newSlices.put(sliceName, new Slice(sliceName, Collections.EMPTY_MAP));
}
newStates.put(collectionName, newSlices);
CloudState newCloudState = new CloudState(state.getLiveNodes(), newStates);
return newCloudState;
ClusterState newClusterState = new ClusterState(state.getLiveNodes(), newStates);
return newClusterState;
}
/*
* Return an already assigned id or null if not assigned
*/
private String getAssignedId(final CloudState state, final String nodeName,
private String getAssignedId(final ClusterState state, final String nodeName,
final ZkNodeProps coreState) {
final String key = coreState.get(ZkStateReader.NODE_NAME_PROP) + "_" + coreState.get(ZkStateReader.CORE_NAME_PROP);
Map<String, Slice> slices = state.getSlices(coreState.get(ZkStateReader.COLLECTION_PROP));
@ -276,7 +276,7 @@ public class Overseer {
return null;
}
private CloudState updateSlice(CloudState state, String collection, Slice slice) {
private ClusterState updateSlice(ClusterState state, String collection, Slice slice) {
final Map<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String,Slice>>();
newStates.putAll(state.getCollectionStates());
@ -306,10 +306,10 @@ public class Overseer {
final Slice updatedSlice = new Slice(slice.getName(), shards);
slices.put(slice.getName(), updatedSlice);
}
return new CloudState(state.getLiveNodes(), newStates);
return new ClusterState(state.getLiveNodes(), newStates);
}
private CloudState setShardLeader(CloudState state, String collection, String sliceName, String leaderUrl) {
private ClusterState setShardLeader(ClusterState state, String collection, String sliceName, String leaderUrl) {
final Map<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String,Slice>>();
newStates.putAll(state.getCollectionStates());
@ -341,21 +341,21 @@ public class Overseer {
Slice slice = new Slice(sliceName, newShards);
slices.put(sliceName, slice);
}
return new CloudState(state.getLiveNodes(), newStates);
return new ClusterState(state.getLiveNodes(), newStates);
}
/*
* Remove core from cloudstate
*/
private CloudState removeCore(final CloudState cloudState, ZkNodeProps message) {
private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) {
final String coreNodeName = message.get(ZkStateReader.NODE_NAME_PROP) + "_" + message.get(ZkStateReader.CORE_NAME_PROP);
final String collection = message.get(ZkStateReader.COLLECTION_PROP);
final LinkedHashMap<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String,Slice>>();
for(String collectionName: cloudState.getCollections()) {
for(String collectionName: clusterState.getCollections()) {
if(collection.equals(collectionName)) {
Map<String, Slice> slices = cloudState.getSlices(collection);
Map<String, Slice> slices = clusterState.getSlices(collection);
LinkedHashMap<String, Slice> newSlices = new LinkedHashMap<String, Slice>();
for(Slice slice: slices.values()) {
if(slice.getShards().containsKey(coreNodeName)) {
@ -393,29 +393,53 @@ public class Overseer {
}
}
} else {
newStates.put(collectionName, cloudState.getSlices(collectionName));
newStates.put(collectionName, clusterState.getSlices(collectionName));
}
}
CloudState newState = new CloudState(cloudState.getLiveNodes(), newStates);
ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newStates);
return newState;
}
}
private Thread ccThread;
private Thread updaterThread;
private volatile boolean isClosed;
private ZkStateReader reader;
private ShardHandler shardHandler;
private String adminPath;
public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader, final String id) throws KeeperException, InterruptedException {
public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader) throws KeeperException, InterruptedException {
this.reader = reader;
this.shardHandler = shardHandler;
this.adminPath = adminPath;
}
public void start(String id) {
log.info("Overseer (id=" + id + ") starting");
createOverseerNode(reader.getZkClient());
//launch cluster state updater thread
ThreadGroup tg = new ThreadGroup("Overseer state updater.");
Thread updaterThread = new Thread(tg, new CloudStateUpdater(reader, id));
updaterThread = new Thread(tg, new ClusterStateUpdater(reader, id));
updaterThread.setDaemon(true);
updaterThread.start();
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
Thread ccThread = new Thread(ccTg, new OverseerCollectionProcessor(reader, id, shardHandler, adminPath));
ccThread = new Thread(ccTg, new OverseerCollectionProcessor(reader, id, shardHandler, adminPath),
"Overseer-" + id);
ccThread.setDaemon(true);
updaterThread.start();
ccThread.start();
}
public void close() {
isClosed = true;
}
/**
* Get queue that can be used to send messages to Overseer.

View File

@ -25,7 +25,7 @@ import java.util.Set;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.CloudState;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
@ -64,6 +64,8 @@ public class OverseerCollectionProcessor implements Runnable {
private String adminPath;
private ZkStateReader zkStateReader;
private boolean isClosed;
public OverseerCollectionProcessor(ZkStateReader zkStateReader, String myId, ShardHandler shardHandler, String adminPath) {
this.zkStateReader = zkStateReader;
@ -76,7 +78,7 @@ public class OverseerCollectionProcessor implements Runnable {
@Override
public void run() {
log.info("Process current queue of collection creations");
while (amILeader()) {
while (amILeader() && !isClosed) {
try {
byte[] head = workQueue.peek(true);
@ -108,6 +110,10 @@ public class OverseerCollectionProcessor implements Runnable {
}
}
public void close() {
isClosed = true;
}
private boolean amILeader() {
try {
ZkNodeProps props = ZkNodeProps.load(zkStateReader.getZkClient().getData(
@ -126,22 +132,22 @@ public class OverseerCollectionProcessor implements Runnable {
private boolean processMessage(ZkNodeProps message, String operation) {
if (CREATECOLLECTION.equals(operation)) {
return createCollection(zkStateReader.getCloudState(), message);
return createCollection(zkStateReader.getClusterState(), message);
} else if (DELETECOLLECTION.equals(operation)) {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.UNLOAD.toString());
params.set(CoreAdminParams.DELETE_INSTANCE_DIR, true);
return collectionCmd(zkStateReader.getCloudState(), message, params);
return collectionCmd(zkStateReader.getClusterState(), message, params);
} else if (RELOADCOLLECTION.equals(operation)) {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.RELOAD.toString());
return collectionCmd(zkStateReader.getCloudState(), message, params);
return collectionCmd(zkStateReader.getClusterState(), message, params);
}
// unknown command, toss it from our queue
return true;
}
private boolean createCollection(CloudState cloudState, ZkNodeProps message) {
private boolean createCollection(ClusterState clusterState, ZkNodeProps message) {
// look at the replication factor and see if it matches reality
// if it does not, find best nodes to create more cores
@ -176,7 +182,7 @@ public class OverseerCollectionProcessor implements Runnable {
// TODO: add smarter options that look at the current number of cores per node?
// for now we just go random
Set<String> nodes = cloudState.getLiveNodes();
Set<String> nodes = clusterState.getLiveNodes();
List<String> nodeList = new ArrayList<String>(nodes.size());
nodeList.addAll(nodes);
Collections.shuffle(nodeList);
@ -229,11 +235,11 @@ public class OverseerCollectionProcessor implements Runnable {
return true;
}
private boolean collectionCmd(CloudState cloudState, ZkNodeProps message, ModifiableSolrParams params) {
private boolean collectionCmd(ClusterState clusterState, ZkNodeProps message, ModifiableSolrParams params) {
log.info("Executing Collection Cmd : " + params);
String name = message.get("name");
Map<String,Slice> slices = cloudState.getCollectionStates().get(name);
Map<String,Slice> slices = clusterState.getCollectionStates().get(name);
if (slices == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Could not find collection:" + name);
@ -245,7 +251,7 @@ public class OverseerCollectionProcessor implements Runnable {
Set<Map.Entry<String,ZkNodeProps>> shardEntries = shards.entrySet();
for (Map.Entry<String,ZkNodeProps> shardEntry : shardEntries) {
final ZkNodeProps node = shardEntry.getValue();
if (cloudState.liveNodesContain(node.get(ZkStateReader.NODE_NAME_PROP))) {
if (clusterState.liveNodesContain(node.get(ZkStateReader.NODE_NAME_PROP))) {
params.set(CoreAdminParams.CORE, node.get(ZkStateReader.CORE_NAME_PROP));
String replica = node.get(ZkStateReader.BASE_URL_PROP);

Some files were not shown because too many files have changed in this diff Show More