mirror of https://github.com/apache/lucene.git
LUCENE-3982: trunk upgrade
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1369470 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
aed1b5d760
24
build.xml
24
build.xml
|
@ -176,22 +176,12 @@
|
|||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="jar-checksums" depends="resolve" description="Recompute SHA1 checksums for all JAR files.">
|
||||
<delete>
|
||||
<fileset dir="${basedir}">
|
||||
<include name="**/*.jar.sha1"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
|
||||
<checksum algorithm="SHA1" fileext=".sha1">
|
||||
<fileset dir="${basedir}">
|
||||
<include name="**/*.jar"/>
|
||||
</fileset>
|
||||
</checksum>
|
||||
|
||||
<fixcrlf
|
||||
srcdir="${basedir}"
|
||||
includes="**/*.jar.sha1"
|
||||
eol="lf" fixlast="true" encoding="US-ASCII" />
|
||||
<target name="jar-checksums" description="Recompute SHA1 checksums for all JAR files.">
|
||||
<sequential>
|
||||
<subant target="jar-checksums" inheritall="false" failonerror="true">
|
||||
<fileset dir="lucene" includes="build.xml" />
|
||||
<fileset dir="solr" includes="build.xml" />
|
||||
</subant>
|
||||
</sequential>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
@ -15,30 +15,30 @@
|
|||
<classpathentry kind="src" path="lucene/sandbox/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/sandbox/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/test-framework/src/java"/>
|
||||
<classpathentry kind="src" output="bin.tests-framework" path="lucene/test-framework/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/tests-framework" path="lucene/test-framework/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/common/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-common" path="lucene/analysis/common/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-common" path="lucene/analysis/common/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/common/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/icu/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-icu" path="lucene/analysis/icu/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-icu" path="lucene/analysis/icu/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/icu/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-kuromoji" path="lucene/analysis/kuromoji/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/kuromoji/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/phonetic/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-phonetic" path="lucene/analysis/phonetic/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/phonetic/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/smartcn/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-smartcn" path="lucene/analysis/smartcn/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/smartcn/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/stempel/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-stempel" path="lucene/analysis/stempel/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/stempel/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/morfologik/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-morfologik" path="lucene/analysis/morfologik/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/morfologik/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/uima/src/java"/>
|
||||
<classpathentry kind="src" output="bin.analysis-uima" path="lucene/analysis/uima/src/resources"/>
|
||||
<classpathentry kind="src" output="bin/analysis-uima" path="lucene/analysis/uima/src/resources"/>
|
||||
<classpathentry kind="src" path="lucene/analysis/uima/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/benchmark/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/benchmark/src/test"/>
|
||||
|
@ -120,7 +120,7 @@
|
|||
<classpathentry kind="lib" path="solr/lib/slf4j-api-1.6.4.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/slf4j-jdk14-1.6.4.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/wstx-asl-3.2.7.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.5.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.6.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-continuation-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-deploy-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-http-8.1.2.v20120308.jar"/>
|
||||
|
@ -175,5 +175,5 @@
|
|||
<classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
<classpathentry kind="output" path="bin/other"/>
|
||||
</classpath>
|
||||
|
|
|
@ -298,7 +298,7 @@
|
|||
<dependency>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper</artifactId>
|
||||
<version>3.3.5</version>
|
||||
<version>3.3.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.carrot2</groupId>
|
||||
|
|
|
@ -20,12 +20,12 @@ import subprocess
|
|||
import signal
|
||||
import shutil
|
||||
import hashlib
|
||||
import httplib
|
||||
import http.client
|
||||
import re
|
||||
import urllib2
|
||||
import urlparse
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import urllib.parse
|
||||
import sys
|
||||
import HTMLParser
|
||||
import html.parser
|
||||
from collections import defaultdict
|
||||
import xml.etree.ElementTree as ET
|
||||
import filecmp
|
||||
|
@ -38,9 +38,9 @@ import checkJavadocLinks
|
|||
# tested on Linux and on Cygwin under Windows 7.
|
||||
|
||||
def unshortenURL(url):
|
||||
parsed = urlparse.urlparse(url)
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if parsed[0] in ('http', 'https'):
|
||||
h = httplib.HTTPConnection(parsed.netloc)
|
||||
h = http.client.HTTPConnection(parsed.netloc)
|
||||
h.request('HEAD', parsed.path)
|
||||
response = h.getresponse()
|
||||
if response.status/100 == 3 and response.getheader('Location'):
|
||||
|
@ -101,8 +101,8 @@ def getHREFs(urlString):
|
|||
|
||||
# Deref any redirects
|
||||
while True:
|
||||
url = urlparse.urlparse(urlString)
|
||||
h = httplib.HTTPConnection(url.netloc)
|
||||
url = urllib.parse.urlparse(urlString)
|
||||
h = http.client.HTTPConnection(url.netloc)
|
||||
h.request('GET', url.path)
|
||||
r = h.getresponse()
|
||||
newLoc = r.getheader('location')
|
||||
|
@ -112,8 +112,8 @@ def getHREFs(urlString):
|
|||
break
|
||||
|
||||
links = []
|
||||
for subUrl, text in reHREF.findall(urllib2.urlopen(urlString).read()):
|
||||
fullURL = urlparse.urljoin(urlString, subUrl)
|
||||
for subUrl, text in reHREF.findall(urllib.request.urlopen(urlString).read().decode('UTF-8')):
|
||||
fullURL = urllib.parse.urljoin(urlString, subUrl)
|
||||
links.append((text, fullURL))
|
||||
return links
|
||||
|
||||
|
@ -121,15 +121,15 @@ def download(name, urlString, tmpDir, quiet=False):
|
|||
fileName = '%s/%s' % (tmpDir, name)
|
||||
if DEBUG and os.path.exists(fileName):
|
||||
if not quiet and fileName.find('.asc') == -1:
|
||||
print ' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
|
||||
print(' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
|
||||
return
|
||||
fIn = urllib2.urlopen(urlString)
|
||||
fIn = urllib.request.urlopen(urlString)
|
||||
fOut = open(fileName, 'wb')
|
||||
success = False
|
||||
try:
|
||||
while True:
|
||||
s = fIn.read(65536)
|
||||
if s == '':
|
||||
if s == b'':
|
||||
break
|
||||
fOut.write(s)
|
||||
fOut.close()
|
||||
|
@ -141,14 +141,14 @@ def download(name, urlString, tmpDir, quiet=False):
|
|||
if not success:
|
||||
os.remove(fileName)
|
||||
if not quiet and fileName.find('.asc') == -1:
|
||||
print ' %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
|
||||
print(' %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
|
||||
|
||||
def load(urlString):
|
||||
return urllib2.urlopen(urlString).read()
|
||||
return urllib.request.urlopen(urlString).read().decode('utf-8')
|
||||
|
||||
def checkSigs(project, urlString, version, tmpDir, isSigned):
|
||||
|
||||
print ' test basics...'
|
||||
print(' test basics...')
|
||||
ents = getDirEntries(urlString)
|
||||
artifact = None
|
||||
keysURL = None
|
||||
|
@ -210,7 +210,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
if keysURL is None:
|
||||
raise RuntimeError('%s is missing KEYS' % project)
|
||||
|
||||
print ' get KEYS'
|
||||
print(' get KEYS')
|
||||
download('%s.KEYS' % project, keysURL, tmpDir)
|
||||
|
||||
keysFile = '%s/%s.KEYS' % (tmpDir, project)
|
||||
|
@ -219,7 +219,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
|
||||
if os.path.exists(gpgHomeDir):
|
||||
shutil.rmtree(gpgHomeDir)
|
||||
os.makedirs(gpgHomeDir, 0700)
|
||||
os.makedirs(gpgHomeDir, 0o700)
|
||||
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
|
||||
'%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
|
||||
|
||||
|
@ -232,12 +232,12 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
testChanges(project, version, changesURL)
|
||||
|
||||
for artifact, urlString in artifacts:
|
||||
print ' download %s...' % artifact
|
||||
print(' download %s...' % artifact)
|
||||
download(artifact, urlString, tmpDir)
|
||||
verifyDigests(artifact, urlString, tmpDir)
|
||||
|
||||
if isSigned:
|
||||
print ' verify sig'
|
||||
print(' verify sig')
|
||||
# Test sig (this is done with a clean brand-new GPG world)
|
||||
download(artifact + '.asc', urlString + '.asc', tmpDir)
|
||||
sigFile = '%s/%s.asc' % (tmpDir, artifact)
|
||||
|
@ -246,28 +246,28 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
|
||||
logFile)
|
||||
# Forward any GPG warnings, except the expected one (since its a clean world)
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1 \
|
||||
and line.find('WARNING: This key is not certified with a trusted signature') == -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
# Test trust (this is done with the real users config)
|
||||
run('gpg --import %s' % (keysFile),
|
||||
'%s/%s.gpg.trust.import.log 2>&1' % (tmpDir, project))
|
||||
print ' verify trust'
|
||||
print(' verify trust')
|
||||
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
|
||||
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
|
||||
# Forward any GPG warnings:
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
def testChanges(project, version, changesURLString):
|
||||
print ' check changes HTML...'
|
||||
print(' check changes HTML...')
|
||||
changesURL = None
|
||||
for text, subURL in getDirEntries(changesURLString):
|
||||
if text == 'Changes.html':
|
||||
|
@ -287,7 +287,7 @@ def testChangesText(dir, version, project):
|
|||
if 'CHANGES.txt' in files:
|
||||
fullPath = '%s/CHANGES.txt' % root
|
||||
#print 'CHECK %s' % fullPath
|
||||
checkChangesContent(open(fullPath).read(), version, fullPath, project, False)
|
||||
checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False)
|
||||
|
||||
def checkChangesContent(s, version, name, project, isHTML):
|
||||
|
||||
|
@ -336,7 +336,7 @@ def run(command, logFile):
|
|||
raise RuntimeError('command "%s" failed; see log file %s' % (command, logPath))
|
||||
|
||||
def verifyDigests(artifact, urlString, tmpDir):
|
||||
print ' verify md5/sha1 digests'
|
||||
print(' verify md5/sha1 digests')
|
||||
md5Expected, t = load(urlString + '.md5').strip().split()
|
||||
if t != '*'+artifact:
|
||||
raise RuntimeError('MD5 %s.md5 lists artifact %s but expected *%s' % (urlString, t, artifact))
|
||||
|
@ -347,10 +347,10 @@ def verifyDigests(artifact, urlString, tmpDir):
|
|||
|
||||
m = hashlib.md5()
|
||||
s = hashlib.sha1()
|
||||
f = open('%s/%s' % (tmpDir, artifact))
|
||||
f = open('%s/%s' % (tmpDir, artifact), 'rb')
|
||||
while True:
|
||||
x = f.read(65536)
|
||||
if x == '':
|
||||
if len(x) == 0:
|
||||
break
|
||||
m.update(x)
|
||||
s.update(x)
|
||||
|
@ -388,7 +388,7 @@ def unpack(project, tmpDir, artifact, version):
|
|||
shutil.rmtree(destDir)
|
||||
os.makedirs(destDir)
|
||||
os.chdir(destDir)
|
||||
print ' unpack %s...' % artifact
|
||||
print(' unpack %s...' % artifact)
|
||||
unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact)
|
||||
if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'):
|
||||
run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile)
|
||||
|
@ -437,12 +437,14 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
|
|||
|
||||
if project == 'lucene':
|
||||
# TODO: clean this up to not be a list of modules that we must maintain
|
||||
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework')
|
||||
extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
|
||||
if isSrc:
|
||||
extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'backwards', 'tools', 'site')
|
||||
else:
|
||||
extras = ()
|
||||
|
||||
# TODO: if solr, verify lucene/licenses, solr/licenses are present
|
||||
|
||||
for e in extras:
|
||||
if e not in l:
|
||||
raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact))
|
||||
|
@ -453,81 +455,81 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
|
|||
raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l))
|
||||
|
||||
if isSrc:
|
||||
print ' make sure no JARs/WARs in src dist...'
|
||||
print(' make sure no JARs/WARs in src dist...')
|
||||
lines = os.popen('find . -name \\*.jar').readlines()
|
||||
if len(lines) != 0:
|
||||
print ' FAILED:'
|
||||
print(' FAILED:')
|
||||
for line in lines:
|
||||
print ' %s' % line.strip()
|
||||
print(' %s' % line.strip())
|
||||
raise RuntimeError('source release has JARs...')
|
||||
lines = os.popen('find . -name \\*.war').readlines()
|
||||
if len(lines) != 0:
|
||||
print ' FAILED:'
|
||||
print(' FAILED:')
|
||||
for line in lines:
|
||||
print ' %s' % line.strip()
|
||||
print(' %s' % line.strip())
|
||||
raise RuntimeError('source release has WARs...')
|
||||
|
||||
print ' run "ant validate"'
|
||||
print(' run "ant validate"')
|
||||
run('%s; ant validate' % javaExe('1.7'), '%s/validate.log' % unpackPath)
|
||||
|
||||
if project == 'lucene':
|
||||
print ' run tests w/ Java 6...'
|
||||
print(' run tests w/ Java 6...')
|
||||
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
|
||||
run('%s; ant jar' % javaExe('1.6'), '%s/compile.log' % unpackPath)
|
||||
testDemo(isSrc, version)
|
||||
# test javadocs
|
||||
print ' generate javadocs w/ Java 6...'
|
||||
print(' generate javadocs w/ Java 6...')
|
||||
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
|
||||
checkJavadocpath('%s/build/docs' % unpackPath)
|
||||
else:
|
||||
print ' run tests w/ Java 6...'
|
||||
print(' run tests w/ Java 6...')
|
||||
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
|
||||
|
||||
# test javadocs
|
||||
print ' generate javadocs w/ Java 6...'
|
||||
print(' generate javadocs w/ Java 6...')
|
||||
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
|
||||
checkJavadocpath('%s/build/docs' % unpackPath)
|
||||
|
||||
print ' run tests w/ Java 7...'
|
||||
print(' run tests w/ Java 7...')
|
||||
run('%s; ant test' % javaExe('1.7'), '%s/test.log' % unpackPath)
|
||||
|
||||
# test javadocs
|
||||
print ' generate javadocs w/ Java 7...'
|
||||
print(' generate javadocs w/ Java 7...')
|
||||
run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath)
|
||||
checkJavadocpath('%s/build/docs' % unpackPath)
|
||||
|
||||
os.chdir('solr')
|
||||
print ' test solr example w/ Java 6...'
|
||||
print(' test solr example w/ Java 6...')
|
||||
run('%s; ant clean example' % javaExe('1.6'), '%s/antexample.log' % unpackPath)
|
||||
testSolrExample(unpackPath, JAVA6_HOME, True)
|
||||
|
||||
print ' test solr example w/ Java 7...'
|
||||
print(' test solr example w/ Java 7...')
|
||||
run('%s; ant clean example' % javaExe('1.7'), '%s/antexample.log' % unpackPath)
|
||||
testSolrExample(unpackPath, JAVA7_HOME, True)
|
||||
os.chdir('..')
|
||||
|
||||
print ' check NOTICE'
|
||||
print(' check NOTICE')
|
||||
testNotice(unpackPath)
|
||||
|
||||
else:
|
||||
if project == 'lucene':
|
||||
testDemo(isSrc, version)
|
||||
else:
|
||||
print ' test solr example w/ Java 6...'
|
||||
print(' test solr example w/ Java 6...')
|
||||
testSolrExample(unpackPath, JAVA6_HOME, False)
|
||||
|
||||
print ' test solr example w/ Java 7...'
|
||||
print(' test solr example w/ Java 7...')
|
||||
testSolrExample(unpackPath, JAVA7_HOME, False)
|
||||
|
||||
testChangesText('.', version, project)
|
||||
|
||||
if project == 'lucene' and not isSrc:
|
||||
print ' check Lucene\'s javadoc JAR'
|
||||
print(' check Lucene\'s javadoc JAR')
|
||||
checkJavadocpath('%s/docs' % unpackPath)
|
||||
|
||||
def testNotice(unpackPath):
|
||||
solrNotice = open('%s/NOTICE.txt' % unpackPath).read()
|
||||
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath).read()
|
||||
solrNotice = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
|
||||
luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
|
||||
|
||||
expected = """
|
||||
=========================================================================
|
||||
|
@ -545,12 +547,12 @@ def readSolrOutput(p, startupEvent, logFile):
|
|||
try:
|
||||
while True:
|
||||
line = p.readline()
|
||||
if line == '':
|
||||
if len(line) == 0:
|
||||
break
|
||||
f.write(line)
|
||||
f.flush()
|
||||
# print 'SOLR: %s' % line.strip()
|
||||
if line.find('Started SocketConnector@0.0.0.0:8983') != -1:
|
||||
if line.decode('UTF-8').find('Started SocketConnector@0.0.0.0:8983') != -1:
|
||||
startupEvent.set()
|
||||
finally:
|
||||
f.close()
|
||||
|
@ -558,7 +560,7 @@ def readSolrOutput(p, startupEvent, logFile):
|
|||
def testSolrExample(unpackPath, javaPath, isSrc):
|
||||
logFile = '%s/solr-example.log' % unpackPath
|
||||
os.chdir('example')
|
||||
print ' start Solr instance (log=%s)...' % logFile
|
||||
print(' start Solr instance (log=%s)...' % logFile)
|
||||
env = {}
|
||||
env.update(os.environ)
|
||||
env['JAVA_HOME'] = javaPath
|
||||
|
@ -572,21 +574,21 @@ def testSolrExample(unpackPath, javaPath, isSrc):
|
|||
|
||||
# Make sure Solr finishes startup:
|
||||
startupEvent.wait()
|
||||
print ' startup done'
|
||||
print(' startup done')
|
||||
|
||||
try:
|
||||
print ' test utf8...'
|
||||
print(' test utf8...')
|
||||
run('sh ./exampledocs/test_utf8.sh', 'utf8.log')
|
||||
print ' index example docs...'
|
||||
print(' index example docs...')
|
||||
run('sh ./exampledocs/post.sh ./exampledocs/*.xml', 'post-example-docs.log')
|
||||
print ' run query...'
|
||||
s = urllib2.urlopen('http://localhost:8983/solr/select/?q=video').read()
|
||||
print(' run query...')
|
||||
s = urllib.request.urlopen('http://localhost:8983/solr/select/?q=video').read().decode('UTF-8')
|
||||
if s.find('<result name="response" numFound="3" start="0">') == -1:
|
||||
print 'FAILED: response is:\n%s' % s
|
||||
print('FAILED: response is:\n%s' % s)
|
||||
raise RuntimeError('query on solr example instance failed')
|
||||
finally:
|
||||
# Stop server:
|
||||
print ' stop server (SIGINT)...'
|
||||
print(' stop server (SIGINT)...')
|
||||
os.kill(server.pid, signal.SIGINT)
|
||||
|
||||
# Give it 10 seconds to gracefully shut down
|
||||
|
@ -594,14 +596,14 @@ def testSolrExample(unpackPath, javaPath, isSrc):
|
|||
|
||||
if serverThread.isAlive():
|
||||
# Kill server:
|
||||
print '***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...'
|
||||
print('***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...')
|
||||
os.kill(server.pid, signal.SIGKILL)
|
||||
|
||||
serverThread.join(10.0)
|
||||
|
||||
if serverThread.isAlive():
|
||||
# Shouldn't happen unless something is seriously wrong...
|
||||
print '***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...'
|
||||
print('***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...')
|
||||
|
||||
os.chdir('..')
|
||||
|
||||
|
@ -615,13 +617,13 @@ def checkJavadocpath(path):
|
|||
if checkJavaDocs.checkPackageSummaries(path):
|
||||
# disabled: RM cannot fix all this, see LUCENE-3887
|
||||
# raise RuntimeError('javadoc problems')
|
||||
print '\n***WARNING***: javadocs want to fail!\n'
|
||||
print('\n***WARNING***: javadocs want to fail!\n')
|
||||
|
||||
if checkJavadocLinks.checkAll(path):
|
||||
raise RuntimeError('broken javadocs links found!')
|
||||
|
||||
def testDemo(isSrc, version):
|
||||
print ' test demo...'
|
||||
print(' test demo...')
|
||||
sep = ';' if cygwin else ':'
|
||||
if isSrc:
|
||||
cp = 'build/core/classes/java{0}build/demo/classes/java{0}build/analysis/common/classes/java{0}build/queryparser/classes/java'.format(sep)
|
||||
|
@ -632,14 +634,14 @@ def testDemo(isSrc, version):
|
|||
run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe('1.6'), cp, docsDir), 'index.log')
|
||||
run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe('1.6'), cp), 'search.log')
|
||||
reMatchingDocs = re.compile('(\d+) total matching documents')
|
||||
m = reMatchingDocs.search(open('search.log', 'rb').read())
|
||||
m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read())
|
||||
if m is None:
|
||||
raise RuntimeError('lucene demo\'s SearchFiles found no results')
|
||||
else:
|
||||
numHits = int(m.group(1))
|
||||
if numHits < 100:
|
||||
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
|
||||
print ' got %d hits for query "lucene"' % numHits
|
||||
print(' got %d hits for query "lucene"' % numHits)
|
||||
|
||||
def checkMaven(baseURL, tmpDir, version, isSigned):
|
||||
# Locate the release branch in subversion
|
||||
|
@ -652,11 +654,11 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
|
|||
if text == releaseBranchText:
|
||||
releaseBranchSvnURL = subURL
|
||||
|
||||
print ' get POM templates',
|
||||
print(' get POM templates', end=' ')
|
||||
POMtemplates = defaultdict()
|
||||
getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
|
||||
print
|
||||
print ' download artifacts',
|
||||
print()
|
||||
print(' download artifacts', end=' ')
|
||||
artifacts = {'lucene': [], 'solr': []}
|
||||
for project in ('lucene', 'solr'):
|
||||
artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
|
||||
|
@ -664,30 +666,30 @@ def checkMaven(baseURL, tmpDir, version, isSigned):
|
|||
if not os.path.exists(targetDir):
|
||||
os.makedirs(targetDir)
|
||||
crawl(artifacts[project], artifactsURL, targetDir)
|
||||
print
|
||||
print ' verify that each binary artifact has a deployed POM...'
|
||||
print()
|
||||
print(' verify that each binary artifact has a deployed POM...')
|
||||
verifyPOMperBinaryArtifact(artifacts, version)
|
||||
print ' verify that there is an artifact for each POM template...'
|
||||
print(' verify that there is an artifact for each POM template...')
|
||||
verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
|
||||
print " verify Maven artifacts' md5/sha1 digests..."
|
||||
print(" verify Maven artifacts' md5/sha1 digests...")
|
||||
verifyMavenDigests(artifacts)
|
||||
print ' verify that all non-Mavenized deps are deployed...'
|
||||
print(' verify that all non-Mavenized deps are deployed...')
|
||||
nonMavenizedDeps = dict()
|
||||
checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir,
|
||||
version, releaseBranchSvnURL)
|
||||
print ' check for javadoc and sources artifacts...'
|
||||
print(' check for javadoc and sources artifacts...')
|
||||
checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
|
||||
print " verify deployed POMs' coordinates..."
|
||||
print(" verify deployed POMs' coordinates...")
|
||||
verifyDeployedPOMsCoordinates(artifacts, version)
|
||||
if isSigned:
|
||||
print ' verify maven artifact sigs',
|
||||
print(' verify maven artifact sigs', end=' ')
|
||||
verifyMavenSigs(baseURL, tmpDir, artifacts)
|
||||
|
||||
distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
|
||||
|
||||
print ' verify that non-Mavenized deps are same as in the binary distribution...'
|
||||
print(' verify that non-Mavenized deps are same as in the binary distribution...')
|
||||
checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
|
||||
print ' verify that Maven artifacts are same as in the binary distribution...'
|
||||
print(' verify that Maven artifacts are same as in the binary distribution...')
|
||||
checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
|
||||
|
||||
def getDistributionsForMavenChecks(tmpDir, version, baseURL):
|
||||
|
@ -697,19 +699,19 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
|
|||
if project == 'solr': distribution = 'apache-' + distribution
|
||||
if not os.path.exists('%s/%s' % (tmpDir, distribution)):
|
||||
distURL = '%s/%s/%s' % (baseURL, project, distribution)
|
||||
print ' download %s...' % distribution,
|
||||
print(' download %s...' % distribution, end=' ')
|
||||
download(distribution, distURL, tmpDir)
|
||||
destDir = '%s/unpack-%s-maven' % (tmpDir, project)
|
||||
if os.path.exists(destDir):
|
||||
shutil.rmtree(destDir)
|
||||
os.makedirs(destDir)
|
||||
os.chdir(destDir)
|
||||
print ' unpack %s...' % distribution
|
||||
print(' unpack %s...' % distribution)
|
||||
unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
|
||||
run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
|
||||
if project == 'solr': # unpack the Solr war
|
||||
unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
|
||||
print ' unpack Solr war...'
|
||||
print(' unpack Solr war...')
|
||||
run('jar xvf */dist/*.war', unpackLogFile)
|
||||
distributionFiles[project] = []
|
||||
for root, dirs, files in os.walk(destDir):
|
||||
|
@ -719,7 +721,7 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
|
|||
def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
|
||||
for project in ('lucene', 'solr'):
|
||||
for artifact in artifacts[project]:
|
||||
if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps.keys():
|
||||
if artifact.endswith(version + '.jar') and artifact not in list(nonMavenizedDeps.keys()):
|
||||
javadocJar = artifact[:-4] + '-javadoc.jar'
|
||||
if javadocJar not in artifacts[project]:
|
||||
raise RuntimeError('missing: %s' % javadocJar)
|
||||
|
@ -732,7 +734,7 @@ def checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps):
|
|||
distFilenames = dict()
|
||||
for file in distributionFiles[project]:
|
||||
distFilenames[os.path.basename(file)] = file
|
||||
for dep in nonMavenizedDeps.keys():
|
||||
for dep in list(nonMavenizedDeps.keys()):
|
||||
if ('/%s/' % project) in dep:
|
||||
depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
|
||||
if not depOrigFilename in distFilenames:
|
||||
|
@ -753,9 +755,9 @@ def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts,
|
|||
distFilenames[baseName] = file
|
||||
for artifact in artifacts[project]:
|
||||
if reJarWar.search(artifact):
|
||||
if artifact not in nonMavenizedDeps.keys():
|
||||
if artifact not in list(nonMavenizedDeps.keys()):
|
||||
artifactFilename = os.path.basename(artifact)
|
||||
if artifactFilename not in distFilenames.keys():
|
||||
if artifactFilename not in list(distFilenames.keys()):
|
||||
raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
|
||||
% (artifact, project))
|
||||
# TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
|
||||
|
@ -772,16 +774,17 @@ def verifyMavenDigests(artifacts):
|
|||
raise RuntimeError('missing: MD5 digest for %s' % artifactFile)
|
||||
if artifactFile + '.sha1' not in artifacts[project]:
|
||||
raise RuntimeError('missing: SHA1 digest for %s' % artifactFile)
|
||||
with open(artifactFile + '.md5', 'r') as md5File:
|
||||
with open(artifactFile + '.md5', encoding='UTF-8') as md5File:
|
||||
md5Expected = md5File.read().strip()
|
||||
with open(artifactFile + '.sha1', 'r') as sha1File:
|
||||
with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File:
|
||||
sha1Expected = sha1File.read().strip()
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
inputFile = open(artifactFile)
|
||||
inputFile = open(artifactFile, 'rb')
|
||||
while True:
|
||||
bytes = inputFile.read(65536)
|
||||
if bytes == '': break
|
||||
if len(bytes) == 0:
|
||||
break
|
||||
md5.update(bytes)
|
||||
sha1.update(bytes)
|
||||
inputFile.close()
|
||||
|
@ -846,7 +849,7 @@ def checkNonMavenizedDeps(nonMavenizedDependencies, POMtemplates, artifacts,
|
|||
if releaseBranchSvnURL is None:
|
||||
pomPath = '%s/%s/%s' % (workingCopy, pomDir, pomFile)
|
||||
if os.path.exists(pomPath):
|
||||
doc2 = ET.XML(open(pomPath).read())
|
||||
doc2 = ET.XML(open(pomPath, encoding='UTF-8').read())
|
||||
break
|
||||
else:
|
||||
entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir))
|
||||
|
@ -891,7 +894,7 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
|
|||
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
|
||||
if os.path.exists(gpgHomeDir):
|
||||
shutil.rmtree(gpgHomeDir)
|
||||
os.makedirs(gpgHomeDir, 0700)
|
||||
os.makedirs(gpgHomeDir, 0o700)
|
||||
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
|
||||
'%s/%s.gpg.import.log' % (tmpDir, project))
|
||||
|
||||
|
@ -904,12 +907,12 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
|
|||
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
|
||||
logFile)
|
||||
# Forward any GPG warnings, except the expected one (since its a clean world)
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1 \
|
||||
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
|
||||
and line.find('WARNING: using insecure memory') == -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
# Test trust (this is done with the real users config)
|
||||
|
@ -918,16 +921,16 @@ def verifyMavenSigs(baseURL, tmpDir, artifacts):
|
|||
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
|
||||
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
|
||||
# Forward any GPG warnings:
|
||||
f = open(logFile, 'rb')
|
||||
f = open(logFile, encoding='UTF-8')
|
||||
for line in f.readlines():
|
||||
if line.lower().find('warning') != -1 \
|
||||
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
|
||||
and line.find('WARNING: using insecure memory') == -1:
|
||||
print ' GPG: %s' % line.strip()
|
||||
print(' GPG: %s' % line.strip())
|
||||
f.close()
|
||||
|
||||
sys.stdout.write('.')
|
||||
print
|
||||
print()
|
||||
|
||||
def verifyPOMperBinaryArtifact(artifacts, version):
|
||||
"""verify that each binary jar and war has a corresponding POM file"""
|
||||
|
@ -1024,9 +1027,9 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
|
|||
def main():
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
print
|
||||
print 'Usage python -u %s BaseURL version tmpDir' % sys.argv[0]
|
||||
print
|
||||
print()
|
||||
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
|
||||
print()
|
||||
sys.exit(1)
|
||||
|
||||
baseURL = sys.argv[1]
|
||||
|
@ -1046,11 +1049,11 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
|
|||
|
||||
lucenePath = None
|
||||
solrPath = None
|
||||
print
|
||||
print 'Load release URL "%s"...' % baseURL
|
||||
print()
|
||||
print('Load release URL "%s"...' % baseURL)
|
||||
newBaseURL = unshortenURL(baseURL)
|
||||
if newBaseURL != baseURL:
|
||||
print ' unshortened: %s' % newBaseURL
|
||||
print(' unshortened: %s' % newBaseURL)
|
||||
baseURL = newBaseURL
|
||||
|
||||
for text, subURL in getDirEntries(baseURL):
|
||||
|
@ -1064,23 +1067,27 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
|
|||
if solrPath is None:
|
||||
raise RuntimeError('could not find solr subdir')
|
||||
|
||||
print
|
||||
print 'Test Lucene...'
|
||||
print()
|
||||
print('Test Lucene...')
|
||||
checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
|
||||
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
|
||||
unpack('lucene', tmpDir, artifact, version)
|
||||
unpack('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
|
||||
|
||||
print
|
||||
print 'Test Solr...'
|
||||
print()
|
||||
print('Test Solr...')
|
||||
checkSigs('solr', solrPath, version, tmpDir, isSigned)
|
||||
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
|
||||
unpack('solr', tmpDir, artifact, version)
|
||||
unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
|
||||
|
||||
print 'Test Maven artifacts for Lucene and Solr...'
|
||||
print('Test Maven artifacts for Lucene and Solr...')
|
||||
checkMaven(baseURL, tmpDir, version, isSigned)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
try:
|
||||
main()
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
|
|
@ -15,6 +15,9 @@ New features
|
|||
underlying PayloadFunction's explanation as the explanation
|
||||
for the payload score. (Scott Smerchek via Robert Muir)
|
||||
|
||||
* LUCENE-4069: Added BloomFilteringPostingsFormat for use with low-frequency terms
|
||||
such as primary keys (Mark Harwood, Mike McCandless)
|
||||
|
||||
* LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
|
||||
iteration marks. (Robert Muir, Christian Moen)
|
||||
|
||||
|
@ -40,6 +43,11 @@ New features
|
|||
implementations to optimize the enum implementation. (Robert Muir,
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
|
||||
int docID), to attempt deletion by docID as long as the provided
|
||||
reader is an NRT reader, and the segment has not yet been merged
|
||||
away (Mike McCandless).
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
|
||||
|
@ -87,6 +95,10 @@ API Changes
|
|||
instead of the previous boolean needsFlags; consistent with the changes
|
||||
for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
|
||||
is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
|
||||
as the Store parameter didn't make sense: if you supplied Store.YES, you would only
|
||||
receive an exception anyway. (Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
@ -99,6 +111,10 @@ Optimizations
|
|||
* LUCENE-4235: Remove enforcing of Filter rewrite for NRQ queries.
|
||||
(Uwe Schindler)
|
||||
|
||||
* LUCENE-4279: Regenerated snowball Stemmers from snowball r554,
|
||||
making them substantially more lightweight. Behavior is unchanged.
|
||||
(Robert Muir)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-4109: BooleanQueries are not parsed correctly with the
|
||||
|
@ -140,11 +156,21 @@ Bug Fixes
|
|||
IndexWriter to only delete files matching this pattern from an index
|
||||
directory, to reduce risk when the wrong index path is accidentally
|
||||
passed to IndexWriter (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-4277: Fix IndexWriter deadlock during rollback if flushable DWPT
|
||||
instance are already checked out and queued up but not yet flushed.
|
||||
(Simon Willnauer)
|
||||
|
||||
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
|
||||
(Johannes Christen, Uwe Schindler, Robert Muir)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
|
||||
(Karsten Rauch via Robert Muir)
|
||||
|
||||
* LUCENE-3616: Field throws exception if you try to set a boost on an
|
||||
unindexed field or one that omits norms. (Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
|
|
|
@ -1,423 +1,439 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* Generated class implementing code defined by a snowball script.
|
||||
*/
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class DanishStemmer extends SnowballProgram {
|
||||
|
||||
private Among a_0[] = {
|
||||
new Among ( "hed", -1, 1, "", this),
|
||||
new Among ( "ethed", 0, 1, "", this),
|
||||
new Among ( "ered", -1, 1, "", this),
|
||||
new Among ( "e", -1, 1, "", this),
|
||||
new Among ( "erede", 3, 1, "", this),
|
||||
new Among ( "ende", 3, 1, "", this),
|
||||
new Among ( "erende", 5, 1, "", this),
|
||||
new Among ( "ene", 3, 1, "", this),
|
||||
new Among ( "erne", 3, 1, "", this),
|
||||
new Among ( "ere", 3, 1, "", this),
|
||||
new Among ( "en", -1, 1, "", this),
|
||||
new Among ( "heden", 10, 1, "", this),
|
||||
new Among ( "eren", 10, 1, "", this),
|
||||
new Among ( "er", -1, 1, "", this),
|
||||
new Among ( "heder", 13, 1, "", this),
|
||||
new Among ( "erer", 13, 1, "", this),
|
||||
new Among ( "s", -1, 2, "", this),
|
||||
new Among ( "heds", 16, 1, "", this),
|
||||
new Among ( "es", 16, 1, "", this),
|
||||
new Among ( "endes", 18, 1, "", this),
|
||||
new Among ( "erendes", 19, 1, "", this),
|
||||
new Among ( "enes", 18, 1, "", this),
|
||||
new Among ( "ernes", 18, 1, "", this),
|
||||
new Among ( "eres", 18, 1, "", this),
|
||||
new Among ( "ens", 16, 1, "", this),
|
||||
new Among ( "hedens", 24, 1, "", this),
|
||||
new Among ( "erens", 24, 1, "", this),
|
||||
new Among ( "ers", 16, 1, "", this),
|
||||
new Among ( "ets", 16, 1, "", this),
|
||||
new Among ( "erets", 28, 1, "", this),
|
||||
new Among ( "et", -1, 1, "", this),
|
||||
new Among ( "eret", 30, 1, "", this)
|
||||
};
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private Among a_1[] = {
|
||||
new Among ( "gd", -1, -1, "", this),
|
||||
new Among ( "dt", -1, -1, "", this),
|
||||
new Among ( "gt", -1, -1, "", this),
|
||||
new Among ( "kt", -1, -1, "", this)
|
||||
};
|
||||
private final static DanishStemmer methodObject = new DanishStemmer ();
|
||||
|
||||
private Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", this),
|
||||
new Among ( "lig", 0, 1, "", this),
|
||||
new Among ( "elig", 1, 1, "", this),
|
||||
new Among ( "els", -1, 1, "", this),
|
||||
new Among ( "l\u00F8st", -1, 2, "", this)
|
||||
};
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "hed", -1, 1, "", methodObject ),
|
||||
new Among ( "ethed", 0, 1, "", methodObject ),
|
||||
new Among ( "ered", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "erede", 3, 1, "", methodObject ),
|
||||
new Among ( "ende", 3, 1, "", methodObject ),
|
||||
new Among ( "erende", 5, 1, "", methodObject ),
|
||||
new Among ( "ene", 3, 1, "", methodObject ),
|
||||
new Among ( "erne", 3, 1, "", methodObject ),
|
||||
new Among ( "ere", 3, 1, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "heden", 10, 1, "", methodObject ),
|
||||
new Among ( "eren", 10, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heder", 13, 1, "", methodObject ),
|
||||
new Among ( "erer", 13, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "heds", 16, 1, "", methodObject ),
|
||||
new Among ( "es", 16, 1, "", methodObject ),
|
||||
new Among ( "endes", 18, 1, "", methodObject ),
|
||||
new Among ( "erendes", 19, 1, "", methodObject ),
|
||||
new Among ( "enes", 18, 1, "", methodObject ),
|
||||
new Among ( "ernes", 18, 1, "", methodObject ),
|
||||
new Among ( "eres", 18, 1, "", methodObject ),
|
||||
new Among ( "ens", 16, 1, "", methodObject ),
|
||||
new Among ( "hedens", 24, 1, "", methodObject ),
|
||||
new Among ( "erens", 24, 1, "", methodObject ),
|
||||
new Among ( "ers", 16, 1, "", methodObject ),
|
||||
new Among ( "ets", 16, 1, "", methodObject ),
|
||||
new Among ( "erets", 28, 1, "", methodObject ),
|
||||
new Among ( "et", -1, 1, "", methodObject ),
|
||||
new Among ( "eret", 30, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "gd", -1, -1, "", methodObject ),
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "gt", -1, -1, "", methodObject ),
|
||||
new Among ( "kt", -1, -1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "lig", 0, 1, "", methodObject ),
|
||||
new Among ( "elig", 1, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "l\u00F8st", -1, 2, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
private StringBuilder S_ch = new StringBuilder();
|
||||
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
|
||||
|
||||
private void copy_from(DanishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
S_ch = other.S_ch;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private void copy_from(DanishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
S_ch = other.S_ch;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 29
|
||||
I_p1 = limit;
|
||||
// test, line 33
|
||||
v_1 = cursor;
|
||||
// (, line 33
|
||||
// hop, line 33
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 33
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 34
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
// (, line 29
|
||||
I_p1 = limit;
|
||||
// test, line 33
|
||||
v_1 = cursor;
|
||||
// (, line 33
|
||||
// hop, line 33
|
||||
{
|
||||
break lab1;
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 34
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
// setmark x, line 33
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 34
|
||||
golab0: while(true)
|
||||
{
|
||||
break lab3;
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
// gopast, line 34
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 34
|
||||
I_p1 = cursor;
|
||||
// try, line 35
|
||||
lab4: do {
|
||||
// (, line 35
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 34
|
||||
I_p1 = cursor;
|
||||
// try, line 35
|
||||
lab4: do {
|
||||
// (, line 35
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 40
|
||||
// setlimit, line 41
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 41
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 41
|
||||
// [, line 41
|
||||
ket = cursor;
|
||||
// substring, line 41
|
||||
among_var = find_among_b(a_0, 32);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 41
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 48
|
||||
// delete, line 48
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 50
|
||||
if (!(in_grouping_b(g_s_ending, 97, 229)))
|
||||
// (, line 40
|
||||
// setlimit, line 41
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 41
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 50
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 41
|
||||
// [, line 41
|
||||
ket = cursor;
|
||||
// substring, line 41
|
||||
among_var = find_among_b(a_0, 32);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 41
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 48
|
||||
// delete, line 48
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 50
|
||||
if (!(in_grouping_b(g_s_ending, 97, 229)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 50
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 54
|
||||
// test, line 55
|
||||
v_1 = limit - cursor;
|
||||
// (, line 55
|
||||
// setlimit, line 56
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 56
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 56
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
if (find_among_b(a_1, 4) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 62
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 62
|
||||
bra = cursor;
|
||||
// delete, line 62
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
// (, line 54
|
||||
// test, line 55
|
||||
v_1 = limit - cursor;
|
||||
// (, line 55
|
||||
// setlimit, line 56
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 56
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 56
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
if (find_among_b(a_1, 4) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 62
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 62
|
||||
bra = cursor;
|
||||
// delete, line 62
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 65
|
||||
// do, line 66
|
||||
v_1 = limit - cursor;
|
||||
lab0: do {
|
||||
// (, line 66
|
||||
// [, line 66
|
||||
ket = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "st")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// ], line 66
|
||||
bra = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "ig")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// delete, line 66
|
||||
slice_del();
|
||||
} while (false);
|
||||
cursor = limit - v_1;
|
||||
// setlimit, line 67
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 67
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 67
|
||||
// [, line 67
|
||||
ket = cursor;
|
||||
// substring, line 67
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 67
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 70
|
||||
// delete, line 70
|
||||
slice_del();
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab1: do {
|
||||
// call consonant_pair, line 70
|
||||
if (!r_consonant_pair())
|
||||
// (, line 65
|
||||
// do, line 66
|
||||
v_1 = limit - cursor;
|
||||
lab0: do {
|
||||
// (, line 66
|
||||
// [, line 66
|
||||
ket = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "st")))
|
||||
{
|
||||
break lab1;
|
||||
break lab0;
|
||||
}
|
||||
// ], line 66
|
||||
bra = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "ig")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// delete, line 66
|
||||
slice_del();
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
break;
|
||||
case 2:
|
||||
// (, line 72
|
||||
// <-, line 72
|
||||
slice_from("l\u00F8s");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
// setlimit, line 67
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 67
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 67
|
||||
// [, line 67
|
||||
ket = cursor;
|
||||
// substring, line 67
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 67
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 70
|
||||
// delete, line 70
|
||||
slice_del();
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab1: do {
|
||||
// call consonant_pair, line 70
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
break;
|
||||
case 2:
|
||||
// (, line 72
|
||||
// <-, line 72
|
||||
slice_from("l\u00F8s");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_undouble() {
|
||||
private boolean r_undouble() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 75
|
||||
// setlimit, line 76
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 76
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 76
|
||||
// [, line 76
|
||||
ket = cursor;
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 76
|
||||
bra = cursor;
|
||||
// -> ch, line 76
|
||||
S_ch = slice_to(S_ch);
|
||||
limit_backward = v_2;
|
||||
// name ch, line 77
|
||||
if (!(eq_v_b(S_ch)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 78
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
// (, line 75
|
||||
// setlimit, line 76
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 76
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 76
|
||||
// [, line 76
|
||||
ket = cursor;
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 76
|
||||
bra = cursor;
|
||||
// -> ch, line 76
|
||||
S_ch = slice_to(S_ch);
|
||||
limit_backward = v_2;
|
||||
// name ch, line 77
|
||||
if (!(eq_v_b(S_ch)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 78
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
int v_5;
|
||||
// (, line 82
|
||||
// do, line 84
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 84
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
// (, line 82
|
||||
// do, line 84
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 84
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 85
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 85
|
||||
// do, line 86
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 86
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 87
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 87
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 88
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 88
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 89
|
||||
v_5 = limit - cursor;
|
||||
lab4: do {
|
||||
// call undouble, line 89
|
||||
if (!r_undouble())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 85
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 85
|
||||
// do, line 86
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 86
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 87
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 87
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 88
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 88
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 89
|
||||
v_5 = limit - cursor;
|
||||
lab4: do {
|
||||
// call undouble, line 89
|
||||
if (!r_undouble())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward; return true;
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof DanishStemmer;
|
||||
}
|
||||
|
||||
}
|
||||
public int hashCode() {
|
||||
return DanishStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,358 +1,375 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* Generated class implementing code defined by a snowball script.
|
||||
*/
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class NorwegianStemmer extends SnowballProgram {
|
||||
|
||||
private Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", this),
|
||||
new Among ( "e", -1, 1, "", this),
|
||||
new Among ( "ede", 1, 1, "", this),
|
||||
new Among ( "ande", 1, 1, "", this),
|
||||
new Among ( "ende", 1, 1, "", this),
|
||||
new Among ( "ane", 1, 1, "", this),
|
||||
new Among ( "ene", 1, 1, "", this),
|
||||
new Among ( "hetene", 6, 1, "", this),
|
||||
new Among ( "erte", 1, 3, "", this),
|
||||
new Among ( "en", -1, 1, "", this),
|
||||
new Among ( "heten", 9, 1, "", this),
|
||||
new Among ( "ar", -1, 1, "", this),
|
||||
new Among ( "er", -1, 1, "", this),
|
||||
new Among ( "heter", 12, 1, "", this),
|
||||
new Among ( "s", -1, 2, "", this),
|
||||
new Among ( "as", 14, 1, "", this),
|
||||
new Among ( "es", 14, 1, "", this),
|
||||
new Among ( "edes", 16, 1, "", this),
|
||||
new Among ( "endes", 16, 1, "", this),
|
||||
new Among ( "enes", 16, 1, "", this),
|
||||
new Among ( "hetenes", 19, 1, "", this),
|
||||
new Among ( "ens", 14, 1, "", this),
|
||||
new Among ( "hetens", 21, 1, "", this),
|
||||
new Among ( "ers", 14, 1, "", this),
|
||||
new Among ( "ets", 14, 1, "", this),
|
||||
new Among ( "et", -1, 1, "", this),
|
||||
new Among ( "het", 25, 1, "", this),
|
||||
new Among ( "ert", -1, 3, "", this),
|
||||
new Among ( "ast", -1, 1, "", this)
|
||||
};
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private Among a_1[] = {
|
||||
new Among ( "dt", -1, -1, "", this),
|
||||
new Among ( "vt", -1, -1, "", this)
|
||||
};
|
||||
private final static NorwegianStemmer methodObject = new NorwegianStemmer ();
|
||||
|
||||
private Among a_2[] = {
|
||||
new Among ( "leg", -1, 1, "", this),
|
||||
new Among ( "eleg", 0, 1, "", this),
|
||||
new Among ( "ig", -1, 1, "", this),
|
||||
new Among ( "eig", 2, 1, "", this),
|
||||
new Among ( "lig", 2, 1, "", this),
|
||||
new Among ( "elig", 4, 1, "", this),
|
||||
new Among ( "els", -1, 1, "", this),
|
||||
new Among ( "lov", -1, 1, "", this),
|
||||
new Among ( "elov", 7, 1, "", this),
|
||||
new Among ( "slov", 7, 1, "", this),
|
||||
new Among ( "hetslov", 9, 1, "", this)
|
||||
};
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "ede", 1, 1, "", methodObject ),
|
||||
new Among ( "ande", 1, 1, "", methodObject ),
|
||||
new Among ( "ende", 1, 1, "", methodObject ),
|
||||
new Among ( "ane", 1, 1, "", methodObject ),
|
||||
new Among ( "ene", 1, 1, "", methodObject ),
|
||||
new Among ( "hetene", 6, 1, "", methodObject ),
|
||||
new Among ( "erte", 1, 3, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "heten", 9, 1, "", methodObject ),
|
||||
new Among ( "ar", -1, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heter", 12, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "as", 14, 1, "", methodObject ),
|
||||
new Among ( "es", 14, 1, "", methodObject ),
|
||||
new Among ( "edes", 16, 1, "", methodObject ),
|
||||
new Among ( "endes", 16, 1, "", methodObject ),
|
||||
new Among ( "enes", 16, 1, "", methodObject ),
|
||||
new Among ( "hetenes", 19, 1, "", methodObject ),
|
||||
new Among ( "ens", 14, 1, "", methodObject ),
|
||||
new Among ( "hetens", 21, 1, "", methodObject ),
|
||||
new Among ( "ers", 14, 1, "", methodObject ),
|
||||
new Among ( "ets", 14, 1, "", methodObject ),
|
||||
new Among ( "et", -1, 1, "", methodObject ),
|
||||
new Among ( "het", 25, 1, "", methodObject ),
|
||||
new Among ( "ert", -1, 3, "", methodObject ),
|
||||
new Among ( "ast", -1, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "vt", -1, -1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_s_ending[] = {119, 125, 149, 1 };
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "leg", -1, 1, "", methodObject ),
|
||||
new Among ( "eleg", 0, 1, "", methodObject ),
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "eig", 2, 1, "", methodObject ),
|
||||
new Among ( "lig", 2, 1, "", methodObject ),
|
||||
new Among ( "elig", 4, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "lov", -1, 1, "", methodObject ),
|
||||
new Among ( "elov", 7, 1, "", methodObject ),
|
||||
new Among ( "slov", 7, 1, "", methodObject ),
|
||||
new Among ( "hetslov", 9, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
private static final char g_s_ending[] = {119, 125, 149, 1 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
|
||||
private void copy_from(NorwegianStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private void copy_from(NorwegianStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 30
|
||||
v_1 = cursor;
|
||||
// (, line 30
|
||||
// hop, line 30
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 30
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 31
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 30
|
||||
v_1 = cursor;
|
||||
// (, line 30
|
||||
// hop, line 30
|
||||
{
|
||||
break lab1;
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 31
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
// setmark x, line 30
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 31
|
||||
golab0: while(true)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 31
|
||||
I_p1 = cursor;
|
||||
// try, line 32
|
||||
lab4: do {
|
||||
// (, line 32
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 37
|
||||
// setlimit, line 38
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 38
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 38
|
||||
// [, line 38
|
||||
ket = cursor;
|
||||
// substring, line 38
|
||||
among_var = find_among_b(a_0, 29);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 38
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
// or, line 46
|
||||
lab0: do {
|
||||
v_3 = limit - cursor;
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping_b(g_s_ending, 98, 122)))
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
break lab0;
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// (, line 46
|
||||
// literal, line 46
|
||||
if (!(eq_s_b(1, "k")))
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 31
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 31
|
||||
I_p1 = cursor;
|
||||
// try, line 32
|
||||
lab4: do {
|
||||
// (, line 32
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 48
|
||||
// <-, line 48
|
||||
slice_from("er");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 52
|
||||
// test, line 53
|
||||
v_1 = limit - cursor;
|
||||
// (, line 53
|
||||
// setlimit, line 54
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 54
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 54
|
||||
// [, line 54
|
||||
ket = cursor;
|
||||
// substring, line 54
|
||||
if (find_among_b(a_1, 2) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 54
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 59
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 59
|
||||
bra = cursor;
|
||||
// delete, line 59
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 62
|
||||
// setlimit, line 63
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 63
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 63
|
||||
// [, line 63
|
||||
ket = cursor;
|
||||
// substring, line 63
|
||||
among_var = find_among_b(a_2, 11);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 63
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 67
|
||||
// delete, line 67
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int v_3;
|
||||
// (, line 37
|
||||
// setlimit, line 38
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 38
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 38
|
||||
// [, line 38
|
||||
ket = cursor;
|
||||
// substring, line 38
|
||||
among_var = find_among_b(a_0, 29);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 38
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
// or, line 46
|
||||
lab0: do {
|
||||
v_3 = limit - cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping_b(g_s_ending, 98, 122)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
break lab0;
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// (, line 46
|
||||
// literal, line 46
|
||||
if (!(eq_s_b(1, "k")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} while (false);
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 48
|
||||
// <-, line 48
|
||||
slice_from("er");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 52
|
||||
// test, line 53
|
||||
v_1 = limit - cursor;
|
||||
// (, line 53
|
||||
// setlimit, line 54
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 54
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 54
|
||||
// [, line 54
|
||||
ket = cursor;
|
||||
// substring, line 54
|
||||
if (find_among_b(a_1, 2) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 54
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 59
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 59
|
||||
bra = cursor;
|
||||
// delete, line 59
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 62
|
||||
// setlimit, line 63
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 63
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 63
|
||||
// [, line 63
|
||||
ket = cursor;
|
||||
// substring, line 63
|
||||
among_var = find_among_b(a_2, 11);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 63
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 67
|
||||
// delete, line 67
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 72
|
||||
// do, line 74
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 74
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
// (, line 72
|
||||
// do, line 74
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 74
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 75
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 75
|
||||
// do, line 76
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 76
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 77
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 77
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 78
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 78
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 75
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 75
|
||||
// do, line 76
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 76
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 77
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 77
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 78
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 78
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof NorwegianStemmer;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return NorwegianStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,349 +1,366 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* Generated class implementing code defined by a snowball script.
|
||||
*/
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class SwedishStemmer extends SnowballProgram {
|
||||
|
||||
private Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", this),
|
||||
new Among ( "arna", 0, 1, "", this),
|
||||
new Among ( "erna", 0, 1, "", this),
|
||||
new Among ( "heterna", 2, 1, "", this),
|
||||
new Among ( "orna", 0, 1, "", this),
|
||||
new Among ( "ad", -1, 1, "", this),
|
||||
new Among ( "e", -1, 1, "", this),
|
||||
new Among ( "ade", 6, 1, "", this),
|
||||
new Among ( "ande", 6, 1, "", this),
|
||||
new Among ( "arne", 6, 1, "", this),
|
||||
new Among ( "are", 6, 1, "", this),
|
||||
new Among ( "aste", 6, 1, "", this),
|
||||
new Among ( "en", -1, 1, "", this),
|
||||
new Among ( "anden", 12, 1, "", this),
|
||||
new Among ( "aren", 12, 1, "", this),
|
||||
new Among ( "heten", 12, 1, "", this),
|
||||
new Among ( "ern", -1, 1, "", this),
|
||||
new Among ( "ar", -1, 1, "", this),
|
||||
new Among ( "er", -1, 1, "", this),
|
||||
new Among ( "heter", 18, 1, "", this),
|
||||
new Among ( "or", -1, 1, "", this),
|
||||
new Among ( "s", -1, 2, "", this),
|
||||
new Among ( "as", 21, 1, "", this),
|
||||
new Among ( "arnas", 22, 1, "", this),
|
||||
new Among ( "ernas", 22, 1, "", this),
|
||||
new Among ( "ornas", 22, 1, "", this),
|
||||
new Among ( "es", 21, 1, "", this),
|
||||
new Among ( "ades", 26, 1, "", this),
|
||||
new Among ( "andes", 26, 1, "", this),
|
||||
new Among ( "ens", 21, 1, "", this),
|
||||
new Among ( "arens", 29, 1, "", this),
|
||||
new Among ( "hetens", 29, 1, "", this),
|
||||
new Among ( "erns", 21, 1, "", this),
|
||||
new Among ( "at", -1, 1, "", this),
|
||||
new Among ( "andet", -1, 1, "", this),
|
||||
new Among ( "het", -1, 1, "", this),
|
||||
new Among ( "ast", -1, 1, "", this)
|
||||
};
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private Among a_1[] = {
|
||||
new Among ( "dd", -1, -1, "", this),
|
||||
new Among ( "gd", -1, -1, "", this),
|
||||
new Among ( "nn", -1, -1, "", this),
|
||||
new Among ( "dt", -1, -1, "", this),
|
||||
new Among ( "gt", -1, -1, "", this),
|
||||
new Among ( "kt", -1, -1, "", this),
|
||||
new Among ( "tt", -1, -1, "", this)
|
||||
};
|
||||
private final static SwedishStemmer methodObject = new SwedishStemmer ();
|
||||
|
||||
private Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", this),
|
||||
new Among ( "lig", 0, 1, "", this),
|
||||
new Among ( "els", -1, 1, "", this),
|
||||
new Among ( "fullt", -1, 3, "", this),
|
||||
new Among ( "l\u00F6st", -1, 2, "", this)
|
||||
};
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", methodObject ),
|
||||
new Among ( "arna", 0, 1, "", methodObject ),
|
||||
new Among ( "erna", 0, 1, "", methodObject ),
|
||||
new Among ( "heterna", 2, 1, "", methodObject ),
|
||||
new Among ( "orna", 0, 1, "", methodObject ),
|
||||
new Among ( "ad", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "ade", 6, 1, "", methodObject ),
|
||||
new Among ( "ande", 6, 1, "", methodObject ),
|
||||
new Among ( "arne", 6, 1, "", methodObject ),
|
||||
new Among ( "are", 6, 1, "", methodObject ),
|
||||
new Among ( "aste", 6, 1, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "anden", 12, 1, "", methodObject ),
|
||||
new Among ( "aren", 12, 1, "", methodObject ),
|
||||
new Among ( "heten", 12, 1, "", methodObject ),
|
||||
new Among ( "ern", -1, 1, "", methodObject ),
|
||||
new Among ( "ar", -1, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heter", 18, 1, "", methodObject ),
|
||||
new Among ( "or", -1, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "as", 21, 1, "", methodObject ),
|
||||
new Among ( "arnas", 22, 1, "", methodObject ),
|
||||
new Among ( "ernas", 22, 1, "", methodObject ),
|
||||
new Among ( "ornas", 22, 1, "", methodObject ),
|
||||
new Among ( "es", 21, 1, "", methodObject ),
|
||||
new Among ( "ades", 26, 1, "", methodObject ),
|
||||
new Among ( "andes", 26, 1, "", methodObject ),
|
||||
new Among ( "ens", 21, 1, "", methodObject ),
|
||||
new Among ( "arens", 29, 1, "", methodObject ),
|
||||
new Among ( "hetens", 29, 1, "", methodObject ),
|
||||
new Among ( "erns", 21, 1, "", methodObject ),
|
||||
new Among ( "at", -1, 1, "", methodObject ),
|
||||
new Among ( "andet", -1, 1, "", methodObject ),
|
||||
new Among ( "het", -1, 1, "", methodObject ),
|
||||
new Among ( "ast", -1, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "dd", -1, -1, "", methodObject ),
|
||||
new Among ( "gd", -1, -1, "", methodObject ),
|
||||
new Among ( "nn", -1, -1, "", methodObject ),
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "gt", -1, -1, "", methodObject ),
|
||||
new Among ( "kt", -1, -1, "", methodObject ),
|
||||
new Among ( "tt", -1, -1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_s_ending[] = {119, 127, 149 };
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "lig", 0, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "fullt", -1, 3, "", methodObject ),
|
||||
new Among ( "l\u00F6st", -1, 2, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
|
||||
|
||||
private static final char g_s_ending[] = {119, 127, 149 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
|
||||
private void copy_from(SwedishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private void copy_from(SwedishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 29
|
||||
v_1 = cursor;
|
||||
// (, line 29
|
||||
// hop, line 29
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 29
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 30
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 246)))
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 29
|
||||
v_1 = cursor;
|
||||
// (, line 29
|
||||
// hop, line 29
|
||||
{
|
||||
break lab1;
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 30
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 246)))
|
||||
// setmark x, line 29
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 30
|
||||
golab0: while(true)
|
||||
{
|
||||
break lab3;
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
// gopast, line 30
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 30
|
||||
I_p1 = cursor;
|
||||
// try, line 31
|
||||
lab4: do {
|
||||
// (, line 31
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 30
|
||||
I_p1 = cursor;
|
||||
// try, line 31
|
||||
lab4: do {
|
||||
// (, line 31
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 36
|
||||
// setlimit, line 37
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 37
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 37
|
||||
// [, line 37
|
||||
ket = cursor;
|
||||
// substring, line 37
|
||||
among_var = find_among_b(a_0, 37);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 37
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
if (!(in_grouping_b(g_s_ending, 98, 121)))
|
||||
// (, line 36
|
||||
// setlimit, line 37
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 37
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 37
|
||||
// [, line 37
|
||||
ket = cursor;
|
||||
// substring, line 37
|
||||
among_var = find_among_b(a_0, 37);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 37
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
if (!(in_grouping_b(g_s_ending, 98, 121)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// setlimit, line 50
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 50
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 50
|
||||
// and, line 52
|
||||
v_3 = limit - cursor;
|
||||
// among, line 51
|
||||
if (find_among_b(a_1, 7) == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor = limit - v_3;
|
||||
// (, line 52
|
||||
// [, line 52
|
||||
ket = cursor;
|
||||
// next, line 52
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 52
|
||||
bra = cursor;
|
||||
// delete, line 52
|
||||
slice_del();
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
// setlimit, line 50
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 50
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 50
|
||||
// and, line 52
|
||||
v_3 = limit - cursor;
|
||||
// among, line 51
|
||||
if (find_among_b(a_1, 7) == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor = limit - v_3;
|
||||
// (, line 52
|
||||
// [, line 52
|
||||
ket = cursor;
|
||||
// next, line 52
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 52
|
||||
bra = cursor;
|
||||
// delete, line 52
|
||||
slice_del();
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// setlimit, line 55
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 55
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 55
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
// setlimit, line 55
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 55
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 55
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 57
|
||||
// delete, line 57
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 58
|
||||
// <-, line 58
|
||||
slice_from("l\u00F6s");
|
||||
break;
|
||||
case 3:
|
||||
// (, line 59
|
||||
// <-, line 59
|
||||
slice_from("full");
|
||||
break;
|
||||
}
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 57
|
||||
// delete, line 57
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 58
|
||||
// <-, line 58
|
||||
slice_from("l\u00F6s");
|
||||
break;
|
||||
case 3:
|
||||
// (, line 59
|
||||
// <-, line 59
|
||||
slice_from("full");
|
||||
break;
|
||||
}
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 64
|
||||
// do, line 66
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 66
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
// (, line 64
|
||||
// do, line 66
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 66
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 67
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 67
|
||||
// do, line 68
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 68
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 69
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 69
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 70
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 67
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 67
|
||||
// do, line 68
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 68
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 69
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 69
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 70
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof SwedishStemmer;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return SwedishStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -116,4 +117,21 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
checkOneTermReuse(a, "", "");
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomStrings() throws IOException {
|
||||
for (String lang : SNOWBALL_LANGS) {
|
||||
checkRandomStrings(lang);
|
||||
}
|
||||
}
|
||||
|
||||
public void checkRandomStrings(final String snowballLanguage) throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer t = new MockTokenizer(reader);
|
||||
return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
|
||||
}
|
||||
};
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
}
|
|
@ -331,7 +331,9 @@ public class TaskSequence extends PerfTask {
|
|||
// Forwards top request to children
|
||||
if (runningParallelTasks != null) {
|
||||
for(ParallelTask t : runningParallelTasks) {
|
||||
t.task.stopNow();
|
||||
if (t != null) {
|
||||
t.task.stopNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -355,6 +357,12 @@ public class TaskSequence extends PerfTask {
|
|||
// run threads
|
||||
startThreads(t);
|
||||
|
||||
if (stopNow) {
|
||||
for (ParallelTask task : t) {
|
||||
task.task.stopNow();
|
||||
}
|
||||
}
|
||||
|
||||
// wait for all threads to complete
|
||||
int count = 0;
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
|
|
|
@ -35,10 +35,9 @@
|
|||
MIGRATE.txt,JRE_VERSION_MIGRATION.txt,
|
||||
CHANGES.txt,
|
||||
**/lib/*.jar,
|
||||
**/lib/*LICENSE*.txt,
|
||||
**/lib/*NOTICE*.txt,
|
||||
licenses/**,
|
||||
*/docs/,**/README*"
|
||||
excludes="build/**,site/**"
|
||||
excludes="build/**,site/**,tools/**"
|
||||
/>
|
||||
|
||||
|
||||
|
@ -572,4 +571,8 @@
|
|||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="jar-checksums" depends="clean-jars,resolve">
|
||||
<jar-checksum-macro srcdir="${common.dir}" dstdir="${common.dir}/licenses"/>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -1567,6 +1567,43 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites
|
|||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<macrodef name="jar-checksum-macro">
|
||||
<attribute name="srcdir"/>
|
||||
<attribute name="dstdir"/>
|
||||
<sequential>
|
||||
<delete>
|
||||
<fileset dir="@{dstdir}">
|
||||
<include name="**/*.jar.sha1"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
|
||||
<!-- checksum task does not have a flatten=true -->
|
||||
<tempfile property="jar-checksum.temp.dir"/>
|
||||
<mkdir dir="${jar-checksum.temp.dir}"/>
|
||||
<copy todir="${jar-checksum.temp.dir}" flatten="true">
|
||||
<fileset dir="@{srcdir}">
|
||||
<include name="**/*.jar"/>
|
||||
<!-- todo make this something passed into the macro and not some hardcoded set -->
|
||||
<exclude name="build/**"/>
|
||||
<exclude name="dist/**"/>
|
||||
<exclude name="package/**"/>
|
||||
<exclude name="example/exampledocs/**"/>
|
||||
</fileset>
|
||||
</copy>
|
||||
|
||||
<checksum algorithm="SHA1" fileext=".sha1" todir="@{dstdir}">
|
||||
<fileset dir="${jar-checksum.temp.dir}"/>
|
||||
</checksum>
|
||||
|
||||
<delete dir="${jar-checksum.temp.dir}"/>
|
||||
|
||||
<fixcrlf
|
||||
srcdir="@{dstdir}"
|
||||
includes="**/*.jar.sha1"
|
||||
eol="lf" fixlast="true" encoding="US-ASCII" />
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<macrodef name="sign-artifacts-macro">
|
||||
<attribute name="artifacts.dir"/>
|
||||
<sequential>
|
||||
|
|
|
@ -53,7 +53,13 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
|
|||
|
||||
/** Reads a segment. NOTE: by the time this call
|
||||
* returns, it must hold open any files it will need to
|
||||
* use; else, those files may be deleted. */
|
||||
* use; else, those files may be deleted.
|
||||
* Additionally, required files may be deleted during the execution of
|
||||
* this call before there is a chance to open them. Under these
|
||||
* circumstances an IOException should be thrown by the implementation.
|
||||
* IOExceptions are expected and will automatically cause a retry of the
|
||||
* segment opening logic with the newly revised segments.
|
||||
* */
|
||||
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
package org.apache.lucene.codecs.bloom;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.FuzzySet;
|
||||
|
||||
|
||||
/**
|
||||
* Class used to create index-time {@link FuzzySet} appropriately configured for
|
||||
* each field. Also called to right-size bitsets for serialization.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class BloomFilterFactory {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param state The content to be indexed
|
||||
* @param info
|
||||
* the field requiring a BloomFilter
|
||||
* @return An appropriately sized set or null if no BloomFiltering required
|
||||
*/
|
||||
public abstract FuzzySet getSetForField(SegmentWriteState state, FieldInfo info);
|
||||
|
||||
/**
|
||||
* Called when downsizing bitsets for serialization
|
||||
*
|
||||
* @param fieldInfo
|
||||
* The field with sparse set bits
|
||||
* @param initialSet
|
||||
* The bits accumulated
|
||||
* @return null or a hopefully more densely packed, smaller bitset
|
||||
*/
|
||||
public FuzzySet downsize(FieldInfo fieldInfo, FuzzySet initialSet) {
|
||||
// Aim for a bitset size that would have 10% of bits set (so 90% of searches
|
||||
// would fail-fast)
|
||||
float targetMaxSaturation = 0.1f;
|
||||
return initialSet.downsize(targetMaxSaturation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to determine if the given filter has reached saturation and should be retired i.e. not saved any more
|
||||
* @param bloomFilter The bloomFilter being tested
|
||||
* @param fieldInfo The field with which this filter is associated
|
||||
* @return true if the set has reached saturation and should be retired
|
||||
*/
|
||||
public abstract boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo);
|
||||
|
||||
}
|
|
@ -0,0 +1,514 @@
|
|||
package org.apache.lucene.codecs.bloom;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsConsumer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.codecs.TermsConsumer;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FuzzySet;
|
||||
import org.apache.lucene.util.FuzzySet.ContainsResult;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.hash.MurmurHash2;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link PostingsFormat} useful for low doc-frequency fields such as primary
|
||||
* keys. Bloom filters are maintained in a ".blm" file which offers "fast-fail"
|
||||
* for reads in segments known to have no record of the key. A choice of
|
||||
* delegate PostingsFormat is used to record all other Postings data.
|
||||
* </p>
|
||||
* <p>
|
||||
* A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter
|
||||
* settings on a per-field basis. The default configuration is
|
||||
* {@link DefaultBloomFilterFactory} which allocates a ~8mb bitset and hashes
|
||||
* values using {@link MurmurHash2}. This should be suitable for most purposes.
|
||||
* </p>
|
||||
* <p>
|
||||
* The format of the blm file is as follows:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>BloomFilter (.blm) --> Header, DelegatePostingsFormatName,
|
||||
* NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
|
||||
* <li>Filter --> FieldNumber, FuzzySet</li>
|
||||
* <li>FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||
* <li>DelegatePostingsFormatName --> {@link DataOutput#writeString(String)
|
||||
* String} The name of a ServiceProvider registered {@link PostingsFormat}</li>
|
||||
* <li>NumFilteredFields --> {@link DataOutput#writeInt Uint32}</li>
|
||||
* <li>FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the
|
||||
* field in this segment</li>
|
||||
* </ul>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||
|
||||
public static final String BLOOM_CODEC_NAME = "BloomFilter";
|
||||
public static final int BLOOM_CODEC_VERSION = 1;
|
||||
|
||||
/** Extension of Bloom Filters file */
|
||||
static final String BLOOM_EXTENSION = "blm";
|
||||
|
||||
BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();
|
||||
private PostingsFormat delegatePostingsFormat;
|
||||
|
||||
/**
|
||||
* Creates Bloom filters for a selection of fields created in the index. This
|
||||
* is recorded as a set of Bitsets held as a segment summary in an additional
|
||||
* "blm" file. This PostingsFormat delegates to a choice of delegate
|
||||
* PostingsFormat for encoding all other postings data.
|
||||
*
|
||||
* @param delegatePostingsFormat
|
||||
* The PostingsFormat that records all the non-bloom filter data i.e.
|
||||
* postings info.
|
||||
* @param bloomFilterFactory
|
||||
* The {@link BloomFilterFactory} responsible for sizing BloomFilters
|
||||
* appropriately
|
||||
*/
|
||||
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat,
|
||||
BloomFilterFactory bloomFilterFactory) {
|
||||
super(BLOOM_CODEC_NAME);
|
||||
this.delegatePostingsFormat = delegatePostingsFormat;
|
||||
this.bloomFilterFactory = bloomFilterFactory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates Bloom filters for a selection of fields created in the index. This
|
||||
* is recorded as a set of Bitsets held as a segment summary in an additional
|
||||
* "blm" file. This PostingsFormat delegates to a choice of delegate
|
||||
* PostingsFormat for encoding all other postings data. This choice of
|
||||
* constructor defaults to the {@link DefaultBloomFilterFactory} for
|
||||
* configuring per-field BloomFilters.
|
||||
*
|
||||
* @param delegatePostingsFormat
|
||||
* The PostingsFormat that records all the non-bloom filter data i.e.
|
||||
* postings info.
|
||||
*/
|
||||
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) {
|
||||
this(delegatePostingsFormat, new DefaultBloomFilterFactory());
|
||||
}
|
||||
|
||||
// Used only by core Lucene at read-time via Service Provider instantiation -
|
||||
// do not use at Write-time in application code.
|
||||
public BloomFilteringPostingsFormat() {
|
||||
super(BLOOM_CODEC_NAME);
|
||||
}
|
||||
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
|
||||
throws IOException {
|
||||
if (delegatePostingsFormat == null) {
|
||||
throw new UnsupportedOperationException("Error - " + getClass().getName()
|
||||
+ " has been constructed without a choice of PostingsFormat");
|
||||
}
|
||||
return new BloomFilteredFieldsConsumer(
|
||||
delegatePostingsFormat.fieldsConsumer(state), state,
|
||||
delegatePostingsFormat);
|
||||
}
|
||||
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state)
|
||||
throws IOException {
|
||||
return new BloomFilteredFieldsProducer(state);
|
||||
}
|
||||
|
||||
public class BloomFilteredFieldsProducer extends FieldsProducer {
|
||||
private FieldsProducer delegateFieldsProducer;
|
||||
HashMap<String,FuzzySet> bloomsByFieldName = new HashMap<String,FuzzySet>();
|
||||
|
||||
public BloomFilteredFieldsProducer(SegmentReadState state)
|
||||
throws IOException {
|
||||
|
||||
String bloomFileName = IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||
IndexInput bloomIn = null;
|
||||
try {
|
||||
bloomIn = state.dir.openInput(bloomFileName, state.context);
|
||||
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
|
||||
BLOOM_CODEC_VERSION);
|
||||
// // Load the hash function used in the BloomFilter
|
||||
// hashFunction = HashFunction.forName(bloomIn.readString());
|
||||
// Load the delegate postings format
|
||||
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
|
||||
.readString());
|
||||
|
||||
this.delegateFieldsProducer = delegatePostingsFormat
|
||||
.fieldsProducer(state);
|
||||
int numBlooms = bloomIn.readInt();
|
||||
for (int i = 0; i < numBlooms; i++) {
|
||||
int fieldNum = bloomIn.readInt();
|
||||
FuzzySet bloom = FuzzySet.deserialize(bloomIn);
|
||||
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
|
||||
bloomsByFieldName.put(fieldInfo.name, bloom);
|
||||
}
|
||||
} finally {
|
||||
IOUtils.close(bloomIn);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public FieldsEnum iterator() throws IOException {
|
||||
return new BloomFilteredFieldsEnum(delegateFieldsProducer.iterator(),
|
||||
bloomsByFieldName);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
delegateFieldsProducer.close();
|
||||
}
|
||||
|
||||
public Terms terms(String field) throws IOException {
|
||||
FuzzySet filter = bloomsByFieldName.get(field);
|
||||
if (filter == null) {
|
||||
return delegateFieldsProducer.terms(field);
|
||||
} else {
|
||||
Terms result = delegateFieldsProducer.terms(field);
|
||||
if (result == null) {
|
||||
return null;
|
||||
}
|
||||
return new BloomFilteredTerms(result, filter);
|
||||
}
|
||||
}
|
||||
|
||||
public int size() throws IOException {
|
||||
return delegateFieldsProducer.size();
|
||||
}
|
||||
|
||||
public long getUniqueTermCount() throws IOException {
|
||||
return delegateFieldsProducer.getUniqueTermCount();
|
||||
}
|
||||
|
||||
// Not all fields in a segment may be subject to a bloom filter. This class
|
||||
// wraps Terms objects appropriately if a filtering request is present
|
||||
class BloomFilteredFieldsEnum extends FieldsEnum {
|
||||
private FieldsEnum delegateFieldsEnum;
|
||||
private HashMap<String,FuzzySet> bloomsByFieldName;
|
||||
private String currentFieldName;
|
||||
|
||||
public BloomFilteredFieldsEnum(FieldsEnum iterator,
|
||||
HashMap<String,FuzzySet> bloomsByFieldName) {
|
||||
this.delegateFieldsEnum = iterator;
|
||||
this.bloomsByFieldName = bloomsByFieldName;
|
||||
}
|
||||
|
||||
public AttributeSource attributes() {
|
||||
return delegateFieldsEnum.attributes();
|
||||
}
|
||||
|
||||
public String next() throws IOException {
|
||||
currentFieldName = delegateFieldsEnum.next();
|
||||
return currentFieldName;
|
||||
}
|
||||
|
||||
public Terms terms() throws IOException {
|
||||
FuzzySet filter = bloomsByFieldName.get(currentFieldName);
|
||||
if (filter == null) {
|
||||
return delegateFieldsEnum.terms();
|
||||
} else {
|
||||
Terms result = delegateFieldsEnum.terms();
|
||||
if (result == null) {
|
||||
return null;
|
||||
}
|
||||
// wrap the terms object with a bloom filter
|
||||
return new BloomFilteredTerms(result, filter);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class BloomFilteredTerms extends Terms {
|
||||
private Terms delegateTerms;
|
||||
private FuzzySet filter;
|
||||
|
||||
public BloomFilteredTerms(Terms terms, FuzzySet filter) {
|
||||
this.delegateTerms = terms;
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled,
|
||||
final BytesRef startTerm) throws IOException {
|
||||
return delegateTerms.intersect(compiled, startTerm);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||
TermsEnum result;
|
||||
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
|
||||
// recycle the existing BloomFilteredTermsEnum by asking the delegate
|
||||
// to recycle its contained TermsEnum
|
||||
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
|
||||
if (bfte.filter == filter) {
|
||||
bfte.delegateTermsEnum = delegateTerms
|
||||
.iterator(bfte.delegateTermsEnum);
|
||||
return bfte;
|
||||
}
|
||||
}
|
||||
// We have been handed something we cannot reuse (either null, wrong
|
||||
// class or wrong filter) so allocate a new object
|
||||
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
|
||||
filter);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return delegateTerms.getComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() throws IOException {
|
||||
return delegateTerms.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() throws IOException {
|
||||
return delegateTerms.getSumTotalTermFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumDocFreq() throws IOException {
|
||||
return delegateTerms.getSumDocFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount() throws IOException {
|
||||
return delegateTerms.getDocCount();
|
||||
}
|
||||
}
|
||||
|
||||
class BloomFilteredTermsEnum extends TermsEnum {
|
||||
|
||||
TermsEnum delegateTermsEnum;
|
||||
private FuzzySet filter;
|
||||
|
||||
public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
|
||||
this.delegateTermsEnum = iterator;
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final BytesRef next() throws IOException {
|
||||
return delegateTermsEnum.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Comparator<BytesRef> getComparator() {
|
||||
return delegateTermsEnum.getComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean seekExact(BytesRef text, boolean useCache)
|
||||
throws IOException {
|
||||
// The magical fail-fast speed up that is the entire point of all of
|
||||
// this code - save a disk seek if there is a match on an in-memory
|
||||
// structure
|
||||
// that may occasionally give a false positive but guaranteed no false
|
||||
// negatives
|
||||
if (filter.contains(text) == ContainsResult.NO) {
|
||||
return false;
|
||||
}
|
||||
return delegateTermsEnum.seekExact(text, useCache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
|
||||
throws IOException {
|
||||
return delegateTermsEnum.seekCeil(text, useCache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void seekExact(long ord) throws IOException {
|
||||
delegateTermsEnum.seekExact(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final BytesRef term() throws IOException {
|
||||
return delegateTermsEnum.term();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long ord() throws IOException {
|
||||
return delegateTermsEnum.ord();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int docFreq() throws IOException {
|
||||
return delegateTermsEnum.docFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long totalTermFreq() throws IOException {
|
||||
return delegateTermsEnum.totalTermFreq();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
|
||||
DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
|
||||
throws IOException {
|
||||
return delegateTermsEnum.docs(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class BloomFilteredFieldsConsumer extends FieldsConsumer {
|
||||
private FieldsConsumer delegateFieldsConsumer;
|
||||
private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
|
||||
private SegmentWriteState state;
|
||||
|
||||
// private PostingsFormat delegatePostingsFormat;
|
||||
|
||||
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
|
||||
SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
|
||||
this.delegateFieldsConsumer = fieldsConsumer;
|
||||
// this.delegatePostingsFormat=delegatePostingsFormat;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsConsumer addField(FieldInfo field) throws IOException {
|
||||
FuzzySet bloomFilter = bloomFilterFactory.getSetForField(state,field);
|
||||
if (bloomFilter != null) {
|
||||
assert bloomFilters.containsKey(field) == false;
|
||||
bloomFilters.put(field, bloomFilter);
|
||||
return new WrappedTermsConsumer(delegateFieldsConsumer.addField(field),bloomFilter);
|
||||
} else {
|
||||
// No, use the unfiltered fieldsConsumer - we are not interested in
|
||||
// recording any term Bitsets.
|
||||
return delegateFieldsConsumer.addField(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
delegateFieldsConsumer.close();
|
||||
// Now we are done accumulating values for these fields
|
||||
List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
|
||||
|
||||
for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
|
||||
FuzzySet bloomFilter = entry.getValue();
|
||||
if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){
|
||||
nonSaturatedBlooms.add(entry);
|
||||
}
|
||||
}
|
||||
String bloomFileName = IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||
IndexOutput bloomOutput = null;
|
||||
try {
|
||||
bloomOutput = state.directory
|
||||
.createOutput(bloomFileName, state.context);
|
||||
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
|
||||
BLOOM_CODEC_VERSION);
|
||||
// remember the name of the postings format we will delegate to
|
||||
bloomOutput.writeString(delegatePostingsFormat.getName());
|
||||
|
||||
// First field in the output file is the number of fields+blooms saved
|
||||
bloomOutput.writeInt(nonSaturatedBlooms.size());
|
||||
for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
|
||||
FieldInfo fieldInfo = entry.getKey();
|
||||
FuzzySet bloomFilter = entry.getValue();
|
||||
bloomOutput.writeInt(fieldInfo.number);
|
||||
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
|
||||
}
|
||||
} finally {
|
||||
IOUtils.close(bloomOutput);
|
||||
}
|
||||
//We are done with large bitsets so no need to keep them hanging around
|
||||
bloomFilters.clear();
|
||||
}
|
||||
|
||||
private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
|
||||
FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
|
||||
|
||||
FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
|
||||
bloomFilter);
|
||||
if (rightSizedSet == null) {
|
||||
rightSizedSet = bloomFilter;
|
||||
}
|
||||
rightSizedSet.serialize(bloomOutput);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class WrappedTermsConsumer extends TermsConsumer {
|
||||
private TermsConsumer delegateTermsConsumer;
|
||||
private FuzzySet bloomFilter;
|
||||
|
||||
public WrappedTermsConsumer(TermsConsumer termsConsumer,FuzzySet bloomFilter) {
|
||||
this.delegateTermsConsumer = termsConsumer;
|
||||
this.bloomFilter = bloomFilter;
|
||||
}
|
||||
|
||||
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||
return delegateTermsConsumer.startTerm(text);
|
||||
}
|
||||
|
||||
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||
|
||||
// Record this term in our BloomFilter
|
||||
if (stats.docFreq > 0) {
|
||||
bloomFilter.addValue(text);
|
||||
}
|
||||
delegateTermsConsumer.finishTerm(text, stats);
|
||||
}
|
||||
|
||||
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
|
||||
throws IOException {
|
||||
delegateTermsConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
|
||||
}
|
||||
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return delegateTermsConsumer.getComparator();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package org.apache.lucene.codecs.bloom;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.FuzzySet;
|
||||
import org.apache.lucene.util.hash.HashFunction;
|
||||
import org.apache.lucene.util.hash.MurmurHash2;
|
||||
|
||||
/**
|
||||
* Default policy is to allocate a bitset with 10% saturation given a unique term per document.
|
||||
* Bits are set via MurmurHash2 hashing function.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultBloomFilterFactory extends BloomFilterFactory {
|
||||
|
||||
@Override
|
||||
public FuzzySet getSetForField(SegmentWriteState state,FieldInfo info) {
|
||||
//Assume all of the docs have a unique term (e.g. a primary key) and we hope to maintain a set with 10% of bits set
|
||||
return FuzzySet.createSetBasedOnQuality(state.segmentInfo.getDocCount(), 0.10f, new MurmurHash2());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) {
|
||||
// Don't bother saving bitsets if >90% of bits are set - we don't want to
|
||||
// throw any more memory at this problem.
|
||||
return bloomFilter.getSaturation() > 0.9f;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
Codec PostingsFormat for fast access to low-frequency terms such as primary key fields.
|
||||
</body>
|
||||
</html>
|
|
@ -377,6 +377,11 @@ public class Field implements IndexableField {
|
|||
* @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
if (boost != 1.0f) {
|
||||
if (type.indexed() == false || type.omitNorms()) {
|
||||
throw new IllegalArgumentException("You cannot set an index-time boost on an unindexed field, or one that omits norms");
|
||||
}
|
||||
}
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ public final class StoredField extends Field {
|
|||
super(name, value, TYPE);
|
||||
}
|
||||
|
||||
// TODO: not great but maybe not a big problem?
|
||||
public StoredField(String name, int value) {
|
||||
super(name, TYPE);
|
||||
fieldsData = value;
|
||||
|
|
|
@ -54,9 +54,4 @@ public final class StringField extends Field {
|
|||
public StringField(String name, String value, Store stored) {
|
||||
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String stringValue() {
|
||||
return (fieldsData == null) ? null : fieldsData.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,9 +46,9 @@ public final class TextField extends Field {
|
|||
|
||||
// TODO: add sugar for term vectors...?
|
||||
|
||||
/** Creates a new TextField with Reader value. */
|
||||
public TextField(String name, Reader reader, Store store) {
|
||||
super(name, reader, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
|
||||
/** Creates a new un-stored TextField with Reader value. */
|
||||
public TextField(String name, Reader reader) {
|
||||
super(name, reader, TYPE_NOT_STORED);
|
||||
}
|
||||
|
||||
/** Creates a new TextField with String value. */
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -20,6 +17,9 @@ import java.util.List;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* {@link IndexReaderContext} for {@link AtomicReader} instances
|
||||
* @lucene.experimental
|
||||
|
@ -51,8 +51,9 @@ public final class AtomicReaderContext extends IndexReaderContext {
|
|||
|
||||
@Override
|
||||
public List<AtomicReaderContext> leaves() {
|
||||
if (!isTopLevel)
|
||||
if (!isTopLevel) {
|
||||
throw new UnsupportedOperationException("This is not a top-level context.");
|
||||
}
|
||||
assert leaves != null;
|
||||
return leaves;
|
||||
}
|
||||
|
|
|
@ -75,6 +75,9 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
|
|||
* memory usage, at the expense of higher latency when
|
||||
* loading a TermInfo. The default value is 1. Set this
|
||||
* to -1 to skip loading the terms index entirely.
|
||||
* <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
|
||||
* implementations, including the default one in this release. It only makes
|
||||
* sense for terms indexes that can efficiently re-sample terms at load time.
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException {
|
||||
|
@ -126,6 +129,9 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
|
|||
* memory usage, at the expense of higher latency when
|
||||
* loading a TermInfo. The default value is 1. Set this
|
||||
* to -1 to skip loading the terms index entirely.
|
||||
* <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
|
||||
* implementations, including the default one in this release. It only makes
|
||||
* sense for terms indexes that can efficiently re-sample terms at load time.
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException {
|
||||
|
|
|
@ -202,11 +202,9 @@ final class DocumentsWriter {
|
|||
* discarding any docs added since last flush. */
|
||||
synchronized void abort() {
|
||||
boolean success = false;
|
||||
synchronized (this) {
|
||||
deleteQueue.clear();
|
||||
}
|
||||
|
||||
try {
|
||||
deleteQueue.clear();
|
||||
if (infoStream.isEnabled("DW")) {
|
||||
infoStream.message("DW", "abort");
|
||||
}
|
||||
|
@ -230,6 +228,7 @@ final class DocumentsWriter {
|
|||
perThread.unlock();
|
||||
}
|
||||
}
|
||||
flushControl.abortPendingFlushes();
|
||||
flushControl.waitForFlush();
|
||||
success = true;
|
||||
} finally {
|
||||
|
|
|
@ -567,19 +567,34 @@ final class DocumentsWriterFlushControl {
|
|||
}
|
||||
|
||||
synchronized void abortFullFlushes() {
|
||||
try {
|
||||
abortPendingFlushes();
|
||||
} finally {
|
||||
fullFlush = false;
|
||||
}
|
||||
}
|
||||
|
||||
synchronized void abortPendingFlushes() {
|
||||
try {
|
||||
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
||||
doAfterFlush(dwpt);
|
||||
dwpt.abort();
|
||||
try {
|
||||
dwpt.abort();
|
||||
doAfterFlush(dwpt);
|
||||
} catch (Throwable ex) {
|
||||
// ignore - keep on aborting the flush queue
|
||||
}
|
||||
}
|
||||
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||
flushingWriters
|
||||
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
doAfterFlush(blockedFlush.dwpt);
|
||||
blockedFlush.dwpt.abort();
|
||||
try {
|
||||
flushingWriters
|
||||
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
blockedFlush.dwpt.abort();
|
||||
doAfterFlush(blockedFlush.dwpt);
|
||||
} catch (Throwable ex) {
|
||||
// ignore - keep on aborting the blocked queue
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
fullFlush = false;
|
||||
flushQueue.clear();
|
||||
blockedFlushes.clear();
|
||||
updateStallState();
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -19,6 +17,8 @@ import java.util.List;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A struct like class that represents a hierarchical relationship between
|
||||
* {@link IndexReader} instances.
|
||||
|
|
|
@ -1241,6 +1241,78 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
}
|
||||
}
|
||||
|
||||
/** Expert: attempts to delete by document ID, as long as
|
||||
* the provided reader is a near-real-time reader (from {@link
|
||||
* DirectoryReader#open(IndexWriter,boolean)}). If the
|
||||
* provided reader is an NRT reader obtained from this
|
||||
* writer, and its segment has not been merged away, then
|
||||
* the delete succeeds and this method returns true; else, it
|
||||
* returns false the caller must then separately delete by
|
||||
* Term or Query.
|
||||
*
|
||||
* <b>NOTE</b>: this method can only delete documents
|
||||
* visible to the currently open NRT reader. If you need
|
||||
* to delete documents indexed after opening the NRT
|
||||
* reader you must use the other deleteDocument methods
|
||||
* (e.g., {@link #deleteDocuments(Term)}). */
|
||||
public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
|
||||
|
||||
final AtomicReader reader;
|
||||
if (readerIn instanceof AtomicReader) {
|
||||
// Reader is already atomic: use the incoming docID:
|
||||
reader = (AtomicReader) readerIn;
|
||||
} else {
|
||||
// Composite reader: lookup sub-reader and re-base docID:
|
||||
List<AtomicReaderContext> leaves = readerIn.getTopReaderContext().leaves();
|
||||
int subIndex = ReaderUtil.subIndex(docID, leaves);
|
||||
reader = leaves.get(subIndex).reader();
|
||||
docID -= leaves.get(subIndex).docBase;
|
||||
assert docID >= 0;
|
||||
assert docID < reader.maxDoc();
|
||||
}
|
||||
|
||||
if (!(reader instanceof SegmentReader)) {
|
||||
throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
|
||||
}
|
||||
|
||||
final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
|
||||
|
||||
// TODO: this is a slow linear search, but, number of
|
||||
// segments should be contained unless something is
|
||||
// seriously wrong w/ the index, so it should be a minor
|
||||
// cost:
|
||||
|
||||
if (segmentInfos.indexOf(info) != -1) {
|
||||
ReadersAndLiveDocs rld = readerPool.get(info, false);
|
||||
if (rld != null) {
|
||||
synchronized(bufferedDeletesStream) {
|
||||
rld.initWritableLiveDocs();
|
||||
if (rld.delete(docID)) {
|
||||
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
|
||||
if (fullDelCount == rld.info.info.getDocCount()) {
|
||||
// If a merge has already registered for this
|
||||
// segment, we leave it in the readerPool; the
|
||||
// merge will skip merging it and will then drop
|
||||
// it once it's done:
|
||||
if (!mergingSegments.contains(rld.info)) {
|
||||
segmentInfos.remove(rld.info);
|
||||
readerPool.drop(rld.info);
|
||||
checkpoint();
|
||||
}
|
||||
}
|
||||
}
|
||||
//System.out.println(" yes " + info.info.name + " " + docID);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
//System.out.println(" no rld " + info.info.name + " " + docID);
|
||||
}
|
||||
} else {
|
||||
//System.out.println(" no seg " + info.info.name + " " + docID);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the document(s) containing any of the
|
||||
* terms. All given deletes are applied and flushed atomically
|
||||
|
|
|
@ -392,7 +392,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
|
|||
* @see #setMaxBufferedDocs(int)
|
||||
* @see #setRAMBufferSizeMB(double)
|
||||
*/
|
||||
public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) {
|
||||
IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) {
|
||||
this.flushPolicy = flushPolicy;
|
||||
return this;
|
||||
}
|
||||
|
@ -422,7 +422,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
|
|||
}
|
||||
|
||||
@Override
|
||||
public FlushPolicy getFlushPolicy() {
|
||||
FlushPolicy getFlushPolicy() {
|
||||
return flushPolicy;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
|
||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -146,6 +147,29 @@ public class LiveIndexWriterConfig {
|
|||
* Takes effect immediately, but only applies to newly flushed/merged
|
||||
* segments.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> This parameter does not apply to all PostingsFormat implementations,
|
||||
* including the default one in this release. It only makes sense for term indexes
|
||||
* that are implemented as a fixed gap between terms. For example,
|
||||
* {@link Lucene40PostingsFormat} implements the term index instead based upon how
|
||||
* terms share prefixes. To configure its parameters (the minimum and maximum size
|
||||
* for a block), you would instead use {@link Lucene40PostingsFormat#Lucene40PostingsFormat(int, int)}.
|
||||
* which can also be configured on a per-field basis:
|
||||
* <pre class="prettyprint">
|
||||
* //customize Lucene40PostingsFormat, passing minBlockSize=50, maxBlockSize=100
|
||||
* final PostingsFormat tweakedPostings = new Lucene40PostingsFormat(50, 100);
|
||||
* iwc.setCodec(new Lucene40Codec() {
|
||||
* @Override
|
||||
* public PostingsFormat getPostingsFormatForField(String field) {
|
||||
* if (field.equals("fieldWithTonsOfTerms"))
|
||||
* return tweakedPostings;
|
||||
* else
|
||||
* return super.getPostingsFormatForField(field);
|
||||
* }
|
||||
* });
|
||||
* </pre>
|
||||
* Note that other implementations may have their own parameters, or no parameters at all.
|
||||
*
|
||||
* @see IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL
|
||||
*/
|
||||
public LiveIndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
|
||||
|
@ -335,6 +359,10 @@ public class LiveIndexWriterConfig {
|
|||
* <p>
|
||||
* Takes effect immediately, but only applies to readers opened after this
|
||||
* call
|
||||
* <p>
|
||||
* <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
|
||||
* implementations, including the default one in this release. It only makes
|
||||
* sense for terms indexes that can efficiently re-sample terms at load time.
|
||||
*/
|
||||
public LiveIndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
|
||||
if (divisor <= 0 && divisor != -1) {
|
||||
|
@ -462,7 +490,7 @@ public class LiveIndexWriterConfig {
|
|||
/**
|
||||
* @see IndexWriterConfig#setFlushPolicy(FlushPolicy)
|
||||
*/
|
||||
public FlushPolicy getFlushPolicy() {
|
||||
FlushPolicy getFlushPolicy() {
|
||||
return flushPolicy;
|
||||
}
|
||||
|
||||
|
@ -497,7 +525,6 @@ public class LiveIndexWriterConfig {
|
|||
sb.append("mergePolicy=").append(getMergePolicy()).append("\n");
|
||||
sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n");
|
||||
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
|
||||
sb.append("flushPolicy=").append(getFlushPolicy()).append("\n");
|
||||
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
@ -122,7 +122,7 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
|
||||
// if minSimilarity >= 1, we treat it as number of edits
|
||||
if (minSimilarity >= 1f) {
|
||||
this.minSimilarity = 1 - (minSimilarity+1) / this.termLength;
|
||||
this.minSimilarity = 0; // just driven by number of edits
|
||||
maxEdits = (int) minSimilarity;
|
||||
raw = true;
|
||||
} else {
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.SegmentInfoPerCommit;
|
||||
import org.apache.lucene.index.IndexReader; // javadocs
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -254,6 +255,14 @@ public class NRTManager extends ReferenceManager<IndexSearcher> {
|
|||
long getAndIncrementGeneration() {
|
||||
return indexingGen.getAndIncrement();
|
||||
}
|
||||
|
||||
public long tryDeleteDocument(IndexReader reader, int docID) throws IOException {
|
||||
if (writer.tryDeleteDocument(reader, docID)) {
|
||||
return indexingGen.get();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -54,6 +54,11 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
|||
bits = new long[bits2words(numBits)];
|
||||
}
|
||||
|
||||
public FixedBitSet(long[]storedBits,int numBits) {
|
||||
this.numBits = numBits;
|
||||
this.bits = storedBits;
|
||||
}
|
||||
|
||||
/** Makes full copy. */
|
||||
public FixedBitSet(FixedBitSet other) {
|
||||
bits = new long[other.bits.length];
|
||||
|
|
|
@ -0,0 +1,292 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.hash.HashFunction;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A class used to represent a set of many, potentially large, values (e.g. many
|
||||
* long strings such as URLs), using a significantly smaller amount of memory.
|
||||
* </p>
|
||||
* <p>
|
||||
* The set is "lossy" in that it cannot definitively state that is does contain
|
||||
* a value but it <em>can</em> definitively say if a value is <em>not</em> in
|
||||
* the set. It can therefore be used as a Bloom Filter.
|
||||
* </p>
|
||||
* Another application of the set is that it can be used to perform fuzzy counting because
|
||||
* it can estimate reasonably accurately how many unique values are contained in the set.
|
||||
* </p>
|
||||
* <p>This class is NOT threadsafe.</p>
|
||||
* <p>
|
||||
* Internally a Bitset is used to record values and once a client has finished recording
|
||||
* a stream of values the {@link #downsize(float)} method can be used to create a suitably smaller set that
|
||||
* is sized appropriately for the number of values recorded and desired saturation levels.
|
||||
*
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FuzzySet {
|
||||
|
||||
public static final int FUZZY_SERIALIZATION_VERSION=1;
|
||||
|
||||
public enum ContainsResult {
|
||||
MAYBE, NO
|
||||
};
|
||||
private HashFunction hashFunction;
|
||||
private FixedBitSet filter;
|
||||
private int bloomSize;
|
||||
|
||||
//The sizes of BitSet used are all numbers that, when expressed in binary form,
|
||||
//are all ones. This is to enable fast downsizing from one bitset to another
|
||||
//by simply ANDing each set index in one bitset with the size of the target bitset
|
||||
// - this provides a fast modulo of the number. Values previously accumulated in
|
||||
// a large bitset and then mapped to a smaller set can be looked up using a single
|
||||
// AND operation of the query term's hash rather than needing to perform a 2-step
|
||||
// translation of the query term that mirrors the stored content's reprojections.
|
||||
static final int usableBitSetSizes[];
|
||||
static
|
||||
{
|
||||
usableBitSetSizes=new int[30];
|
||||
int mask=1;
|
||||
int size=mask;
|
||||
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||
size=(size<<1)|mask;
|
||||
usableBitSetSizes[i]=size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rounds down required maxNumberOfBits to the nearest number that is made up
|
||||
* of all ones as a binary number.
|
||||
* Use this method where controlling memory use is paramount.
|
||||
*/
|
||||
public static int getNearestSetSize(int maxNumberOfBits)
|
||||
{
|
||||
int result=usableBitSetSizes[0];
|
||||
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||
if(usableBitSetSizes[i]<=maxNumberOfBits)
|
||||
{
|
||||
result=usableBitSetSizes[i];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this method to choose a set size where accuracy (low content saturation) is more important
|
||||
* than deciding how much memory to throw at the problem.
|
||||
* @param maxNumberOfValuesExpected
|
||||
* @param desiredSaturation A number between 0 and 1 expressing the % of bits set once all values have been recorded
|
||||
* @return The size of the set nearest to the required size
|
||||
*/
|
||||
public static int getNearestSetSize(int maxNumberOfValuesExpected,
|
||||
float desiredSaturation) {
|
||||
// Iterate around the various scales of bitset from smallest to largest looking for the first that
|
||||
// satisfies value volumes at the chosen saturation level
|
||||
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||
int numSetBitsAtDesiredSaturation = (int) (usableBitSetSizes[i] * desiredSaturation);
|
||||
int estimatedNumUniqueValues = getEstimatedNumberUniqueValuesAllowingForCollisions(
|
||||
usableBitSetSizes[i], numSetBitsAtDesiredSaturation);
|
||||
if (estimatedNumUniqueValues > maxNumberOfValuesExpected) {
|
||||
return usableBitSetSizes[i];
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public static FuzzySet createSetBasedOnMaxMemory(int maxNumBytes, HashFunction hashFunction)
|
||||
{
|
||||
int setSize=getNearestSetSize(maxNumBytes);
|
||||
return new FuzzySet(new FixedBitSet(setSize+1),setSize,hashFunction);
|
||||
}
|
||||
|
||||
public static FuzzySet createSetBasedOnQuality(int maxNumUniqueValues, float desiredMaxSaturation, HashFunction hashFunction)
|
||||
{
|
||||
int setSize=getNearestSetSize(maxNumUniqueValues,desiredMaxSaturation);
|
||||
return new FuzzySet(new FixedBitSet(setSize+1),setSize,hashFunction);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private FuzzySet(FixedBitSet filter, int bloomSize, HashFunction hashFunction) {
|
||||
super();
|
||||
this.filter = filter;
|
||||
this.bloomSize = bloomSize;
|
||||
this.hashFunction=hashFunction;
|
||||
}
|
||||
|
||||
/**
|
||||
* The main method required for a Bloom filter which, given a value determines set membership.
|
||||
* Unlike a conventional set, the fuzzy set returns NO or MAYBE rather than true or false.
|
||||
* @param value
|
||||
* @return NO or MAYBE
|
||||
*/
|
||||
public ContainsResult contains(BytesRef value) {
|
||||
int hash = hashFunction.hash(value);
|
||||
if (hash < 0) {
|
||||
hash = hash * -1;
|
||||
}
|
||||
return mayContainValue(hash);
|
||||
}
|
||||
|
||||
/**
|
||||
* Serializes the data set to file using the following format:
|
||||
* <ul>
|
||||
* <li>FuzzySet -->FuzzySetVersion,HashFunctionName,BloomSize,
|
||||
* NumBitSetWords,BitSetWord<sup>NumBitSetWords</sup></li>
|
||||
* <li>HashFunctionName --> {@link DataOutput#writeString(String) String} The
|
||||
* name of a ServiceProvider registered {@link HashFunction}</li>
|
||||
* <li>FuzzySetVersion --> {@link DataOutput#writeInt Uint32} The version number of the {@link FuzzySet} class</li>
|
||||
* <li>BloomSize --> {@link DataOutput#writeInt Uint32} The modulo value used
|
||||
* to project hashes into the field's Bitset</li>
|
||||
* <li>NumBitSetWords --> {@link DataOutput#writeInt Uint32} The number of
|
||||
* longs (as returned from {@link FixedBitSet#getBits})</li>
|
||||
* <li>BitSetWord --> {@link DataOutput#writeLong Long} A long from the array
|
||||
* returned by {@link FixedBitSet#getBits}</li>
|
||||
* </ul>
|
||||
* @param out Data output stream
|
||||
* @throws IOException
|
||||
*/
|
||||
public void serialize(DataOutput out) throws IOException
|
||||
{
|
||||
out.writeInt(FUZZY_SERIALIZATION_VERSION);
|
||||
out.writeString(hashFunction.getName());
|
||||
out.writeInt(bloomSize);
|
||||
long[] bits = filter.getBits();
|
||||
out.writeInt(bits.length);
|
||||
for (int i = 0; i < bits.length; i++) {
|
||||
// Can't used VLong encoding because cant cope with negative numbers
|
||||
// output by FixedBitSet
|
||||
out.writeLong(bits[i]);
|
||||
}
|
||||
}
|
||||
public static FuzzySet deserialize(DataInput in) throws IOException
|
||||
{
|
||||
int version=in.readInt();
|
||||
if(version!=FUZZY_SERIALIZATION_VERSION)
|
||||
{
|
||||
throw new IOException("Error deserializing: set version is not "+FUZZY_SERIALIZATION_VERSION);
|
||||
}
|
||||
HashFunction hashFunction=HashFunction.forName(in.readString());
|
||||
int bloomSize=in.readInt();
|
||||
int numLongs=in.readInt();
|
||||
long[]longs=new long[numLongs];
|
||||
for (int i = 0; i < numLongs; i++) {
|
||||
longs[i]=in.readLong();
|
||||
}
|
||||
FixedBitSet bits = new FixedBitSet(longs,bloomSize+1);
|
||||
return new FuzzySet(bits,bloomSize,hashFunction);
|
||||
}
|
||||
|
||||
private ContainsResult mayContainValue(int positiveHash) {
|
||||
assert positiveHash >= 0;
|
||||
// Bloom sizes are always base 2 and so can be ANDed for a fast modulo
|
||||
int pos = positiveHash & bloomSize;
|
||||
if (filter.get(pos)) {
|
||||
// This term may be recorded in this index (but could be a collision)
|
||||
return ContainsResult.MAYBE;
|
||||
}
|
||||
// definitely NOT in this segment
|
||||
return ContainsResult.NO;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a value in the set. The referenced bytes are hashed and then modulo n'd where n is the
|
||||
* chosen size of the internal bitset.
|
||||
* @param value the key value to be hashed
|
||||
* @throws IOException
|
||||
*/
|
||||
public void addValue(BytesRef value) throws IOException {
|
||||
int hash = hashFunction.hash(value);
|
||||
if (hash < 0) {
|
||||
hash = hash * -1;
|
||||
}
|
||||
// Bitmasking using bloomSize is effectively a modulo operation.
|
||||
int bloomPos = hash & bloomSize;
|
||||
filter.set(bloomPos);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param targetMaxSaturation A number between 0 and 1 describing the % of bits that would ideally be set in the
|
||||
* result. Lower values have better qccuracy but require more space.
|
||||
* @return a smaller FuzzySet or null if the current set is already over-saturated
|
||||
*/
|
||||
public FuzzySet downsize(float targetMaxSaturation)
|
||||
{
|
||||
int numBitsSet = filter.cardinality();
|
||||
FixedBitSet rightSizedBitSet = filter;
|
||||
int rightSizedBitSetSize = bloomSize;
|
||||
//Hopefully find a smaller size bitset into which we can project accumulated values while maintaining desired saturation level
|
||||
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||
int candidateBitsetSize = usableBitSetSizes[i];
|
||||
float candidateSaturation = (float) numBitsSet
|
||||
/ (float) candidateBitsetSize;
|
||||
if (candidateSaturation <= targetMaxSaturation) {
|
||||
rightSizedBitSetSize = candidateBitsetSize;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Re-project the numbers to a smaller space if necessary
|
||||
if (rightSizedBitSetSize < bloomSize) {
|
||||
// Reset the choice of bitset to the smaller version
|
||||
rightSizedBitSet = new FixedBitSet(rightSizedBitSetSize + 1);
|
||||
// Map across the bits from the large set to the smaller one
|
||||
int bitIndex = 0;
|
||||
do {
|
||||
bitIndex = filter.nextSetBit(bitIndex);
|
||||
if (bitIndex >= 0) {
|
||||
// Project the larger number into a smaller one effectively
|
||||
// modulo-ing by using the target bitset size as a mask
|
||||
int downSizedBitIndex = bitIndex & rightSizedBitSetSize;
|
||||
rightSizedBitSet.set(downSizedBitIndex);
|
||||
bitIndex++;
|
||||
}
|
||||
} while ( (bitIndex >= 0)&&(bitIndex<=bloomSize));
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
return new FuzzySet(rightSizedBitSet,rightSizedBitSetSize, hashFunction);
|
||||
}
|
||||
|
||||
public int getEstimatedUniqueValues()
|
||||
{
|
||||
return getEstimatedNumberUniqueValuesAllowingForCollisions(bloomSize, filter.cardinality());
|
||||
}
|
||||
|
||||
// Given a set size and a the number of set bits, produces an estimate of the number of unique values recorded
|
||||
public static int getEstimatedNumberUniqueValuesAllowingForCollisions(
|
||||
int setSize, int numRecordedBits) {
|
||||
double setSizeAsDouble = setSize;
|
||||
double numRecordedBitsAsDouble = numRecordedBits;
|
||||
double saturation = numRecordedBitsAsDouble / setSizeAsDouble;
|
||||
double logInverseSaturation = Math.log(1 - saturation) * -1;
|
||||
return (int) (setSizeAsDouble * logInverseSaturation);
|
||||
}
|
||||
|
||||
public float getSaturation() {
|
||||
int numBitsSet = filter.cardinality();
|
||||
return (float) numBitsSet / (float) bloomSize;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
package org.apache.lucene.util.hash;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NamedSPILoader;
|
||||
|
||||
|
||||
/**
|
||||
* Base class for hashing functions that can be referred to by name.
|
||||
* Subclasses are expected to provide threadsafe implementations of the hash function
|
||||
* on the range of bytes referenced in the provided {@link BytesRef}
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class HashFunction implements NamedSPILoader.NamedSPI {
|
||||
|
||||
/**
|
||||
* Hashes the contents of the referenced bytes
|
||||
* @param bytes the data to be hashed
|
||||
* @return the hash of the bytes referenced by bytes.offset and length bytes.length
|
||||
*/
|
||||
public abstract int hash(BytesRef bytes);
|
||||
|
||||
private static final NamedSPILoader<HashFunction> loader =
|
||||
new NamedSPILoader<HashFunction>(HashFunction.class);
|
||||
|
||||
private final String name;
|
||||
|
||||
public HashFunction(String name) {
|
||||
NamedSPILoader.checkServiceName(name);
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/** Returns this codec's name */
|
||||
@Override
|
||||
public final String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/** looks up a hash function by name */
|
||||
public static HashFunction forName(String name) {
|
||||
return loader.lookup(name);
|
||||
}
|
||||
|
||||
/** returns a list of all available hash function names */
|
||||
public static Set<String> availableHashFunctionNames() {
|
||||
return loader.availableServices();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
package org.apache.lucene.util.hash;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This is a very fast, non-cryptographic hash suitable for general hash-based
|
||||
* lookup. See http://murmurhash.googlepages.com/ for more details.
|
||||
* <p>
|
||||
* The C version of MurmurHash 2.0 found at that site was ported to Java by
|
||||
* Andrzej Bialecki (ab at getopt org).
|
||||
* </p>
|
||||
* <p>
|
||||
* The code from getopt.org was adapted by Mark Harwood in the form here as one of a pluggable choice of
|
||||
* hashing functions as the core function had to be adapted to work with BytesRefs with offsets and lengths
|
||||
* rather than raw byte arrays.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class MurmurHash2 extends HashFunction{
|
||||
|
||||
|
||||
public static final String HASH_NAME="MurmurHash2";
|
||||
|
||||
public MurmurHash2() {
|
||||
super(HASH_NAME);
|
||||
}
|
||||
|
||||
public static int hash(byte[] data, int seed, int offset, int len) {
|
||||
int m = 0x5bd1e995;
|
||||
int r = 24;
|
||||
int h = seed ^ len;
|
||||
int len_4 = len >> 2;
|
||||
for (int i = 0; i < len_4; i++) {
|
||||
int i_4 = offset + (i << 2);
|
||||
int k = data[i_4 + 3];
|
||||
k = k << 8;
|
||||
k = k | (data[i_4 + 2] & 0xff);
|
||||
k = k << 8;
|
||||
k = k | (data[i_4 + 1] & 0xff);
|
||||
k = k << 8;
|
||||
k = k | (data[i_4 + 0] & 0xff);
|
||||
k *= m;
|
||||
k ^= k >>> r;
|
||||
k *= m;
|
||||
h *= m;
|
||||
h ^= k;
|
||||
}
|
||||
int len_m = len_4 << 2;
|
||||
int left = len - len_m;
|
||||
if (left != 0) {
|
||||
if (left >= 3) {
|
||||
h ^= data[offset + len - 3] << 16;
|
||||
}
|
||||
if (left >= 2) {
|
||||
h ^= data[offset + len - 2] << 8;
|
||||
}
|
||||
if (left >= 1) {
|
||||
h ^= data[offset + len - 1];
|
||||
}
|
||||
h *= m;
|
||||
}
|
||||
h ^= h >>> 13;
|
||||
h *= m;
|
||||
h ^= h >>> 15;
|
||||
return h;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates 32 bit hash from byte array with default seed value.
|
||||
*
|
||||
* @param data
|
||||
* byte array to hash
|
||||
* @param offset
|
||||
* the start position in the array to hash
|
||||
* @param len
|
||||
* length of the array elements to hash
|
||||
* @return 32 bit hash of the given array
|
||||
*/
|
||||
public static final int hash32(final byte[] data, int offset, int len) {
|
||||
return MurmurHash2.hash(data, 0x9747b28c, offset, len);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public final int hash(BytesRef br) {
|
||||
return hash32(br.bytes, br.offset, br.length);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
Hashing functions load-able via SPI service
|
||||
</body>
|
||||
</html>
|
|
@ -18,6 +18,7 @@ org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
|
|||
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
||||
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
||||
org.apache.lucene.codecs.bulkvint.BulkVIntPostingsFormat
|
||||
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
|
||||
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
||||
org.apache.lucene.codecs.block.BlockPostingsFormat
|
||||
org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat
|
||||
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.lucene.util.hash.MurmurHash2
|
|
@ -290,33 +290,4 @@ public class TestDocument extends LuceneTestCase {
|
|||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
public void testBoost() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
IndexWriter iw = new IndexWriter(dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field1", "sometext", Field.Store.YES));
|
||||
doc.add(new TextField("field2", "sometext", Field.Store.NO));
|
||||
doc.add(new StringField("foo", "bar", Field.Store.NO));
|
||||
iw.addDocument(doc); // add an 'ok' document
|
||||
try {
|
||||
doc = new Document();
|
||||
// try to boost with norms omitted
|
||||
StringField field = new StringField("foo", "baz", Field.Store.NO);
|
||||
field.setBoost(5.0f);
|
||||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
fail("didn't get any exception, boost silently discarded");
|
||||
} catch (UnsupportedOperationException expected) {
|
||||
// expected
|
||||
}
|
||||
DirectoryReader ir = DirectoryReader.open(iw, false);
|
||||
assertEquals(1, ir.numDocs());
|
||||
assertEquals("sometext", ir.document(0).get("field1"));
|
||||
ir.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,613 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
// sanity check some basics of fields
|
||||
public class TestField extends LuceneTestCase {
|
||||
|
||||
public void testByteDocValuesField() throws Exception {
|
||||
ByteDocValuesField field = new ByteDocValuesField("foo", (byte) 5);
|
||||
|
||||
trySetBoost(field);
|
||||
field.setByteValue((byte) 6); // ok
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6, field.numericValue().byteValue());
|
||||
}
|
||||
|
||||
public void testDerefBytesDocValuesField() throws Exception {
|
||||
DerefBytesDocValuesField field = new DerefBytesDocValuesField("foo", new BytesRef("bar"));
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
field.setBytesValue("fubar".getBytes("UTF-8"));
|
||||
field.setBytesValue(new BytesRef("baz"));
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(new BytesRef("baz"), field.binaryValue());
|
||||
}
|
||||
|
||||
public void testDoubleField() throws Exception {
|
||||
Field fields[] = new Field[] {
|
||||
new DoubleField("foo", 5d, Field.Store.NO),
|
||||
new DoubleField("foo", 5d, Field.Store.YES)
|
||||
};
|
||||
|
||||
for (Field field : fields) {
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
field.setDoubleValue(6d); // ok
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6d, field.numericValue().doubleValue(), 0.0d);
|
||||
}
|
||||
}
|
||||
|
||||
public void testDoubleDocValuesField() throws Exception {
|
||||
DoubleDocValuesField field = new DoubleDocValuesField("foo", 5d);
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
field.setDoubleValue(6d); // ok
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6d, field.numericValue().doubleValue(), 0.0d);
|
||||
}
|
||||
|
||||
public void testFloatDocValuesField() throws Exception {
|
||||
FloatDocValuesField field = new FloatDocValuesField("foo", 5f);
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
field.setFloatValue(6f); // ok
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6f, field.numericValue().floatValue(), 0.0f);
|
||||
}
|
||||
|
||||
public void testFloatField() throws Exception {
|
||||
Field fields[] = new Field[] {
|
||||
new FloatField("foo", 5f, Field.Store.NO),
|
||||
new FloatField("foo", 5f, Field.Store.YES)
|
||||
};
|
||||
|
||||
for (Field field : fields) {
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
field.setFloatValue(6f); // ok
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6f, field.numericValue().floatValue(), 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
public void testIntDocValuesField() throws Exception {
|
||||
IntDocValuesField field = new IntDocValuesField("foo", 5);
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
field.setIntValue(6); // ok
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6, field.numericValue().intValue());
|
||||
}
|
||||
|
||||
public void testIntField() throws Exception {
|
||||
Field fields[] = new Field[] {
|
||||
new IntField("foo", 5, Field.Store.NO),
|
||||
new IntField("foo", 5, Field.Store.YES)
|
||||
};
|
||||
|
||||
for (Field field : fields) {
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
field.setIntValue(6); // ok
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6, field.numericValue().intValue());
|
||||
}
|
||||
}
|
||||
|
||||
public void testLongDocValuesField() throws Exception {
|
||||
LongDocValuesField field = new LongDocValuesField("foo", 5L);
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
field.setLongValue(6); // ok
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6L, field.numericValue().longValue());
|
||||
}
|
||||
|
||||
public void testLongField() throws Exception {
|
||||
Field fields[] = new Field[] {
|
||||
new LongField("foo", 5L, Field.Store.NO),
|
||||
new LongField("foo", 5L, Field.Store.YES)
|
||||
};
|
||||
|
||||
for (Field field : fields) {
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
field.setLongValue(6); // ok
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6L, field.numericValue().longValue());
|
||||
}
|
||||
}
|
||||
|
||||
public void testPackedLongDocValuesField() throws Exception {
|
||||
PackedLongDocValuesField field = new PackedLongDocValuesField("foo", 5L);
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
field.setLongValue(6); // ok
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(6L, field.numericValue().longValue());
|
||||
}
|
||||
|
||||
public void testShortDocValuesField() throws Exception {
|
||||
ShortDocValuesField field = new ShortDocValuesField("foo", (short)5);
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
field.setShortValue((short) 6); // ok
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals((short)6, field.numericValue().shortValue());
|
||||
}
|
||||
|
||||
public void testSortedBytesDocValuesField() throws Exception {
|
||||
SortedBytesDocValuesField field = new SortedBytesDocValuesField("foo", new BytesRef("bar"));
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
field.setBytesValue("fubar".getBytes("UTF-8"));
|
||||
field.setBytesValue(new BytesRef("baz"));
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(new BytesRef("baz"), field.binaryValue());
|
||||
}
|
||||
|
||||
public void testStraightBytesDocValuesField() throws Exception {
|
||||
StraightBytesDocValuesField field = new StraightBytesDocValuesField("foo", new BytesRef("bar"));
|
||||
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
field.setBytesValue("fubar".getBytes("UTF-8"));
|
||||
field.setBytesValue(new BytesRef("baz"));
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(new BytesRef("baz"), field.binaryValue());
|
||||
}
|
||||
|
||||
public void testStringField() throws Exception {
|
||||
Field fields[] = new Field[] {
|
||||
new StringField("foo", "bar", Field.Store.NO),
|
||||
new StringField("foo", "bar", Field.Store.YES)
|
||||
};
|
||||
|
||||
for (Field field : fields) {
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
field.setStringValue("baz");
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals("baz", field.stringValue());
|
||||
}
|
||||
}
|
||||
|
||||
public void testTextFieldString() throws Exception {
|
||||
Field fields[] = new Field[] {
|
||||
new TextField("foo", "bar", Field.Store.NO),
|
||||
new TextField("foo", "bar", Field.Store.YES)
|
||||
};
|
||||
|
||||
for (Field field : fields) {
|
||||
field.setBoost(5f);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
field.setStringValue("baz");
|
||||
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
|
||||
|
||||
assertEquals("baz", field.stringValue());
|
||||
assertEquals(5f, field.boost(), 0f);
|
||||
}
|
||||
}
|
||||
|
||||
public void testTextFieldReader() throws Exception {
|
||||
Field field = new TextField("foo", new StringReader("bar"));
|
||||
|
||||
field.setBoost(5f);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
field.setReaderValue(new StringReader("foobar"));
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
|
||||
|
||||
assertNotNull(field.readerValue());
|
||||
assertEquals(5f, field.boost(), 0f);
|
||||
}
|
||||
|
||||
/* TODO: this is pretty expert and crazy
|
||||
* see if we can fix it up later
|
||||
public void testTextFieldTokenStream() throws Exception {
|
||||
}
|
||||
*/
|
||||
|
||||
public void testStoredFieldBytes() throws Exception {
|
||||
Field fields[] = new Field[] {
|
||||
new StoredField("foo", "bar".getBytes("UTF-8")),
|
||||
new StoredField("foo", "bar".getBytes("UTF-8"), 0, 3),
|
||||
new StoredField("foo", new BytesRef("bar")),
|
||||
};
|
||||
|
||||
for (Field field : fields) {
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
field.setBytesValue("baz".getBytes("UTF-8"));
|
||||
field.setBytesValue(new BytesRef("baz"));
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(new BytesRef("baz"), field.binaryValue());
|
||||
}
|
||||
}
|
||||
|
||||
public void testStoredFieldString() throws Exception {
|
||||
Field field = new StoredField("foo", "bar");
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
field.setStringValue("baz");
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals("baz", field.stringValue());
|
||||
}
|
||||
|
||||
public void testStoredFieldInt() throws Exception {
|
||||
Field field = new StoredField("foo", 1);
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
field.setIntValue(5);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(5, field.numericValue().intValue());
|
||||
}
|
||||
|
||||
public void testStoredFieldDouble() throws Exception {
|
||||
Field field = new StoredField("foo", 1D);
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
field.setDoubleValue(5D);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(5D, field.numericValue().doubleValue(), 0.0D);
|
||||
}
|
||||
|
||||
public void testStoredFieldFloat() throws Exception {
|
||||
Field field = new StoredField("foo", 1F);
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
field.setFloatValue(5f);
|
||||
trySetLongValue(field);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(5f, field.numericValue().floatValue(), 0.0f);
|
||||
}
|
||||
|
||||
public void testStoredFieldLong() throws Exception {
|
||||
Field field = new StoredField("foo", 1L);
|
||||
trySetBoost(field);
|
||||
trySetByteValue(field);
|
||||
trySetBytesValue(field);
|
||||
trySetBytesRefValue(field);
|
||||
trySetDoubleValue(field);
|
||||
trySetIntValue(field);
|
||||
trySetFloatValue(field);
|
||||
field.setLongValue(5);
|
||||
trySetReaderValue(field);
|
||||
trySetShortValue(field);
|
||||
trySetStringValue(field);
|
||||
trySetTokenStreamValue(field);
|
||||
|
||||
assertEquals(5L, field.numericValue().longValue());
|
||||
}
|
||||
|
||||
private void trySetByteValue(Field f) {
|
||||
try {
|
||||
f.setByteValue((byte) 10);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetBytesValue(Field f) {
|
||||
try {
|
||||
f.setBytesValue(new byte[] { 5, 5 });
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetBytesRefValue(Field f) {
|
||||
try {
|
||||
f.setBytesValue(new BytesRef("bogus"));
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetDoubleValue(Field f) {
|
||||
try {
|
||||
f.setDoubleValue(Double.MAX_VALUE);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetIntValue(Field f) {
|
||||
try {
|
||||
f.setIntValue(Integer.MAX_VALUE);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetLongValue(Field f) {
|
||||
try {
|
||||
f.setLongValue(Long.MAX_VALUE);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetFloatValue(Field f) {
|
||||
try {
|
||||
f.setFloatValue(Float.MAX_VALUE);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetReaderValue(Field f) {
|
||||
try {
|
||||
f.setReaderValue(new StringReader("BOO!"));
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetShortValue(Field f) {
|
||||
try {
|
||||
f.setShortValue(Short.MAX_VALUE);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetStringValue(Field f) {
|
||||
try {
|
||||
f.setStringValue("BOO!");
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetTokenStreamValue(Field f) {
|
||||
try {
|
||||
f.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
private void trySetBoost(Field f) {
|
||||
try {
|
||||
f.setBoost(5.0f);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
}
|
|
@ -40,24 +40,41 @@ public class TestByteSlices extends LuceneTestCase {
|
|||
counters[stream] = 0;
|
||||
}
|
||||
|
||||
int num = atLeast(10000);
|
||||
int num = atLeast(3000);
|
||||
for (int iter = 0; iter < num; iter++) {
|
||||
int stream = random().nextInt(NUM_STREAM);
|
||||
if (VERBOSE)
|
||||
int stream;
|
||||
if (random().nextBoolean()) {
|
||||
stream = random().nextInt(3);
|
||||
} else {
|
||||
stream = random().nextInt(NUM_STREAM);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("write stream=" + stream);
|
||||
}
|
||||
|
||||
if (starts[stream] == -1) {
|
||||
final int spot = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
|
||||
starts[stream] = uptos[stream] = spot + pool.byteOffset;
|
||||
if (VERBOSE)
|
||||
if (VERBOSE) {
|
||||
System.out.println(" init to " + starts[stream]);
|
||||
}
|
||||
}
|
||||
|
||||
writer.init(uptos[stream]);
|
||||
int numValue = random().nextInt(20);
|
||||
int numValue;
|
||||
if (random().nextInt(10) == 3) {
|
||||
numValue = random().nextInt(100);
|
||||
} else if (random().nextInt(5) == 3) {
|
||||
numValue = random().nextInt(3);
|
||||
} else {
|
||||
numValue = random().nextInt(20);
|
||||
}
|
||||
|
||||
for(int j=0;j<numValue;j++) {
|
||||
if (VERBOSE)
|
||||
if (VERBOSE) {
|
||||
System.out.println(" write " + (counters[stream]+j));
|
||||
}
|
||||
// write some large (incl. negative) ints:
|
||||
writer.writeVInt(random().nextInt());
|
||||
writer.writeVInt(counters[stream]+j);
|
||||
|
|
|
@ -186,7 +186,7 @@ public class TestDoc extends LuceneTestCase {
|
|||
File file = new File(workDir, fileName);
|
||||
Document doc = new Document();
|
||||
InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8");
|
||||
doc.add(new TextField("contents", is, Field.Store.NO));
|
||||
doc.add(new TextField("contents", is));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
is.close();
|
||||
|
|
|
@ -891,9 +891,12 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testIndexingThenDeleting() throws Exception {
|
||||
// TODO: move this test to its own class and just @SuppressCodecs?
|
||||
// TODO: is it enough to just use newFSDirectory?
|
||||
final String fieldFormat = _TestUtil.getPostingsFormat("field");
|
||||
assumeFalse("This test cannot run with Memory codec", fieldFormat.equals("Memory"));
|
||||
assumeFalse("This test cannot run with SimpleText codec", fieldFormat.equals("SimpleText"));
|
||||
assumeFalse("This test cannot run with Direct codec", fieldFormat.equals("Direct"));
|
||||
final Random r = random();
|
||||
Directory dir = newDirectory();
|
||||
// note this test explicitly disables payloads
|
||||
|
|
|
@ -1542,4 +1542,73 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testBoostOmitNorms() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
IndexWriter iw = new IndexWriter(dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field1", "sometext", Field.Store.YES));
|
||||
doc.add(new TextField("field2", "sometext", Field.Store.NO));
|
||||
doc.add(new StringField("foo", "bar", Field.Store.NO));
|
||||
iw.addDocument(doc); // add an 'ok' document
|
||||
try {
|
||||
doc = new Document();
|
||||
// try to boost with norms omitted
|
||||
List<IndexableField> list = new ArrayList<IndexableField>();
|
||||
list.add(new IndexableField() {
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "foo";
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexableFieldType fieldType() {
|
||||
return StringField.TYPE_NOT_STORED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float boost() {
|
||||
return 5f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef binaryValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String stringValue() {
|
||||
return "baz";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader readerValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number numericValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
return null;
|
||||
}
|
||||
});
|
||||
iw.addDocument(list);
|
||||
fail("didn't get any exception, boost silently discarded");
|
||||
} catch (UnsupportedOperationException expected) {
|
||||
// expected
|
||||
}
|
||||
DirectoryReader ir = DirectoryReader.open(iw, false);
|
||||
assertEquals(1, ir.numDocs());
|
||||
assertEquals("sometext", ir.document(0).get("field1"));
|
||||
ir.close();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.junit.Test;
|
||||
|
@ -48,10 +51,13 @@ public class TestRollingUpdates extends LuceneTestCase {
|
|||
final int SIZE = atLeast(20);
|
||||
int id = 0;
|
||||
IndexReader r = null;
|
||||
IndexSearcher s = null;
|
||||
final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: numUpdates=" + numUpdates);
|
||||
}
|
||||
int updateCount = 0;
|
||||
// TODO: sometimes update ids not in order...
|
||||
for(int docIter=0;docIter<numUpdates;docIter++) {
|
||||
final Document doc = docs.nextDoc();
|
||||
final String myID = ""+id;
|
||||
|
@ -60,16 +66,59 @@ public class TestRollingUpdates extends LuceneTestCase {
|
|||
} else {
|
||||
id++;
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println(" docIter=" + docIter + " id=" + id);
|
||||
}
|
||||
((Field) doc.getField("docid")).setStringValue(myID);
|
||||
w.updateDocument(new Term("docid", myID), doc);
|
||||
|
||||
Term idTerm = new Term("docid", myID);
|
||||
|
||||
final boolean doUpdate;
|
||||
if (s != null && updateCount < SIZE) {
|
||||
TopDocs hits = s.search(new TermQuery(idTerm), 1);
|
||||
assertEquals(1, hits.totalHits);
|
||||
doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
|
||||
if (VERBOSE) {
|
||||
if (doUpdate) {
|
||||
System.out.println(" tryDeleteDocument failed");
|
||||
} else {
|
||||
System.out.println(" tryDeleteDocument succeeded");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
doUpdate = true;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" no searcher: doUpdate=true");
|
||||
}
|
||||
}
|
||||
|
||||
updateCount++;
|
||||
|
||||
if (doUpdate) {
|
||||
w.updateDocument(idTerm, doc);
|
||||
} else {
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (docIter >= SIZE && random().nextInt(50) == 17) {
|
||||
if (r != null) {
|
||||
r.close();
|
||||
}
|
||||
|
||||
final boolean applyDeletions = random().nextBoolean();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
|
||||
}
|
||||
|
||||
r = w.getReader(applyDeletions);
|
||||
if (applyDeletions) {
|
||||
s = new IndexSearcher(r);
|
||||
} else {
|
||||
s = null;
|
||||
}
|
||||
assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
|
||||
updateCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Arrays;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -189,6 +190,41 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void test2() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
|
||||
addDoc("LANGE", writer);
|
||||
addDoc("LUETH", writer);
|
||||
addDoc("PIRSING", writer);
|
||||
addDoc("RIEGEL", writer);
|
||||
addDoc("TRZECZIAK", writer);
|
||||
addDoc("WALKER", writer);
|
||||
addDoc("WBR", writer);
|
||||
addDoc("WE", writer);
|
||||
addDoc("WEB", writer);
|
||||
addDoc("WEBE", writer);
|
||||
addDoc("WEBER", writer);
|
||||
addDoc("WEBERE", writer);
|
||||
addDoc("WEBREE", writer);
|
||||
addDoc("WEBEREI", writer);
|
||||
addDoc("WBRE", writer);
|
||||
addDoc("WITTKOPF", writer);
|
||||
addDoc("WOJNAROWSKI", writer);
|
||||
addDoc("WRICKE", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
|
||||
//query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(8, hits.length);
|
||||
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* MultiTermQuery provides (via attribute) information about which values
|
||||
* must be competitive to enter the priority queue.
|
||||
|
|
|
@ -206,7 +206,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("content", new StringReader(
|
||||
"a a b c d e a f g h i j a b k k"), Field.Store.NO));
|
||||
"a a b c d e a f g h i j a b k k")));
|
||||
writer.addDocument(doc);
|
||||
|
||||
final IndexReader readerFromWriter = writer.getReader();
|
||||
|
|
|
@ -77,8 +77,7 @@ public class TestShardSearching extends ShardSearchingTestBase {
|
|||
System.out.println("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
|
||||
}
|
||||
|
||||
start(_TestUtil.getTempDir("TestShardSearching").toString(),
|
||||
numNodes,
|
||||
start(numNodes,
|
||||
runTimeSec,
|
||||
maxSearcherAgeSeconds
|
||||
);
|
||||
|
|
|
@ -196,7 +196,7 @@ public class TestSort extends LuceneTestCase {
|
|||
if (data[i][11] != null) doc.add (new StringField ("parser", data[i][11], Field.Store.NO));
|
||||
|
||||
for(IndexableField f : doc.getFields()) {
|
||||
if (!f.fieldType().omitNorms()) {
|
||||
if (f.fieldType().indexed() && !f.fieldType().omitNorms()) {
|
||||
((Field) f).setBoost(2.0f);
|
||||
}
|
||||
}
|
||||
|
@ -239,7 +239,7 @@ public class TestSort extends LuceneTestCase {
|
|||
doc.add(new SortedBytesDocValuesField("string2", new BytesRef(num2)));
|
||||
doc.add (new Field ("tracer2", num2, onlyStored));
|
||||
for(IndexableField f2 : doc.getFields()) {
|
||||
if (!f2.fieldType().omitNorms()) {
|
||||
if (f2.fieldType().indexed() && !f2.fieldType().omitNorms()) {
|
||||
((Field) f2).setBoost(2.0f);
|
||||
}
|
||||
}
|
||||
|
@ -255,7 +255,7 @@ public class TestSort extends LuceneTestCase {
|
|||
doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored));
|
||||
|
||||
for(IndexableField f2 : doc.getFields()) {
|
||||
if (!f2.fieldType().omitNorms()) {
|
||||
if (f2.fieldType().indexed() && !f2.fieldType().omitNorms()) {
|
||||
((Field) f2).setBoost(2.0f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -256,7 +256,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k"), Field.Store.NO));
|
||||
doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k")));
|
||||
writer.addDocument(doc);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
|
@ -293,7 +293,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k"), Field.Store.NO));
|
||||
doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k")));
|
||||
writer.addDocument(doc);
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher is = newSearcher(reader);
|
||||
|
@ -328,7 +328,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a"), Field.Store.NO));
|
||||
doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
|
||||
writer.addDocument(doc);
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher is = newSearcher(reader);
|
||||
|
|
|
@ -1096,13 +1096,6 @@ public class TestFSTs extends LuceneTestCase {
|
|||
// file, up until a time limit
|
||||
public void testRealTerms() throws Exception {
|
||||
|
||||
// TODO: is this necessary? we use the annotation...
|
||||
final String defaultFormat = _TestUtil.getPostingsFormat("abracadabra");
|
||||
if (defaultFormat.equals("SimpleText") || defaultFormat.equals("Memory")) {
|
||||
// no
|
||||
Codec.setDefault(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
|
||||
}
|
||||
|
||||
final LineFileDocs docs = new LineFileDocs(random(), true);
|
||||
final int RUN_TIME_MSEC = atLeast(500);
|
||||
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
package org.apache.lucene.util.junitcompat;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.JUnitCore;
|
||||
import org.junit.runner.Result;
|
||||
|
||||
public class TestFailIfDirectoryNotClosed extends WithNestedTests {
|
||||
public TestFailIfDirectoryNotClosed() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
public static class Nested1 extends WithNestedTests.AbstractNestedTest {
|
||||
public void testDummy() {
|
||||
Directory dir = newDirectory();
|
||||
System.out.println(dir.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailIfDirectoryNotClosed() {
|
||||
Result r = JUnitCore.runClasses(Nested1.class);
|
||||
Assert.assertEquals(1, r.getFailureCount());
|
||||
}
|
||||
}
|
|
@ -190,7 +190,7 @@ public class IndexFiles {
|
|||
// so that the text of the file is tokenized and indexed, but not stored.
|
||||
// Note that FileReader expects the file to be in UTF-8 encoding.
|
||||
// If that's not the case searching for special characters will fail.
|
||||
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")), Field.Store.NO));
|
||||
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
|
||||
|
||||
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
|
||||
// New index, so we just add the document (no old document can be there):
|
||||
|
|
|
@ -440,21 +440,25 @@ public class TestSlowFuzzyQuery extends LuceneTestCase {
|
|||
assertEquals(1, hits.length);
|
||||
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "t"), 3);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
// TODO: cannot really be supported given the legacy scoring
|
||||
// system which scores negative, if the distance > min term len,
|
||||
// so such matches were always impossible with lucene 3.x, etc
|
||||
//
|
||||
//q = new SlowFuzzyQuery(new Term("field", "t"), 3);
|
||||
//hits = searcher.search(q, 10).scoreDocs;
|
||||
//assertEquals(1, hits.length);
|
||||
//assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
// q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
|
||||
// hits = searcher.search(q, 10).scoreDocs;
|
||||
// assertEquals(1, hits.length);
|
||||
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
||||
// q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
||||
// hits = searcher.search(q, 10).scoreDocs;
|
||||
// assertEquals(2, hits.length);
|
||||
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
// assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
||||
|
||||
reader.close();
|
||||
index.close();
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
package org.apache.lucene.codecs.bloom;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.FuzzySet;
|
||||
import org.apache.lucene.util.hash.MurmurHash2;
|
||||
|
||||
/**
|
||||
* A class used for testing {@link BloomFilteringPostingsFormat} with a concrete
|
||||
* delegate (Lucene40). Creates a Bloom filter on ALL fields and with tiny
|
||||
* amounts of memory reserved for the filter. DO NOT USE IN A PRODUCTION
|
||||
* APPLICATION This is not a realistic application of Bloom Filters as they
|
||||
* ordinarily are larger and operate on only primary key type fields.
|
||||
*/
|
||||
public class TestBloomFilteredLucene40Postings extends PostingsFormat {
|
||||
|
||||
private BloomFilteringPostingsFormat delegate;
|
||||
|
||||
// Special class used to avoid OOM exceptions where Junit tests create many
|
||||
// fields.
|
||||
static class LowMemoryBloomFactory extends BloomFilterFactory {
|
||||
@Override
|
||||
public FuzzySet getSetForField(SegmentWriteState state,FieldInfo info) {
|
||||
return FuzzySet.createSetBasedOnMaxMemory(1024, new MurmurHash2());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) {
|
||||
// For test purposes always maintain the BloomFilter - even past the point
|
||||
// of usefulness when all bits are set
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public TestBloomFilteredLucene40Postings() {
|
||||
super("TestBloomFilteredLucene40Postings");
|
||||
delegate = new BloomFilteringPostingsFormat(new Lucene40PostingsFormat(),
|
||||
new LowMemoryBloomFactory());
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
|
||||
throws IOException {
|
||||
return delegate.fieldsConsumer(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state)
|
||||
throws IOException {
|
||||
return delegate.fieldsProducer(state);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
Support for generating test indexes using the BloomFilteringPostingsFormat
|
||||
</body>
|
||||
</html>
|
|
@ -29,6 +29,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
|
||||
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds;
|
||||
|
@ -98,6 +99,10 @@ public class RandomCodec extends Lucene40Codec {
|
|||
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
|
||||
// add pulsing again with (usually) different parameters
|
||||
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
|
||||
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene40Postings to be constructed
|
||||
//with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing
|
||||
//with such "wrapper" classes?
|
||||
new TestBloomFilteredLucene40Postings(),
|
||||
new MockSepPostingsFormat(),
|
||||
new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)),
|
||||
new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)),
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -27,15 +26,17 @@ import java.util.Random;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.PrintStreamInfoStream;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
// TODO
|
||||
// - doc blocks? so we can test joins/grouping...
|
||||
|
@ -423,11 +424,16 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
|||
|
||||
private volatile ShardIndexSearcher currentShardSearcher;
|
||||
|
||||
public NodeState(Random random, String baseDir, int nodeID, int numNodes) throws IOException {
|
||||
public NodeState(Random random, int nodeID, int numNodes) throws IOException {
|
||||
myNodeID = nodeID;
|
||||
dir = newFSDirectory(new File(baseDir + "." + myNodeID));
|
||||
dir = newFSDirectory(_TestUtil.getTempDir("ShardSearchingTestBase"));
|
||||
// TODO: set warmer
|
||||
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
|
||||
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||
if (VERBOSE) {
|
||||
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
|
||||
}
|
||||
writer = new IndexWriter(dir, iwc);
|
||||
mgr = new SearcherManager(writer, true, null);
|
||||
searchers = new SearcherLifetimeManager();
|
||||
|
||||
|
@ -556,14 +562,14 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
|||
long endTimeNanos;
|
||||
private Thread changeIndicesThread;
|
||||
|
||||
protected void start(String baseDirName, int numNodes, double runTimeSec, int maxSearcherAgeSeconds) throws IOException {
|
||||
protected void start(int numNodes, double runTimeSec, int maxSearcherAgeSeconds) throws IOException {
|
||||
|
||||
endTimeNanos = System.nanoTime() + (long) (runTimeSec*1000000000);
|
||||
this.maxSearcherAgeSeconds = maxSearcherAgeSeconds;
|
||||
|
||||
nodes = new NodeState[numNodes];
|
||||
for(int nodeID=0;nodeID<numNodes;nodeID++) {
|
||||
nodes[nodeID] = new NodeState(random(), baseDirName, nodeID, numNodes);
|
||||
nodes[nodeID] = new NodeState(random(), nodeID, numNodes);
|
||||
}
|
||||
|
||||
long[] nodeVersions = new long[nodes.length];
|
||||
|
|
|
@ -31,8 +31,6 @@ import org.apache.lucene.util._TestUtil;
|
|||
public class BaseDirectoryWrapper extends Directory {
|
||||
/** our in directory */
|
||||
protected final Directory delegate;
|
||||
/** best effort: base on in Directory is volatile */
|
||||
protected boolean open;
|
||||
|
||||
private boolean checkIndexOnClose = true;
|
||||
private boolean crossCheckTermVectorsOnClose = true;
|
||||
|
@ -43,7 +41,7 @@ public class BaseDirectoryWrapper extends Directory {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
open = false;
|
||||
isOpen = false;
|
||||
if (checkIndexOnClose && indexPossiblyExists()) {
|
||||
_TestUtil.checkIndex(this, crossCheckTermVectorsOnClose);
|
||||
}
|
||||
|
@ -51,7 +49,7 @@ public class BaseDirectoryWrapper extends Directory {
|
|||
}
|
||||
|
||||
public boolean isOpen() {
|
||||
return open;
|
||||
return isOpen;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -551,7 +551,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
|
|||
if (noDeleteOpenFile && openLocks.size() > 0) {
|
||||
throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open locks: " + openLocks);
|
||||
}
|
||||
open = false;
|
||||
isOpen = false;
|
||||
if (getCheckIndexOnClose()) {
|
||||
if (indexPossiblyExists()) {
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
|
@ -614,11 +614,6 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
|
|||
public synchronized void removeIndexInput(IndexInput in, String name) {
|
||||
removeOpenFile(in, name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean isOpen() {
|
||||
return open;
|
||||
}
|
||||
|
||||
/**
|
||||
* Objects that represent fail-able conditions. Objects of a derived
|
||||
|
|
|
@ -20,5 +20,6 @@ org.apache.lucene.codecs.mocksep.MockSepPostingsFormat
|
|||
org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat
|
||||
org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
|
||||
org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds
|
||||
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings
|
||||
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
||||
|
||||
|
|
522
solr/CHANGES.txt
522
solr/CHANGES.txt
|
@ -32,7 +32,7 @@ Apache Tika 1.1
|
|||
Carrot2 3.5.0
|
||||
Velocity 1.6.4 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.3.5
|
||||
Apache ZooKeeper 3.3.6
|
||||
|
||||
Upgrading from Solr 4.0.0-ALPHA
|
||||
----------------------
|
||||
|
@ -134,7 +134,7 @@ Bug Fixes
|
|||
* SOLR-1781: Replication index directories not always cleaned up.
|
||||
(Markus Jelsma, Terje Sten Bjerkseth, Mark Miller)
|
||||
|
||||
* SOLR-3639: Update ZooKeeper to 3.3.5 for a variety of bug fixes. (Mark Miller)
|
||||
* SOLR-3639: Update ZooKeeper to 3.3.6 for a variety of bug fixes. (Mark Miller)
|
||||
|
||||
* SOLR-3629: Typo in solr.xml persistence when overriding the solrconfig.xml
|
||||
file name using the "config" attribute prevented the override file from being
|
||||
|
@ -173,6 +173,9 @@ Bug Fixes
|
|||
* SOLR-3677: Fixed missleading error message in web ui to distinguish between
|
||||
no SolrCores loaded vs. no /admin/ handler available.
|
||||
(hossman, steffkes)
|
||||
|
||||
* SOLR-3428: SolrCmdDistributor flushAdds/flushDeletes can cause repeated
|
||||
adds/deletes to be sent (Mark Miller, Per Steffensen)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
@ -207,6 +210,14 @@ Other Changes
|
|||
|
||||
* SOLR-3682: Fail to parse schema.xml if uniqueKeyField is multivalued (hossman)
|
||||
|
||||
* SOLR-2115: DIH no longer requires the "config" parameter to be specified in solrconfig.xml.
|
||||
Instead, the configuration is loaded and parsed with every import. This allows the use of
|
||||
a different configuration with each import, and makes correcting configuration errors simpler.
|
||||
Also, the configuration itself can be passed using the "dataConfig" parameter rather than
|
||||
using a file (this previously worked in debug mode only). When configuration errors are
|
||||
encountered, the error message is returned in XML format. (James Dyer)
|
||||
|
||||
|
||||
================== 4.0.0-ALPHA ==================
|
||||
More information about this release, including any errata related to the
|
||||
release notes, upgrade instructions, or other changes may be found online at:
|
||||
|
@ -709,6 +720,13 @@ Bug Fixes
|
|||
* SOLR-3470: contrib/clustering: custom Carrot2 tokenizer and stemmer factories
|
||||
are respected now (Stanislaw Osinski, Dawid Weiss)
|
||||
|
||||
* SOLR-3430: Added a new DIH test against a real SQL database. Fixed problems
|
||||
revealed by this new test related to the expanded cache support added to
|
||||
3.6/SOLR-2382 (James Dyer)
|
||||
|
||||
* SOLR-1958: When using the MailEntityProcessor, import would fail if
|
||||
fetchMailsSince was not specified. (Max Lynch via James Dyer)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
@ -862,7 +880,13 @@ Other Changes
|
|||
* SOLR-3534: The Dismax and eDismax query parsers will fall back on the 'df' parameter
|
||||
when 'qf' is absent. And if neither is present nor the schema default search field
|
||||
then an exception will be thrown now. (dsmiley)
|
||||
|
||||
|
||||
* SOLR-3262: The "threads" feature of DIH is removed (deprecated in Solr 3.6)
|
||||
(James Dyer)
|
||||
|
||||
* SOLR-3422: Refactored DIH internal data classes. All entities in
|
||||
data-config.xml must have a name (James Dyer)
|
||||
|
||||
Documentation
|
||||
----------------------
|
||||
|
||||
|
@ -898,6 +922,17 @@ Bug Fixes:
|
|||
* SOLR-3470: contrib/clustering: custom Carrot2 tokenizer and stemmer factories
|
||||
are respected now (Stanislaw Osinski, Dawid Weiss)
|
||||
|
||||
* SOLR-3360: More DIH bug fixes for the deprecated "threads" parameter.
|
||||
(Mikhail Khludnev, Claudio R, via James Dyer)
|
||||
|
||||
* SOLR-3430: Added a new DIH test against a real SQL database. Fixed problems
|
||||
revealed by this new test related to the expanded cache support added to
|
||||
3.6/SOLR-2382 (James Dyer)
|
||||
|
||||
* SOLR-3336: SolrEntityProcessor substitutes most variables at query time.
|
||||
(Michael Kroh, Lance Norskog, via Martijn van Groningen)
|
||||
|
||||
|
||||
================== 3.6.0 ==================
|
||||
More information about this release, including any errata related to the
|
||||
release notes, upgrade instructions, or other changes may be found online at:
|
||||
|
@ -1050,6 +1085,27 @@ New Features
|
|||
auto detector cannot detect encoding, especially the text file is too short
|
||||
to detect encoding. (koji)
|
||||
|
||||
* SOLR-1499: Added SolrEntityProcessor that imports data from another Solr core
|
||||
or instance based on a specified query.
|
||||
(Lance Norskog, Erik Hatcher, Pulkit Singhal, Ahmet Arslan, Luca Cavanna,
|
||||
Martijn van Groningen)
|
||||
|
||||
* SOLR-3190: Minor improvements to SolrEntityProcessor. Add more consistency
|
||||
between solr parameters and parameters used in SolrEntityProcessor and
|
||||
ability to specify a custom HttpClient instance.
|
||||
(Luca Cavanna via Martijn van Groningen)
|
||||
|
||||
* SOLR-2382: Added pluggable cache support to DIH so that any Entity can be
|
||||
made cache-able by adding the "cacheImpl" parameter. Include
|
||||
"SortedMapBackedCache" to provide in-memory caching (as previously this was
|
||||
the only option when using CachedSqlEntityProcessor). Users can provide
|
||||
their own implementations of DIHCache for other caching strategies.
|
||||
Deprecate CachedSqlEntityProcessor in favor of specifing "cacheImpl" with
|
||||
SqlEntityProcessor. Make SolrWriter implement DIHWriter and allow the
|
||||
possibility of pluggable Writers (DIH writing to something other than Solr).
|
||||
(James Dyer, Noble Paul)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter
|
||||
|
@ -1296,6 +1352,10 @@ Other Changes
|
|||
extracting request handler and are willing to use java 6, just add the jar.
|
||||
(rmuir)
|
||||
|
||||
* SOLR-3142: DIH Imports no longer default optimize to true, instead false.
|
||||
If you want to force all segments to be merged into one, you can specify
|
||||
this parameter yourself. NOTE: this can be very expensive operation and
|
||||
usually does not make sense for delta-imports. (Robert Muir)
|
||||
|
||||
Build
|
||||
----------------------
|
||||
|
@ -1393,6 +1453,9 @@ Bug Fixes
|
|||
a wrong number of collation results in the response.
|
||||
(Bastiaan Verhoef, James Dyer via Simon Willnauer)
|
||||
|
||||
* SOLR-2875: Fix the incorrect url in DIH example tika-data-config.xml
|
||||
(Shinichiro Abe via koji)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -1585,6 +1648,24 @@ Bug Fixes
|
|||
* SOLR-2692: contrib/clustering: Typo in param name fixed: "carrot.fragzise"
|
||||
changed to "carrot.fragSize" (Stanislaw Osinski).
|
||||
|
||||
* SOLR-2644: When using DIH with threads=2 the default logging is set too high
|
||||
(Bill Bell via shalin)
|
||||
|
||||
* SOLR-2492: DIH does not commit if only deletes are processed
|
||||
(James Dyer via shalin)
|
||||
|
||||
* SOLR-2186: DataImportHandler's multi-threaded option throws NPE
|
||||
(Lance Norskog, Frank Wesemann, shalin)
|
||||
|
||||
* SOLR-2655: DIH multi threaded mode does not resolve attributes correctly
|
||||
(Frank Wesemann, shalin)
|
||||
|
||||
* SOLR-2695: DIH: Documents are collected in unsynchronized list in
|
||||
multi-threaded debug mode (Michael McCandless, shalin)
|
||||
|
||||
* SOLR-2668: DIH multithreaded mode does not rollback on errors from
|
||||
EntityProcessor (Frank Wesemann, shalin)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -1697,6 +1778,9 @@ Bug Fixes
|
|||
* SOLR-2581: UIMAToSolrMapper wrongly instantiates Type with reflection.
|
||||
(Tommaso Teofili via koji)
|
||||
|
||||
* SOLR-2551: Check dataimport.properties for write access (if delta-import is
|
||||
supported in DIH configuration) before starting an import (C S, shalin)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -2141,6 +2225,30 @@ New Features
|
|||
|
||||
* SOLR-2237: Added StempelPolishStemFilterFactory to contrib/analysis-extras (rmuir)
|
||||
|
||||
* SOLR-1525: allow DIH to refer to core properties (noble)
|
||||
|
||||
* SOLR-1547: DIH TemplateTransformer copy objects more intelligently when the
|
||||
template is a single variable (noble)
|
||||
|
||||
* SOLR-1627: DIH VariableResolver should be fetched just in time (noble)
|
||||
|
||||
* SOLR-1583: DIH Create DataSources that return InputStream (noble)
|
||||
|
||||
* SOLR-1358: Integration of Tika and DataImportHandler (Akshay Ukey, noble)
|
||||
|
||||
* SOLR-1654: TikaEntityProcessor example added DIHExample
|
||||
(Akshay Ukey via noble)
|
||||
|
||||
* SOLR-1678: Move onError handling to DIH framework (noble)
|
||||
|
||||
* SOLR-1352: Multi-threaded implementation of DIH (noble)
|
||||
|
||||
* SOLR-1721: Add explicit option to run DataImportHandler in synchronous mode
|
||||
(Alexey Serba via noble)
|
||||
|
||||
* SOLR-1737: Added FieldStreamDataSource (noble)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
@ -2166,6 +2274,9 @@ Optimizations
|
|||
SolrIndexSearcher.doc(int, Set<String>) method b/c it can use the document
|
||||
cache (gsingers)
|
||||
|
||||
* SOLR-2200: Improve the performance of DataImportHandler for large
|
||||
delta-import updates. (Mark Waddle via rmuir)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-1769: Solr 1.4 Replication - Repeater throwing NullPointerException (Jörgen Rydenius via noble)
|
||||
|
@ -2428,6 +2539,61 @@ Bug Fixes
|
|||
does not properly use the same iterator instance.
|
||||
(Christoph Brill, Mark Miller)
|
||||
|
||||
* SOLR-1638: Fixed NullPointerException during DIH import if uniqueKey is not
|
||||
specified in schema (Akshay Ukey via shalin)
|
||||
|
||||
* SOLR-1639: Fixed misleading error message when dataimport.properties is not
|
||||
writable (shalin)
|
||||
|
||||
* SOLR-1598: DIH: Reader used in PlainTextEntityProcessor is not explicitly
|
||||
closed (Sascha Szott via noble)
|
||||
|
||||
* SOLR-1759: DIH: $skipDoc was not working correctly
|
||||
(Gian Marco Tagliani via noble)
|
||||
|
||||
* SOLR-1762: DIH: DateFormatTransformer does not work correctly with
|
||||
non-default locale dates (tommy chheng via noble)
|
||||
|
||||
* SOLR-1757: DIH multithreading sometimes throws NPE (noble)
|
||||
|
||||
* SOLR-1766: DIH with threads enabled doesn't respond to the abort command
|
||||
(Michael Henson via noble)
|
||||
|
||||
* SOLR-1767: dataimporter.functions.escapeSql() does not escape backslash
|
||||
character (Sean Timm via noble)
|
||||
|
||||
* SOLR-1811: formatDate should use the current NOW value always
|
||||
(Sean Timm via noble)
|
||||
|
||||
* SOLR-1794: Dataimport of CLOB fields fails when getCharacterStream() is
|
||||
defined in a superclass. (Gunnar Gauslaa Bergem via rmuir)
|
||||
|
||||
* SOLR-2057: DataImportHandler never calls UpdateRequestProcessor.finish()
|
||||
(Drew Farris via koji)
|
||||
|
||||
* SOLR-1973: Empty fields in XML update messages confuse DataImportHandler.
|
||||
(koji)
|
||||
|
||||
* SOLR-2221: Use StrUtils.parseBool() to get values of boolean options in DIH.
|
||||
true/on/yes (for TRUE) and false/off/no (for FALSE) can be used for
|
||||
sub-options (debug, verbose, synchronous, commit, clean, optimize) for
|
||||
full/delta-import commands. (koji)
|
||||
|
||||
* SOLR-2310: DIH: getTimeElapsedSince() returns incorrect hour value when
|
||||
the elapse is over 60 hours (tom liu via koji)
|
||||
|
||||
* SOLR-2252: DIH: When a child entity in nested entities is rootEntity="true",
|
||||
delta-import doesn't work. (koji)
|
||||
|
||||
* SOLR-2330: solrconfig.xml files in example-DIH are broken. (Matt Parker, koji)
|
||||
|
||||
* SOLR-1191: resolve DataImportHandler deltaQuery column against pk when pk
|
||||
has a prefix (e.g. pk="book.id" deltaQuery="select id from ..."). More
|
||||
useful error reporting when no match found (previously failed with a
|
||||
NullPointerException in log and no clear user feedback). (gthb via yonik)
|
||||
|
||||
* SOLR-2116: Fix TikaConfig classloader bug in TikaEntityProcessor
|
||||
(Martijn van Groningen via hossman)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
@ -2561,6 +2727,12 @@ Other Changes
|
|||
* SOLR-1813: Add ICU4j to contrib/extraction libs and add tests for Arabic
|
||||
extraction (Robert Muir via gsingers)
|
||||
|
||||
* SOLR-1821: Fix TimeZone-dependent test failure in TestEvaluatorBag.
|
||||
(Chris Male via rmuir)
|
||||
|
||||
* SOLR-2367: Reduced noise in test output by ensuring the properties file
|
||||
can be written. (Gunnlaugur Thor Briem via rmuir)
|
||||
|
||||
Build
|
||||
----------------------
|
||||
|
||||
|
@ -2645,6 +2817,33 @@ error. See SOLR-1410 for more information.
|
|||
* RussianLowerCaseFilterFactory
|
||||
* RussianLetterTokenizerFactory
|
||||
|
||||
DIH: Evaluator API has been changed in a non back-compatible way. Users who
|
||||
have developed custom Evaluators will need to change their code according to
|
||||
the new API for it to work. See SOLR-996 for details.
|
||||
|
||||
DIH: The formatDate evaluator's syntax has been changed. The new syntax is
|
||||
formatDate(<variable>, '<format_string>'). For example,
|
||||
formatDate(x.date, 'yyyy-MM-dd'). In the old syntax, the date string was
|
||||
written without a single-quotes. The old syntax has been deprecated and will
|
||||
be removed in 1.5, until then, using the old syntax will log a warning.
|
||||
|
||||
DIH: The Context API has been changed in a non back-compatible way. In
|
||||
particular, the Context.currentProcess() method now returns a String
|
||||
describing the type of the current import process instead of an int.
|
||||
Similarily, the public constants in Context viz. FULL_DUMP, DELTA_DUMP and
|
||||
FIND_DELTA are changed to a String type. See SOLR-969 for details.
|
||||
|
||||
DIH: The EntityProcessor API has been simplified by moving logic for applying
|
||||
transformers and handling multi-row outputs from Transformers into an
|
||||
EntityProcessorWrapper class. The EntityProcessor#destroy is now called once
|
||||
per parent-row at the end of row (end of data). A new method
|
||||
EntityProcessor#close is added which is called at the end of import.
|
||||
|
||||
DIH: In Solr 1.3, if the last_index_time was not available (first import) and
|
||||
a delta-import was requested, a full-import was run instead. This is no longer
|
||||
the case. In Solr 1.4 delta import is run with last_index_time as the epoch
|
||||
date (January 1, 1970, 00:00:00 GMT) if last_index_time is not available.
|
||||
|
||||
Versions of Major Components
|
||||
----------------------------
|
||||
Apache Lucene 2.9.1 (r832363 on 2.9 branch)
|
||||
|
@ -2936,6 +3135,141 @@ New Features
|
|||
86. SOLR-1274: Added text serialization output for extractOnly
|
||||
(Peter Wolanin, gsingers)
|
||||
|
||||
87. SOLR-768: DIH: Set last_index_time variable in full-import command.
|
||||
(Wojtek Piaseczny, Noble Paul via shalin)
|
||||
|
||||
88. SOLR-811: Allow a "deltaImportQuery" attribute in SqlEntityProcessor
|
||||
which is used for delta imports instead of DataImportHandler manipulating
|
||||
the SQL itself. (Noble Paul via shalin)
|
||||
|
||||
89. SOLR-842: Better error handling in DataImportHandler with options to
|
||||
abort, skip and continue imports. (Noble Paul, shalin)
|
||||
|
||||
90. SOLR-833: DIH: A DataSource to read data from a field as a reader. This
|
||||
can be used, for example, to read XMLs residing as CLOBs or BLOBs in
|
||||
databases. (Noble Paul via shalin)
|
||||
|
||||
91. SOLR-887: A DIH Transformer to strip HTML tags. (Ahmed Hammad via shalin)
|
||||
|
||||
92. SOLR-886: DataImportHandler should rollback when an import fails or it is
|
||||
aborted (shalin)
|
||||
|
||||
93. SOLR-891: A DIH Transformer to read strings from Clob type.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
94. SOLR-812: Configurable JDBC settings in JdbcDataSource including optimized
|
||||
defaults for read only mode. (David Smiley, Glen Newton, shalin)
|
||||
|
||||
95. SOLR-910: Add a few utility commands to the DIH admin page such as full
|
||||
import, delta import, status, reload config. (Ahmed Hammad via shalin)
|
||||
|
||||
96. SOLR-938: Add event listener API for DIH import start and end.
|
||||
(Kay Kay, Noble Paul via shalin)
|
||||
|
||||
97. SOLR-801: DIH: Add support for configurable pre-import and post-import
|
||||
delete query per root-entity. (Noble Paul via shalin)
|
||||
|
||||
98. SOLR-988: Add a new scope for session data stored in Context to store
|
||||
objects across imports. (Noble Paul via shalin)
|
||||
|
||||
99. SOLR-980: A PlainTextEntityProcessor which can read from any
|
||||
DataSource<Reader> and output a String.
|
||||
(Nathan Adams, Noble Paul via shalin)
|
||||
|
||||
100.SOLR-1003: XPathEntityprocessor must allow slurping all text from a given
|
||||
xml node and its children. (Noble Paul via shalin)
|
||||
|
||||
101.SOLR-1001: Allow variables in various attributes of RegexTransformer,
|
||||
HTMLStripTransformer and NumberFormatTransformer.
|
||||
(Fergus McMenemie, Noble Paul, shalin)
|
||||
|
||||
102.SOLR-989: DIH: Expose running statistics from the Context API.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
103.SOLR-996: DIH: Expose Context to Evaluators. (Noble Paul, shalin)
|
||||
|
||||
104.SOLR-783: DIH: Enhance delta-imports by maintaining separate
|
||||
last_index_time for each entity. (Jon Baer, Noble Paul via shalin)
|
||||
|
||||
105.SOLR-1033: Current entity's namespace is made available to all DIH
|
||||
Transformers. This allows one to use an output field of TemplateTransformer
|
||||
in other transformers, among other things.
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
106.SOLR-1066: New methods in DIH Context to expose Script details.
|
||||
ScriptTransformer changed to read scripts through the new API methods.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
107.SOLR-1062: A DIH LogTransformer which can log data in a given template
|
||||
format. (Jon Baer, Noble Paul via shalin)
|
||||
|
||||
108.SOLR-1065: A DIH ContentStreamDataSource which can accept HTTP POST data
|
||||
in a content stream. This can be used to push data to Solr instead of
|
||||
just pulling it from DB/Files/URLs. (Noble Paul via shalin)
|
||||
|
||||
109.SOLR-1061: Improve DIH RegexTransformer to create multiple columns from
|
||||
regex groups. (Noble Paul via shalin)
|
||||
|
||||
110.SOLR-1059: Special DIH flags introduced for deleting documents by query or
|
||||
id, skipping rows and stopping further transforms. Use $deleteDocById,
|
||||
$deleteDocByQuery for deleting by id and query respectively. Use $skipRow
|
||||
to skip the current row but continue with the document. Use $stopTransform
|
||||
to stop further transformers. New methods are introduced in Context for
|
||||
deleting by id and query. (Noble Paul, Fergus McMenemie, shalin)
|
||||
|
||||
111.SOLR-1076: JdbcDataSource should resolve DIH variables in all its
|
||||
configuration parameters. (shalin)
|
||||
|
||||
112.SOLR-1055: Make DIH JdbcDataSource easily extensible by making the
|
||||
createConnectionFactory method protected and return a
|
||||
Callable<Connection> object. (Noble Paul, shalin)
|
||||
|
||||
113.SOLR-1058: DIH: JdbcDataSource can lookup javax.sql.DataSource using JNDI.
|
||||
Use a jndiName attribute to specify the location of the data source.
|
||||
(Jason Shepherd, Noble Paul via shalin)
|
||||
|
||||
114.SOLR-1083: A DIH Evaluator for escaping query characters.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
115.SOLR-934: A MailEntityProcessor to enable indexing mails from
|
||||
POP/IMAP sources into a solr index. (Preetam Rao, shalin)
|
||||
|
||||
116.SOLR-1060: A DIH LineEntityProcessor which can stream lines of text from a
|
||||
given file to be indexed directly or for processing with transformers and
|
||||
child entities.
|
||||
(Fergus McMenemie, Noble Paul, shalin)
|
||||
|
||||
117.SOLR-1127: Add support for DIH field name to be templatized.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
118.SOLR-1092: Added a new DIH command named 'import' which does not
|
||||
automatically clean the index. This is useful and more appropriate when one
|
||||
needs to import only some of the entities.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
119.SOLR-1153: DIH 'deltaImportQuery' is honored on child entities as well
|
||||
(noble)
|
||||
|
||||
120.SOLR-1230: Enhanced dataimport.jsp to work with all DataImportHandler
|
||||
request handler configurations, rather than just a hardcoded /dataimport
|
||||
handler. (ehatcher)
|
||||
|
||||
121.SOLR-1235: disallow period (.) in DIH entity names (noble)
|
||||
|
||||
122.SOLR-1234: Multiple DIH does not work because all of them write to
|
||||
dataimport.properties. Use the handler name as the properties file name
|
||||
(noble)
|
||||
|
||||
123.SOLR-1348: Support binary field type in convertType logic in DIH
|
||||
JdbcDataSource (shalin)
|
||||
|
||||
124.SOLR-1406: DIH: Make FileDataSource and FileListEntityProcessor to be more
|
||||
extensible (Luke Forehand, shalin)
|
||||
|
||||
125.SOLR-1437: DIH: XPathEntityProcessor can deal with xpath syntaxes such as
|
||||
//tagname , /root//tagname (Fergus McMenemie via noble)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
|
||||
|
@ -2993,6 +3327,21 @@ Optimizations
|
|||
17. SOLR-1296: Enables setting IndexReader's termInfosIndexDivisor via a new attribute to StandardIndexReaderFactory. Enables
|
||||
setting termIndexInterval to IndexWriter via SolrIndexConfig. (Jason Rutherglen, hossman, gsingers)
|
||||
|
||||
18. SOLR-846: DIH: Reduce memory consumption during delta import by removing
|
||||
keys when used (Ricky Leung, Noble Paul via shalin)
|
||||
|
||||
19. SOLR-974: DataImportHandler skips commit if no data has been updated.
|
||||
(Wojtek Piaseczny, shalin)
|
||||
|
||||
20. SOLR-1004: DIH: Check for abort more frequently during delta-imports.
|
||||
(Marc Sturlese, shalin)
|
||||
|
||||
21. SOLR-1098: DIH DateFormatTransformer can cache the format objects.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
22. SOLR-1465: Replaced string concatenations with StringBuilder append
|
||||
calls in DIH XPathRecordReader. (Mark Miller, shalin)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
1. SOLR-774: Fixed logging level display (Sean Timm via Otis Gospodnetic)
|
||||
|
@ -3210,6 +3559,103 @@ Bug Fixes
|
|||
caused an error to be returned, although the deletes were
|
||||
still executed. (asmodean via yonik)
|
||||
|
||||
76. SOLR-800: Deep copy collections to avoid ConcurrentModificationException
|
||||
in XPathEntityprocessor while streaming
|
||||
(Kyle Morrison, Noble Paul via shalin)
|
||||
|
||||
77. SOLR-823: Request parameter variables ${dataimporter.request.xxx} are not
|
||||
resolved in DIH (Mck SembWever, Noble Paul, shalin)
|
||||
|
||||
78. SOLR-728: Add synchronization to avoid race condition of multiple DIH
|
||||
imports working concurrently (Walter Ferrara, shalin)
|
||||
|
||||
79. SOLR-742: Add ability to create dynamic fields with custom
|
||||
DataImportHandler transformers (Wojtek Piaseczny, Noble Paul, shalin)
|
||||
|
||||
80. SOLR-832: Rows parameter is not honored in DIH non-debug mode and can
|
||||
abort a running import in debug mode. (Akshay Ukey, shalin)
|
||||
|
||||
81. SOLR-838: The DIH VariableResolver obtained from a DataSource's context
|
||||
does not have current data. (Noble Paul via shalin)
|
||||
|
||||
82. SOLR-864: DataImportHandler does not catch and log Errors (shalin)
|
||||
|
||||
83. SOLR-873: Fix case-sensitive field names and columns (Jon Baer, shalin)
|
||||
|
||||
84. SOLR-893: Unable to delete documents via SQL and deletedPkQuery with
|
||||
deltaimport (Dan Rosher via shalin)
|
||||
|
||||
85. SOLR-888: DIH DateFormatTransformer cannot convert non-string type
|
||||
(Amit Nithian via shalin)
|
||||
|
||||
86. SOLR-841: DataImportHandler should throw exception if a field does not
|
||||
have column attribute (Michael Henson, shalin)
|
||||
|
||||
87. SOLR-884: CachedSqlEntityProcessor should check if the cache key is
|
||||
present in the query results (Noble Paul via shalin)
|
||||
|
||||
88. SOLR-985: Fix thread-safety issue with DIH TemplateString for concurrent
|
||||
imports with multiple cores. (Ryuuichi Kumai via shalin)
|
||||
|
||||
89. SOLR-999: DIH XPathRecordReader fails on XMLs with nodes mixed with
|
||||
CDATA content. (Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
90. SOLR-1000: DIH FileListEntityProcessor should not apply fileName filter to
|
||||
directory names. (Fergus McMenemie via shalin)
|
||||
|
||||
91. SOLR-1009: Repeated column names result in duplicate values.
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
92. SOLR-1017: Fix DIH thread-safety issue with last_index_time for concurrent
|
||||
imports in multiple cores due to unsafe usage of SimpleDateFormat by
|
||||
multiple threads. (Ryuuichi Kumai via shalin)
|
||||
|
||||
93. SOLR-1024: Calling abort on DataImportHandler import commits data instead
|
||||
of calling rollback. (shalin)
|
||||
|
||||
94. SOLR-1037: DIH should not add null values in a row returned by
|
||||
EntityProcessor to documents. (shalin)
|
||||
|
||||
95. SOLR-1040: DIH XPathEntityProcessor fails with an xpath like
|
||||
/feed/entry/link[@type='text/html']/@href (Noble Paul via shalin)
|
||||
|
||||
96. SOLR-1042: Fix memory leak in DIH by making TemplateString non-static
|
||||
member in VariableResolverImpl (Ryuuichi Kumai via shalin)
|
||||
|
||||
97. SOLR-1053: IndexOutOfBoundsException in DIH SolrWriter.getResourceAsString
|
||||
when size of data-config.xml is a multiple of 1024 bytes.
|
||||
(Herb Jiang via shalin)
|
||||
|
||||
98. SOLR-1077: IndexOutOfBoundsException with useSolrAddSchema in DIH
|
||||
XPathEntityProcessor. (Sam Keen, Noble Paul via shalin)
|
||||
|
||||
99. SOLR-1080: DIH RegexTransformer should not replace if regex is not matched.
|
||||
(Noble Paul, Fergus McMenemie via shalin)
|
||||
|
||||
100.SOLR-1090: DataImportHandler should load the data-config.xml using UTF-8
|
||||
encoding. (Rui Pereira, shalin)
|
||||
|
||||
101.SOLR-1146: ConcurrentModificationException in DataImporter.getStatusMessages
|
||||
(Walter Ferrara, Noble Paul via shalin)
|
||||
|
||||
102.SOLR-1229: Fixes for DIH deletedPkQuery, particularly when using
|
||||
transformed Solr unique id's
|
||||
(Lance Norskog, Noble Paul via ehatcher)
|
||||
|
||||
103.SOLR-1286: Fix the IH commit parameter always defaulting to "true" even
|
||||
if "false" is explicitly passed in. (Jay Hill, Noble Paul via ehatcher)
|
||||
|
||||
104.SOLR-1323: Reset XPathEntityProcessor's $hasMore/$nextUrl when fetching
|
||||
next URL (noble, ehatcher)
|
||||
|
||||
105.SOLR-1450: DIH: Jdbc connection properties such as batchSize are not
|
||||
applied if the driver jar is placed in solr_home/lib.
|
||||
(Steve Sun via shalin)
|
||||
|
||||
106.SOLR-1474: DIH Delta-import should run even if last_index_time is not set.
|
||||
(shalin)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
1. Upgraded to Lucene 2.4.0 (yonik)
|
||||
|
@ -3357,6 +3803,55 @@ Other Changes
|
|||
for discussion on language detection.
|
||||
See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. (gsingers)
|
||||
|
||||
53. SOLR-782: DIH: Refactored SolrWriter to make it a concrete class and
|
||||
removed wrappers over SolrInputDocument. Refactored to load Evaluators
|
||||
lazily. Removed multiple document nodes in the configuration xml. Removed
|
||||
support for 'default' variables, they are automatically available as
|
||||
request parameters. (Noble Paul via shalin)
|
||||
|
||||
54. SOLR-964: DIH: XPathEntityProcessor now ignores DTD validations
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
55. SOLR-1029: DIH: Standardize Evaluator parameter parsing and added helper
|
||||
functions for parsing all evaluator parameters in a standard way.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
56. SOLR-1081: Change DIH EventListener to be an interface so that components
|
||||
such as an EntityProcessor or a Transformer can act as an event listener.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
57. SOLR-1027: DIH: Alias the 'dataimporter' namespace to a shorter name 'dih'.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
58. SOLR-1084: Better error reporting when DIH entity name is a reserved word
|
||||
and data-config.xml root node is not <dataConfig>.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
59. SOLR-1087: Deprecate 'where' attribute in CachedSqlEntityProcessor in
|
||||
favor of cacheKey and cacheLookup. (Noble Paul via shalin)
|
||||
|
||||
60. SOLR-969: Change the FULL_DUMP, DELTA_DUMP, FIND_DELTA constants in DIH
|
||||
Context to String. Change Context.currentProcess() to return a string
|
||||
instead of an integer. (Kay Kay, Noble Paul, shalin)
|
||||
|
||||
61. SOLR-1120: Simplified DIH EntityProcessor API by moving logic for applying
|
||||
transformers and handling multi-row outputs from Transformers into an
|
||||
EntityProcessorWrapper class. The behavior of the method
|
||||
EntityProcessor#destroy has been modified to be called once per parent-row
|
||||
at the end of row. A new method EntityProcessor#close is added which is
|
||||
called at the end of import. A new method
|
||||
Context#getResolvedEntityAttribute is added which returns the resolved
|
||||
value of an entity's attribute. Introduced a DocWrapper which takes care
|
||||
of maintaining document level session variables.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
62. SOLR-1265: Add DIH variable resolving for URLDataSource properties like
|
||||
baseUrl. (Chris Eldredge via ehatcher)
|
||||
|
||||
63. SOLR-1269: Better error messages from DIH JdbcDataSource when JDBC Driver
|
||||
name or SQL is incorrect. (ehatcher, shalin)
|
||||
|
||||
|
||||
Build
|
||||
----------------------
|
||||
1. SOLR-776: Added in ability to sign artifacts via Ant for releases (gsingers)
|
||||
|
@ -3382,6 +3877,10 @@ Documentation
|
|||
|
||||
3. SOLR-1409: Added Solr Powered By Logos
|
||||
|
||||
4. SOLR-1369: Add HSQLDB Jar to example-DIH, unzip database and update
|
||||
instructions.
|
||||
|
||||
|
||||
================== Release 1.3.0 ==================
|
||||
|
||||
Upgrading from Solr 1.2
|
||||
|
@ -3727,7 +4226,10 @@ New Features
|
|||
71. SOLR-1129 : Support binding dynamic fields to beans in SolrJ (Avlesh Singh , noble)
|
||||
|
||||
72. SOLR-920 : Cache and reuse IndexSchema . A new attribute added in solr.xml called 'shareSchema' (noble)
|
||||
|
||||
|
||||
73. SOLR-700: DIH: Allow configurable locales through a locale attribute in
|
||||
fields for NumberFormatTransformer. (Stefan Oestreicher, shalin)
|
||||
|
||||
Changes in runtime behavior
|
||||
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This
|
||||
removes the maxBufferedDeletes parameter added by SOLR-310 as Lucene
|
||||
|
@ -3942,6 +4444,18 @@ Bug Fixes
|
|||
|
||||
50. SOLR-749: Allow QParser and ValueSourceParsers to be extended with same name (hossman, gsingers)
|
||||
|
||||
51. SOLR-704: DIH NumberFormatTransformer can silently ignore part of the
|
||||
string while parsing. Now it tries to use the complete string for parsing.
|
||||
Failure to do so will result in an exception.
|
||||
(Stefan Oestreicher via shalin)
|
||||
|
||||
52. SOLR-729: DIH Context.getDataSource(String) gives current entity's
|
||||
DataSource instance regardless of argument. (Noble Paul, shalin)
|
||||
|
||||
53. SOLR-726: DIH: Jdbc Drivers and DataSources fail to load if placed in
|
||||
multicore sharedLib or core's lib directory.
|
||||
(Walter Ferrara, Noble Paul, shalin)
|
||||
|
||||
Other Changes
|
||||
1. SOLR-135: Moved common classes to org.apache.solr.common and altered the
|
||||
build scripts to make two jars: apache-solr-1.3.jar and
|
||||
|
|
|
@ -402,11 +402,11 @@
|
|||
prefix="${fullnamever}"
|
||||
includes="LICENSE.txt NOTICE.txt CHANGES.txt README.txt example/**
|
||||
client/README.txt client/ruby/solr-ruby/** contrib/**/lib/**
|
||||
contrib/**/README.txt contrib/**/CHANGES.txt"
|
||||
contrib/**/README.txt licenses/**"
|
||||
excludes="lib/README.committers.txt **/data/ **/logs/*
|
||||
**/classes/ **/*.sh **/ivy.xml **/build.xml
|
||||
**/bin/ **/*.iml **/*.ipr **/*.iws **/pom.xml
|
||||
**/*pom.xml.template **/*.sha1" />
|
||||
**/*pom.xml.template" />
|
||||
<tarfileset dir="${dest}/contrib-lucene-libs-to-package"
|
||||
prefix="${fullnamever}"
|
||||
includes="**" />
|
||||
|
@ -763,4 +763,8 @@
|
|||
</delete>
|
||||
</target>
|
||||
|
||||
<target name="jar-checksums" depends="clean-jars,resolve">
|
||||
<jar-checksum-macro srcdir="${common-solr.dir}" dstdir="${common-solr.dir}/licenses"/>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -7,6 +7,7 @@ rm -r -f example2
|
|||
rm -r -f dist
|
||||
rm -r -f build
|
||||
rm -r -f example/solr/zoo_data
|
||||
rm -r -f example/solr/collection1/data
|
||||
rm -f example/example.log
|
||||
|
||||
ant example dist
|
||||
|
|
|
@ -9,6 +9,7 @@ rm -r -f example4
|
|||
rm -r -f dist
|
||||
rm -r -f build
|
||||
rm -r -f example/solr/zoo_data
|
||||
rm -r -f example/solr/collection1/data
|
||||
rm -f example/example.log
|
||||
|
||||
ant example dist
|
||||
|
|
|
@ -9,6 +9,7 @@ rm -r -f example4
|
|||
rm -r -f dist
|
||||
rm -r -f build
|
||||
rm -r -f example/solr/zoo_data
|
||||
rm -r -f example/solr/collection1/data
|
||||
rm -f example/example.log
|
||||
|
||||
ant example dist
|
||||
|
|
|
@ -13,7 +13,7 @@ rm -r -f example6
|
|||
rm -r -f dist
|
||||
rm -r -f build
|
||||
rm -r -f example/solr/zoo_data
|
||||
rm -r -f example/solr/data
|
||||
rm -r -f example/solr/collection1/data
|
||||
rm -f example/example.log
|
||||
|
||||
ant example dist
|
||||
|
|
|
@ -13,7 +13,7 @@ rm -r -f example6
|
|||
rm -r -f dist
|
||||
rm -r -f build
|
||||
rm -r -f example/solr/zoo_data
|
||||
rm -r -f example/solr/data
|
||||
rm -r -f example/solr/collection1/data
|
||||
rm -f example/example.log
|
||||
|
||||
ant example dist
|
||||
|
|
|
@ -2,9 +2,6 @@
|
|||
|
||||
cd ..
|
||||
|
||||
rm -r -f dist
|
||||
rm -r -f build
|
||||
|
||||
cd example
|
||||
java -DzkRun -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar 1>example.log 2>&1 &
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ rm -r -f example6
|
|||
rm -r -f dist
|
||||
rm -r -f build
|
||||
rm -r -f example/solr/zoo_data
|
||||
rm -r -f example/solr/data
|
||||
rm -r -f example/solr/collection1/data
|
||||
rm -f example/example.log
|
||||
|
||||
ant example dist
|
||||
|
|
|
@ -1,547 +0,0 @@
|
|||
Apache Solr - DataImportHandler
|
||||
Release Notes
|
||||
|
||||
Introduction
|
||||
------------
|
||||
DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
|
||||
HTTP data sources quick and easy.
|
||||
|
||||
|
||||
$Id$
|
||||
================== 5.0.0 ==============
|
||||
|
||||
(No changes)
|
||||
|
||||
================== 4.0.0-ALPHA ==============
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-3430: Added a new test against a real SQL database. Fixed problems revealed by this new test
|
||||
related to the expanded cache support added to 3.6/SOLR-2382 (James Dyer)
|
||||
|
||||
* SOLR-1958: When using the MailEntityProcessor, import would fail if fetchMailsSince was not specified.
|
||||
(Max Lynch via James Dyer)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
* SOLR-3262: The "threads" feature is removed (deprecated in Solr 3.6) (James Dyer)
|
||||
|
||||
* SOLR-3422: Refactored internal data classes.
|
||||
All entities in data-config.xml must have a name (James Dyer)
|
||||
|
||||
================== 3.6.1 ==================
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-3360: More bug fixes for the deprecated "threads" parameter. (Mikhail Khludnev, Claudio R, via James Dyer)
|
||||
|
||||
* SOLR-3430: Added a new test against a real SQL database. Fixed problems revealed by this new test
|
||||
related to the expanded cache support added to 3.6/SOLR-2382 (James Dyer)
|
||||
|
||||
* SOLR-3336: SolrEntityProcessor substitutes most variables at query time.
|
||||
(Michael Kroh, Lance Norskog, via Martijn van Groningen)
|
||||
|
||||
================== 3.6.0 ==================
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-1499: Added SolrEntityProcessor that imports data from another Solr core or instance based on a specified query.
|
||||
(Lance Norskog, Erik Hatcher, Pulkit Singhal, Ahmet Arslan, Luca Cavanna, Martijn van Groningen)
|
||||
Additional Work:
|
||||
SOLR-3190: Minor improvements to SolrEntityProcessor. Add more consistency between solr parameters
|
||||
and parameters used in SolrEntityProcessor and ability to specify a custom HttpClient instance.
|
||||
(Luca Cavanna via Martijn van Groningen)
|
||||
* SOLR-2382: Added pluggable cache support so that any Entity can be made cache-able by adding the "cacheImpl" parameter.
|
||||
Include "SortedMapBackedCache" to provide in-memory caching (as previously this was the only option when
|
||||
using CachedSqlEntityProcessor). Users can provide their own implementations of DIHCache for other
|
||||
caching strategies. Deprecate CachedSqlEntityProcessor in favor of specifing "cacheImpl" with
|
||||
SqlEntityProcessor. Make SolrWriter implement DIHWriter and allow the possibility of pluggable Writers
|
||||
(DIH writing to something other than Solr). (James Dyer, Noble Paul)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
----------------------
|
||||
* SOLR-3142: Imports no longer default optimize to true, instead false. If you want to force all segments to be merged
|
||||
into one, you can specify this parameter yourself. NOTE: this can be very expensive operation and usually
|
||||
does not make sense for delta-imports. (Robert MUir)
|
||||
|
||||
================== 3.5.0 ==================
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-2875: Fix the incorrect url in tika-data-config.xml (Shinichiro Abe via koji)
|
||||
|
||||
================== 3.4.0 ==================
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-2644: When using threads=2 the default logging is set too high (Bill Bell via shalin)
|
||||
* SOLR-2492: DIH does not commit if only deletes are processed (James Dyer via shalin)
|
||||
* SOLR-2186: DataImportHandler's multi-threaded option throws NPE (Lance Norskog, Frank Wesemann, shalin)
|
||||
* SOLR-2655: DIH multi threaded mode does not resolve attributes correctly (Frank Wesemann, shalin)
|
||||
* SOLR-2695: Documents are collected in unsynchronized list in multi-threaded debug mode (Michael McCandless, shalin)
|
||||
* SOLR-2668: DIH multithreaded mode does not rollback on errors from EntityProcessor (Frank Wesemann, shalin)
|
||||
|
||||
================== 3.3.0 ==================
|
||||
|
||||
* SOLR-2551: Check dataimport.properties for write access (if delta-import is supported
|
||||
in DIH configuration) before starting an import (C S, shalin)
|
||||
|
||||
================== 3.2.0 ==================
|
||||
|
||||
(No Changes)
|
||||
|
||||
================== 3.1.0 ==================
|
||||
Upgrading from Solr 1.4
|
||||
----------------------
|
||||
|
||||
Versions of Major Components
|
||||
---------------------
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-1525 : allow DIH to refer to core properties (noble)
|
||||
|
||||
* SOLR-1547 : TemplateTransformer copy objects more intelligently when there when the template is a single variable (noble)
|
||||
|
||||
* SOLR-1627 : VariableResolver should be fetched just in time (noble)
|
||||
|
||||
* SOLR-1583 : Create DataSources that return InputStream (noble)
|
||||
|
||||
* SOLR-1358 : Integration of Tika and DataImportHandler ( Akshay Ukey, noble)
|
||||
|
||||
* SOLR-1654 : TikaEntityProcessor example added DIHExample (Akshay Ukey via noble)
|
||||
|
||||
* SOLR-1678 : Move onError handling to DIH framework (noble)
|
||||
|
||||
* SOLR-1352 : Multi-threaded implementation of DIH (noble)
|
||||
|
||||
* SOLR-1721 : Add explicit option to run DataImportHandler in synchronous mode (Alexey Serba via noble)
|
||||
|
||||
* SOLR-1737 : Added FieldStreamDataSource (noble)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
* SOLR-2200: Improve the performance of DataImportHandler for large delta-import
|
||||
updates. (Mark Waddle via rmuir)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-1638: Fixed NullPointerException during import if uniqueKey is not specified
|
||||
in schema (Akshay Ukey via shalin)
|
||||
|
||||
* SOLR-1639: Fixed misleading error message when dataimport.properties is not writable (shalin)
|
||||
|
||||
* SOLR-1598: Reader used in PlainTextEntityProcessor is not explicitly closed (Sascha Szott via noble)
|
||||
|
||||
* SOLR-1759: $skipDoc was not working correctly (Gian Marco Tagliani via noble)
|
||||
|
||||
* SOLR-1762: DateFormatTransformer does not work correctly with non-default locale dates (tommy chheng via noble)
|
||||
|
||||
* SOLR-1757: DIH multithreading sometimes throws NPE (noble)
|
||||
|
||||
* SOLR-1766: DIH with threads enabled doesn't respond to the abort command (Michael Henson via noble)
|
||||
|
||||
* SOLR-1767: dataimporter.functions.escapeSql() does not escape backslash character (Sean Timm via noble)
|
||||
|
||||
* SOLR-1811: formatDate should use the current NOW value always (Sean Timm via noble)
|
||||
|
||||
* SOLR-1794: Dataimport of CLOB fields fails when getCharacterStream() is
|
||||
defined in a superclass. (Gunnar Gauslaa Bergem via rmuir)
|
||||
|
||||
* SOLR-2057: DataImportHandler never calls UpdateRequestProcessor.finish()
|
||||
(Drew Farris via koji)
|
||||
|
||||
* SOLR-1973: Empty fields in XML update messages confuse DataImportHandler. (koji)
|
||||
|
||||
* SOLR-2221: Use StrUtils.parseBool() to get values of boolean options in DIH.
|
||||
true/on/yes (for TRUE) and false/off/no (for FALSE) can be used for sub-options
|
||||
(debug, verbose, synchronous, commit, clean, optimize) for full/delta-import commands. (koji)
|
||||
|
||||
* SOLR-2310: getTimeElapsedSince() returns incorrect hour value when the elapse is over 60 hours
|
||||
(tom liu via koji)
|
||||
|
||||
* SOLR-2252: When a child entity in nested entities is rootEntity="true", delta-import doesn't work.
|
||||
(koji)
|
||||
|
||||
* SOLR-2330: solrconfig.xml files in example-DIH are broken. (Matt Parker, koji)
|
||||
|
||||
* SOLR-1191: resolve DataImportHandler deltaQuery column against pk when pk
|
||||
has a prefix (e.g. pk="book.id" deltaQuery="select id from ..."). More
|
||||
useful error reporting when no match found (previously failed with a
|
||||
NullPointerException in log and no clear user feedback). (gthb via yonik)
|
||||
|
||||
* SOLR-2116: Fix TikaConfig classloader bug in TikaEntityProcessor
|
||||
(Martijn van Groningen via hossman)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-1821: Fix TimeZone-dependent test failure in TestEvaluatorBag.
|
||||
(Chris Male via rmuir)
|
||||
|
||||
* SOLR-2367: Reduced noise in test output by ensuring the properties file can be written.
|
||||
(Gunnlaugur Thor Briem via rmuir)
|
||||
|
||||
|
||||
Build
|
||||
----------------------
|
||||
|
||||
|
||||
Documentation
|
||||
----------------------
|
||||
|
||||
================== Release 1.4.0 ==================
|
||||
|
||||
Upgrading from Solr 1.3
|
||||
-----------------------
|
||||
|
||||
Evaluator API has been changed in a non back-compatible way. Users who have developed custom Evaluators will need
|
||||
to change their code according to the new API for it to work. See SOLR-996 for details.
|
||||
|
||||
The formatDate evaluator's syntax has been changed. The new syntax is formatDate(<variable>, '<format_string>').
|
||||
For example, formatDate(x.date, 'yyyy-MM-dd'). In the old syntax, the date string was written without a single-quotes.
|
||||
The old syntax has been deprecated and will be removed in 1.5, until then, using the old syntax will log a warning.
|
||||
|
||||
The Context API has been changed in a non back-compatible way. In particular, the Context.currentProcess() method
|
||||
now returns a String describing the type of the current import process instead of an int. Similarily, the public
|
||||
constants in Context viz. FULL_DUMP, DELTA_DUMP and FIND_DELTA are changed to a String type. See SOLR-969 for details.
|
||||
|
||||
The EntityProcessor API has been simplified by moving logic for applying transformers and handling multi-row outputs
|
||||
from Transformers into an EntityProcessorWrapper class. The EntityProcessor#destroy is now called once per
|
||||
parent-row at the end of row (end of data). A new method EntityProcessor#close is added which is called at the end
|
||||
of import.
|
||||
|
||||
In Solr 1.3, if the last_index_time was not available (first import) and a delta-import was requested, a full-import
|
||||
was run instead. This is no longer the case. In Solr 1.4 delta import is run with last_index_time as the epoch
|
||||
date (January 1, 1970, 00:00:00 GMT) if last_index_time is not available.
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
1. SOLR-768: Set last_index_time variable in full-import command.
|
||||
(Wojtek Piaseczny, Noble Paul via shalin)
|
||||
|
||||
2. SOLR-811: Allow a "deltaImportQuery" attribute in SqlEntityProcessor which is used for delta imports
|
||||
instead of DataImportHandler manipulating the SQL itself.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
3. SOLR-842: Better error handling in DataImportHandler with options to abort, skip and continue imports.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
4. SOLR-833: A DataSource to read data from a field as a reader. This can be used, for example, to read XMLs
|
||||
residing as CLOBs or BLOBs in databases.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
5. SOLR-887: A Transformer to strip HTML tags.
|
||||
(Ahmed Hammad via shalin)
|
||||
|
||||
6. SOLR-886: DataImportHandler should rollback when an import fails or it is aborted
|
||||
(shalin)
|
||||
|
||||
7. SOLR-891: A Transformer to read strings from Clob type.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
8. SOLR-812: Configurable JDBC settings in JdbcDataSource including optimized defaults for read only mode.
|
||||
(David Smiley, Glen Newton, shalin)
|
||||
|
||||
9. SOLR-910: Add a few utility commands to the DIH admin page such as full import, delta import, status, reload config.
|
||||
(Ahmed Hammad via shalin)
|
||||
|
||||
10.SOLR-938: Add event listener API for import start and end.
|
||||
(Kay Kay, Noble Paul via shalin)
|
||||
|
||||
11.SOLR-801: Add support for configurable pre-import and post-import delete query per root-entity.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
12.SOLR-988: Add a new scope for session data stored in Context to store objects across imports.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
13.SOLR-980: A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String.
|
||||
(Nathan Adams, Noble Paul via shalin)
|
||||
|
||||
14.SOLR-1003: XPathEntityprocessor must allow slurping all text from a given xml node and its children.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
15.SOLR-1001: Allow variables in various attributes of RegexTransformer, HTMLStripTransformer
|
||||
and NumberFormatTransformer.
|
||||
(Fergus McMenemie, Noble Paul, shalin)
|
||||
|
||||
16.SOLR-989: Expose running statistics from the Context API.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
17.SOLR-996: Expose Context to Evaluators.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity.
|
||||
(Jon Baer, Noble Paul via shalin)
|
||||
|
||||
19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field
|
||||
of TemplateTransformer in other transformers, among other things.
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
20.SOLR-1066: New methods in Context to expose Script details. ScriptTransformer changed to read scripts
|
||||
through the new API methods.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
21.SOLR-1062: A LogTransformer which can log data in a given template format.
|
||||
(Jon Baer, Noble Paul via shalin)
|
||||
|
||||
22.SOLR-1065: A ContentStreamDataSource which can accept HTTP POST data in a content stream. This can be used to
|
||||
push data to Solr instead of just pulling it from DB/Files/URLs.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
23.SOLR-1061: Improve RegexTransformer to create multiple columns from regex groups.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
24.SOLR-1059: Special flags introduced for deleting documents by query or id, skipping rows and stopping further
|
||||
transforms. Use $deleteDocById, $deleteDocByQuery for deleting by id and query respectively.
|
||||
Use $skipRow to skip the current row but continue with the document. Use $stopTransform to stop
|
||||
further transformers. New methods are introduced in Context for deleting by id and query.
|
||||
(Noble Paul, Fergus McMenemie, shalin)
|
||||
|
||||
25.SOLR-1076: JdbcDataSource should resolve variables in all its configuration parameters.
|
||||
(shalin)
|
||||
|
||||
26.SOLR-1055: Make DIH JdbcDataSource easily extensible by making the createConnectionFactory method protected and
|
||||
return a Callable<Connection> object.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
27.SOLR-1058: JdbcDataSource can lookup javax.sql.DataSource using JNDI. Use a jndiName attribute to specify the
|
||||
location of the data source.
|
||||
(Jason Shepherd, Noble Paul via shalin)
|
||||
|
||||
28.SOLR-1083: An Evaluator for escaping query characters.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
29.SOLR-934: A MailEntityProcessor to enable indexing mails from POP/IMAP sources into a solr index.
|
||||
(Preetam Rao, shalin)
|
||||
|
||||
30.SOLR-1060: A LineEntityProcessor which can stream lines of text from a given file to be indexed directly or
|
||||
for processing with transformers and child entities.
|
||||
(Fergus McMenemie, Noble Paul, shalin)
|
||||
|
||||
31.SOLR-1127: Add support for field name to be templatized.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
32.SOLR-1092: Added a new command named 'import' which does not automatically clean the index. This is useful and
|
||||
more appropriate when one needs to import only some of the entities.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
33.SOLR-1153: 'deltaImportQuery' is honored on child entities as well (noble)
|
||||
|
||||
34.SOLR-1230: Enhanced dataimport.jsp to work with all DataImportHandler request handler configurations,
|
||||
rather than just a hardcoded /dataimport handler. (ehatcher)
|
||||
|
||||
35.SOLR-1235: disallow period (.) in entity names (noble)
|
||||
|
||||
36.SOLR-1234: Multiple DIH does not work because all of them write to dataimport.properties.
|
||||
Use the handler name as the properties file name (noble)
|
||||
|
||||
37.SOLR-1348: Support binary field type in convertType logic in JdbcDataSource (shalin)
|
||||
|
||||
38.SOLR-1406: Make FileDataSource and FileListEntityProcessor to be more extensible (Luke Forehand, shalin)
|
||||
|
||||
39.SOLR-1437 : XPathEntityProcessor can deal with xpath syntaxes such as //tagname , /root//tagname (Fergus McMenemie via noble)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
|
||||
(Ricky Leung, Noble Paul via shalin)
|
||||
|
||||
2. SOLR-974: DataImportHandler skips commit if no data has been updated.
|
||||
(Wojtek Piaseczny, shalin)
|
||||
|
||||
3. SOLR-1004: Check for abort more frequently during delta-imports.
|
||||
(Marc Sturlese, shalin)
|
||||
|
||||
4. SOLR-1098: DateFormatTransformer can cache the format objects.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
5. SOLR-1465: Replaced string concatenations with StringBuilder append calls in XPathRecordReader.
|
||||
(Mark Miller, shalin)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
1. SOLR-800: Deep copy collections to avoid ConcurrentModificationException in XPathEntityprocessor while streaming
|
||||
(Kyle Morrison, Noble Paul via shalin)
|
||||
|
||||
2. SOLR-823: Request parameter variables ${dataimporter.request.xxx} are not resolved
|
||||
(Mck SembWever, Noble Paul, shalin)
|
||||
|
||||
3. SOLR-728: Add synchronization to avoid race condition of multiple imports working concurrently
|
||||
(Walter Ferrara, shalin)
|
||||
|
||||
4. SOLR-742: Add ability to create dynamic fields with custom DataImportHandler transformers
|
||||
(Wojtek Piaseczny, Noble Paul, shalin)
|
||||
|
||||
5. SOLR-832: Rows parameter is not honored in non-debug mode and can abort a running import in debug mode.
|
||||
(Akshay Ukey, shalin)
|
||||
|
||||
6. SOLR-838: The VariableResolver obtained from a DataSource's context does not have current data.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
7. SOLR-864: DataImportHandler does not catch and log Errors (shalin)
|
||||
|
||||
8. SOLR-873: Fix case-sensitive field names and columns (Jon Baer, shalin)
|
||||
|
||||
9. SOLR-893: Unable to delete documents via SQL and deletedPkQuery with deltaimport
|
||||
(Dan Rosher via shalin)
|
||||
|
||||
10. SOLR-888: DateFormatTransformer cannot convert non-string type
|
||||
(Amit Nithian via shalin)
|
||||
|
||||
11. SOLR-841: DataImportHandler should throw exception if a field does not have column attribute
|
||||
(Michael Henson, shalin)
|
||||
|
||||
12. SOLR-884: CachedSqlEntityProcessor should check if the cache key is present in the query results
|
||||
(Noble Paul via shalin)
|
||||
|
||||
13. SOLR-985: Fix thread-safety issue with TemplateString for concurrent imports with multiple cores.
|
||||
(Ryuuichi Kumai via shalin)
|
||||
|
||||
14. SOLR-999: XPathRecordReader fails on XMLs with nodes mixed with CDATA content.
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
15.SOLR-1000: FileListEntityProcessor should not apply fileName filter to directory names.
|
||||
(Fergus McMenemie via shalin)
|
||||
|
||||
16.SOLR-1009: Repeated column names result in duplicate values.
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
17.SOLR-1017: Fix thread-safety issue with last_index_time for concurrent imports in multiple cores due to unsafe usage
|
||||
of SimpleDateFormat by multiple threads.
|
||||
(Ryuuichi Kumai via shalin)
|
||||
|
||||
18.SOLR-1024: Calling abort on DataImportHandler import commits data instead of calling rollback.
|
||||
(shalin)
|
||||
|
||||
19.SOLR-1037: DIH should not add null values in a row returned by EntityProcessor to documents.
|
||||
(shalin)
|
||||
|
||||
20.SOLR-1040: XPathEntityProcessor fails with an xpath like /feed/entry/link[@type='text/html']/@href
|
||||
(Noble Paul via shalin)
|
||||
|
||||
21.SOLR-1042: Fix memory leak in DIH by making TemplateString non-static member in VariableResolverImpl
|
||||
(Ryuuichi Kumai via shalin)
|
||||
|
||||
22.SOLR-1053: IndexOutOfBoundsException in SolrWriter.getResourceAsString when size of data-config.xml is a
|
||||
multiple of 1024 bytes.
|
||||
(Herb Jiang via shalin)
|
||||
|
||||
23.SOLR-1077: IndexOutOfBoundsException with useSolrAddSchema in XPathEntityProcessor.
|
||||
(Sam Keen, Noble Paul via shalin)
|
||||
|
||||
24.SOLR-1080: RegexTransformer should not replace if regex is not matched.
|
||||
(Noble Paul, Fergus McMenemie via shalin)
|
||||
|
||||
25.SOLR-1090: DataImportHandler should load the data-config.xml using UTF-8 encoding.
|
||||
(Rui Pereira, shalin)
|
||||
|
||||
26.SOLR-1146: ConcurrentModificationException in DataImporter.getStatusMessages
|
||||
(Walter Ferrara, Noble Paul via shalin)
|
||||
|
||||
27.SOLR-1229: Fixes for deletedPkQuery, particularly when using transformed Solr unique id's
|
||||
(Lance Norskog, Noble Paul via ehatcher)
|
||||
|
||||
28.SOLR-1286: Fix the commit parameter always defaulting to "true" even if "false" is explicitly passed in.
|
||||
(Jay Hill, Noble Paul via ehatcher)
|
||||
|
||||
29.SOLR-1323: Reset XPathEntityProcessor's $hasMore/$nextUrl when fetching next URL (noble, ehatcher)
|
||||
|
||||
30.SOLR-1450: Jdbc connection properties such as batchSize are not applied if the driver jar is placed
|
||||
in solr_home/lib.
|
||||
(Steve Sun via shalin)
|
||||
|
||||
31.SOLR-1474: Delta-import should run even if last_index_time is not set.
|
||||
(shalin)
|
||||
|
||||
|
||||
Documentation
|
||||
----------------------
|
||||
1. SOLR-1369: Add HSQLDB Jar to example-DIH, unzip database and update instructions.
|
||||
|
||||
Other
|
||||
----------------------
|
||||
1. SOLR-782: Refactored SolrWriter to make it a concrete class and removed wrappers over SolrInputDocument.
|
||||
Refactored to load Evaluators lazily. Removed multiple document nodes in the configuration xml.
|
||||
Removed support for 'default' variables, they are automatically available as request parameters.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
2. SOLR-964: XPathEntityProcessor now ignores DTD validations
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
3. SOLR-1029: Standardize Evaluator parameter parsing and added helper functions for parsing all evaluator
|
||||
parameters in a standard way.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
4. SOLR-1081: Change EventListener to be an interface so that components such as an EntityProcessor or a Transformer
|
||||
can act as an event listener.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
5. SOLR-1027: Alias the 'dataimporter' namespace to a shorter name 'dih'.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
6. SOLR-1084: Better error reporting when entity name is a reserved word and data-config.xml root node
|
||||
is not <dataConfig>.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
7. SOLR-1087: Deprecate 'where' attribute in CachedSqlEntityProcessor in favor of cacheKey and cacheLookup.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
8. SOLR-969: Change the FULL_DUMP, DELTA_DUMP, FIND_DELTA constants in Context to String.
|
||||
Change Context.currentProcess() to return a string instead of an integer.
|
||||
(Kay Kay, Noble Paul, shalin)
|
||||
|
||||
9. SOLR-1120: Simplified EntityProcessor API by moving logic for applying transformers and handling multi-row outputs
|
||||
from Transformers into an EntityProcessorWrapper class. The behavior of the method
|
||||
EntityProcessor#destroy has been modified to be called once per parent-row at the end of row. A new
|
||||
method EntityProcessor#close is added which is called at the end of import. A new method
|
||||
Context#getResolvedEntityAttribute is added which returns the resolved value of an entity's attribute.
|
||||
Introduced a DocWrapper which takes care of maintaining document level session variables.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
10.SOLR-1265: Add variable resolving for URLDataSource properties like baseUrl. (Chris Eldredge via ehatcher)
|
||||
|
||||
11.SOLR-1269: Better error messages from JdbcDataSource when JDBC Driver name or SQL is incorrect.
|
||||
(ehatcher, shalin)
|
||||
|
||||
================== Release 1.3.0 ==================
|
||||
|
||||
Status
|
||||
------
|
||||
This is the first release since DataImportHandler was added to the contrib solr distribution.
|
||||
The following changes list changes since the code was introduced, not since
|
||||
the first official release.
|
||||
|
||||
|
||||
Detailed Change List
|
||||
--------------------
|
||||
|
||||
New Features
|
||||
1. SOLR-700: Allow configurable locales through a locale attribute in fields for NumberFormatTransformer.
|
||||
(Stefan Oestreicher, shalin)
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
Bug Fixes
|
||||
1. SOLR-704: NumberFormatTransformer can silently ignore part of the string while parsing. Now it tries to
|
||||
use the complete string for parsing. Failure to do so will result in an exception.
|
||||
(Stefan Oestreicher via shalin)
|
||||
|
||||
2. SOLR-729: Context.getDataSource(String) gives current entity's DataSource instance regardless of argument.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
3. SOLR-726: Jdbc Drivers and DataSources fail to load if placed in multicore sharedLib or core's lib directory.
|
||||
(Walter Ferrara, Noble Paul, shalin)
|
||||
|
||||
Other Changes
|
||||
|
||||
|
|
@ -1,3 +1,12 @@
|
|||
Apache Solr - DataImportHandler
|
||||
|
||||
Introduction
|
||||
------------
|
||||
DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
|
||||
HTTP data sources quick and easy.
|
||||
|
||||
Important Note
|
||||
--------------
|
||||
Although Solr strives to be agnostic of the Locale where the server is
|
||||
running, some code paths in DataImportHandler are known to depend on the
|
||||
System default Locale, Timezone, or Charset. It is recommended that when
|
||||
|
|
|
@ -152,7 +152,7 @@ public class ContextImpl extends Context {
|
|||
}
|
||||
} else if (SCOPE_SOLR_CORE.equals(scope)){
|
||||
if(dataImporter != null) {
|
||||
dataImporter.getCoreScopeSession().put(name, val);
|
||||
dataImporter.putToCoreScopeSession(name, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ public class ContextImpl extends Context {
|
|||
DocBuilder.DocWrapper doc = getDocument();
|
||||
return doc == null ? null: doc.getSessionAttribute(name);
|
||||
} else if (SCOPE_SOLR_CORE.equals(scope)){
|
||||
return dataImporter == null ? null : dataImporter.getCoreScopeSession().get(name);
|
||||
return dataImporter == null ? null : dataImporter.getFromCoreScopeSession(name);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -71,14 +71,10 @@ public class DataImportHandler extends RequestHandlerBase implements
|
|||
|
||||
private DataImporter importer;
|
||||
|
||||
private Map<String, Properties> dataSources = new HashMap<String, Properties>();
|
||||
|
||||
private boolean debugEnabled = true;
|
||||
|
||||
private String myName = "dataimport";
|
||||
|
||||
private Map<String , Object> coreScopeSession = new HashMap<String, Object>();
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(NamedList args) {
|
||||
|
@ -102,21 +98,10 @@ public class DataImportHandler extends RequestHandlerBase implements
|
|||
}
|
||||
}
|
||||
debugEnabled = StrUtils.parseBool((String)initArgs.get(ENABLE_DEBUG), true);
|
||||
NamedList defaults = (NamedList) initArgs.get("defaults");
|
||||
if (defaults != null) {
|
||||
String configLoc = (String) defaults.get("config");
|
||||
if (configLoc != null && configLoc.length() != 0) {
|
||||
processConfiguration(defaults);
|
||||
final InputSource is = new InputSource(core.getResourceLoader().openResource(configLoc));
|
||||
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(configLoc));
|
||||
importer = new DataImporter(is, core,
|
||||
dataSources, coreScopeSession, myName);
|
||||
}
|
||||
}
|
||||
importer = new DataImporter(core, myName);
|
||||
} catch (Throwable e) {
|
||||
LOG.error( DataImporter.MSG.LOAD_EXP, e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
DataImporter.MSG.INVALID_CONFIG, e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, DataImporter.MSG.LOAD_EXP, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,48 +121,35 @@ public class DataImportHandler extends RequestHandlerBase implements
|
|||
}
|
||||
}
|
||||
SolrParams params = req.getParams();
|
||||
NamedList defaultParams = (NamedList) initArgs.get("defaults");
|
||||
RequestInfo requestParams = new RequestInfo(getParamsMap(params), contentStream);
|
||||
String command = requestParams.getCommand();
|
||||
|
||||
|
||||
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
|
||||
// Modify incoming request params to add wt=raw
|
||||
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
|
||||
rawParams.set(CommonParams.WT, "raw");
|
||||
req.setParams(rawParams);
|
||||
String dataConfigFile = defaults.get("config");
|
||||
ContentStreamBase content = new ContentStreamBase.StringStream(SolrWriter
|
||||
.getResourceAsString(req.getCore().getResourceLoader().openResource(
|
||||
dataConfigFile)));
|
||||
rsp.add(RawResponseWriter.CONTENT, content);
|
||||
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
|
||||
String dataConfigFile = params.get("config");
|
||||
String dataConfig = params.get("dataConfig");
|
||||
if(dataConfigFile != null) {
|
||||
dataConfig = SolrWriter.getResourceAsString(req.getCore().getResourceLoader().openResource(dataConfigFile));
|
||||
}
|
||||
if(dataConfig==null) {
|
||||
rsp.add("status", DataImporter.MSG.NO_CONFIG_FOUND);
|
||||
} else {
|
||||
// Modify incoming request params to add wt=raw
|
||||
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
|
||||
rawParams.set(CommonParams.WT, "raw");
|
||||
req.setParams(rawParams);
|
||||
ContentStreamBase content = new ContentStreamBase.StringStream(dataConfig);
|
||||
rsp.add(RawResponseWriter.CONTENT, content);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
rsp.add("initArgs", initArgs);
|
||||
String message = "";
|
||||
|
||||
if (command != null)
|
||||
if (command != null) {
|
||||
rsp.add("command", command);
|
||||
|
||||
if (requestParams.isDebug() && (importer == null || !importer.isBusy())) {
|
||||
// Reload the data-config.xml
|
||||
importer = null;
|
||||
if (requestParams.getDataConfig() != null) {
|
||||
try {
|
||||
processConfiguration((NamedList) initArgs.get("defaults"));
|
||||
importer = new DataImporter(new InputSource(new StringReader(requestParams.getDataConfig())), req.getCore()
|
||||
, dataSources, coreScopeSession, myName);
|
||||
} catch (RuntimeException e) {
|
||||
rsp.add("exception", DebugLogger.getStacktraceString(e));
|
||||
importer = null;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
inform(req.getCore());
|
||||
}
|
||||
message = DataImporter.MSG.CONFIG_RELOADED;
|
||||
}
|
||||
|
||||
// If importer is still null
|
||||
if (importer == null) {
|
||||
rsp.add("status", DataImporter.MSG.NO_INIT);
|
||||
|
@ -192,7 +164,7 @@ public class DataImportHandler extends RequestHandlerBase implements
|
|||
if (DataImporter.FULL_IMPORT_CMD.equals(command)
|
||||
|| DataImporter.DELTA_IMPORT_CMD.equals(command) ||
|
||||
IMPORT_CMD.equals(command)) {
|
||||
|
||||
importer.maybeReloadConfiguration(requestParams, defaultParams);
|
||||
UpdateRequestProcessorChain processorChain =
|
||||
req.getCore().getUpdateProcessingChain(params.get(UpdateParams.UPDATE_CHAIN));
|
||||
UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);
|
||||
|
@ -219,10 +191,12 @@ public class DataImportHandler extends RequestHandlerBase implements
|
|||
importer.runCmd(requestParams, sw);
|
||||
}
|
||||
}
|
||||
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
|
||||
importer = null;
|
||||
inform(req.getCore());
|
||||
message = DataImporter.MSG.CONFIG_RELOADED;
|
||||
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
|
||||
if(importer.maybeReloadConfiguration(requestParams, defaultParams)) {
|
||||
message = DataImporter.MSG.CONFIG_RELOADED;
|
||||
} else {
|
||||
message = DataImporter.MSG.CONFIG_NOT_RELOADED;
|
||||
}
|
||||
}
|
||||
}
|
||||
rsp.add("status", importer.isBusy() ? "busy" : "idle");
|
||||
|
@ -248,36 +222,6 @@ public class DataImportHandler extends RequestHandlerBase implements
|
|||
return result;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void processConfiguration(NamedList defaults) {
|
||||
if (defaults == null) {
|
||||
LOG.info("No configuration specified in solrconfig.xml for DataImportHandler");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG.info("Processing configuration from solrconfig.xml: " + defaults);
|
||||
|
||||
dataSources = new HashMap<String, Properties>();
|
||||
|
||||
int position = 0;
|
||||
|
||||
while (position < defaults.size()) {
|
||||
if (defaults.getName(position) == null)
|
||||
break;
|
||||
|
||||
String name = defaults.getName(position);
|
||||
if (name.equals("datasource")) {
|
||||
NamedList dsConfig = (NamedList) defaults.getVal(position);
|
||||
Properties props = new Properties();
|
||||
for (int i = 0; i < dsConfig.size(); i++)
|
||||
props.put(dsConfig.getName(i), dsConfig.getVal(i));
|
||||
LOG.info("Adding properties to datasource: " + props);
|
||||
dataSources.put((String) dsConfig.get("name"), props);
|
||||
}
|
||||
position++;
|
||||
}
|
||||
}
|
||||
|
||||
private SolrWriter getSolrWriter(final UpdateRequestProcessor processor,
|
||||
final SolrResourceLoader loader, final RequestInfo requestParams, SolrQueryRequest req) {
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.apache.solr.core.SolrCore;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.util.SystemIdResolver;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.XMLErrorLogger;
|
||||
import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
|
||||
import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
|
||||
|
@ -41,9 +43,12 @@ import org.apache.commons.io.IOUtils;
|
|||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
|
@ -67,14 +72,14 @@ public class DataImporter {
|
|||
private DIHConfiguration config;
|
||||
private Date indexStartTime;
|
||||
private Properties store = new Properties();
|
||||
private Map<String, Properties> dataSourceProps = new HashMap<String, Properties>();
|
||||
private Map<String, Map<String,String>> requestLevelDataSourceProps = new HashMap<String, Map<String,String>>();
|
||||
private IndexSchema schema;
|
||||
public DocBuilder docBuilder;
|
||||
public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
|
||||
private SolrCore core;
|
||||
private Map<String, Object> coreScopeSession = new ConcurrentHashMap<String,Object>();
|
||||
private DIHPropertiesWriter propWriter;
|
||||
private ReentrantLock importLock = new ReentrantLock();
|
||||
private final Map<String , Object> coreScopeSession;
|
||||
private boolean isDeltaImportSupported = false;
|
||||
private final String handlerName;
|
||||
private Map<String, SchemaField> lowerNameVsSchemaField = new HashMap<String, SchemaField>();
|
||||
|
@ -83,12 +88,19 @@ public class DataImporter {
|
|||
* Only for testing purposes
|
||||
*/
|
||||
DataImporter() {
|
||||
coreScopeSession = new HashMap<String, Object>();
|
||||
createPropertyWriter();
|
||||
propWriter.init(this);
|
||||
this.handlerName = "dataimport" ;
|
||||
}
|
||||
|
||||
|
||||
DataImporter(SolrCore core, String handlerName) {
|
||||
this.handlerName = handlerName;
|
||||
this.core = core;
|
||||
this.schema = core.getSchema();
|
||||
loadSchemaFieldMap();
|
||||
createPropertyWriter();
|
||||
}
|
||||
|
||||
private void createPropertyWriter() {
|
||||
if (this.core == null
|
||||
|| !this.core.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) {
|
||||
|
@ -99,27 +111,58 @@ public class DataImporter {
|
|||
propWriter.init(this);
|
||||
}
|
||||
|
||||
DataImporter(InputSource dataConfig, SolrCore core, Map<String, Properties> ds, Map<String, Object> session, String handlerName) {
|
||||
this.handlerName = handlerName;
|
||||
if (dataConfig == null) {
|
||||
throw new DataImportHandlerException(SEVERE, "Configuration not found");
|
||||
}
|
||||
this.core = core;
|
||||
this.schema = core.getSchema();
|
||||
loadSchemaFieldMap();
|
||||
createPropertyWriter();
|
||||
|
||||
dataSourceProps = ds;
|
||||
if (session == null)
|
||||
session = new HashMap<String, Object>();
|
||||
coreScopeSession = session;
|
||||
loadDataConfig(dataConfig);
|
||||
|
||||
for (Entity e : config.getEntities()) {
|
||||
if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
|
||||
isDeltaImportSupported = true;
|
||||
break;
|
||||
|
||||
boolean maybeReloadConfiguration(RequestInfo params,
|
||||
NamedList<?> defaultParams) throws IOException {
|
||||
if (importLock.tryLock()) {
|
||||
boolean success = false;
|
||||
try {
|
||||
String dataConfigText = params.getDataConfig();
|
||||
String dataconfigFile = (String) params.getConfigFile();
|
||||
InputSource is = null;
|
||||
if(dataConfigText!=null && dataConfigText.length()>0) {
|
||||
is = new InputSource(new StringReader(dataConfigText));
|
||||
} else if(dataconfigFile!=null) {
|
||||
is = new InputSource(core.getResourceLoader().openResource(dataconfigFile));
|
||||
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile));
|
||||
LOG.info("Loading DIH Configuration: " + dataconfigFile);
|
||||
}
|
||||
if(is!=null) {
|
||||
loadDataConfig(is);
|
||||
success = true;
|
||||
}
|
||||
|
||||
Map<String,Map<String,String>> dsProps = new HashMap<String,Map<String,String>>();
|
||||
if(defaultParams!=null) {
|
||||
int position = 0;
|
||||
while (position < defaultParams.size()) {
|
||||
if (defaultParams.getName(position) == null) {
|
||||
break;
|
||||
}
|
||||
String name = defaultParams.getName(position);
|
||||
if (name.equals("datasource")) {
|
||||
success = true;
|
||||
NamedList dsConfig = (NamedList) defaultParams.getVal(position);
|
||||
LOG.info("Getting configuration for Global Datasource...");
|
||||
Map<String,String> props = new HashMap<String,String>();
|
||||
for (int i = 0; i < dsConfig.size(); i++) {
|
||||
props.put(dsConfig.getName(i), dsConfig.getVal(i).toString());
|
||||
}
|
||||
LOG.info("Adding properties to datasource: " + props);
|
||||
dsProps.put((String) dsConfig.get("name"), props);
|
||||
}
|
||||
position++;
|
||||
}
|
||||
}
|
||||
requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps);
|
||||
} catch(IOException ioe) {
|
||||
throw ioe;
|
||||
} finally {
|
||||
importLock.unlock();
|
||||
}
|
||||
return success;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -188,7 +231,13 @@ public class DataImporter {
|
|||
LOG.info("Data Configuration loaded successfully");
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"Exception occurred while initializing context", e);
|
||||
"Data Config problem: " + e.getMessage(), e);
|
||||
}
|
||||
for (Entity e : config.getEntities()) {
|
||||
if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
|
||||
isDeltaImportSupported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,7 +245,7 @@ public class DataImporter {
|
|||
DIHConfiguration config;
|
||||
List<Map<String, String >> functions = new ArrayList<Map<String ,String>>();
|
||||
Script script = null;
|
||||
Map<String, Properties> dataSources = new HashMap<String, Properties>();
|
||||
Map<String, Map<String,String>> dataSources = new HashMap<String, Map<String,String>>();
|
||||
|
||||
NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig");
|
||||
if(dataConfigTags == null || dataConfigTags.getLength() == 0) {
|
||||
|
@ -232,16 +281,16 @@ public class DataImporter {
|
|||
List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, DATA_SRC);
|
||||
if (!dataSourceTags.isEmpty()) {
|
||||
for (Element element : dataSourceTags) {
|
||||
Properties p = new Properties();
|
||||
Map<String,String> p = new HashMap<String,String>();
|
||||
HashMap<String, String> attrs = ConfigParseUtil.getAllAttributes(element);
|
||||
for (Map.Entry<String, String> entry : attrs.entrySet()) {
|
||||
p.setProperty(entry.getKey(), entry.getValue());
|
||||
p.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
dataSources.put(p.getProperty("name"), p);
|
||||
dataSources.put(p.get("name"), p);
|
||||
}
|
||||
}
|
||||
if(dataSources.get(null) == null){
|
||||
for (Properties properties : dataSources.values()) {
|
||||
for (Map<String,String> properties : dataSources.values()) {
|
||||
dataSources.put(null,properties);
|
||||
break;
|
||||
}
|
||||
|
@ -270,17 +319,17 @@ public class DataImporter {
|
|||
}
|
||||
|
||||
DataSource getDataSourceInstance(Entity key, String name, Context ctx) {
|
||||
Properties p = dataSourceProps.get(name);
|
||||
Map<String,String> p = requestLevelDataSourceProps.get(name);
|
||||
if (p == null)
|
||||
p = config.getDataSources().get(name);
|
||||
if (p == null)
|
||||
p = dataSourceProps.get(null);// for default data source
|
||||
p = requestLevelDataSourceProps.get(null);// for default data source
|
||||
if (p == null)
|
||||
p = config.getDataSources().get(null);
|
||||
if (p == null)
|
||||
throw new DataImportHandlerException(SEVERE,
|
||||
"No dataSource :" + name + " available for entity :" + key.getName());
|
||||
String type = p.getProperty(TYPE);
|
||||
String type = p.get(TYPE);
|
||||
DataSource dataSrc = null;
|
||||
if (type == null) {
|
||||
dataSrc = new JdbcDataSource();
|
||||
|
@ -458,6 +507,8 @@ public class DataImporter {
|
|||
public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";
|
||||
|
||||
public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
|
||||
|
||||
public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy.";
|
||||
|
||||
public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
|
||||
|
||||
|
@ -476,13 +527,16 @@ public class DataImporter {
|
|||
return schema;
|
||||
}
|
||||
|
||||
Map<String, Object> getCoreScopeSession() {
|
||||
return coreScopeSession;
|
||||
}
|
||||
|
||||
SolrCore getCore() {
|
||||
return core;
|
||||
}
|
||||
|
||||
void putToCoreScopeSession(String key, Object val) {
|
||||
coreScopeSession.put(key, val);
|
||||
}
|
||||
Object getFromCoreScopeSession(String key) {
|
||||
return coreScopeSession.get(key);
|
||||
}
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ public class RequestInfo {
|
|||
private final boolean clean;
|
||||
private final List<String> entitiesToRun;
|
||||
private final Map<String,Object> rawParams;
|
||||
private final String configFile;
|
||||
private final String dataConfig;
|
||||
|
||||
//TODO: find a different home for these two...
|
||||
|
@ -98,7 +99,8 @@ public class RequestInfo {
|
|||
} else {
|
||||
entitiesToRun = null;
|
||||
}
|
||||
|
||||
String configFileParam = (String) requestParams.get("config");
|
||||
configFile = configFileParam;
|
||||
String dataConfigParam = (String) requestParams.get("dataConfig");
|
||||
if (dataConfigParam != null && dataConfigParam.trim().length() == 0) {
|
||||
// Empty data-config param is not valid, change it to null
|
||||
|
@ -161,4 +163,8 @@ public class RequestInfo {
|
|||
public DebugInfo getDebugInfo() {
|
||||
return debugInfo;
|
||||
}
|
||||
|
||||
public String getConfigFile() {
|
||||
return configFile;
|
||||
}
|
||||
}
|
|
@ -4,7 +4,6 @@ import java.util.ArrayList;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.solr.handler.dataimport.DataImporter;
|
||||
import org.w3c.dom.Element;
|
||||
|
@ -49,8 +48,8 @@ public class DIHConfiguration {
|
|||
private final String onImportEnd;
|
||||
private final List<Map<String, String>> functions;
|
||||
private final Script script;
|
||||
private final Map<String, Properties> dataSources;
|
||||
public DIHConfiguration(Element element, DataImporter di, List<Map<String, String>> functions, Script script, Map<String, Properties> dataSources) {
|
||||
private final Map<String, Map<String,String>> dataSources;
|
||||
public DIHConfiguration(Element element, DataImporter di, List<Map<String, String>> functions, Script script, Map<String, Map<String,String>> dataSources) {
|
||||
this.deleteQuery = ConfigParseUtil.getStringAttribute(element, "deleteQuery", null);
|
||||
this.onImportStart = ConfigParseUtil.getStringAttribute(element, "onImportStart", null);
|
||||
this.onImportEnd = ConfigParseUtil.getStringAttribute(element, "onImportEnd", null);
|
||||
|
@ -90,7 +89,7 @@ public class DIHConfiguration {
|
|||
public List<Map<String,String>> getFunctions() {
|
||||
return functions;
|
||||
}
|
||||
public Map<String,Properties> getDataSources() {
|
||||
public Map<String,Map<String,String>> getDataSources() {
|
||||
return dataSources;
|
||||
}
|
||||
public Script getScript() {
|
||||
|
|
|
@ -31,11 +31,7 @@
|
|||
<str name="echoParams">explicit</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
<requestHandler name="/dataimport-end-to-end" class="org.apache.solr.handler.dataimport.DataImportHandler">
|
||||
<lst name="defaults">
|
||||
<str name="config">data-config-end-to-end.xml</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
<requestHandler name="/dataimport-end-to-end" class="org.apache.solr.handler.dataimport.DataImportHandler" />
|
||||
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
|
|
|
@ -31,7 +31,8 @@ public class TestDIHEndToEnd extends AbstractDIHJdbcTestCase {
|
|||
}
|
||||
@Test
|
||||
public void testEndToEnd() throws Exception {
|
||||
LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import",
|
||||
LocalSolrQueryRequest request = lrf.makeRequest(
|
||||
"command", "full-import", "config", "data-config-end-to-end.xml",
|
||||
"clean", "true", "commit", "true", "synchronous", "true", "indent", "true");
|
||||
h.query("/dataimport-end-to-end", request);
|
||||
assertQ(req("*:*"), "//*[@numFound='20']");
|
||||
|
|
|
@ -250,10 +250,10 @@ sb.append("(group_name=").append(tg.getName()).append(")");
|
|||
|
||||
/*** Isn't core specific... prob better logged from zkController
|
||||
if (info != null) {
|
||||
CloudState cloudState = zkController.getCloudState();
|
||||
if (info.cloudState != cloudState) {
|
||||
ClusterState clusterState = zkController.getClusterState();
|
||||
if (info.clusterState != clusterState) {
|
||||
// something has changed in the matrix...
|
||||
sb.append(zkController.getBaseUrl() + " sees new CloudState:");
|
||||
sb.append(zkController.getBaseUrl() + " sees new ClusterState:");
|
||||
}
|
||||
}
|
||||
***/
|
||||
|
@ -263,7 +263,7 @@ sb.append("(group_name=").append(tg.getName()).append(")");
|
|||
|
||||
private Map<String,String> getCoreProps(ZkController zkController, SolrCore core) {
|
||||
final String collection = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
|
||||
ZkNodeProps props = zkController.getCloudState().getShardProps(collection, ZkStateReader.getCoreNodeName(zkController.getNodeName(), core.getName()));
|
||||
ZkNodeProps props = zkController.getClusterState().getShardProps(collection, ZkStateReader.getCoreNodeName(zkController.getNodeName(), core.getName()));
|
||||
if(props!=null) {
|
||||
return props.getProperties();
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.cloud.CloudState;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
|
||||
public class AssignShard {
|
||||
|
@ -36,7 +36,7 @@ public class AssignShard {
|
|||
* @param state
|
||||
* @return the assigned shard id
|
||||
*/
|
||||
public static String assignShard(String collection, CloudState state, Integer numShards) {
|
||||
public static String assignShard(String collection, ClusterState state, Integer numShards) {
|
||||
if (numShards == null) {
|
||||
numShards = 1;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.CloudState;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
|
@ -13,7 +13,6 @@ import org.apache.solr.common.cloud.ZkNodeProps;
|
|||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.KeeperException.NodeExistsException;
|
||||
|
@ -195,8 +194,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
}
|
||||
|
||||
private boolean shouldIBeLeader(ZkNodeProps leaderProps) {
|
||||
CloudState cloudState = zkController.getZkStateReader().getCloudState();
|
||||
Map<String,Slice> slices = cloudState.getSlices(this.collection);
|
||||
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
|
||||
Map<String,Slice> slices = clusterState.getSlices(this.collection);
|
||||
Slice slice = slices.get(shardId);
|
||||
Map<String,ZkNodeProps> shards = slice.getShards();
|
||||
boolean foundSomeoneElseActive = false;
|
||||
|
@ -206,7 +205,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
if (new ZkCoreNodeProps(shard.getValue()).getCoreUrl().equals(
|
||||
new ZkCoreNodeProps(leaderProps).getCoreUrl())) {
|
||||
if (state.equals(ZkStateReader.ACTIVE)
|
||||
&& cloudState.liveNodesContain(shard.getValue().get(
|
||||
&& clusterState.liveNodesContain(shard.getValue().get(
|
||||
ZkStateReader.NODE_NAME_PROP))) {
|
||||
// we are alive
|
||||
return true;
|
||||
|
@ -214,7 +213,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
}
|
||||
|
||||
if ((state.equals(ZkStateReader.ACTIVE))
|
||||
&& cloudState.liveNodesContain(shard.getValue().get(
|
||||
&& clusterState.liveNodesContain(shard.getValue().get(
|
||||
ZkStateReader.NODE_NAME_PROP))
|
||||
&& !new ZkCoreNodeProps(shard.getValue()).getCoreUrl().equals(
|
||||
new ZkCoreNodeProps(leaderProps).getCoreUrl())) {
|
||||
|
@ -226,8 +225,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
}
|
||||
|
||||
private boolean anyoneElseActive() {
|
||||
CloudState cloudState = zkController.getZkStateReader().getCloudState();
|
||||
Map<String,Slice> slices = cloudState.getSlices(this.collection);
|
||||
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
|
||||
Map<String,Slice> slices = clusterState.getSlices(this.collection);
|
||||
Slice slice = slices.get(shardId);
|
||||
Map<String,ZkNodeProps> shards = slice.getShards();
|
||||
|
||||
|
@ -236,7 +235,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
|
||||
|
||||
if ((state.equals(ZkStateReader.ACTIVE))
|
||||
&& cloudState.liveNodesContain(shard.getValue().get(
|
||||
&& clusterState.liveNodesContain(shard.getValue().get(
|
||||
ZkStateReader.NODE_NAME_PROP))) {
|
||||
return true;
|
||||
}
|
||||
|
@ -250,16 +249,13 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
final class OverseerElectionContext extends ElectionContext {
|
||||
|
||||
private final SolrZkClient zkClient;
|
||||
private final ZkStateReader stateReader;
|
||||
private ShardHandler shardHandler;
|
||||
private String adminPath;
|
||||
private Overseer overseer;
|
||||
|
||||
public OverseerElectionContext(ShardHandler shardHandler, String adminPath, final String zkNodeName, ZkStateReader stateReader) {
|
||||
super(zkNodeName, "/overseer_elect", "/overseer_elect/leader", null, stateReader.getZkClient());
|
||||
this.stateReader = stateReader;
|
||||
this.shardHandler = shardHandler;
|
||||
this.adminPath = adminPath;
|
||||
this.zkClient = stateReader.getZkClient();
|
||||
|
||||
public OverseerElectionContext(SolrZkClient zkClient, Overseer overseer, final String zkNodeName) {
|
||||
super(zkNodeName, "/overseer_elect", "/overseer_elect/leader", null, zkClient);
|
||||
this.overseer = overseer;
|
||||
this.zkClient = zkClient;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -281,7 +277,7 @@ final class OverseerElectionContext extends ElectionContext {
|
|||
CreateMode.EPHEMERAL, true);
|
||||
}
|
||||
|
||||
new Overseer(shardHandler, adminPath, stateReader, id);
|
||||
overseer.start(id);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.Map;
|
|||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.CloudState;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
|
@ -47,7 +47,7 @@ public class Overseer {
|
|||
|
||||
private static Logger log = LoggerFactory.getLogger(Overseer.class);
|
||||
|
||||
private static class CloudStateUpdater implements Runnable {
|
||||
private class ClusterStateUpdater implements Runnable {
|
||||
|
||||
private static final String DELETECORE = "deletecore";
|
||||
private final ZkStateReader reader;
|
||||
|
@ -59,7 +59,7 @@ public class Overseer {
|
|||
//If Overseer dies while extracting the main queue a new overseer will start from this queue
|
||||
private final DistributedQueue workQueue;
|
||||
|
||||
public CloudStateUpdater(final ZkStateReader reader, final String myId) {
|
||||
public ClusterStateUpdater(final ZkStateReader reader, final String myId) {
|
||||
this.zkClient = reader.getZkClient();
|
||||
this.stateUpdateQueue = getInQueue(zkClient);
|
||||
this.workQueue = getInternalQueue(zkClient);
|
||||
|
@ -70,7 +70,7 @@ public class Overseer {
|
|||
@Override
|
||||
public void run() {
|
||||
|
||||
if(amILeader()) {
|
||||
if(amILeader() && !Overseer.this.isClosed) {
|
||||
// see if there's something left from the previous Overseer and re
|
||||
// process all events that were not persisted into cloud state
|
||||
synchronized (reader.getUpdateLock()) { //XXX this only protects against edits inside single node
|
||||
|
@ -78,17 +78,17 @@ public class Overseer {
|
|||
byte[] head = workQueue.peek();
|
||||
|
||||
if (head != null) {
|
||||
reader.updateCloudState(true);
|
||||
CloudState cloudState = reader.getCloudState();
|
||||
reader.updateClusterState(true);
|
||||
ClusterState clusterState = reader.getClusterState();
|
||||
log.info("Replaying operations from work queue.");
|
||||
|
||||
while (head != null && amILeader()) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(head);
|
||||
final String operation = message
|
||||
.get(QUEUE_OPERATION);
|
||||
cloudState = processMessage(cloudState, message, operation);
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
zkClient.setData(ZkStateReader.CLUSTER_STATE,
|
||||
ZkStateReader.toJSON(cloudState), true);
|
||||
ZkStateReader.toJSON(clusterState), true);
|
||||
workQueue.remove();
|
||||
head = workQueue.peek();
|
||||
}
|
||||
|
@ -110,26 +110,26 @@ public class Overseer {
|
|||
}
|
||||
|
||||
log.info("Starting to work on the main queue");
|
||||
while (amILeader()) {
|
||||
while (amILeader() && !isClosed) {
|
||||
synchronized (reader.getUpdateLock()) {
|
||||
try {
|
||||
byte[] head = stateUpdateQueue.peek();
|
||||
|
||||
if (head != null) {
|
||||
reader.updateCloudState(true);
|
||||
CloudState cloudState = reader.getCloudState();
|
||||
reader.updateClusterState(true);
|
||||
ClusterState clusterState = reader.getClusterState();
|
||||
|
||||
while (head != null) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(head);
|
||||
final String operation = message.get(QUEUE_OPERATION);
|
||||
|
||||
cloudState = processMessage(cloudState, message, operation);
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
byte[] processed = stateUpdateQueue.remove();
|
||||
workQueue.offer(processed);
|
||||
head = stateUpdateQueue.peek();
|
||||
}
|
||||
zkClient.setData(ZkStateReader.CLUSTER_STATE,
|
||||
ZkStateReader.toJSON(cloudState), true);
|
||||
ZkStateReader.toJSON(clusterState), true);
|
||||
}
|
||||
// clean work queue
|
||||
while (workQueue.poll() != null);
|
||||
|
@ -157,12 +157,12 @@ public class Overseer {
|
|||
}
|
||||
}
|
||||
|
||||
private CloudState processMessage(CloudState cloudState,
|
||||
private ClusterState processMessage(ClusterState clusterState,
|
||||
final ZkNodeProps message, final String operation) {
|
||||
if ("state".equals(operation)) {
|
||||
cloudState = updateState(cloudState, message);
|
||||
clusterState = updateState(clusterState, message);
|
||||
} else if (DELETECORE.equals(operation)) {
|
||||
cloudState = removeCore(cloudState, message);
|
||||
clusterState = removeCore(clusterState, message);
|
||||
} else if (ZkStateReader.LEADER_PROP.equals(operation)) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String baseUrl = message.get(ZkStateReader.BASE_URL_PROP);
|
||||
|
@ -172,14 +172,14 @@ public class Overseer {
|
|||
sb.append(coreName == null ? "" : coreName);
|
||||
if (!(sb.substring(sb.length() - 1).equals("/"))) sb
|
||||
.append("/");
|
||||
cloudState = setShardLeader(cloudState,
|
||||
clusterState = setShardLeader(clusterState,
|
||||
message.get(ZkStateReader.COLLECTION_PROP),
|
||||
message.get(ZkStateReader.SHARD_ID_PROP), sb.toString());
|
||||
} else {
|
||||
throw new RuntimeException("unknown operation:" + operation
|
||||
+ " contents:" + message.getProperties());
|
||||
}
|
||||
return cloudState;
|
||||
return clusterState;
|
||||
}
|
||||
|
||||
private boolean amILeader() {
|
||||
|
@ -199,7 +199,7 @@ public class Overseer {
|
|||
/**
|
||||
* Try to assign core to the cluster.
|
||||
*/
|
||||
private CloudState updateState(CloudState state, final ZkNodeProps message) {
|
||||
private ClusterState updateState(ClusterState state, final ZkNodeProps message) {
|
||||
final String collection = message.get(ZkStateReader.COLLECTION_PROP);
|
||||
final String zkCoreNodeName = message.get(ZkStateReader.NODE_NAME_PROP) + "_" + message.get(ZkStateReader.CORE_NAME_PROP);
|
||||
final Integer numShards = message.get(ZkStateReader.NUM_SHARDS_PROP)!=null?Integer.parseInt(message.get(ZkStateReader.NUM_SHARDS_PROP)):null;
|
||||
|
@ -214,7 +214,7 @@ public class Overseer {
|
|||
String shardId = message.get(ZkStateReader.SHARD_ID_PROP);
|
||||
if (shardId == null) {
|
||||
String nodeName = message.get(ZkStateReader.NODE_NAME_PROP);
|
||||
//get shardId from CloudState
|
||||
//get shardId from ClusterState
|
||||
shardId = getAssignedId(state, nodeName, message);
|
||||
}
|
||||
if(shardId == null) {
|
||||
|
@ -242,11 +242,11 @@ public class Overseer {
|
|||
shardProps.put(zkCoreNodeName, zkProps);
|
||||
|
||||
slice = new Slice(shardId, shardProps);
|
||||
CloudState newCloudState = updateSlice(state, collection, slice);
|
||||
return newCloudState;
|
||||
ClusterState newClusterState = updateSlice(state, collection, slice);
|
||||
return newClusterState;
|
||||
}
|
||||
|
||||
private CloudState createCollection(CloudState state, String collectionName, int numShards) {
|
||||
private ClusterState createCollection(ClusterState state, String collectionName, int numShards) {
|
||||
Map<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String, Slice>>();
|
||||
Map<String, Slice> newSlices = new LinkedHashMap<String,Slice>();
|
||||
newStates.putAll(state.getCollectionStates());
|
||||
|
@ -255,14 +255,14 @@ public class Overseer {
|
|||
newSlices.put(sliceName, new Slice(sliceName, Collections.EMPTY_MAP));
|
||||
}
|
||||
newStates.put(collectionName, newSlices);
|
||||
CloudState newCloudState = new CloudState(state.getLiveNodes(), newStates);
|
||||
return newCloudState;
|
||||
ClusterState newClusterState = new ClusterState(state.getLiveNodes(), newStates);
|
||||
return newClusterState;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an already assigned id or null if not assigned
|
||||
*/
|
||||
private String getAssignedId(final CloudState state, final String nodeName,
|
||||
private String getAssignedId(final ClusterState state, final String nodeName,
|
||||
final ZkNodeProps coreState) {
|
||||
final String key = coreState.get(ZkStateReader.NODE_NAME_PROP) + "_" + coreState.get(ZkStateReader.CORE_NAME_PROP);
|
||||
Map<String, Slice> slices = state.getSlices(coreState.get(ZkStateReader.COLLECTION_PROP));
|
||||
|
@ -276,7 +276,7 @@ public class Overseer {
|
|||
return null;
|
||||
}
|
||||
|
||||
private CloudState updateSlice(CloudState state, String collection, Slice slice) {
|
||||
private ClusterState updateSlice(ClusterState state, String collection, Slice slice) {
|
||||
|
||||
final Map<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String,Slice>>();
|
||||
newStates.putAll(state.getCollectionStates());
|
||||
|
@ -306,10 +306,10 @@ public class Overseer {
|
|||
final Slice updatedSlice = new Slice(slice.getName(), shards);
|
||||
slices.put(slice.getName(), updatedSlice);
|
||||
}
|
||||
return new CloudState(state.getLiveNodes(), newStates);
|
||||
return new ClusterState(state.getLiveNodes(), newStates);
|
||||
}
|
||||
|
||||
private CloudState setShardLeader(CloudState state, String collection, String sliceName, String leaderUrl) {
|
||||
private ClusterState setShardLeader(ClusterState state, String collection, String sliceName, String leaderUrl) {
|
||||
|
||||
final Map<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String,Slice>>();
|
||||
newStates.putAll(state.getCollectionStates());
|
||||
|
@ -341,21 +341,21 @@ public class Overseer {
|
|||
Slice slice = new Slice(sliceName, newShards);
|
||||
slices.put(sliceName, slice);
|
||||
}
|
||||
return new CloudState(state.getLiveNodes(), newStates);
|
||||
return new ClusterState(state.getLiveNodes(), newStates);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove core from cloudstate
|
||||
*/
|
||||
private CloudState removeCore(final CloudState cloudState, ZkNodeProps message) {
|
||||
private ClusterState removeCore(final ClusterState clusterState, ZkNodeProps message) {
|
||||
|
||||
final String coreNodeName = message.get(ZkStateReader.NODE_NAME_PROP) + "_" + message.get(ZkStateReader.CORE_NAME_PROP);
|
||||
final String collection = message.get(ZkStateReader.COLLECTION_PROP);
|
||||
|
||||
final LinkedHashMap<String, Map<String, Slice>> newStates = new LinkedHashMap<String,Map<String,Slice>>();
|
||||
for(String collectionName: cloudState.getCollections()) {
|
||||
for(String collectionName: clusterState.getCollections()) {
|
||||
if(collection.equals(collectionName)) {
|
||||
Map<String, Slice> slices = cloudState.getSlices(collection);
|
||||
Map<String, Slice> slices = clusterState.getSlices(collection);
|
||||
LinkedHashMap<String, Slice> newSlices = new LinkedHashMap<String, Slice>();
|
||||
for(Slice slice: slices.values()) {
|
||||
if(slice.getShards().containsKey(coreNodeName)) {
|
||||
|
@ -393,29 +393,53 @@ public class Overseer {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
newStates.put(collectionName, cloudState.getSlices(collectionName));
|
||||
newStates.put(collectionName, clusterState.getSlices(collectionName));
|
||||
}
|
||||
}
|
||||
CloudState newState = new CloudState(cloudState.getLiveNodes(), newStates);
|
||||
ClusterState newState = new ClusterState(clusterState.getLiveNodes(), newStates);
|
||||
return newState;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Thread ccThread;
|
||||
|
||||
private Thread updaterThread;
|
||||
|
||||
private volatile boolean isClosed;
|
||||
|
||||
private ZkStateReader reader;
|
||||
|
||||
private ShardHandler shardHandler;
|
||||
|
||||
private String adminPath;
|
||||
|
||||
public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader, final String id) throws KeeperException, InterruptedException {
|
||||
public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader) throws KeeperException, InterruptedException {
|
||||
this.reader = reader;
|
||||
this.shardHandler = shardHandler;
|
||||
this.adminPath = adminPath;
|
||||
}
|
||||
|
||||
public void start(String id) {
|
||||
log.info("Overseer (id=" + id + ") starting");
|
||||
createOverseerNode(reader.getZkClient());
|
||||
//launch cluster state updater thread
|
||||
ThreadGroup tg = new ThreadGroup("Overseer state updater.");
|
||||
Thread updaterThread = new Thread(tg, new CloudStateUpdater(reader, id));
|
||||
updaterThread = new Thread(tg, new ClusterStateUpdater(reader, id));
|
||||
updaterThread.setDaemon(true);
|
||||
updaterThread.start();
|
||||
|
||||
|
||||
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
|
||||
Thread ccThread = new Thread(ccTg, new OverseerCollectionProcessor(reader, id, shardHandler, adminPath));
|
||||
ccThread = new Thread(ccTg, new OverseerCollectionProcessor(reader, id, shardHandler, adminPath),
|
||||
"Overseer-" + id);
|
||||
ccThread.setDaemon(true);
|
||||
|
||||
updaterThread.start();
|
||||
ccThread.start();
|
||||
}
|
||||
|
||||
public void close() {
|
||||
isClosed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get queue that can be used to send messages to Overseer.
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.CloudState;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
|
@ -64,6 +64,8 @@ public class OverseerCollectionProcessor implements Runnable {
|
|||
private String adminPath;
|
||||
|
||||
private ZkStateReader zkStateReader;
|
||||
|
||||
private boolean isClosed;
|
||||
|
||||
public OverseerCollectionProcessor(ZkStateReader zkStateReader, String myId, ShardHandler shardHandler, String adminPath) {
|
||||
this.zkStateReader = zkStateReader;
|
||||
|
@ -76,7 +78,7 @@ public class OverseerCollectionProcessor implements Runnable {
|
|||
@Override
|
||||
public void run() {
|
||||
log.info("Process current queue of collection creations");
|
||||
while (amILeader()) {
|
||||
while (amILeader() && !isClosed) {
|
||||
try {
|
||||
byte[] head = workQueue.peek(true);
|
||||
|
||||
|
@ -108,6 +110,10 @@ public class OverseerCollectionProcessor implements Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
isClosed = true;
|
||||
}
|
||||
|
||||
private boolean amILeader() {
|
||||
try {
|
||||
ZkNodeProps props = ZkNodeProps.load(zkStateReader.getZkClient().getData(
|
||||
|
@ -126,22 +132,22 @@ public class OverseerCollectionProcessor implements Runnable {
|
|||
|
||||
private boolean processMessage(ZkNodeProps message, String operation) {
|
||||
if (CREATECOLLECTION.equals(operation)) {
|
||||
return createCollection(zkStateReader.getCloudState(), message);
|
||||
return createCollection(zkStateReader.getClusterState(), message);
|
||||
} else if (DELETECOLLECTION.equals(operation)) {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CoreAdminParams.ACTION, CoreAdminAction.UNLOAD.toString());
|
||||
params.set(CoreAdminParams.DELETE_INSTANCE_DIR, true);
|
||||
return collectionCmd(zkStateReader.getCloudState(), message, params);
|
||||
return collectionCmd(zkStateReader.getClusterState(), message, params);
|
||||
} else if (RELOADCOLLECTION.equals(operation)) {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CoreAdminParams.ACTION, CoreAdminAction.RELOAD.toString());
|
||||
return collectionCmd(zkStateReader.getCloudState(), message, params);
|
||||
return collectionCmd(zkStateReader.getClusterState(), message, params);
|
||||
}
|
||||
// unknown command, toss it from our queue
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean createCollection(CloudState cloudState, ZkNodeProps message) {
|
||||
private boolean createCollection(ClusterState clusterState, ZkNodeProps message) {
|
||||
|
||||
// look at the replication factor and see if it matches reality
|
||||
// if it does not, find best nodes to create more cores
|
||||
|
@ -176,7 +182,7 @@ public class OverseerCollectionProcessor implements Runnable {
|
|||
|
||||
// TODO: add smarter options that look at the current number of cores per node?
|
||||
// for now we just go random
|
||||
Set<String> nodes = cloudState.getLiveNodes();
|
||||
Set<String> nodes = clusterState.getLiveNodes();
|
||||
List<String> nodeList = new ArrayList<String>(nodes.size());
|
||||
nodeList.addAll(nodes);
|
||||
Collections.shuffle(nodeList);
|
||||
|
@ -229,11 +235,11 @@ public class OverseerCollectionProcessor implements Runnable {
|
|||
return true;
|
||||
}
|
||||
|
||||
private boolean collectionCmd(CloudState cloudState, ZkNodeProps message, ModifiableSolrParams params) {
|
||||
private boolean collectionCmd(ClusterState clusterState, ZkNodeProps message, ModifiableSolrParams params) {
|
||||
log.info("Executing Collection Cmd : " + params);
|
||||
String name = message.get("name");
|
||||
|
||||
Map<String,Slice> slices = cloudState.getCollectionStates().get(name);
|
||||
Map<String,Slice> slices = clusterState.getCollectionStates().get(name);
|
||||
|
||||
if (slices == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Could not find collection:" + name);
|
||||
|
@ -245,7 +251,7 @@ public class OverseerCollectionProcessor implements Runnable {
|
|||
Set<Map.Entry<String,ZkNodeProps>> shardEntries = shards.entrySet();
|
||||
for (Map.Entry<String,ZkNodeProps> shardEntry : shardEntries) {
|
||||
final ZkNodeProps node = shardEntry.getValue();
|
||||
if (cloudState.liveNodesContain(node.get(ZkStateReader.NODE_NAME_PROP))) {
|
||||
if (clusterState.liveNodesContain(node.get(ZkStateReader.NODE_NAME_PROP))) {
|
||||
params.set(CoreAdminParams.CORE, node.get(ZkStateReader.CORE_NAME_PROP));
|
||||
|
||||
String replica = node.get(ZkStateReader.BASE_URL_PROP);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue