SOLR-2452: merged with trunk up to r1131485

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1131486 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-06-04 20:08:08 +00:00
commit be0b3062a3
62 changed files with 3366 additions and 1140 deletions

128
dev-tools/scripts/poll-mirrors.pl Executable file
View File

@ -0,0 +1,128 @@
#!/usr/bin/perl
#
# poll-mirrors.pl
#
# This script is designed to poll download sites after posting a release
# and print out notice as each becomes available. The RM can use this
# script to delay the release announcement until the release can be
# downloaded.
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
use strict;
use warnings;
use Getopt::Long;
use POSIX qw/strftime/;
use LWP::UserAgent;
my $version;
my $interval = 300;
my $quiet = 0;
my $result = GetOptions ("version=s" => \$version, "interval=i" => \$interval);
my $usage = "$0 -v version [ -i interval (seconds; default: 300) ]";
unless ($result) {
print STDERR $usage;
exit(1);
}
unless (defined($version) && $version =~ /\d+(?:\.\d+)+/) {
print STDERR "You must specify the release version.\n$usage";
exit(1);
}
my $previously_selected = select STDOUT;
$| = 1; # turn off buffering of STDOUT, so status is printed immediately
select $previously_selected;
my $apache_url_suffix = "lucene/java/$version/lucene-$version.zip.asc";
my $apache_mirrors_list_url = "http://www.apache.org/mirrors/";
my $maven_url = "http://repo1.maven.org/maven2/org/apache/lucene/lucene-core/$version/lucene-core-$version.pom.asc";
my $agent = LWP::UserAgent->new();
$agent->timeout(2);
my $maven_available = 0;
my @apache_mirrors = ();
my $apache_mirrors_list_page = $agent->get($apache_mirrors_list_url)->decoded_content;
if (defined($apache_mirrors_list_page)) {
#<TR>
# <TD ALIGN=RIGHT><A HREF="http://apache.dattatec.com/">apache.dattatec.com</A>&nbsp;&nbsp;<A HREF="http://apache.dattatec.com/">@</A></TD>
#
# <TD>http</TD>
# <TD ALIGN=RIGHT>8 hours<BR><IMG BORDER=1 SRC="icons/mms14.gif" ALT=""></TD>
# <TD ALIGN=RIGHT>5 hours<BR><IMG BORDER=1 SRC="icons/mms14.gif" ALT=""></TD>
# <TD>ok</TD>
#</TR>
while ($apache_mirrors_list_page =~ m~<TR>(.*?)</TR>~gis) {
my $mirror_entry = $1;
next unless ($mirror_entry =~ m~<TD>\s*ok\s*</TD>\s*$~i); # skip mirrors with problems
if ($mirror_entry =~ m~<A\s+HREF\s*=\s*"([^"]+)"\s*>~i) {
my $mirror_url = $1;
push @apache_mirrors, "$mirror_url/$apache_url_suffix";
}
}
} else {
print STDERR "Error fetching Apache mirrors list $apache_mirrors_list_url";
exit(1);
}
my $num_apache_mirrors = $#apache_mirrors;
my $sleep_interval = 0;
while (1) {
print "\n", strftime('%d-%b-%Y %H:%M:%S', localtime);
print "\nPolling $#apache_mirrors Apache Mirrors";
print " and Maven Central" unless ($maven_available);
print "...\n";
my $start = time();
$maven_available = (200 == $agent->get($maven_url)->code)
unless ($maven_available);
@apache_mirrors = &check_mirrors;
my $stop = time();
$sleep_interval = $interval - ($stop - $start);
my $num_downloadable_apache_mirrors = $num_apache_mirrors - $#apache_mirrors;
print "$version is ", ($maven_available ? "" : "not "),
"downloadable from Maven Central.\n";
printf "$version is downloadable from %d/%d Apache Mirrors (%0.1f%%)\n",
$num_downloadable_apache_mirrors, $num_apache_mirrors,
($num_downloadable_apache_mirrors*100/$num_apache_mirrors);
last if ($maven_available && 0 == $#apache_mirrors);
if ($sleep_interval > 0) {
print "Sleeping for $sleep_interval seconds...\n";
sleep($sleep_interval)
}
}
sub check_mirrors {
my @not_yet_downloadable_apache_mirrors;
for my $mirror (@apache_mirrors) {
push @not_yet_downloadable_apache_mirrors, $mirror
unless (200 == $agent->get($mirror)->code);
print ".";
}
print "\n";
return @not_yet_downloadable_apache_mirrors;
}

View File

@ -0,0 +1,407 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import hashlib
import httplib
import re
import urllib2
import urlparse
import sys
import HTMLParser
# This tool expects to find /lucene and /solr off the base URL. You
# must have a working gpg, tar, unzip in your path. This has only
# been tested on Linux so far!
# http://s.apache.org/lusolr32rc2
# TODO
# + verify KEYS contains key that signed the release
# + make sure changes HTML looks ok
# - verify license/notice of all dep jars
# - check maven
# - check JAR manifest version
# - check license/notice exist
# - check no "extra" files
# - make sure jars exist inside bin release
# - run "ant test"
# - make sure docs exist
# - use java5 for lucene/modules
reHREF = re.compile('<a href="(.*?)">(.*?)</a>')
# Set to True to avoid re-downloading the packages...
DEBUG = False
def getHREFs(urlString):
# Deref any redirects
while True:
url = urlparse.urlparse(urlString)
h = httplib.HTTPConnection(url.netloc)
h.request('GET', url.path)
r = h.getresponse()
newLoc = r.getheader('location')
if newLoc is not None:
urlString = newLoc
else:
break
links = []
for subUrl, text in reHREF.findall(urllib2.urlopen(urlString).read()):
fullURL = urlparse.urljoin(urlString, subUrl)
links.append((text, fullURL))
return links
def download(name, urlString, tmpDir):
fileName = '%s/%s' % (tmpDir, name)
if DEBUG and os.path.exists(fileName):
if fileName.find('.asc') == -1:
print ' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
return
fIn = urllib2.urlopen(urlString)
fOut = open(fileName, 'wb')
success = False
try:
while True:
s = fIn.read(65536)
if s == '':
break
fOut.write(s)
fOut.close()
fIn.close()
success = True
finally:
fIn.close()
fOut.close()
if not success:
os.remove(fileName)
if fileName.find('.asc') == -1:
print ' %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
def load(urlString):
return urllib2.urlopen(urlString).read()
def checkSigs(project, urlString, version, tmpDir):
print ' test basics...'
ents = getDirEntries(urlString)
artifact = None
keysURL = None
changesURL = None
mavenURL = None
expectedSigs = ['asc', 'md5', 'sha1']
artifacts = []
for text, subURL in ents:
if text == 'KEYS':
keysURL = subURL
elif text == 'maven/':
mavenURL = subURL
elif text.startswith('changes'):
if text not in ('changes/', 'changes-%s/' % version):
raise RuntimeError('%s: found %s vs expected changes-%s/' % (project, text, version))
changesURL = subURL
elif artifact == None:
artifact = text
artifactURL = subURL
if project == 'solr':
expected = 'apache-solr-%s' % version
else:
expected = 'lucene-%s' % version
if not artifact.startswith(expected):
raise RuntimeError('%s: unknown artifact %s: expected prefix %s' % (project, text, expected))
sigs = []
elif text.startswith(artifact + '.'):
sigs.append(text[len(artifact)+1:])
else:
if sigs != expectedSigs:
raise RuntimeError('%s: artifact %s has wrong sigs: expected %s but got %s' % (project, artifact, expectedSigs, sigs))
artifacts.append((artifact, artifactURL))
artifact = text
artifactURL = subURL
sigs = []
if sigs != []:
artifacts.append((artifact, artifactURL))
if sigs != expectedSigs:
raise RuntimeError('%s: artifact %s has wrong sigs: expected %s but got %s' % (project, artifact, expectedSigs, sigs))
if project == 'lucene':
expected = ['lucene-%s-src.tgz' % version,
'lucene-%s.tgz' % version,
'lucene-%s.zip' % version]
else:
expected = ['apache-solr-%s-src.tgz' % version,
'apache-solr-%s.tgz' % version,
'apache-solr-%s.zip' % version]
actual = [x[0] for x in artifacts]
if expected != actual:
raise RuntimeError('%s: wrong artifacts: expected %s but got %s' % (project, expected, actual))
if keysURL is None:
raise RuntimeError('%s is missing KEYS' % project)
download('%s.KEYS' % project, keysURL, tmpDir)
keysFile = '%s/%s.KEYS' % (tmpDir, project)
# Set up clean gpg world; import keys file:
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
if os.path.exists(gpgHomeDir):
shutil.rmtree(gpgHomeDir)
os.makedirs(gpgHomeDir, 0700)
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
'%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
if mavenURL is None:
raise RuntimeError('%s is missing maven' % project)
if project == 'lucene':
if changesURL is None:
raise RuntimeError('%s is missing changes-%s' % (project, version))
testChanges(project, version, changesURL)
for artifact, urlString in artifacts:
print ' download %s...' % artifact
download(artifact, urlString, tmpDir)
verifyDigests(artifact, urlString, tmpDir)
print ' verify sig'
# Test sig
download(artifact + '.asc', urlString + '.asc', tmpDir)
sigFile = '%s/%s.asc' % (tmpDir, artifact)
artifactFile = '%s/%s' % (tmpDir, artifact)
logFile = '%s/%s.%s.gpg.verify.log' % (tmpDir, project, artifact)
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
logFile)
# Forward any GPG warnings:
f = open(logFile, 'rb')
for line in f.readlines():
if line.lower().find('warning') != -1:
print ' GPG: %s' % line.strip()
f.close()
def testChanges(project, version, changesURLString):
print ' check changes HTML...'
changesURL = None
contribChangesURL = None
for text, subURL in getDirEntries(changesURLString):
if text == 'Changes.html':
changesURL = subURL
elif text == 'Contrib-Changes.html':
contribChangesURL = subURL
if changesURL is None:
raise RuntimeError('did not see Changes.html link from %s' % changesURLString)
if contribChangesURL is None:
raise RuntimeError('did not see Contrib-Changes.html link from %s' % changesURLString)
s = load(changesURL)
if s.find('Release %s' % version) == -1:
raise RuntimeError('did not see "Release %s" in %s' % (version, changesURL))
def run(command, logFile):
if os.system('%s > %s 2>&1' % (command, logFile)):
raise RuntimeError('command "%s" failed; see log file %s' % (command, logFile))
def verifyDigests(artifact, urlString, tmpDir):
print ' verify md5/sha1 digests'
md5Expected, t = load(urlString + '.md5').strip().split()
if t != '*'+artifact:
raise RuntimeError('MD5 %s.md5 lists artifact %s but expected *%s' % (urlString, t, artifact))
sha1Expected, t = load(urlString + '.sha1').strip().split()
if t != '*'+artifact:
raise RuntimeError('SHA1 %s.sha1 lists artifact %s but expected *%s' % (urlString, t, artifact))
m = hashlib.md5()
s = hashlib.sha1()
f = open('%s/%s' % (tmpDir, artifact))
while True:
x = f.read(65536)
if x == '':
break
m.update(x)
s.update(x)
f.close()
md5Actual = m.hexdigest()
sha1Actual = s.hexdigest()
if md5Actual != md5Expected:
raise RuntimeError('MD5 digest mismatch for %s: expected %s but got %s' % (artifact, md5Expected, md5Actual))
if sha1Actual != sha1Expected:
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
def getDirEntries(urlString):
links = getHREFs(urlString)
for i, (text, subURL) in enumerate(links):
if text == 'Parent Directory':
return links[(i+1):]
def unpack(project, tmpDir, artifact, version):
destDir = '%s/unpack' % tmpDir
if os.path.exists(destDir):
shutil.rmtree(destDir)
os.makedirs(destDir)
os.chdir(destDir)
print ' unpack %s...' % artifact
unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact)
if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'):
run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile)
elif artifact.endswith('.zip'):
run('unzip %s/%s' % (tmpDir, artifact), unpackLogFile)
# make sure it unpacks to proper subdir
l = os.listdir(destDir)
if project == 'solr':
expected = 'apache-%s-%s' % (project, version)
else:
expected = '%s-%s' % (project, version)
if l != [expected]:
raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected))
unpackPath = '%s/%s' % (destDir, expected)
verifyUnpacked(project, artifact, unpackPath, version)
def verifyUnpacked(project, artifact, unpackPath, version):
os.chdir(unpackPath)
isSrc = artifact.find('-src') != -1
l = os.listdir(unpackPath)
textFiles = ['LICENSE', 'NOTICE', 'README']
if project == 'lucene':
textFiles.extend(('JRE_VERSION_MIGRATION', 'CHANGES'))
if isSrc:
textFiles.append('BUILD')
for fileName in textFiles:
fileName += '.txt'
if fileName not in l:
raise RuntimeError('file "%s" is missing from artifact %s' % (fileName, artifact))
l.remove(fileName)
if not isSrc:
if project == 'lucene':
expectedJARs = ('lucene-core-%s' % version,
'lucene-core-%s-javadoc' % version,
'lucene-test-framework-%s' % version,
'lucene-test-framework-%s-javadoc' % version)
else:
expectedJARs = ()
for fileName in expectedJARs:
fileName += '.jar'
if fileName not in l:
raise RuntimeError('%s: file "%s" is missing from artifact %s' % (project, fileName, artifact))
l.remove(fileName)
if project == 'lucene':
extras = ('lib', 'docs', 'contrib')
if isSrc:
extras += ('build.xml', 'index.html', 'common-build.xml', 'src', 'backwards')
else:
extras = ()
for e in extras:
if e not in l:
raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact))
l.remove(e)
if project == 'lucene':
if len(l) > 0:
raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l))
if isSrc:
if project == 'lucene':
print ' run tests w/ Java 5...'
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; ant test', '%s/test.log' % unpackPath)
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; ant jar', '%s/compile.log' % unpackPath)
testDemo(isSrc)
else:
print ' run tests w/ Java 6...'
run('export JAVA_HOME=/usr/local/src/jdk1.6.0_21; ant test', '%s/test.log' % unpackPath)
else:
if project == 'lucene':
testDemo(isSrc)
def testDemo(isSrc):
print ' test demo...'
if isSrc:
cp = 'build/lucene-core-3.2-SNAPSHOT.jar:build/contrib/demo/lucene-demo-3.2-SNAPSHOT.jar'
docsDir = 'src'
else:
cp = 'lucene-core-3.2.0.jar:contrib/demo/lucene-demo-3.2.0.jar'
docsDir = 'docs'
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; java -cp %s org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir), 'index.log')
run('export JAVA_HOME=/usr/local/src/jdk1.5.0_22; java -cp %s org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp, 'search.log')
reMatchingDocs = re.compile('(\d+) total matching documents')
m = reMatchingDocs.search(open('search.log', 'rb').read())
if m is None:
raise RuntimeError('lucene demo\'s SearchFiles found no results')
else:
numHits = int(m.group(1))
if numHits < 100:
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
print ' got %d hits for query "lucene"' % numHits
def main():
if len(sys.argv) != 4:
print
print 'Usage python -u %s BaseURL version tmpDir' % sys.argv[0]
print
sys.exit(1)
baseURL = sys.argv[1]
version = sys.argv[2]
tmpDir = os.path.abspath(sys.argv[3])
if not DEBUG:
if os.path.exists(tmpDir):
raise RuntimeError('temp dir %s exists; please remove first' % tmpDir)
os.makedirs(tmpDir)
lucenePath = None
solrPath = None
print 'Load release URL...'
for text, subURL in getDirEntries(baseURL):
if text.lower().find('lucene') != -1:
lucenePath = subURL
elif text.lower().find('solr') != -1:
solrPath = subURL
if lucenePath is None:
raise RuntimeError('could not find lucene subdir')
if solrPath is None:
raise RuntimeError('could not find solr subdir')
print
print 'Test Lucene...'
checkSigs('lucene', lucenePath, version, tmpDir)
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
unpack('lucene', tmpDir, artifact, version)
unpack('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
print
print 'Test Solr...'
checkSigs('solr', solrPath, version, tmpDir)
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
unpack('solr', tmpDir, artifact, version)
unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
if __name__ == '__main__':
main()

View File

@ -433,6 +433,10 @@ Bug fixes
with more document deletions is requested before a reader with fewer with more document deletions is requested before a reader with fewer
deletions, provided they share some segments. (yonik) deletions, provided they share some segments. (yonik)
* LUCENE-2645: Fix false assertion error when same token was added one
after another with 0 posIncr. (Kurosaka Teruhiko via Mike
McCandless)
======================= Lucene 3.x (not yet released) ================ ======================= Lucene 3.x (not yet released) ================
Changes in backwards compatibility policy Changes in backwards compatibility policy
@ -458,6 +462,9 @@ Bug fixes
including locks, and fails if the test fails to release all of them. including locks, and fails if the test fails to release all of them.
(Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer) (Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer)
* LUCENE-3102: CachingCollector.replay was failing to call setScorer
per-segment (Martijn van Groningen via Mike McCandless)
New Features New Features
* LUCENE-3140: Added experimental FST implementation to Lucene. * LUCENE-3140: Added experimental FST implementation to Lucene.

View File

@ -75,6 +75,10 @@ API Changes
* LUCENE-3141: add getter method to access fragInfos in FieldFragList. * LUCENE-3141: add getter method to access fragInfos in FieldFragList.
(Sujit Pal via Koji Sekiguchi) (Sujit Pal via Koji Sekiguchi)
* LUCENE-3099: Allow subclasses to determine the group value for
First/SecondPassGroupingCollector. (Martijn van Groningen, Mike
McCandless)
Build Build
* LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8. * LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.

View File

@ -181,9 +181,9 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
// term text into textStart address // term text into textStart address
// Get the text & hash of this term. // Get the text & hash of this term.
int termID; int termID;
try{ try {
termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef()); termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
}catch (MaxBytesLengthExceededException e) { } catch (MaxBytesLengthExceededException e) {
// Not enough room in current block // Not enough room in current block
// Just skip this term, to remain as robust as // Just skip this term, to remain as robust as
// possible during indexing. A TokenFilter // possible during indexing. A TokenFilter

View File

@ -230,7 +230,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
assert !omitTF; assert !omitTF;
final int delta = position - lastPosition; final int delta = position - lastPosition;
assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it) assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position; lastPosition = position;
if (storePayloads) { if (storePayloads) {

View File

@ -194,7 +194,7 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
final int delta = position - lastPosition; final int delta = position - lastPosition;
assert delta > 0 || position == 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it) assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition;
lastPosition = position; lastPosition = position;

View File

@ -168,10 +168,10 @@ public abstract class CachingCollector extends Collector {
int curUpto = 0; int curUpto = 0;
int curBase = 0; int curBase = 0;
int chunkUpto = 0; int chunkUpto = 0;
other.setScorer(cachedScorer);
curDocs = EMPTY_INT_ARRAY; curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) { for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext); other.setNextReader(seg.readerContext);
other.setScorer(cachedScorer);
while (curBase + curUpto < seg.end) { while (curBase + curUpto < seg.end) {
if (curUpto == curDocs.length) { if (curUpto == curDocs.length) {
curBase += curDocs.length; curBase += curDocs.length;

View File

@ -21,6 +21,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import java.io.IOException; import java.io.IOException;
@ -69,6 +70,42 @@ public class Builder<T> {
// current "frontier" // current "frontier"
private UnCompiledNode<T>[] frontier; private UnCompiledNode<T>[] frontier;
/**
* Instantiates an FST/FSA builder without any pruning. A shortcut
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, Outputs)} with
* pruning options turned off.
*/
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs)
{
this(inputType, 0, 0, true, outputs);
}
/**
* Instantiates an FST/FSA builder with all the possible tuning and construction
* tweaks. Read parameter documentation carefully.
*
* @param inputType
* The input type (transition labels). Can be anything from {@link INPUT_TYPE}
* enumeration. Shorter types will consume less memory. Strings (character sequences) are
* represented as {@link INPUT_TYPE#BYTE4} (full unicode codepoints).
*
* @param minSuffixCount1
* If pruning the input graph during construction, this threshold is used for telling
* if a node is kept or pruned. If transition_count(node) &gt;= minSuffixCount1, the node
* is kept.
*
* @param minSuffixCount2
* (Note: only Mike McCandless knows what this one is really doing...)
*
* @param doMinSuffix
* If <code>true</code>, the shared suffixes will be compacted into unique paths.
* This requires an additional hash map for lookups in memory. Setting this parameter to
* <code>false</code> creates a single path for all input sequences. This will result in a larger
* graph, but may require less memory and will speed up construction.
* @param outputs The output type for each input sequence. Applies only if building an FST. For
* FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
* singleton output object.
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doMinSuffix, Outputs<T> outputs) { public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doMinSuffix, Outputs<T> outputs) {
this.minSuffixCount1 = minSuffixCount1; this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2; this.minSuffixCount2 = minSuffixCount2;

View File

@ -147,7 +147,7 @@ public class FST<T> {
return flag(BIT_LAST_ARC); return flag(BIT_LAST_ARC);
} }
boolean isFinal() { public boolean isFinal() {
return flag(BIT_FINAL_ARC); return flag(BIT_FINAL_ARC);
} }
}; };

View File

@ -28,7 +28,7 @@ import org.apache.lucene.store.DataOutput;
public final class NoOutputs extends Outputs<Object> { public final class NoOutputs extends Outputs<Object> {
final Object NO_OUTPUT = new Object() { static final Object NO_OUTPUT = new Object() {
// NodeHash calls hashCode for this output; we fix this // NodeHash calls hashCode for this output; we fix this
// so we get deterministic hashing. // so we get deterministic hashing.
@Override @Override

View File

@ -60,6 +60,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
import org.junit.*; import org.junit.*;
import org.junit.rules.TestWatchman; import org.junit.rules.TestWatchman;
@ -160,6 +161,8 @@ public abstract class LuceneTestCase extends Assert {
public static final String TEST_LINE_DOCS_FILE = System.getProperty("tests.linedocsfile", "europarl.lines.txt.gz"); public static final String TEST_LINE_DOCS_FILE = System.getProperty("tests.linedocsfile", "europarl.lines.txt.gz");
/** whether or not to clean threads between test invocations: "false", "perMethod", "perClass" */ /** whether or not to clean threads between test invocations: "false", "perMethod", "perClass" */
public static final String TEST_CLEAN_THREADS = System.getProperty("tests.cleanthreads", "perClass"); public static final String TEST_CLEAN_THREADS = System.getProperty("tests.cleanthreads", "perClass");
/** whether or not to clean threads between test invocations: "false", "perMethod", "perClass" */
public static final Throttling TEST_THROTTLING = TEST_NIGHTLY ? Throttling.SOMETIMES : Throttling.NEVER;
private static final Pattern codecWithParam = Pattern.compile("(.*)\\(\\s*(\\d+)\\s*\\)"); private static final Pattern codecWithParam = Pattern.compile("(.*)\\(\\s*(\\d+)\\s*\\)");
@ -938,8 +941,9 @@ public abstract class LuceneTestCase extends Assert {
Directory impl = newDirectoryImpl(r, TEST_DIRECTORY); Directory impl = newDirectoryImpl(r, TEST_DIRECTORY);
MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl); MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl);
stores.put(dir, Thread.currentThread().getStackTrace()); stores.put(dir, Thread.currentThread().getStackTrace());
dir.setThrottling(TEST_THROTTLING);
return dir; return dir;
} }
/** /**
* Returns a new Directory instance, with contents copied from the * Returns a new Directory instance, with contents copied from the
@ -985,6 +989,7 @@ public abstract class LuceneTestCase extends Assert {
dir.setLockFactory(lf); dir.setLockFactory(lf);
} }
stores.put(dir, Thread.currentThread().getStackTrace()); stores.put(dir, Thread.currentThread().getStackTrace());
dir.setThrottling(TEST_THROTTLING);
return dir; return dir;
} catch (Exception e) { } catch (Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
@ -1003,6 +1008,7 @@ public abstract class LuceneTestCase extends Assert {
} }
MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl); MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl);
stores.put(dir, Thread.currentThread().getStackTrace()); stores.put(dir, Thread.currentThread().getStackTrace());
dir.setThrottling(TEST_THROTTLING);
return dir; return dir;
} }

View File

@ -0,0 +1,82 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestSameTokenSamePosition extends LuceneTestCase {
/**
* Attempt to reproduce an assertion error that happens
* only with the trunk version around April 2011.
* @param args
*/
public void test() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer()));
Document doc = new Document();
doc.add(new Field("eng", "Six drunken" /*This shouldn't matter. */,
Field.Store.YES, Field.Index.ANALYZED));
riw.addDocument(doc);
riw.close();
dir.close();
}
}
final class BugReproAnalyzer extends Analyzer{
@Override
public TokenStream tokenStream(String arg0, Reader arg1) {
return new BugReproAnalyzerTokenizer();
}
}
final class BugReproAnalyzerTokenizer extends TokenStream {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
int tokenCount = 4;
int nextTokenIndex = 0;
String terms[] = new String[]{"six", "six", "drunken", "drunken"};
int starts[] = new int[]{0, 0, 4, 4};
int ends[] = new int[]{3, 3, 11, 11};
int incs[] = new int[]{1, 0, 1, 0};
@Override
public boolean incrementToken() throws IOException {
if (nextTokenIndex < tokenCount) {
termAtt.setEmpty().append(terms[nextTokenIndex]);
offsetAtt.setOffset(starts[nextTokenIndex], ends[nextTokenIndex]);
posIncAtt.setPositionIncrement(incs[nextTokenIndex]);
nextTokenIndex++;
return true;
} else {
return false;
}
}
}

View File

@ -0,0 +1,67 @@
package org.apache.lucene.search.grouping;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collection;
/**
* A collector that collects all groups that match the
* query. Only the group value is collected, and the order
* is undefined. This collector does not determine
* the most relevant document of a group.
*
* <p/>
* This is an abstract version. Concrete implementations define
* what a group actually is and how it is internally collected.
*
* @lucene.experimental
*/
public abstract class AbstractAllGroupsCollector<GROUP_VALUE_TYPE> extends Collector {
/**
* Returns the total number of groups for the executed search.
* This is a convenience method. The following code snippet has the same effect: <pre>getGroups().size()</pre>
*
* @return The total number of groups for the executed search
*/
public int getGroupCount() {
return getGroups().size();
}
/**
* Returns the group values
* <p/>
* This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef}
* representing a group value.
*
* @return the group values
*/
public abstract Collection<GROUP_VALUE_TYPE> getGroups();
// Empty not necessary
public void setScorer(Scorer scorer) throws IOException {}
public boolean acceptsDocsOutOfOrder() {
return true;
}
}

View File

@ -17,56 +17,39 @@ package org.apache.lucene.search.grouping;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Collector; import org.apache.lucene.search.*;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator; import java.io.IOException;
import org.apache.lucene.search.Scorer; import java.util.*;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
/** FirstPassGroupingCollector is the first of two passes necessary /** FirstPassGroupingCollector is the first of two passes necessary
* to collect grouped hits. This pass gathers the top N sorted * to collect grouped hits. This pass gathers the top N sorted
* groups. * groups. Concrete subclasses define what a group is and how it
* is internally collected.
* *
* <p>See {@link org.apache.lucene.search.grouping} for more * <p>See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.</p> * details including a full code example.</p>
* *
* @lucene.experimental * @lucene.experimental
*/ */
abstract public class AbstractFirstPassGroupingCollector<GROUP_VALUE_TYPE> extends Collector {
public class FirstPassGroupingCollector extends Collector {
private final String groupField;
private final Sort groupSort; private final Sort groupSort;
private final FieldComparator[] comparators; private final FieldComparator[] comparators;
private final int[] reversed; private final int[] reversed;
private final int topNGroups; private final int topNGroups;
private final HashMap<BytesRef, CollectedSearchGroup> groupMap; private final HashMap<GROUP_VALUE_TYPE, CollectedSearchGroup<GROUP_VALUE_TYPE>> groupMap;
private final BytesRef scratchBytesRef = new BytesRef();
private final int compIDXEnd; private final int compIDXEnd;
// Set once we reach topNGroups unique groups: // Set once we reach topNGroups unique groups:
private TreeSet<CollectedSearchGroup> orderedGroups; private TreeSet<CollectedSearchGroup<GROUP_VALUE_TYPE>> orderedGroups;
private int docBase; private int docBase;
private int spareSlot; private int spareSlot;
private FieldCache.DocTermsIndex index;
/** /**
* Create the first pass collector. * Create the first pass collector.
* *
* @param groupField The field used to group
* documents. This field must be single-valued and
* indexed (FieldCache is used to access its value
* per-document).
* @param groupSort The {@link Sort} used to sort the * @param groupSort The {@link Sort} used to sort the
* groups. The top sorted document within each group * groups. The top sorted document within each group
* according to groupSort, determines how that group * according to groupSort, determines how that group
@ -74,13 +57,13 @@ public class FirstPassGroupingCollector extends Collector {
* ie, if you want to groupSort by relevance use * ie, if you want to groupSort by relevance use
* Sort.RELEVANCE. * Sort.RELEVANCE.
* @param topNGroups How many top groups to keep. * @param topNGroups How many top groups to keep.
* @throws IOException If I/O related errors occur
*/ */
public FirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException { public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups) throws IOException {
if (topNGroups < 1) { if (topNGroups < 1) {
throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")"); throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
} }
this.groupField = groupField;
// TODO: allow null groupSort to mean "by relevance", // TODO: allow null groupSort to mean "by relevance",
// and specialize it? // and specialize it?
this.groupSort = groupSort; this.groupSort = groupSort;
@ -100,13 +83,19 @@ public class FirstPassGroupingCollector extends Collector {
} }
spareSlot = topNGroups; spareSlot = topNGroups;
groupMap = new HashMap<BytesRef, CollectedSearchGroup>(topNGroups); groupMap = new HashMap<GROUP_VALUE_TYPE, CollectedSearchGroup<GROUP_VALUE_TYPE>>(topNGroups);
} }
/** Returns top groups, starting from offset. This may /**
* return null, if no groups were collected, or if the * Returns top groups, starting from offset. This may
* number of unique groups collected is <= offset. */ * return null, if no groups were collected, or if the
public Collection<SearchGroup> getTopGroups(int groupOffset, boolean fillFields) { * number of unique groups collected is <= offset.
*
* @param groupOffset The offset in the collected groups
* @param fillFields Whether to fill to {@link SearchGroup#sortValues}
* @return top groups, starting from offset
*/
public Collection<SearchGroup<GROUP_VALUE_TYPE>> getTopGroups(int groupOffset, boolean fillFields) {
//System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size()); //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
@ -122,15 +111,15 @@ public class FirstPassGroupingCollector extends Collector {
buildSortedSet(); buildSortedSet();
} }
final Collection<SearchGroup> result = new ArrayList<SearchGroup>(); final Collection<SearchGroup<GROUP_VALUE_TYPE>> result = new ArrayList<SearchGroup<GROUP_VALUE_TYPE>>();
int upto = 0; int upto = 0;
final int sortFieldCount = groupSort.getSort().length; final int sortFieldCount = groupSort.getSort().length;
for(CollectedSearchGroup group : orderedGroups) { for(CollectedSearchGroup<GROUP_VALUE_TYPE> group : orderedGroups) {
if (upto++ < groupOffset) { if (upto++ < groupOffset) {
continue; continue;
} }
//System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
SearchGroup searchGroup = new SearchGroup(); SearchGroup<GROUP_VALUE_TYPE> searchGroup = new SearchGroup<GROUP_VALUE_TYPE>();
searchGroup.groupValue = group.groupValue; searchGroup.groupValue = group.groupValue;
if (fillFields) { if (fillFields) {
searchGroup.sortValues = new Comparable[sortFieldCount]; searchGroup.sortValues = new Comparable[sortFieldCount];
@ -144,10 +133,6 @@ public class FirstPassGroupingCollector extends Collector {
return result; return result;
} }
public String getGroupField() {
return groupField;
}
@Override @Override
public void setScorer(Scorer scorer) throws IOException { public void setScorer(Scorer scorer) throws IOException {
for (FieldComparator comparator : comparators) { for (FieldComparator comparator : comparators) {
@ -189,13 +174,9 @@ public class FirstPassGroupingCollector extends Collector {
// TODO: should we add option to mean "ignore docs that // TODO: should we add option to mean "ignore docs that
// don't have the group field" (instead of stuffing them // don't have the group field" (instead of stuffing them
// under null group)? // under null group)?
final int ord = index.getOrd(doc); final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc);
//System.out.println(" ord=" + ord);
final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef); final CollectedSearchGroup<GROUP_VALUE_TYPE> group = groupMap.get(groupValue);
//System.out.println(" group=" + (br == null ? "null" : br.utf8ToString()));
final CollectedSearchGroup group = groupMap.get(br);
if (group == null) { if (group == null) {
@ -210,8 +191,8 @@ public class FirstPassGroupingCollector extends Collector {
// just keep collecting them // just keep collecting them
// Add a new CollectedSearchGroup: // Add a new CollectedSearchGroup:
CollectedSearchGroup sg = new CollectedSearchGroup(); CollectedSearchGroup<GROUP_VALUE_TYPE> sg = new CollectedSearchGroup<GROUP_VALUE_TYPE>();
sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef); sg.groupValue = copyDocGroupValue(groupValue, null);
sg.comparatorSlot = groupMap.size(); sg.comparatorSlot = groupMap.size();
sg.topDoc = docBase + doc; sg.topDoc = docBase + doc;
for (FieldComparator fc : comparators) { for (FieldComparator fc : comparators) {
@ -233,20 +214,14 @@ public class FirstPassGroupingCollector extends Collector {
// the bottom group with this new group. // the bottom group with this new group.
// java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast(); // java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
final CollectedSearchGroup bottomGroup = orderedGroups.last(); final CollectedSearchGroup<GROUP_VALUE_TYPE> bottomGroup = orderedGroups.last();
orderedGroups.remove(bottomGroup); orderedGroups.remove(bottomGroup);
assert orderedGroups.size() == topNGroups -1; assert orderedGroups.size() == topNGroups -1;
groupMap.remove(bottomGroup.groupValue); groupMap.remove(bottomGroup.groupValue);
// reuse the removed CollectedSearchGroup // reuse the removed CollectedSearchGroup
if (br == null) { bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
bottomGroup.groupValue = null;
} else if (bottomGroup.groupValue != null) {
bottomGroup.groupValue.copy(br);
} else {
bottomGroup.groupValue = new BytesRef(br);
}
bottomGroup.topDoc = docBase + doc; bottomGroup.topDoc = docBase + doc;
for (FieldComparator fc : comparators) { for (FieldComparator fc : comparators) {
@ -291,7 +266,7 @@ public class FirstPassGroupingCollector extends Collector {
// Remove before updating the group since lookup is done via comparators // Remove before updating the group since lookup is done via comparators
// TODO: optimize this // TODO: optimize this
final CollectedSearchGroup prevLast; final CollectedSearchGroup<GROUP_VALUE_TYPE> prevLast;
if (orderedGroups != null) { if (orderedGroups != null) {
prevLast = orderedGroups.last(); prevLast = orderedGroups.last();
orderedGroups.remove(group); orderedGroups.remove(group);
@ -336,7 +311,7 @@ public class FirstPassGroupingCollector extends Collector {
} }
}; };
orderedGroups = new TreeSet<CollectedSearchGroup>(comparator); orderedGroups = new TreeSet<CollectedSearchGroup<GROUP_VALUE_TYPE>>(comparator);
orderedGroups.addAll(groupMap.values()); orderedGroups.addAll(groupMap.values());
assert orderedGroups.size() > 0; assert orderedGroups.size() > 0;
@ -353,15 +328,31 @@ public class FirstPassGroupingCollector extends Collector {
@Override @Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException { public void setNextReader(AtomicReaderContext readerContext) throws IOException {
docBase = readerContext.docBase; docBase = readerContext.docBase;
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
for (int i=0; i<comparators.length; i++) { for (int i=0; i<comparators.length; i++) {
comparators[i] = comparators[i].setNextReader(readerContext); comparators[i] = comparators[i].setNextReader(readerContext);
} }
} }
/**
* Returns the group value for the specified doc.
*
* @param doc The specified doc
* @return the group value for the specified doc
*/
protected abstract GROUP_VALUE_TYPE getDocGroupValue(int doc);
/**
* Returns a copy of the specified group value by creating a new instance and copying the value from the specified
* groupValue in the new instance. Or optionally the reuse argument can be used to copy the group value in.
*
* @param groupValue The group value to copy
* @param reuse Optionally a reuse instance to prevent a new instance creation
* @return a copy of the specified group value
*/
protected abstract GROUP_VALUE_TYPE copyDocGroupValue(GROUP_VALUE_TYPE groupValue, GROUP_VALUE_TYPE reuse);
} }
class CollectedSearchGroup extends SearchGroup { class CollectedSearchGroup<T> extends SearchGroup<T> {
int topDoc; int topDoc;
int comparatorSlot; int comparatorSlot;
} }

View File

@ -0,0 +1,156 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
/**
* SecondPassGroupingCollector is the second of two passes
* necessary to collect grouped docs. This pass gathers the
* top N documents per top group computed from the
* first pass. Concrete subclasses define what a group is and how it
* is internally collected.
*
* <p>See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
public abstract class AbstractSecondPassGroupingCollector<GROUP_VALUE_TYPE> extends Collector {
protected final Map<GROUP_VALUE_TYPE, SearchGroupDocs<GROUP_VALUE_TYPE>> groupMap;
private final int maxDocsPerGroup;
protected SearchGroupDocs<GROUP_VALUE_TYPE>[] groupDocs;
private final Collection<SearchGroup<GROUP_VALUE_TYPE>> groups;
private final Sort withinGroupSort;
private final Sort groupSort;
private int totalHitCount;
private int totalGroupedHitCount;
public AbstractSecondPassGroupingCollector(Collection<SearchGroup<GROUP_VALUE_TYPE>> groups, Sort groupSort, Sort withinGroupSort,
int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
throws IOException {
//System.out.println("SP init");
if (groups.size() == 0) {
throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
}
this.groupSort = groupSort;
this.withinGroupSort = withinGroupSort;
this.groups = groups;
this.maxDocsPerGroup = maxDocsPerGroup;
groupMap = new HashMap<GROUP_VALUE_TYPE, SearchGroupDocs<GROUP_VALUE_TYPE>>(groups.size());
for (SearchGroup<GROUP_VALUE_TYPE> group : groups) {
//System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
final TopDocsCollector collector;
if (withinGroupSort == null) {
// Sort by score
collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
}
groupMap.put(group.groupValue,
new SearchGroupDocs<GROUP_VALUE_TYPE>(group.groupValue,
collector));
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
for (SearchGroupDocs<GROUP_VALUE_TYPE> group : groupMap.values()) {
group.collector.setScorer(scorer);
}
}
@Override
public void collect(int doc) throws IOException {
totalHitCount++;
SearchGroupDocs<GROUP_VALUE_TYPE> group = retrieveGroup(doc);
if (group != null) {
totalGroupedHitCount++;
group.collector.collect(doc);
}
}
/**
* Returns the group the specified doc belongs to or <code>null</code> if no group could be retrieved.
*
* @param doc The specified doc
* @return the group the specified doc belongs to or <code>null</code> if no group could be retrieved
* @throws IOException If an I/O related error occurred
*/
protected abstract SearchGroupDocs<GROUP_VALUE_TYPE> retrieveGroup(int doc) throws IOException;
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
//System.out.println("SP.setNextReader");
for (SearchGroupDocs<GROUP_VALUE_TYPE> group : groupMap.values()) {
group.collector.setNextReader(readerContext);
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
public TopGroups<GROUP_VALUE_TYPE> getTopGroups(int withinGroupOffset) {
@SuppressWarnings("unchecked")
final GroupDocs<GROUP_VALUE_TYPE>[] groupDocsResult = (GroupDocs<GROUP_VALUE_TYPE>[]) new GroupDocs[groups.size()];
int groupIDX = 0;
for(SearchGroup group : groups) {
final SearchGroupDocs<GROUP_VALUE_TYPE> groupDocs = groupMap.get(group.groupValue);
final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
groupDocsResult[groupIDX++] = new GroupDocs<GROUP_VALUE_TYPE>(topDocs.getMaxScore(),
topDocs.totalHits,
topDocs.scoreDocs,
groupDocs.groupValue,
group.sortValues);
}
return new TopGroups<GROUP_VALUE_TYPE>(groupSort.getSort(),
withinGroupSort == null ? null : withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groupDocsResult);
}
// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
public class SearchGroupDocs<GROUP_VALUE_TYPE> {
public final GROUP_VALUE_TYPE groupValue;
public final TopDocsCollector collector;
public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) {
this.groupValue = groupValue;
this.collector = collector;
}
}
}

View File

@ -49,7 +49,7 @@ import org.apache.lucene.util.PriorityQueue;
* being that the documents in each group must always be * being that the documents in each group must always be
* indexed as a block. This collector also fills in * indexed as a block. This collector also fills in
* TopGroups.totalGroupCount without requiring the separate * TopGroups.totalGroupCount without requiring the separate
* {@link AllGroupsCollector}. However, this collector does * {@link TermAllGroupsCollector}. However, this collector does
* not fill in the groupValue of each group; this field * not fill in the groupValue of each group; this field
* will always be null. * will always be null.
* *
@ -212,7 +212,7 @@ public class BlockGroupingCollector extends Collector {
// Swap pending scores // Swap pending scores
final float[] savScores = og.scores; final float[] savScores = og.scores;
og.scores = pendingSubScores; og.scores = pendingSubScores;
pendingSubScores = og.scores; pendingSubScores = savScores;
} }
og.readerContext = currentReaderContext; og.readerContext = currentReaderContext;
//og.groupOrd = lastGroupOrd; //og.groupOrd = lastGroupOrd;
@ -317,7 +317,8 @@ public class BlockGroupingCollector extends Collector {
final FakeScorer fakeScorer = new FakeScorer(); final FakeScorer fakeScorer = new FakeScorer();
final GroupDocs[] groups = new GroupDocs[groupQueue.size() - groupOffset]; @SuppressWarnings("unchecked")
final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) { for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
final OneGroup og = groupQueue.pop(); final OneGroup og = groupQueue.pop();
@ -360,7 +361,7 @@ public class BlockGroupingCollector extends Collector {
final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup); final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);
groups[downTo] = new GroupDocs(topDocs.getMaxScore(), groups[downTo] = new GroupDocs<Object>(topDocs.getMaxScore(),
og.count, og.count,
topDocs.scoreDocs, topDocs.scoreDocs,
null, null,
@ -375,7 +376,7 @@ public class BlockGroupingCollector extends Collector {
} }
*/ */
return new TopGroups(new TopGroups(groupSort.getSort(), return new TopGroups<Object>(new TopGroups<Object>(groupSort.getSort(),
withinGroupSort == null ? null : withinGroupSort.getSort(), withinGroupSort == null ? null : withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groups), totalHitCount, totalGroupedHitCount, groups),
totalGroupCount); totalGroupCount);

View File

@ -18,15 +18,14 @@ package org.apache.lucene.search.grouping;
*/ */
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.util.BytesRef;
/** Represents one group in the results. /** Represents one group in the results.
* *
* @lucene.experimental */ * @lucene.experimental */
public class GroupDocs { public class GroupDocs<GROUP_VALUE_TYPE> {
/** The groupField value for all docs in this group; this /** The groupField value for all docs in this group; this
* may be null if hits did not have the groupField. */ * may be null if hits did not have the groupField. */
public final BytesRef groupValue; public final GROUP_VALUE_TYPE groupValue;
/** Max score in this group */ /** Max score in this group */
public final float maxScore; public final float maxScore;
@ -40,13 +39,13 @@ public class GroupDocs {
public final int totalHits; public final int totalHits;
/** Matches the groupSort passed to {@link /** Matches the groupSort passed to {@link
* FirstPassGroupingCollector}. */ * AbstractFirstPassGroupingCollector}. */
public final Comparable[] groupSortValues; public final Comparable[] groupSortValues;
public GroupDocs(float maxScore, public GroupDocs(float maxScore,
int totalHits, int totalHits,
ScoreDoc[] scoreDocs, ScoreDoc[] scoreDocs,
BytesRef groupValue, GROUP_VALUE_TYPE groupValue,
Comparable[] groupSortValues) { Comparable[] groupSortValues) {
this.maxScore = maxScore; this.maxScore = maxScore;
this.totalHits = totalHits; this.totalHits = totalHits;

View File

@ -17,10 +17,16 @@ package org.apache.lucene.search.grouping;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.util.BytesRef; /**
* Represents a group that is found during the first pass search.
*
* @lucene.experimental
*/
public class SearchGroup<GROUP_VALUE_TYPE> {
/** @lucene.experimental */ /** The value that defines this group */
public class SearchGroup { public GROUP_VALUE_TYPE groupValue;
public BytesRef groupValue;
/** The sort values used during sorting. Can be <code>null</code>. */
public Comparable[] sortValues; public Comparable[] sortValues;
} }

View File

@ -1,172 +0,0 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.BytesRef;
/**
* SecondPassGroupingCollector is the second of two passes
* necessary to collect grouped docs. This pass gathers the
* top N documents per top group computed from the
* first pass.
*
* <p>See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
public class SecondPassGroupingCollector extends Collector {
private final HashMap<BytesRef, SearchGroupDocs> groupMap;
private FieldCache.DocTermsIndex index;
private final String groupField;
private final int maxDocsPerGroup;
private final SentinelIntSet ordSet;
private final SearchGroupDocs[] groupDocs;
private final BytesRef spareBytesRef = new BytesRef();
private final Collection<SearchGroup> groups;
private final Sort withinGroupSort;
private final Sort groupSort;
private int totalHitCount;
private int totalGroupedHitCount;
public SecondPassGroupingCollector(String groupField, Collection<SearchGroup> groups, Sort groupSort, Sort withinGroupSort,
int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
throws IOException {
//System.out.println("SP init");
if (groups.size() == 0) {
throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
}
this.groupSort = groupSort;
this.withinGroupSort = withinGroupSort;
this.groups = groups;
this.groupField = groupField;
this.maxDocsPerGroup = maxDocsPerGroup;
groupMap = new HashMap<BytesRef, SearchGroupDocs>(groups.size());
for (SearchGroup group : groups) {
//System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
final TopDocsCollector collector;
if (withinGroupSort == null) {
// Sort by score
collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
}
groupMap.put(group.groupValue,
new SearchGroupDocs(group.groupValue,
collector));
}
ordSet = new SentinelIntSet(groupMap.size(), -1);
groupDocs = new SearchGroupDocs[ordSet.keys.length];
}
@Override
public void setScorer(Scorer scorer) throws IOException {
for (SearchGroupDocs group : groupMap.values()) {
group.collector.setScorer(scorer);
}
}
@Override
public void collect(int doc) throws IOException {
final int slot = ordSet.find(index.getOrd(doc));
//System.out.println("SP.collect doc=" + doc + " slot=" + slot);
totalHitCount++;
if (slot >= 0) {
totalGroupedHitCount++;
groupDocs[slot].collector.collect(doc);
}
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
//System.out.println("SP.setNextReader");
for (SearchGroupDocs group : groupMap.values()) {
group.collector.setNextReader(readerContext);
}
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
// Rebuild ordSet
ordSet.clear();
for (SearchGroupDocs group : groupMap.values()) {
//System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
if (ord >= 0) {
groupDocs[ordSet.put(ord)] = group;
}
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
public TopGroups getTopGroups(int withinGroupOffset) {
final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()];
int groupIDX = 0;
for(SearchGroup group : groups) {
final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
topDocs.totalHits,
topDocs.scoreDocs,
groupDocs.groupValue,
group.sortValues);
}
return new TopGroups(groupSort.getSort(),
withinGroupSort == null ? null : withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groupDocsResult);
}
}
// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
class SearchGroupDocs {
public final BytesRef groupValue;
public final TopDocsCollector collector;
public SearchGroupDocs(BytesRef groupValue, TopDocsCollector collector) {
this.groupValue = groupValue;
this.collector = collector;
}
}

View File

@ -18,9 +18,7 @@ package org.apache.lucene.search.grouping;
*/ */
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import java.io.IOException; import java.io.IOException;
@ -43,47 +41,44 @@ import java.util.List;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class AllGroupsCollector extends Collector { public class TermAllGroupsCollector extends AbstractAllGroupsCollector<BytesRef> {
private static final int DEFAULT_INITIAL_SIZE = 128; private static final int DEFAULT_INITIAL_SIZE = 128;
private final String groupField; private final String groupField;
private final SentinelIntSet ordSet; private final SentinelIntSet ordSet;
private final List<BytesRef> groups; private final List<BytesRef> groups;
private final BytesRef spareBytesRef = new BytesRef();
private FieldCache.DocTermsIndex index; private FieldCache.DocTermsIndex index;
private final BytesRef spareBytesRef = new BytesRef();
/** /**
* Expert: Constructs a {@link AllGroupsCollector} * Expert: Constructs a {@link AbstractAllGroupsCollector}
* *
* @param groupField The field to group by * @param groupField The field to group by
* @param initialSize The initial allocation size of the * @param initialSize The initial allocation size of the
* internal int set and group list * internal int set and group list
* which should roughly match the total * which should roughly match the total
* number of expected unique groups. Be aware that the * number of expected unique groups. Be aware that the
* heap usage is 4 bytes * initialSize. * heap usage is 4 bytes * initialSize.
*/ */
public AllGroupsCollector(String groupField, int initialSize) { public TermAllGroupsCollector(String groupField, int initialSize) {
this.groupField = groupField;
ordSet = new SentinelIntSet(initialSize, -1); ordSet = new SentinelIntSet(initialSize, -1);
groups = new ArrayList<BytesRef>(initialSize); groups = new ArrayList<BytesRef>(initialSize);
this.groupField = groupField;
} }
/** /**
* Constructs a {@link AllGroupsCollector}. This sets the * Constructs a {@link AbstractAllGroupsCollector}. This sets the
* initial allocation size for the internal int set and group * initial allocation size for the internal int set and group
* list to 128. * list to 128.
* *
* @param groupField The field to group by * @param groupField The field to group by
*/ */
public AllGroupsCollector(String groupField) { public TermAllGroupsCollector(String groupField) {
this(groupField, DEFAULT_INITIAL_SIZE); this(groupField, DEFAULT_INITIAL_SIZE);
} }
public void setScorer(Scorer scorer) throws IOException {
}
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
int key = index.getOrd(doc); int key = index.getOrd(doc);
if (!ordSet.exists(key)) { if (!ordSet.exists(key)) {
@ -94,22 +89,7 @@ public class AllGroupsCollector extends Collector {
} }
/** /**
* Returns the total number of groups for the executed search. * {@inheritDoc}
* This is a convenience method. The following code snippet has the same effect: <pre>getGroups().size()</pre>
*
* @return The total number of groups for the executed search
*/
public int getGroupCount() {
return groups.size();
}
/**
* Returns the group values
* <p/>
* This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef}
* representing a group value.
*
* @return the group values
*/ */
public Collection<BytesRef> getGroups() { public Collection<BytesRef> getGroups() {
return groups; return groups;
@ -128,7 +108,4 @@ public class AllGroupsCollector extends Collector {
} }
} }
public boolean acceptsDocsOutOfOrder() {
return true;
}
} }

View File

@ -0,0 +1,85 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
/**
* Concrete implementation of {@link AbstractFirstPassGroupingCollector} that groups based on
* field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTermsIndex}
* to collect groups.
*
* @lucene.experimental
*/
public class TermFirstPassGroupingCollector extends AbstractFirstPassGroupingCollector<BytesRef> {
private final BytesRef scratchBytesRef = new BytesRef();
private FieldCache.DocTermsIndex index;
private String groupField;
/**
* Create the first pass collector.
*
* @param groupField The field used to group
* documents. This field must be single-valued and
* indexed (FieldCache is used to access its value
* per-document).
* @param groupSort The {@link Sort} used to sort the
* groups. The top sorted document within each group
* according to groupSort, determines how that group
* sorts against other groups. This must be non-null,
* ie, if you want to groupSort by relevance use
* Sort.RELEVANCE.
* @param topNGroups How many top groups to keep.
* @throws IOException When I/O related errors occur
*/
public TermFirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException {
super(groupSort, topNGroups);
this.groupField = groupField;
}
@Override
protected BytesRef getDocGroupValue(int doc) {
final int ord = index.getOrd(doc);
return ord == 0 ? null : index.lookup(ord, scratchBytesRef);
}
@Override
protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) {
if (groupValue == null) {
return null;
} else if (reuse != null) {
reuse.copy(groupValue);
return reuse;
} else {
return new BytesRef(groupValue);
}
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
}
}

View File

@ -0,0 +1,76 @@
package org.apache.lucene.search.grouping;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collection;
/**
* Concrete implementation of {@link AbstractSecondPassGroupingCollector} that groups based on
* field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTermsIndex}
* to collect grouped docs.
*
* @lucene.experimental
*/
public class TermSecondPassGroupingCollector extends AbstractSecondPassGroupingCollector<BytesRef> {
private final SentinelIntSet ordSet;
private FieldCache.DocTermsIndex index;
private final BytesRef spareBytesRef = new BytesRef();
private final String groupField;
@SuppressWarnings("unchecked")
public TermSecondPassGroupingCollector(String groupField, Collection<SearchGroup<BytesRef>> groups, Sort groupSort, Sort withinGroupSort,
int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
throws IOException {
super(groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
ordSet = new SentinelIntSet(groupMap.size(), -1);
this.groupField = groupField;
groupDocs = (SearchGroupDocs<BytesRef>[]) new SearchGroupDocs[ordSet.keys.length];
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
// Rebuild ordSet
ordSet.clear();
for (SearchGroupDocs<BytesRef> group : groupMap.values()) {
// System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
if (ord >= 0) {
groupDocs[ordSet.put(ord)] = group;
}
}
}
@Override
protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
int slot = ordSet.find(index.getOrd(doc));
if (slot >= 0) {
return groupDocs[slot];
}
return null;
}
}

View File

@ -22,7 +22,7 @@ import org.apache.lucene.search.SortField;
/** Represents result returned by a grouping search. /** Represents result returned by a grouping search.
* *
* @lucene.experimental */ * @lucene.experimental */
public class TopGroups { public class TopGroups<GROUP_VALUE_TYPE> {
/** Number of documents matching the search */ /** Number of documents matching the search */
public final int totalHitCount; public final int totalHitCount;
@ -33,7 +33,7 @@ public class TopGroups {
public final Integer totalGroupCount; public final Integer totalGroupCount;
/** Group results in groupSort order */ /** Group results in groupSort order */
public final GroupDocs[] groups; public final GroupDocs<GROUP_VALUE_TYPE>[] groups;
/** How groups are sorted against each other */ /** How groups are sorted against each other */
public final SortField[] groupSort; public final SortField[] groupSort;
@ -41,7 +41,7 @@ public class TopGroups {
/** How docs are sorted within each group */ /** How docs are sorted within each group */
public final SortField[] withinGroupSort; public final SortField[] withinGroupSort;
public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) { public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<GROUP_VALUE_TYPE>[] groups) {
this.groupSort = groupSort; this.groupSort = groupSort;
this.withinGroupSort = withinGroupSort; this.withinGroupSort = withinGroupSort;
this.totalHitCount = totalHitCount; this.totalHitCount = totalHitCount;
@ -50,7 +50,7 @@ public class TopGroups {
this.totalGroupCount = null; this.totalGroupCount = null;
} }
public TopGroups(TopGroups oldTopGroups, Integer totalGroupCount) { public TopGroups(TopGroups<GROUP_VALUE_TYPE> oldTopGroups, Integer totalGroupCount) {
this.groupSort = oldTopGroups.groupSort; this.groupSort = oldTopGroups.groupSort;
this.withinGroupSort = oldTopGroups.withinGroupSort; this.withinGroupSort = oldTopGroups.withinGroupSort;
this.totalHitCount = oldTopGroups.totalHitCount; this.totalHitCount = oldTopGroups.totalHitCount;

View File

@ -43,55 +43,37 @@ field fall into a single group.</p>
</ul> </ul>
<p>The implementation is two-pass: the first pass ({@link
org.apache.lucene.search.grouping.TermFirstPassGroupingCollector})
gathers the top groups, and the second pass ({@link
org.apache.lucene.search.grouping.TermSecondPassGroupingCollector})
gathers documents within those groups. If the search is costly to
run you may want to use the {@link
org.apache.lucene.search.CachingCollector} class, which
caches hits and can (quickly) replay them for the second pass. This
way you only run the query once, but you pay a RAM cost to (briefly)
hold all hits. Results are returned as a {@link
org.apache.lucene.search.grouping.TopGroups} instance.</p>
<p> <p>
There are two grouping implementations here: This module abstracts away what defines group and how it is collected. All grouping collectors
<ul> are abstract and have currently term based implementations. One can implement
<li> collectors that for example group on multiple fields.
Arbitrary grouping that can group by any single-valued indexed </p>
field, implemented as a two-pass collector: the first pass ({@link
org.apache.lucene.search.grouping.FirstPassGroupingCollector})
gathers the top groups, and the second pass ({@link
org.apache.lucene.search.grouping.SecondPassGroupingCollector})
gathers documents within those groups. If the search is costly to
run you may want to use the {@link
org.apache.lucene.search.CachingCollector} class, which caches
hits and can (quickly) replay them for the second pass. This way
you only run the query once, but you pay a RAM cost to (briefly)
hold all hits. Results are returned as a {@link
org.apache.lucene.search.grouping.TopGroups} instance.</p>
</li>
<li>
Indexed groups, using a single pass collector (<code>BlockGroupingCollectorDoc</code>) that
is able to group according to the doc blocks created during
indexing using <code>IndexWriter</code>'s <code>add/updateDocuments</code> API.
This is faster (~25% faster QPS) than the generic two-pass
collector, but it only works for doc blocks so you must statically
commit (during indexing) to which grouping you'll need at search
time.
<p>This implementation does not rely on a single valued grouping <p>
field; rather, the blocks in the index define the groups, so your This module abstracts away what defines group and how it is collected. All grouping collectors
application is free to determine what the grouping criteria is. are abstract and have currently term based implementations. One can implement
At search time, you must provide a <code>Filter</code> that marks collectors that for example group on multiple fields.
the last document in each group. This is a substantial memory </p>
savings because this collector does not load
a <code>DocTermsIndex</code> from the
<code>FieldCache</code>.
</li>
</ul>
<p>The benefit of the arbitrary grouping implementation is you don't have
to commit at indexing time to a static grouping of your documents.
But the downside is it's somewhat slower to run, and requires more RAM
(a <code>FieldCache.DocTermsIndex</code> entry is created).
<p>Known limitations:</p> <p>Known limitations:</p>
<ul> <ul>
<li> For the two-pass grouping collector, the group field must be a <li> For the two-pass grouping collector, the group field must be a
single-valued indexed field. single-valued indexed field.
{@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field. {@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field.
<li> Unlike Solr's implementation, this module cannot group by <li> Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only
function query values nor by arbitrary queries. implementations for grouping based on terms.
<li> Sharding is not directly supported, though is not too <li> Sharding is not directly supported, though is not too
difficult, if you can merge the top groups and top documents per difficult, if you can merge the top groups and top documents per
group yourself. group yourself.
@ -101,14 +83,14 @@ But the downside is it's somewhat slower to run, and requires more RAM
(using the {@link org.apache.lucene.search.CachingCollector}):</p> (using the {@link org.apache.lucene.search.CachingCollector}):</p>
<pre class="prettyprint"> <pre class="prettyprint">
FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups); TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
boolean cacheScores = true; boolean cacheScores = true;
double maxCacheRAMMB = 4.0; double maxCacheRAMMB = 4.0;
CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB); CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB);
s.search(new TermQuery(new Term("content", searchTerm)), cachedCollector); s.search(new TermQuery(new Term("content", searchTerm)), cachedCollector);
Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields); Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
if (topGroups == null) { if (topGroups == null) {
// No groups matched // No groups matched
@ -118,12 +100,12 @@ But the downside is it's somewhat slower to run, and requires more RAM
boolean getScores = true; boolean getScores = true;
boolean getMaxScores = true; boolean getMaxScores = true;
boolean fillFields = true; boolean fillFields = true;
SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields); TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
//Optionally compute total group count //Optionally compute total group count
AllGroupsCollector allGroupsCollector = null; TermAllGroupsCollector allGroupsCollector = null;
if (requiredTotalGroupCount) { if (requiredTotalGroupCount) {
allGroupsCollector = new AllGroupsCollector("author"); allGroupsCollector = new TermAllGroupsCollector("author");
c2 = MultiCollector.wrap(c2, allGroupsCollector); c2 = MultiCollector.wrap(c2, allGroupsCollector);
} }
@ -135,9 +117,9 @@ But the downside is it's somewhat slower to run, and requires more RAM
s.search(new TermQuery(new Term("content", searchTerm)), c2); s.search(new TermQuery(new Term("content", searchTerm)), c2);
} }
TopGroups groupsResult = c2.getTopGroups(docOffset); TopGroups<BytesRef> groupsResult = c2.getTopGroups(docOffset);
if (requiredTotalGroupCount) { if (requiredTotalGroupCount) {
groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount()); groupResult = new TopGroups<BytesRef>(groupsResult, allGroupsCollector.getGroupCount());
} }
// Render groupsResult... // Render groupsResult...

View File

@ -27,7 +27,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
public class AllGroupsCollectorTest extends LuceneTestCase { public class TermAllGroupsCollectorTest extends LuceneTestCase {
public void testTotalGroupCount() throws Exception { public void testTotalGroupCount() throws Exception {
@ -91,15 +91,15 @@ public class AllGroupsCollectorTest extends LuceneTestCase {
IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close(); w.close();
AllGroupsCollector c1 = new AllGroupsCollector(groupField); TermAllGroupsCollector c1 = new TermAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1); indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
assertEquals(4, c1.getGroupCount()); assertEquals(4, c1.getGroupCount());
AllGroupsCollector c2 = new AllGroupsCollector(groupField); TermAllGroupsCollector c2 = new TermAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "some")), c2); indexSearcher.search(new TermQuery(new Term("content", "some")), c2);
assertEquals(3, c2.getGroupCount()); assertEquals(3, c2.getGroupCount());
AllGroupsCollector c3 = new AllGroupsCollector(groupField); TermAllGroupsCollector c3 = new TermAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "blob")), c3); indexSearcher.search(new TermQuery(new Term("content", "blob")), c3);
assertEquals(2, c3.getGroupCount()); assertEquals(2, c3.getGroupCount());

View File

@ -17,9 +17,6 @@
package org.apache.lucene.search.grouping; package org.apache.lucene.search.grouping;
import java.util.*;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
@ -33,6 +30,9 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import java.io.IOException;
import java.util.*;
// TODO // TODO
// - should test relevance sort too // - should test relevance sort too
// - test null // - test null
@ -103,10 +103,10 @@ public class TestGrouping extends LuceneTestCase {
w.close(); w.close();
final Sort groupSort = Sort.RELEVANCE; final Sort groupSort = Sort.RELEVANCE;
final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10); final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1); indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true); final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
indexSearcher.search(new TermQuery(new Term("content", "random")), c2); indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
final TopGroups groups = c2.getTopGroups(0); final TopGroups groups = c2.getTopGroups(0);
@ -154,7 +154,10 @@ public class TestGrouping extends LuceneTestCase {
final BytesRef group; final BytesRef group;
final BytesRef sort1; final BytesRef sort1;
final BytesRef sort2; final BytesRef sort2;
// content must be "realN ..."
final String content; final String content;
float score;
float score2;
public GroupDoc(int id, BytesRef group, BytesRef sort1, BytesRef sort2, String content) { public GroupDoc(int id, BytesRef group, BytesRef sort1, BytesRef sort2, String content) {
this.id = id; this.id = id;
@ -167,16 +170,21 @@ public class TestGrouping extends LuceneTestCase {
private Sort getRandomSort() { private Sort getRandomSort() {
final List<SortField> sortFields = new ArrayList<SortField>(); final List<SortField> sortFields = new ArrayList<SortField>();
if (random.nextBoolean()) { if (random.nextInt(7) == 2) {
sortFields.add(SortField.FIELD_SCORE);
} else {
if (random.nextBoolean()) { if (random.nextBoolean()) {
if (random.nextBoolean()) {
sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean()));
} else {
sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean()));
}
} else if (random.nextBoolean()) {
sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean())); sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean()));
} else {
sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean())); sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean()));
} }
} else if (random.nextBoolean()) {
sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean()));
sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean()));
} }
// Break ties:
sortFields.add(new SortField("id", SortField.INT)); sortFields.add(new SortField("id", SortField.INT));
return new Sort(sortFields.toArray(new SortField[sortFields.size()])); return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
} }
@ -188,7 +196,15 @@ public class TestGrouping extends LuceneTestCase {
public int compare(GroupDoc d1, GroupDoc d2) { public int compare(GroupDoc d1, GroupDoc d2) {
for(SortField sf : sortFields) { for(SortField sf : sortFields) {
final int cmp; final int cmp;
if (sf.getField().equals("sort1")) { if (sf.getType() == SortField.SCORE) {
if (d1.score > d2.score) {
cmp = -1;
} else if (d1.score < d2.score) {
cmp = 1;
} else {
cmp = 0;
}
} else if (sf.getField().equals("sort1")) {
cmp = d1.sort1.compareTo(d2.sort1); cmp = d1.sort1.compareTo(d2.sort1);
} else if (sf.getField().equals("sort2")) { } else if (sf.getField().equals("sort2")) {
cmp = d1.sort2.compareTo(d2.sort2); cmp = d1.sort2.compareTo(d2.sort2);
@ -213,7 +229,9 @@ public class TestGrouping extends LuceneTestCase {
for(int fieldIDX=0;fieldIDX<sortFields.length;fieldIDX++) { for(int fieldIDX=0;fieldIDX<sortFields.length;fieldIDX++) {
final Comparable<?> c; final Comparable<?> c;
final SortField sf = sortFields[fieldIDX]; final SortField sf = sortFields[fieldIDX];
if (sf.getField().equals("sort1")) { if (sf.getType() == SortField.SCORE) {
c = new Float(d.score);
} else if (sf.getField().equals("sort1")) {
c = d.sort1; c = d.sort1;
} else if (sf.getField().equals("sort2")) { } else if (sf.getField().equals("sort2")) {
c = d.sort2; c = d.sort2;
@ -236,18 +254,18 @@ public class TestGrouping extends LuceneTestCase {
} }
*/ */
private TopGroups slowGrouping(GroupDoc[] groupDocs, private TopGroups<BytesRef> slowGrouping(GroupDoc[] groupDocs,
String searchTerm, String searchTerm,
boolean fillFields, boolean fillFields,
boolean getScores, boolean getScores,
boolean getMaxScores, boolean getMaxScores,
boolean doAllGroups, boolean doAllGroups,
Sort groupSort, Sort groupSort,
Sort docSort, Sort docSort,
int topNGroups, int topNGroups,
int docsPerGroup, int docsPerGroup,
int groupOffset, int groupOffset,
int docOffset) { int docOffset) {
final Comparator<GroupDoc> groupSortComp = getComparator(groupSort); final Comparator<GroupDoc> groupSortComp = getComparator(groupSort);
@ -262,11 +280,11 @@ public class TestGrouping extends LuceneTestCase {
//System.out.println("TEST: slowGrouping"); //System.out.println("TEST: slowGrouping");
for(GroupDoc d : groupDocs) { for(GroupDoc d : groupDocs) {
// TODO: would be better to filter by searchTerm before sorting! // TODO: would be better to filter by searchTerm before sorting!
if (!d.content.equals(searchTerm)) { if (!d.content.startsWith(searchTerm)) {
continue; continue;
} }
totalHitCount++; totalHitCount++;
//System.out.println(" match id=" + d.id); //System.out.println(" match id=" + d.id + " score=" + d.score);
if (doAllGroups) { if (doAllGroups) {
if (!knownGroups.contains(d.group)) { if (!knownGroups.contains(d.group)) {
@ -296,7 +314,8 @@ public class TestGrouping extends LuceneTestCase {
final int limit = Math.min(groupOffset + topNGroups, groups.size()); final int limit = Math.min(groupOffset + topNGroups, groups.size());
final Comparator<GroupDoc> docSortComp = getComparator(docSort); final Comparator<GroupDoc> docSortComp = getComparator(docSort);
final GroupDocs[] result = new GroupDocs[limit-groupOffset]; @SuppressWarnings("unchecked")
final GroupDocs<BytesRef>[] result = new GroupDocs[limit-groupOffset];
int totalGroupedHitCount = 0; int totalGroupedHitCount = 0;
for(int idx=groupOffset;idx < limit;idx++) { for(int idx=groupOffset;idx < limit;idx++) {
final BytesRef group = sortedGroups.get(idx); final BytesRef group = sortedGroups.get(idx);
@ -311,9 +330,9 @@ public class TestGrouping extends LuceneTestCase {
final GroupDoc d = docs.get(docIDX); final GroupDoc d = docs.get(docIDX);
final FieldDoc fd; final FieldDoc fd;
if (fillFields) { if (fillFields) {
fd = new FieldDoc(d.id, 0.0f, fillFields(d, docSort)); fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN, fillFields(d, docSort));
} else { } else {
fd = new FieldDoc(d.id, 0.0f); fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN);
} }
hits[docIDX-docOffset] = fd; hits[docIDX-docOffset] = fd;
} }
@ -321,7 +340,7 @@ public class TestGrouping extends LuceneTestCase {
hits = new ScoreDoc[0]; hits = new ScoreDoc[0];
} }
result[idx-groupOffset] = new GroupDocs(0.0f, result[idx-groupOffset] = new GroupDocs<BytesRef>(0.0f,
docs.size(), docs.size(),
hits, hits,
group, group,
@ -329,12 +348,12 @@ public class TestGrouping extends LuceneTestCase {
} }
if (doAllGroups) { if (doAllGroups) {
return new TopGroups( return new TopGroups<BytesRef>(
new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result), new TopGroups<BytesRef>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),
knownGroups.size() knownGroups.size()
); );
} else { } else {
return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result); return new TopGroups<BytesRef>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
} }
} }
@ -372,7 +391,7 @@ public class TestGrouping extends LuceneTestCase {
doc.add(newField("sort1", groupValue.sort1.utf8ToString(), Field.Index.NOT_ANALYZED)); doc.add(newField("sort1", groupValue.sort1.utf8ToString(), Field.Index.NOT_ANALYZED));
doc.add(newField("sort2", groupValue.sort2.utf8ToString(), Field.Index.NOT_ANALYZED)); doc.add(newField("sort2", groupValue.sort2.utf8ToString(), Field.Index.NOT_ANALYZED));
doc.add(new NumericField("id").setIntValue(groupValue.id)); doc.add(new NumericField("id").setIntValue(groupValue.id));
doc.add(newField("content", groupValue.content, Field.Index.NOT_ANALYZED)); doc.add(newField("content", groupValue.content, Field.Index.ANALYZED));
//System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id); //System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id);
} }
// So we can pull filter marking last doc in block: // So we can pull filter marking last doc in block:
@ -420,7 +439,22 @@ public class TestGrouping extends LuceneTestCase {
groups.add(new BytesRef(_TestUtil.randomRealisticUnicodeString(random))); groups.add(new BytesRef(_TestUtil.randomRealisticUnicodeString(random)));
//groups.add(new BytesRef(_TestUtil.randomSimpleString(random))); //groups.add(new BytesRef(_TestUtil.randomSimpleString(random)));
} }
final String[] contentStrings = new String[] {"a", "b", "c", "d"}; final String[] contentStrings = new String[_TestUtil.nextInt(random, 2, 20)];
if (VERBOSE) {
System.out.println("TEST: create fake content");
}
for(int contentIDX=0;contentIDX<contentStrings.length;contentIDX++) {
final StringBuilder sb = new StringBuilder();
sb.append("real" + random.nextInt(3)).append(' ');
final int fakeCount = random.nextInt(10);
for(int fakeIDX=0;fakeIDX<fakeCount;fakeIDX++) {
sb.append("fake ");
}
contentStrings[contentIDX] = sb.toString();
if (VERBOSE) {
System.out.println(" content=" + sb.toString());
}
}
Directory dir = newDirectory(); Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter( RandomIndexWriter w = new RandomIndexWriter(
@ -439,7 +473,7 @@ public class TestGrouping extends LuceneTestCase {
Field sort2 = newField("sort2", "", Field.Index.NOT_ANALYZED); Field sort2 = newField("sort2", "", Field.Index.NOT_ANALYZED);
doc.add(sort2); doc.add(sort2);
docNoGroup.add(sort2); docNoGroup.add(sort2);
Field content = newField("content", "", Field.Index.NOT_ANALYZED); Field content = newField("content", "", Field.Index.ANALYZED);
doc.add(content); doc.add(content);
docNoGroup.add(content); docNoGroup.add(content);
NumericField id = new NumericField("id"); NumericField id = new NumericField("id");
@ -479,40 +513,100 @@ public class TestGrouping extends LuceneTestCase {
} }
} }
final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);
final IndexReader r = w.getReader(); final IndexReader r = w.getReader();
w.close(); w.close();
// Build 2nd index, where docs are added in blocks by // NOTE: intentional but temporary field cache insanity!
// group, so we can use single pass collector
final Directory dir2 = newDirectory();
final IndexReader r2 = getDocBlockReader(dir2, groupDocs);
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
final IndexSearcher s = new IndexSearcher(r);
final IndexSearcher s2 = new IndexSearcher(r2);
final int[] docIDToID = FieldCache.DEFAULT.getInts(r, "id"); final int[] docIDToID = FieldCache.DEFAULT.getInts(r, "id");
final int[] docIDToID2 = FieldCache.DEFAULT.getInts(r2, "id"); IndexReader r2 = null;
Directory dir2 = null;
try { try {
final IndexSearcher s = new IndexSearcher(r);
for(int contentID=0;contentID<3;contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
final GroupDoc gd = groupDocs[docIDToID[hit.doc]];
assertTrue(gd.score == 0.0);
gd.score = hit.score;
assertEquals(gd.id, docIDToID[hit.doc]);
//System.out.println(" score=" + hit.score + " id=" + docIDToID[hit.doc]);
}
}
for(GroupDoc gd : groupDocs) {
assertTrue(gd.score != 0.0);
}
// Build 2nd index, where docs are added in blocks by
// group, so we can use single pass collector
dir2 = newDirectory();
r2 = getDocBlockReader(dir2, groupDocs);
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
final int[] docIDToID2 = FieldCache.DEFAULT.getInts(r2, "id");
final IndexSearcher s2 = new IndexSearcher(r2);
// Reader2 only increases maxDoc() vs reader, which
// means a monotonic shift in scores, so we can
// reliably remap them w/ Map:
final Map<String,Map<Float,Float>> scoreMap = new HashMap<String,Map<Float,Float>>();
// Tricky: must separately set .score2, because the doc
// block index was created with possible deletions!
//System.out.println("fixup score2");
for(int contentID=0;contentID<3;contentID++) {
//System.out.println(" term=real" + contentID);
final Map<Float,Float> termScoreMap = new HashMap<Float,Float>();
scoreMap.put("real"+contentID, termScoreMap);
//System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
//" dfnew=" + s2.docFreq(new Term("content", "real"+contentID)));
final ScoreDoc[] hits = s2.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
final GroupDoc gd = groupDocsByID[docIDToID2[hit.doc]];
assertTrue(gd.score2 == 0.0);
gd.score2 = hit.score;
assertEquals(gd.id, docIDToID2[hit.doc]);
//System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToID2[hit.doc]);
termScoreMap.put(gd.score, gd.score2);
}
}
for(int searchIter=0;searchIter<100;searchIter++) { for(int searchIter=0;searchIter<100;searchIter++) {
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter); System.out.println("TEST: searchIter=" + searchIter);
} }
final String searchTerm = contentStrings[random.nextInt(contentStrings.length)]; final String searchTerm = "real" + random.nextInt(3);
final boolean fillFields = random.nextBoolean(); final boolean fillFields = random.nextBoolean();
final boolean getScores = random.nextBoolean(); boolean getScores = random.nextBoolean();
final boolean getMaxScores = random.nextBoolean(); final boolean getMaxScores = random.nextBoolean();
final Sort groupSort = getRandomSort(); final Sort groupSort = getRandomSort();
//final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)}); //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
// TODO: also test null (= sort by relevance) // TODO: also test null (= sort by relevance)
final Sort docSort = getRandomSort(); final Sort docSort = getRandomSort();
for(SortField sf : docSort.getSort()) {
if (sf.getType() == SortField.SCORE) {
getScores = true;
}
}
for(SortField sf : groupSort.getSort()) {
if (sf.getType() == SortField.SCORE) {
getScores = true;
}
}
final int topNGroups = _TestUtil.nextInt(random, 1, 30); final int topNGroups = _TestUtil.nextInt(random, 1, 30);
//final int topNGroups = 4; //final int topNGroups = 4;
final int docsPerGroup = _TestUtil.nextInt(random, 1, 50); final int docsPerGroup = _TestUtil.nextInt(random, 1, 50);
final int groupOffset = _TestUtil.nextInt(random, 0, (topNGroups-1)/2); final int groupOffset = _TestUtil.nextInt(random, 0, (topNGroups-1)/2);
//final int groupOffset = 0; //final int groupOffset = 0;
@ -522,17 +616,17 @@ public class TestGrouping extends LuceneTestCase {
final boolean doCache = random.nextBoolean(); final boolean doCache = random.nextBoolean();
final boolean doAllGroups = random.nextBoolean(); final boolean doAllGroups = random.nextBoolean();
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups); System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
} }
final AllGroupsCollector allGroupsCollector; final TermAllGroupsCollector allGroupsCollector;
if (doAllGroups) { if (doAllGroups) {
allGroupsCollector = new AllGroupsCollector("group"); allGroupsCollector = new TermAllGroupsCollector("group");
} else { } else {
allGroupsCollector = null; allGroupsCollector = null;
} }
final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups); final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final CachingCollector cCache; final CachingCollector cCache;
final Collector c; final Collector c;
@ -583,19 +677,19 @@ public class TestGrouping extends LuceneTestCase {
} }
} }
final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields); final Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult; final TopGroups groupsResult;
if (topGroups != null) { if (topGroups != null) {
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: topGroups"); System.out.println("TEST: topGroups");
for (SearchGroup searchGroup : topGroups) { for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues)); System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
} }
} }
final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields); final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) { if (doCache) {
if (cCache.isCached()) { if (cCache.isCached()) {
if (VERBOSE) { if (VERBOSE) {
@ -613,8 +707,8 @@ public class TestGrouping extends LuceneTestCase {
} }
if (doAllGroups) { if (doAllGroups) {
TopGroups tempTopGroups = c2.getTopGroups(docOffset); TopGroups<BytesRef> tempTopGroups = c2.getTopGroups(docOffset);
groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount()); groupsResult = new TopGroups<BytesRef>(tempTopGroups, allGroupsCollector.getGroupCount());
} else { } else {
groupsResult = c2.getTopGroups(docOffset); groupsResult = c2.getTopGroups(docOffset);
} }
@ -625,49 +719,93 @@ public class TestGrouping extends LuceneTestCase {
} }
} }
final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset); final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
if (VERBOSE) { if (VERBOSE) {
if (expectedGroups == null) { if (expectedGroups == null) {
System.out.println("TEST: no expected groups"); System.out.println("TEST: no expected groups");
} else { } else {
System.out.println("TEST: expected groups"); System.out.println("TEST: expected groups");
for(GroupDocs gd : expectedGroups.groups) { for(GroupDocs<BytesRef> gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString())); System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()));
for(ScoreDoc sd : gd.scoreDocs) { for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc); System.out.println(" id=" + sd.doc + " score=" + sd.score);
} }
} }
} }
} }
// NOTE: intentional but temporary field cache insanity! assertEquals(docIDToID, expectedGroups, groupsResult, true, getScores);
assertEquals(docIDToID, expectedGroups, groupsResult, true);
final boolean needsScores = getScores || getMaxScores || docSort == null; final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock); final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);
final AllGroupsCollector allGroupsCollector2; final TermAllGroupsCollector allGroupsCollector2;
final Collector c4; final Collector c4;
if (doAllGroups) { if (doAllGroups) {
allGroupsCollector2 = new AllGroupsCollector("group"); allGroupsCollector2 = new TermAllGroupsCollector("group");
c4 = MultiCollector.wrap(c3, allGroupsCollector2); c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else { } else {
allGroupsCollector2 = null; allGroupsCollector2 = null;
c4 = c3; c4 = c3;
} }
s2.search(new TermQuery(new Term("content", searchTerm)), c4); s2.search(new TermQuery(new Term("content", searchTerm)), c4);
final TopGroups tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields); @SuppressWarnings("unchecked")
final TopGroups<BytesRef> tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups groupsResult2; final TopGroups groupsResult2;
if (doAllGroups && tempTopGroups2 != null) { if (doAllGroups && tempTopGroups2 != null) {
assertEquals((int) tempTopGroups2.totalGroupCount, allGroupsCollector2.getGroupCount()); assertEquals((int) tempTopGroups2.totalGroupCount, allGroupsCollector2.getGroupCount());
groupsResult2 = new TopGroups(tempTopGroups2, allGroupsCollector2.getGroupCount()); groupsResult2 = new TopGroups<BytesRef>(tempTopGroups2, allGroupsCollector2.getGroupCount());
} else { } else {
groupsResult2 = tempTopGroups2; groupsResult2 = tempTopGroups2;
} }
assertEquals(docIDToID2, expectedGroups, groupsResult2, false);
if (expectedGroups != null) {
// Fixup scores for reader2
for (GroupDocs groupDocsHits : expectedGroups.groups) {
for(ScoreDoc hit : groupDocsHits.scoreDocs) {
final GroupDoc gd = groupDocsByID[hit.doc];
assertEquals(gd.id, hit.doc);
//System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
hit.score = gd.score2;
}
}
final SortField[] sortFields = groupSort.getSort();
final Map<Float,Float> termScoreMap = scoreMap.get(searchTerm);
for(int groupSortIDX=0;groupSortIDX<sortFields.length;groupSortIDX++) {
if (sortFields[groupSortIDX].getType() == SortField.SCORE) {
for (GroupDocs groupDocsHits : expectedGroups.groups) {
if (groupDocsHits.groupSortValues != null) {
//System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
}
}
}
}
final SortField[] docSortFields = docSort.getSort();
for(int docSortIDX=0;docSortIDX<docSortFields.length;docSortIDX++) {
if (docSortFields[docSortIDX].getType() == SortField.SCORE) {
for (GroupDocs groupDocsHits : expectedGroups.groups) {
for(ScoreDoc _hit : groupDocsHits.scoreDocs) {
FieldDoc hit = (FieldDoc) _hit;
if (hit.fields != null) {
hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
assertNotNull(hit.fields[docSortIDX]);
}
}
}
}
}
}
assertEquals(docIDToID2, expectedGroups, groupsResult2, false, getScores);
} }
} finally { } finally {
FieldCache.DEFAULT.purge(r); FieldCache.DEFAULT.purge(r);
FieldCache.DEFAULT.purge(r2); if (r2 != null) {
FieldCache.DEFAULT.purge(r2);
}
} }
r.close(); r.close();
@ -678,7 +816,7 @@ public class TestGrouping extends LuceneTestCase {
} }
} }
private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues) { private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues, boolean testScores) {
if (expected == null) { if (expected == null) {
assertNull(actual); assertNull(actual);
return; return;
@ -714,9 +852,14 @@ public class TestGrouping extends LuceneTestCase {
for(int docIDX=0;docIDX<expectedFDs.length;docIDX++) { for(int docIDX=0;docIDX<expectedFDs.length;docIDX++) {
final FieldDoc expectedFD = (FieldDoc) expectedFDs[docIDX]; final FieldDoc expectedFD = (FieldDoc) expectedFDs[docIDX];
final FieldDoc actualFD = (FieldDoc) actualFDs[docIDX]; final FieldDoc actualFD = (FieldDoc) actualFDs[docIDX];
//System.out.println(" actual doc=" + docIDtoID[actualFD.doc] + " score=" + actualFD.score);
assertEquals(expectedFD.doc, docIDtoID[actualFD.doc]); assertEquals(expectedFD.doc, docIDtoID[actualFD.doc]);
// TODO if (testScores) {
// assertEquals(expectedFD.score, actualFD.score); assertEquals(expectedFD.score, actualFD.score);
} else {
// TODO: too anal for now
//assertEquals(Float.NaN, actualFD.score);
}
assertArrayEquals(expectedFD.fields, actualFD.fields); assertArrayEquals(expectedFD.fields, actualFD.fields);
} }
} }

View File

@ -144,6 +144,10 @@ New Features
to IndexReader.open (in the case you have a custom IndexReaderFactory). to IndexReader.open (in the case you have a custom IndexReaderFactory).
(simonw via rmuir) (simonw via rmuir)
* SOLR-2136: Boolean type added to function queries, along with
new functions exists(), if(), and(), or(), xor(), not(), def(),
and true and false constants. (yonik)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -0,0 +1 @@
<!-- admin-extra.menu-bottom.html -->

View File

@ -0,0 +1 @@
<!-- admin-extra.menu-top.html -->

View File

@ -17,12 +17,16 @@
package org.apache.solr.schema; package org.apache.solr.schema;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.solr.search.MutableValue;
import org.apache.solr.search.MutableValueBool;
import org.apache.solr.search.MutableValueInt;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.*;
import org.apache.solr.search.function.OrdFieldSource;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -50,7 +54,7 @@ public class BoolField extends FieldType {
@Override @Override
public ValueSource getValueSource(SchemaField field, QParser qparser) { public ValueSource getValueSource(SchemaField field, QParser qparser) {
field.checkFieldCacheSource(qparser); field.checkFieldCacheSource(qparser);
return new OrdFieldSource(field.name); return new BoolFieldSource(field.name);
} }
// avoid instantiating every time... // avoid instantiating every time...
@ -121,7 +125,7 @@ public class BoolField extends FieldType {
@Override @Override
public Object toObject(SchemaField sf, BytesRef term) { public Object toObject(SchemaField sf, BytesRef term) {
return term.bytes[0] == 'T'; return term.bytes[term.offset] == 'T';
} }
@Override @Override
@ -145,6 +149,83 @@ public class BoolField extends FieldType {
@Override @Override
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
writer.writeBool(name, f.stringValue().charAt(0) =='T'); writer.writeBool(name, f.stringValue().charAt(0) == 'T');
} }
} }
// TODO - this can be much more efficient - use OpenBitSet or Bits
class BoolFieldSource extends ValueSource {
protected String field;
public BoolFieldSource(String field) {
this.field = field;
}
@Override
public String description() {
return "bool(" + field + ')';
}
@Override
public DocValues getValues(Map context, IndexReader.AtomicReaderContext readerContext) throws IOException {
final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, field);
// figure out what ord maps to true
int nord = sindex.numOrd();
BytesRef br = new BytesRef();
int tord = -1;
for (int i=1; i<nord; i++) {
sindex.lookup(i, br);
if (br.length==1 && br.bytes[br.offset]=='T') {
tord = i;
break;
}
}
final int trueOrd = tord;
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return sindex.getOrd(doc) == trueOrd;
}
@Override
public boolean exists(int doc) {
return sindex.getOrd(doc) != 0;
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueBool mval = new MutableValueBool();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
int ord = sindex.getOrd(doc);
mval.value = (ord == trueOrd);
mval.exists = (ord != 0);
}
};
}
};
}
@Override
public boolean equals(Object o) {
return o.getClass() == BoolFieldSource.class && this.field.equals(((BoolFieldSource)o).field);
}
private static final int hcode = OrdFieldSource.class.hashCode();
@Override
public int hashCode() {
return hcode + field.hashCode();
};
}

View File

@ -364,8 +364,14 @@ public class FunctionQParser extends QParser {
sp.expect(")"); sp.expect(")");
} }
else { else {
SchemaField f = req.getSchema().getField(id); if ("true".equals(id)) {
valueSource = f.getType().getValueSource(f, this); valueSource = new BoolConstValueSource(true);
} else if ("false".equals(id)) {
valueSource = new BoolConstValueSource(false);
} else {
SchemaField f = req.getSchema().getField(id);
valueSource = f.getType().getValueSource(f, this);
}
} }
} }

View File

@ -0,0 +1,60 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
public class MutableValueBool extends MutableValue {
public boolean value;
@Override
public Object toObject() {
return exists ? value : null;
}
@Override
public void copy(MutableValue source) {
MutableValueBool s = (MutableValueBool) source;
value = s.value;
exists = s.exists;
}
@Override
public MutableValue duplicate() {
MutableValueBool v = new MutableValueBool();
v.value = this.value;
v.exists = this.exists;
return v;
}
@Override
public boolean equalsSameType(Object other) {
MutableValueBool b = (MutableValueBool)other;
return value == b.value && exists == b.exists;
}
@Override
public int compareSameType(Object other) {
MutableValueBool b = (MutableValueBool)other;
if (value != b.value) return value ? 1 : 0;
if (exists == b.exists) return 0;
return exists ? 1 : -1;
}
@Override
public int hashCode() {
return value ? 2 : (exists ? 1 : 0);
}
}

View File

@ -32,6 +32,8 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
@ -382,6 +384,22 @@ public class QueryParsing {
} }
} }
static void writeFieldVal(BytesRef val, FieldType ft, Appendable out, int flags) throws IOException {
if (ft != null) {
try {
CharsRef readable = new CharsRef();
ft.indexedToReadable(val, readable);
out.append(readable);
} catch (Exception e) {
out.append("EXCEPTION(val=");
out.append(val.utf8ToString());
out.append(")");
}
} else {
out.append(val.utf8ToString());
}
}
/** /**
* @see #toString(Query,IndexSchema) * @see #toString(Query,IndexSchema)
*/ */
@ -392,14 +410,14 @@ public class QueryParsing {
TermQuery q = (TermQuery) query; TermQuery q = (TermQuery) query;
Term t = q.getTerm(); Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags); FieldType ft = writeFieldName(t.field(), schema, out, flags);
writeFieldVal(t.text(), ft, out, flags); writeFieldVal(t.bytes(), ft, out, flags);
} else if (query instanceof TermRangeQuery) { } else if (query instanceof TermRangeQuery) {
TermRangeQuery q = (TermRangeQuery) query; TermRangeQuery q = (TermRangeQuery) query;
String fname = q.getField(); String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags); FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesLower() ? '[' : '{'); out.append(q.includesLower() ? '[' : '{');
String lt = q.getLowerTerm().utf8ToString(); BytesRef lt = q.getLowerTerm();
String ut = q.getUpperTerm().utf8ToString(); BytesRef ut = q.getUpperTerm();
if (lt == null) { if (lt == null) {
out.append('*'); out.append('*');
} else { } else {

View File

@ -579,6 +579,134 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
return new NumDocsValueSource(); return new NumDocsValueSource();
} }
}); });
addParser("true", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
return new BoolConstValueSource(true);
}
});
addParser("false", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
return new BoolConstValueSource(false);
}
});
addParser("exists", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource vs = fp.parseValueSource();
return new SimpleBoolFunction(vs) {
@Override
protected String name() {
return "exists";
}
@Override
protected boolean func(int doc, DocValues vals) {
return vals.exists(doc);
}
};
}
});
addParser("not", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource vs = fp.parseValueSource();
return new SimpleBoolFunction(vs) {
@Override
protected boolean func(int doc, DocValues vals) {
return !vals.boolVal(doc);
}
@Override
protected String name() {
return "not";
}
};
}
});
addParser("and", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
List<ValueSource> sources = fp.parseValueSourceList();
return new MultiBoolFunction(sources) {
@Override
protected String name() {
return "and";
}
@Override
protected boolean func(int doc, DocValues[] vals) {
for (DocValues dv : vals)
if (!dv.boolVal(doc)) return false;
return true;
}
};
}
});
addParser("or", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
List<ValueSource> sources = fp.parseValueSourceList();
return new MultiBoolFunction(sources) {
@Override
protected String name() {
return "or";
}
@Override
protected boolean func(int doc, DocValues[] vals) {
for (DocValues dv : vals)
if (dv.boolVal(doc)) return true;
return false;
}
};
}
});
addParser("xor", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
List<ValueSource> sources = fp.parseValueSourceList();
return new MultiBoolFunction(sources) {
@Override
protected String name() {
return "xor";
}
@Override
protected boolean func(int doc, DocValues[] vals) {
int nTrue=0, nFalse=0;
for (DocValues dv : vals) {
if (dv.boolVal(doc)) nTrue++;
else nFalse++;
}
return nTrue != 0 && nFalse != 0;
}
};
}
});
addParser("if", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource ifValueSource = fp.parseValueSource();
ValueSource trueValueSource = fp.parseValueSource();
ValueSource falseValueSource = fp.parseValueSource();
return new IfFunction(ifValueSource, trueValueSource, falseValueSource);
}
});
addParser("def", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws ParseException {
return new DefFunction(fp.parseValueSourceList());
}
});
} }
private static TInfo parseTerm(FunctionQParser fp) throws ParseException { private static TInfo parseTerm(FunctionQParser fp) throws ParseException {
@ -857,6 +985,11 @@ class LongConstValueSource extends ConstNumberSource {
public Number getNumber() { public Number getNumber() {
return constant; return constant;
} }
@Override
public boolean getBool() {
return constant != 0;
}
} }
@ -981,3 +1114,69 @@ abstract class Double2Parser extends NamedParser {
} }
} }
class BoolConstValueSource extends ConstNumberSource {
final boolean constant;
public BoolConstValueSource(boolean constant) {
this.constant = constant;
}
@Override
public String description() {
return "const(" + constant + ")";
}
@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return constant;
}
};
}
@Override
public int hashCode() {
return constant ? 0x12345678 : 0x87654321;
}
@Override
public boolean equals(Object o) {
if (BoolConstValueSource.class != o.getClass()) return false;
BoolConstValueSource other = (BoolConstValueSource) o;
return this.constant == other.constant;
}
@Override
public int getInt() {
return constant ? 1 : 0;
}
@Override
public long getLong() {
return constant ? 1 : 0;
}
@Override
public float getFloat() {
return constant ? 1 : 0;
}
@Override
public double getDouble() {
return constant ? 1 : 0;
}
@Override
public Number getNumber() {
return constant ? 1 : 0;
}
@Override
public boolean getBool() {
return constant;
}
}

View File

@ -0,0 +1,79 @@
package org.apache.solr.search.function;
import org.apache.solr.search.MutableValue;
import org.apache.solr.search.MutableValueBool;
import org.apache.solr.search.MutableValueInt;
public abstract class BoolDocValues extends DocValues {
protected final ValueSource vs;
public BoolDocValues(ValueSource vs) {
this.vs = vs;
}
@Override
public abstract boolean boolVal(int doc);
@Override
public byte byteVal(int doc) {
return boolVal(doc) ? (byte)1 : (byte)0;
}
@Override
public short shortVal(int doc) {
return boolVal(doc) ? (short)1 : (short)0;
}
@Override
public float floatVal(int doc) {
return boolVal(doc) ? (float)1 : (float)0;
}
@Override
public int intVal(int doc) {
return boolVal(doc) ? 1 : 0;
}
@Override
public long longVal(int doc) {
return boolVal(doc) ? (long)1 : (long)0;
}
@Override
public double doubleVal(int doc) {
return boolVal(doc) ? (double)1 : (double)0;
}
@Override
public String strVal(int doc) {
return Boolean.toString(boolVal(doc));
}
@Override
public Object objectVal(int doc) {
return exists(doc) ? boolVal(doc) : null;
}
@Override
public String toString(int doc) {
return vs.description() + '=' + strVal(doc);
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueBool mval = new MutableValueBool();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
mval.value = boolVal(doc);
mval.exists = exists(doc);
}
};
}
}

View File

@ -0,0 +1,23 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.function;
public abstract class BoolFunction extends ValueSource {
// TODO: placeholder to return type, among other common future functionality
}

View File

@ -26,4 +26,5 @@ public abstract class ConstNumberSource extends ValueSource {
public abstract float getFloat(); public abstract float getFloat();
public abstract double getDouble(); public abstract double getDouble();
public abstract Number getNumber(); public abstract Number getNumber();
public abstract boolean getBool();
} }

View File

@ -66,6 +66,10 @@ public class ConstValueSource extends ConstNumberSource {
public Object objectVal(int doc) { public Object objectVal(int doc) {
return constant; return constant;
} }
@Override
public boolean boolVal(int doc) {
return constant != 0.0f;
}
}; };
} }
@ -105,4 +109,9 @@ public class ConstValueSource extends ConstNumberSource {
public Number getNumber() { public Number getNumber() {
return constant; return constant;
} }
@Override
public boolean getBool() {
return constant != 0.0f;
}
} }

View File

@ -0,0 +1,124 @@
package org.apache.solr.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
public class DefFunction extends MultiFunction {
public DefFunction(List<ValueSource> sources) {
super(sources);
}
@Override
protected String name() {
return "def";
}
@Override
public DocValues getValues(Map fcontext, AtomicReaderContext readerContext) throws IOException {
return new Values(valsArr(sources, fcontext, readerContext)) {
final int upto = valsArr.length - 1;
private DocValues get(int doc) {
for (int i=0; i<upto; i++) {
DocValues vals = valsArr[i];
if (vals.exists(doc)) {
return vals;
}
}
return valsArr[upto];
}
@Override
public byte byteVal(int doc) {
return get(doc).byteVal(doc);
}
@Override
public short shortVal(int doc) {
return get(doc).shortVal(doc);
}
@Override
public float floatVal(int doc) {
return get(doc).floatVal(doc);
}
@Override
public int intVal(int doc) {
return get(doc).intVal(doc);
}
@Override
public long longVal(int doc) {
return get(doc).longVal(doc);
}
@Override
public double doubleVal(int doc) {
return get(doc).doubleVal(doc);
}
@Override
public String strVal(int doc) {
return get(doc).strVal(doc);
}
@Override
public boolean boolVal(int doc) {
return get(doc).boolVal(doc);
}
@Override
public boolean bytesVal(int doc, BytesRef target) {
return get(doc).bytesVal(doc, target);
}
@Override
public Object objectVal(int doc) {
return get(doc).objectVal(doc);
}
@Override
public boolean exists(int doc) {
// return true if any source is exists?
for (DocValues vals : valsArr) {
if (vals.exists(doc)) {
return true;
}
}
return false;
}
@Override
public ValueFiller getValueFiller() {
// TODO: need ValueSource.type() to determine correct type
return super.getValueFiller();
}
};
}
}

View File

@ -48,6 +48,10 @@ public abstract class DocValues {
// TODO: should we make a termVal, returns BytesRef? // TODO: should we make a termVal, returns BytesRef?
public String strVal(int doc) { throw new UnsupportedOperationException(); } public String strVal(int doc) { throw new UnsupportedOperationException(); }
public boolean boolVal(int doc) {
return intVal(doc) != 0;
}
/** returns the bytes representation of the string val - TODO: should this return the indexed raw bytes not? */ /** returns the bytes representation of the string val - TODO: should this return the indexed raw bytes not? */
public boolean bytesVal(int doc, BytesRef target) { public boolean bytesVal(int doc, BytesRef target) {
String s = strVal(doc); String s = strVal(doc);

View File

@ -115,4 +115,9 @@ public class DoubleConstValueSource extends ConstNumberSource {
public Number getNumber() { public Number getNumber() {
return constant; return constant;
} }
@Override
public boolean getBool() {
return constant != 0;
}
} }

View File

@ -35,6 +35,11 @@ public abstract class DoubleDocValues extends DocValues {
return (long)doubleVal(doc); return (long)doubleVal(doc);
} }
@Override
public boolean boolVal(int doc) {
return doubleVal(doc) != 0;
}
@Override @Override
public abstract double doubleVal(int doc); public abstract double doubleVal(int doc);

View File

@ -53,40 +53,15 @@ public class DoubleFieldSource extends NumericFieldCacheSource<DoubleValues> {
final double[] arr = vals.values; final double[] arr = vals.values;
final Bits valid = vals.valid; final Bits valid = vals.valid;
return new DocValues() { return new DoubleDocValues(this) {
@Override
public float floatVal(int doc) {
return (float) arr[doc];
}
@Override
public int intVal(int doc) {
return (int) arr[doc];
}
@Override
public long longVal(int doc) {
return (long) arr[doc];
}
@Override @Override
public double doubleVal(int doc) { public double doubleVal(int doc) {
return arr[doc]; return arr[doc];
} }
@Override @Override
public String strVal(int doc) { public boolean exists(int doc) {
return Double.toString(arr[doc]); return valid.get(doc);
}
@Override
public Object objectVal(int doc) {
return valid.get(doc) ? arr[doc] : null;
}
@Override
public String toString(int doc) {
return description() + '=' + doubleVal(doc);
} }
@Override @Override
@ -147,7 +122,7 @@ public class DoubleFieldSource extends NumericFieldCacheSource<DoubleValues> {
} }
} }
@Override @Override
public ValueFiller getValueFiller() { public ValueFiller getValueFiller() {
return new ValueFiller() { return new ValueFiller() {
private final double[] doubleArr = arr; private final double[] doubleArr = arr;

View File

@ -0,0 +1,148 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.function;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public class IfFunction extends BoolFunction {
private ValueSource ifSource;
private ValueSource trueSource;
private ValueSource falseSource;
public IfFunction(ValueSource ifSource, ValueSource trueSource, ValueSource falseSource) {
this.ifSource = ifSource;
this.trueSource = trueSource;
this.falseSource = falseSource;
}
@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final DocValues ifVals = ifSource.getValues(context, readerContext);
final DocValues trueVals = trueSource.getValues(context, readerContext);
final DocValues falseVals = falseSource.getValues(context, readerContext);
return new DocValues() {
@Override
public byte byteVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.byteVal(doc) : falseVals.byteVal(doc);
}
@Override
public short shortVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.shortVal(doc) : falseVals.shortVal(doc);
}
@Override
public float floatVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.floatVal(doc) : falseVals.floatVal(doc);
}
@Override
public int intVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.intVal(doc) : falseVals.intVal(doc);
}
@Override
public long longVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.longVal(doc) : falseVals.longVal(doc);
}
@Override
public double doubleVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.doubleVal(doc) : falseVals.doubleVal(doc);
}
@Override
public String strVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.strVal(doc) : falseVals.strVal(doc);
}
@Override
public boolean boolVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.boolVal(doc) : falseVals.boolVal(doc);
}
@Override
public boolean bytesVal(int doc, BytesRef target) {
return ifVals.boolVal(doc) ? trueVals.bytesVal(doc, target) : falseVals.bytesVal(doc, target);
}
@Override
public Object objectVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.objectVal(doc) : falseVals.objectVal(doc);
}
@Override
public boolean exists(int doc) {
return true; // TODO: flow through to any sub-sources?
}
@Override
public ValueFiller getValueFiller() {
// TODO: we need types of trueSource / falseSource to handle this
// for now, use float.
return super.getValueFiller();
}
@Override
public String toString(int doc) {
return "if(" + ifVals.toString(doc) + ',' + trueVals.toString(doc) + ',' + falseVals.toString(doc) + ')';
}
};
}
@Override
public String description() {
return "if(" + ifSource.description() + ',' + trueSource.description() + ',' + falseSource + ')';
}
@Override
public int hashCode() {
int h = ifSource.hashCode();
h = h * 31 + trueSource.hashCode();
h = h * 31 + falseSource.hashCode();
return h;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof IfFunction)) return false;
IfFunction other = (IfFunction)o;
return ifSource.equals(other.ifSource)
&& trueSource.equals(other.trueSource)
&& falseSource.equals(other.falseSource);
}
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
ifSource.createWeight(context, searcher);
trueSource.createWeight(context, searcher);
falseSource.createWeight(context, searcher);
}
}

View File

@ -38,6 +38,11 @@ public abstract class LongDocValues extends DocValues {
return (double)longVal(doc); return (double)longVal(doc);
} }
@Override
public boolean boolVal(int doc) {
return longVal(doc) != 0;
}
@Override @Override
public String strVal(int doc) { public String strVal(int doc) {
return Long.toString(longVal(doc)); return Long.toString(longVal(doc));

View File

@ -0,0 +1,105 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.function;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.IndexSearcher;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public abstract class MultiBoolFunction extends BoolFunction {
protected final List<ValueSource> sources;
public MultiBoolFunction(List<ValueSource> sources) {
this.sources = sources;
}
protected abstract String name();
protected abstract boolean func(int doc, DocValues[] vals);
@Override
public BoolDocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final DocValues[] vals = new DocValues[sources.size()];
int i=0;
for (ValueSource source : sources) {
vals[i++] = source.getValues(context, readerContext);
}
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return func(doc, vals);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder(name());
sb.append('(');
boolean first = true;
for (DocValues dv : vals) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append(dv.toString(doc));
}
return sb.toString();
}
};
}
@Override
public String description() {
StringBuilder sb = new StringBuilder(name());
sb.append('(');
boolean first = true;
for (ValueSource source : sources) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append(source.description());
}
return sb.toString();
}
@Override
public int hashCode() {
return sources.hashCode() + name().hashCode();
}
@Override
public boolean equals(Object o) {
if (this.getClass() != o.getClass()) return false;
MultiBoolFunction other = (MultiBoolFunction)o;
return this.sources.equals(other.sources);
}
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
for (ValueSource source : sources) {
source.createWeight(context, searcher);
}
}
}

View File

@ -0,0 +1,122 @@
package org.apache.solr.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
public abstract class MultiFunction extends ValueSource {
protected final List<ValueSource> sources;
public MultiFunction(List<ValueSource> sources) {
this.sources = sources;
}
abstract protected String name();
@Override
public String description() {
return description(name(), sources);
}
public static String description(String name, List<ValueSource> sources) {
StringBuilder sb = new StringBuilder();
sb.append(name).append('(');
boolean firstTime=true;
for (ValueSource source : sources) {
if (firstTime) {
firstTime=false;
} else {
sb.append(',');
}
sb.append(source);
}
sb.append(')');
return sb.toString();
}
public static DocValues[] valsArr(List<ValueSource> sources, Map fcontext, AtomicReaderContext readerContext) throws IOException {
final DocValues[] valsArr = new DocValues[sources.size()];
int i=0;
for (ValueSource source : sources) {
valsArr[i++] = source.getValues(fcontext, readerContext);
}
return valsArr;
}
public class Values extends DocValues {
final DocValues[] valsArr;
public Values(DocValues[] valsArr) {
this.valsArr = valsArr;
}
@Override
public String toString(int doc) {
return MultiFunction.toString(name(), valsArr, doc);
}
@Override
public ValueFiller getValueFiller() {
// TODO: need ValueSource.type() to determine correct type
return super.getValueFiller();
}
}
public static String toString(String name, DocValues[] valsArr, int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name).append('(');
boolean firstTime=true;
for (DocValues vals : valsArr) {
if (firstTime) {
firstTime=false;
} else {
sb.append(',');
}
sb.append(vals.toString(doc));
}
sb.append(')');
return sb.toString();
}
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
for (ValueSource source : sources)
source.createWeight(context, searcher);
}
@Override
public int hashCode() {
return sources.hashCode() + name().hashCode();
}
@Override
public boolean equals(Object o) {
if (this.getClass() != o.getClass()) return false;
MultiFunction other = (MultiFunction)o;
return this.sources.equals(other.sources);
}
}

View File

@ -0,0 +1,74 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.function;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.IndexSearcher;
import java.io.IOException;
import java.util.Map;
public abstract class SimpleBoolFunction extends BoolFunction {
protected final ValueSource source;
public SimpleBoolFunction(ValueSource source) {
this.source = source;
}
protected abstract String name();
protected abstract boolean func(int doc, DocValues vals);
@Override
public BoolDocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final DocValues vals = source.getValues(context, readerContext);
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return func(doc, vals);
}
@Override
public String toString(int doc) {
return name() + '(' + vals.toString(doc) + ')';
}
};
}
@Override
public String description() {
return name() + '(' + source.description() + ')';
}
@Override
public int hashCode() {
return source.hashCode() + name().hashCode();
}
@Override
public boolean equals(Object o) {
if (this.getClass() != o.getClass()) return false;
SingleFunction other = (SingleFunction)o;
return this.source.equals(other.source);
}
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source.createWeight(context, searcher);
}
}

View File

@ -21,6 +21,11 @@ public abstract class StrDocValues extends DocValues {
return exists(doc) ? strVal(doc) : null; return exists(doc) ? strVal(doc) : null;
} }
@Override
public boolean boolVal(int doc) {
return exists(doc);
}
@Override @Override
public String toString(int doc) { public String toString(int doc) {
return vs.description() + "='" + strVal(doc) + "'"; return vs.description() + "='" + strVal(doc) + "'";

View File

@ -78,6 +78,10 @@ public abstract class StringIndexDocValues extends DocValues {
return spareChars.toString(); return spareChars.toString();
} }
@Override
public boolean boolVal(int doc) {
return exists(doc);
}
@Override @Override
public abstract Object objectVal(int doc); // force subclasses to override public abstract Object objectVal(int doc); // force subclasses to override

View File

@ -120,6 +120,28 @@ public class TestQueryTypes extends AbstractSolrTestCase {
,"//result[@numFound='1']" ,"//result[@numFound='1']"
); );
// exists()
assertQ(req( "fq","id:999", "q", "{!frange l=1 u=1}if(exists("+f+"),1,0)" )
,"//result[@numFound='1']"
);
// boolean value of non-zero values (just leave off the exists from the prev test)
assertQ(req( "fq","id:999", "q", "{!frange l=1 u=1}if("+f+",1,0)" )
,"//result[@numFound='1']"
);
if (!"id".equals(f)) {
assertQ(req( "fq","id:1", "q", "{!frange l=1 u=1}if(exists("+f+"),1,0)" )
,"//result[@numFound='0']"
);
// boolean value of zero/missing values (just leave off the exists from the prev test)
assertQ(req( "fq","id:1", "q", "{!frange l=1 u=1}if("+f+",1,0)" )
,"//result[@numFound='0']"
);
}
// function query... just make sure it doesn't throw an exception // function query... just make sure it doesn't throw an exception
if ("v_s".equals(f)) continue; // in this context, functions must be able to be interpreted as a float if ("v_s".equals(f)) continue; // in this context, functions must be able to be interpreted as a float
assertQ(req( "q", "+id:999 _val_:\"" + f + "\"") assertQ(req( "q", "+id:999 _val_:\"" + f + "\"")

View File

@ -581,4 +581,56 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity
} }
@Test
public void testBooleanFunctions() throws Exception {
assertU(adoc("id", "1", "text", "hello", "foo_s","A", "foo_ti", "0", "foo_tl","0"));
assertU(adoc("id", "2" , "foo_ti","10", "foo_tl","11"));
assertU(commit());
// true and false functions and constants
assertJQ(req("q", "id:1", "fl", "t:true(),f:false(),tt:{!func}true,ff:{!func}false")
, "/response/docs/[0]=={'t':true,'f':false,'tt':true,'ff':false}");
// test that exists(query) depends on the query matching the document
assertJQ(req("q", "id:1", "fl", "t:exists(query($q1)),f:exists(query($q2))", "q1","text:hello", "q2","text:there")
, "/response/docs/[0]=={'t':true,'f':false}");
// test if()
assertJQ(req("q", "id:1", "fl", "a1:if(true,'A','B')", "fl","b1:if(false,'A','B')")
, "/response/docs/[0]=={'a1':'A', 'b1':'B'}");
// test boolean operators
assertJQ(req("q", "id:1", "fl", "t1:and(true,true)", "fl","f1:and(true,false)", "fl","f2:and(false,true)", "fl","f3:and(false,false)")
, "/response/docs/[0]=={'t1':true, 'f1':false, 'f2':false, 'f3':false}");
assertJQ(req("q", "id:1", "fl", "t1:or(true,true)", "fl","t2:or(true,false)", "fl","t3:or(false,true)", "fl","f1:or(false,false)")
, "/response/docs/[0]=={'t1':true, 't2':true, 't3':true, 'f1':false}");
assertJQ(req("q", "id:1", "fl", "f1:xor(true,true)", "fl","t1:xor(true,false)", "fl","t2:xor(false,true)", "fl","f2:xor(false,false)")
, "/response/docs/[0]=={'t1':true, 't2':true, 'f1':false, 'f2':false}");
assertJQ(req("q", "id:1", "fl", "t:not(false),f:not(true)")
, "/response/docs/[0]=={'t':true, 'f':false}");
// def(), the default function that returns the first value that exists
assertJQ(req("q", "id:1", "fl", "x:def(id,123.0), y:def(foo_f,234.0)")
, "/response/docs/[0]=={'x':1.0, 'y':234.0}");
assertJQ(req("q", "id:1", "fl", "x:def(foo_s,'Q'), y:def(missing_s,'W')")
, "/response/docs/[0]=={'x':'A', 'y':'W'}");
// test constant conversion to boolean
assertJQ(req("q", "id:1", "fl", "a:not(0), b:not(1), c:not(0.0), d:not(1.1), e:not('A')")
, "/response/docs/[0]=={'a':true, 'b':false, 'c':true, 'd':false, 'e':false}");
}
@Test
public void testPseudoFieldFunctions() throws Exception {
assertU(adoc("id", "1", "text", "hello", "foo_s","A"));
assertU(adoc("id", "2"));
assertU(commit());
assertJQ(req("q", "id:1", "fl", "a:1,b:2.0,c:'X',d:{!func}foo_s,e:{!func}bar_s") // if exists() is false, no pseudo-field should be added
, "/response/docs/[0]=={'a':1, 'b':2.0,'c':'X','d':'A'}");
}
} }

View File

@ -462,6 +462,7 @@ ul
#content #dashboard .block #content #dashboard .block
{ {
background-image: none;
width: 49%; width: 49%;
} }
@ -550,85 +551,13 @@ ul
display: block; display: block;
} }
#content #dashboard #replication.is-master .slave #content #dashboard #replication #details table thead td span
{ {
display: none; display: none;
} }
#content #dashboard #replication table
{
border-collapse: collapse;
}
#content #dashboard #replication table th,
#content #dashboard #replication table td
{
border: 1px solid #f0f0f0;
padding: 2px 5px;
}
#content #dashboard #replication table thead td
{
border: 0;
}
#content #dashboard #replication table thead th,
#content #dashboard #replication table tbody td
{
border-right: 0;
}
#content #dashboard #replication table thead th
{
border-top: 0;
font-weight: bold;
}
#content #dashboard #replication table tbody th,
#content #dashboard #replication table tbody td
{
border-bottom: 0;
text-align: right;
}
#content #dashboard #replication table tbody th
{
border-left: 0;
}
#content #dashboard #replication table tbody th,
#content #dashboard #replication dt
{
width: 100px;
}
#content #dashboard #replication dl
{
display: none;
margin-top: 10px;
}
#content #dashboard #replication dt,
#content #dashboard #replication dd
{
display: block;
padding-top: 1px;
padding-bottom: 1px;
}
#content #dashboard #replication dt
{
border-right: 1px solid #f0f0f0;
float: left;
padding-left: 5px;
padding-right: 5px;
margin-right: 3px;
text-align: right;
}
#content #dashboard #dataimport #content #dashboard #dataimport
{ {
background-color: #0ff;
float: right; float: right;
} }
@ -711,6 +640,19 @@ ul
max-width: 99%; max-width: 99%;
} }
#content #analysis #analysis-error
{
background-color: #f00;
background-image: url( ../img/ico/construction.png );
background-position: 10px 50%;
color: #fff;
display: none;
font-weight: bold;
margin-bottom: 20px;
padding: 10px;
padding-left: 35px;
}
#content #analysis .analysis-result h2 #content #analysis .analysis-result h2
{ {
position: relative; position: relative;
@ -1334,6 +1276,12 @@ ul
padding-left: 10px; padding-left: 10px;
} }
#content #schema-browser #related #f-df-t
{
border-bottom: 1px solid #f0f0f0;
padding-bottom: 15px;
}
#content #schema-browser #related dl #content #schema-browser #related dl
{ {
margin-top: 15px; margin-top: 15px;
@ -1367,7 +1315,9 @@ ul
#content #schema-browser #related .dynamic-field .dynamic-field, #content #schema-browser #related .dynamic-field .dynamic-field,
#content #schema-browser #related .dynamic-field .dynamic-field a, #content #schema-browser #related .dynamic-field .dynamic-field a,
#content #schema-browser #related .type .type, #content #schema-browser #related .type .type,
#content #schema-browser #related .type .type a #content #schema-browser #related .type .type a,
#content #schema-browser #related .active,
#content #schema-browser #related .active a
{ {
color: #333; color: #333;
} }
@ -1378,6 +1328,11 @@ ul
color: #666; color: #666;
} }
#content #schema-browser #data
{
display: none;
}
#content #schema-browser #data #index dt #content #schema-browser #data #index dt
{ {
display: none; display: none;
@ -1491,6 +1446,7 @@ ul
#content #schema-browser #data #field .topterms-holder #content #schema-browser #data #field .topterms-holder
{ {
display: none;
float: left; float: left;
} }
@ -2830,6 +2786,7 @@ ul
#content #replication #details table tbody .size #content #replication #details table tbody .size
{ {
text-align: right; text-align: right;
white-space: nowrap;
} }
#content #replication #details table tbody .generation div #content #replication #details table tbody .generation div

View File

@ -35,11 +35,11 @@
<div id="wip-notice"> <div id="wip-notice">
<p>This interface is work in progress. It works best in Chrome.</p> <p>This interface is work in progress. It works best in Chrome.</p>
<p><a href="admin/">Use the <span>old admin interface</span> if there are problems with this one.</a></p> <p><a href="admin">Use the <span>old admin interface</span> if there are problems with this one.</a></p>
<p><a href="https://issues.apache.org/jira/browse/SOLR-2399">Bugs/Requests/Suggestions: <span>SOLR-2399</span></a></p> <p><a href="https://issues.apache.org/jira/browse/SOLR-2399">Bugs/Requests/Suggestions: <span>SOLR-2399</span></a></p>
</div> </div>
<p id="environment">&nbsp;</p> <p id="environment">&nbsp;</p>
</div> </div>

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,11 @@
<div id="analysis"> <div id="analysis">
<div class="block" id="analysis-error">
This Functionality requires the <code>/analysis/field</code> Handler to be registered and active!
</div>
<div class="block" id="field-analysis"> <div class="block" id="field-analysis">
<h2><span>Field Analysis</span></h2> <h2><span>Field Analysis</span></h2>

View File

@ -18,8 +18,10 @@
<form> <form>
<input type="hidden" name="action" value="RENAME">
<p class="clearfix"><label for="rename_core">from:</label> <p class="clearfix"><label for="rename_core">from:</label>
<input type="text" name="core" id="rename_core" disabled="disabled"></p> <input type="text" name="core" id="rename_core" readonly="readonly"></p>
<p class="clearfix"><label for="rename_other">to:</label> <p class="clearfix"><label for="rename_other">to:</label>
<input type="text" name="other" id="rename_other"></p> <input type="text" name="other" id="rename_other"></p>
@ -42,12 +44,15 @@
<form> <form>
<input type="hidden" name="action" value="SWAP">
<input type="hidden" name="core">
<p class="clearfix"><label for="swap_core">this:</label> <p class="clearfix"><label for="swap_core">this:</label>
<select name="core" id="swap_core" class="core" disabled="disabled"> <select id="swap_core" class="core" disabled="disabled">
</select></p> </select></p>
<p class="clearfix"><label for="swap_other">and:</label> <p class="clearfix"><label for="swap_other">and:</label>
<select class="other" id="swap_other" class="other"> <select name="other" id="swap_other" class="other">
</select></p> </select></p>
<p class="clearfix buttons"> <p class="clearfix buttons">
@ -181,6 +186,8 @@
<form> <form>
<input type="hidden" name="action" value="CREATE">
<p class="clearfix"><label for="add_name">name:</label> <p class="clearfix"><label for="add_name">name:</label>
<input type="text" name="name" id="add_name"></p> <input type="text" name="name" id="add_name"></p>

View File

@ -63,96 +63,52 @@
<div class="message"></div> <div class="message"></div>
</div> </div>
<div class="content"> <div class="content clearfix"id="details">
<table border="0" cellspacing="0" cellpadding="0"> <table border="0" cellspacing="0" cellpadding="0">
<thead> <thead>
<tr> <tr>
<td>&nbsp;</td>
<th class="slave">slave</th> <td><span>Index</span></td>
<th>master</th> <th>Version</th>
<th><abbr title="Generation">Gen</abbr></th>
<th>Size</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
<tr>
<th>indexVersion</th> <tr class="master">
<td class="slave value details_slave_master-details_index-version"></td>
<td class="value details_index-version"></td> <th>Master:</th>
<td class="version"><div>x</div></td>
<td class="generation"><div>y</div></td>
<td class="size"><div>z</div></td>
</tr> </tr>
<tr>
<th>generation</th> <tr class="slave slaveOnly">
<td class="slave value details_slave_master-details_generation"></td>
<td class="value details_generation"></td> <th>Slave:</th>
</tr> <td class="version"><div>a</div></td>
<tr> <td class="generation"><div>c</div></td>
<th>indexSize</th> <td class="size"><div>c</div></td>
<td class="slave value details_slave_master-details_index-size"></td>
<td class="value details_index-size"></td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
<dl class="clearfix slave">
<dt class="details_slave_master-url">masterUrl</dt>
<dd class="value details_slave_master-url"></dd>
<dt class="details_slave_poll-interval">poll every</dt>
<dd class="value details_slave_poll-interval"></dd>
<dt class="details_slave_index-replicated-at">last replicated</dt>
<dd class="value timeago details_slave_index-replicated-at"></dd>
<dt class="details_slave_next-execution-at">replicate next</dt>
<dd class="value timeago details_slave_next-execution-at"></dd>
<dt class="details_slave_replication-failed-at">last failed</dt>
<dd class="value timeago details_slave_replication-failed-at"></dd>
</dl>
<!--
indexVersion:
1295900553587
generation:
2
indexSize:
4.25 KB
// slave
indexVersion:
1295900553587
generation:
2
indexSize:
4.25 KB
masterUrl:
http://localhost:8985/solr/replication
pollInterval:
00:00:60
indexReplicatedAt:
Tue Mar 01 19:37:00 UTC 2011
nextExecutionAt:
Tue Mar 01 19:38:00 UTC 2011
replicationFailedAt:
Tue Mar 01 19:37:00 UTC 2011
lastCycleBytesDownloaded:
0
previousCycleTimeInSeconds:
0
isPollingDisabled:
false
isReplicating:
false
-->
</div> </div>
</div> </div>
<div class="block" id="dataimport"> <div class="block" id="dataimport">
<h2><span>DataImport-Handler</span></h2> <h2><span>Dataimport</span></h2>
<div class="message-container"> <div class="message-container">
<div class="message"></div> <div class="message"></div>

View File

@ -4,17 +4,133 @@
<div id="data"> <div id="data">
#data <div id="field">
<div class="field-options">
<dl class="options clearfix">
<dt class="field-type">Field-Type:</dt>
<dt class="properties">Properties:</dt>
<dt class="schema">Schema:</dt>
<dt class="index">Index:</dt>
<dt class="position-increment-gap"><abbr title="Position Increment Gap">PI Gap</abbr>:</dt>
<dt class="docs">Docs:</dt>
<dt class="distinct">Distinct:</dt>
</dl>
<ul class="analyzer">
<li class="clearfix index">
<p>Index Analyzer:</p>
<dl>
<dt></dt>
</dl>
<ul>
<li class="clearfix tokenizer">
<p>Tokenizer:</p>
<dl>
</dl>
</li>
<li class="clearfix filters">
<p>Filters:</p>
<dl>
</dl>
</li>
</ul>
</li>
<li class="clearfix query">
<p>Query Analyzer:</p>
<dl>
<dt></dt>
</dl>
<ul>
<li class="clearfix tokenizer">
<p>Tokenizer:</p>
<dl>
</dl>
</li>
<li class="clearfix filters">
<p>Filters:</p>
<dl>
</dl>
</li>
</ul>
</li>
</ul>
</div>
<div class="topterms-holder">
<p class="head">Top <span class="shown"></span><span class="max-holder">/<span class="max"></span></span> Terms:</p>
<table border="0" cellspacing="0" cellpadding="0">
<thead>
<tr>
<th class="position" title="Position">&nbsp;</th>
<th class="term">Term</th>
<th class="frequency" title="Frequency">Frq</th>
</tr>
</thead>
</table>
<p class="navi clearfix">
<a class="less"><span>less</span></a>
<a class="more"><span>more</span></a>
</p>
</div>
<div class="histogram-holder">
<p class="head">Histogram:</p>
<div class="histogram"></div>
<dl class="clearfix">
</dl>
</div>
</div>
</div> </div>
<div id="related"> <div id="related">
<select> <select>
<option value="" selected="selected">Please select ..</option> <option value="" selected="selected">Please select </option>
</select> </select>
<dl> <dl id="f-df-t">
</dl>
<dl class="ukf-dsf">
<dt class="unique-key-field">Unique Key Field</dt>
<dt class="default-search-field">Default Search Field</dt>
</dl> </dl>
</div> </div>

View File

@ -1,109 +0,0 @@
<div id="field">
<div class="field-options">
<dl class="options clearfix">
<dt class="field-type">Field-Type:</dt>
<dt class="properties">Properties:</dt>
<dt class="schema">Schema:</dt>
<dt class="index">Index:</dt>
<dt class="position-increment-gap"><abbr title="Position Increment Gap">PI Gap</abbr>:</dt>
<dt class="docs">Docs:</dt>
<dt class="distinct">Distinct:</dt>
</dl>
<ul class="analyzer">
<li class="clearfix index">
<p>Index Analyzer:</p>
<dl>
<dt></dt>
</dl>
<ul>
<li class="clearfix tokenizer">
<p>Tokenizer:</p>
<dl>
</dl>
</li>
<li class="clearfix filters">
<p>Filters:</p>
<dl>
</dl>
</li>
</ul>
</li>
<li class="clearfix query">
<p>Query Analyzer:</p>
<dl>
<dt></dt>
</dl>
<ul>
<li class="clearfix tokenizer">
<p>Tokenizer:</p>
<dl>
</dl>
</li>
<li class="clearfix filters">
<p>Filters:</p>
<dl>
</dl>
</li>
</ul>
</li>
</ul>
</div>
<div class="topterms-holder">
<p class="head">Top <span class="shown"></span><span class="max-holder">/<span class="max"></span></span> Terms:</p>
<table border="0" cellspacing="0" cellpadding="0">
<thead>
<tr>
<th class="position" title="Position">&nbsp;</th>
<th class="term">Term</th>
<th class="frequency" title="Frequency">Frq</th>
</tr>
</thead>
</table>
<p class="navi clearfix">
<a class="less"><span>less</span></a>
<a class="more"><span>more</span></a>
</p>
</div>
<div class="histogram-holder">
<p class="head">Histogram:</p>
<div class="histogram"></div>
<dl class="clearfix">
</dl>
</div>
</div>

View File

@ -1,11 +0,0 @@
<div id="index">
<dl class="clearfix">
<dt class="unique-key-field">Unique Key Field:</dt>
<dt class="default-search-field">Default Search Field:</dt>
</dl>
</div>