mirror of https://github.com/apache/lucene.git
SOLR-2452: Merged with trunk up to r1137125
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1137126 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
4c9c9259e8
|
@ -22,6 +22,7 @@
|
|||
<orderEntry type="library" name="Solr library" level="project" />
|
||||
<orderEntry type="library" name="Solr example library" level="project" />
|
||||
<orderEntry type="module" module-name="spatial" />
|
||||
<orderEntry type="module" module-name="grouping" />
|
||||
<orderEntry type="module" module-name="highlighter" />
|
||||
<orderEntry type="module" module-name="icu" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
<properties>
|
||||
<module-directory>solr/contrib/analysis-extras</module-directory>
|
||||
<build-directory>../../build/contrib/analysis-extras</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
<properties>
|
||||
<module-directory>solr/contrib/clustering</module-directory>
|
||||
<build-directory>../../build/contrib/clustering</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
<properties>
|
||||
<module-directory>solr/contrib/dataimporthandler-extras</module-directory>
|
||||
<build-directory>../../build/contrib/dataimporthandler-extras</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
<properties>
|
||||
<module-directory>solr/contrib/dataimporthandler</module-directory>
|
||||
<build-directory>../../build/contrib/dataimporthandler</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -37,7 +37,6 @@
|
|||
<properties>
|
||||
<module-directory>solr/contrib/extraction</module-directory>
|
||||
<build-directory>../../build/contrib/extraction</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
<properties>
|
||||
<module-directory>solr/contrib/uima</module-directory>
|
||||
<build-directory>../../build/contrib/uima</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
<module>contrib</module>
|
||||
</modules>
|
||||
<properties>
|
||||
<java.compat.version>1.6</java.compat.version>
|
||||
<tests.luceneMatchVersion>LUCENE_CURRENT</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<issueManagement>
|
||||
<system>JIRA</system>
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
<properties>
|
||||
<module-directory>solr</module-directory>
|
||||
<build-directory>../build</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -92,6 +91,11 @@
|
|||
<artifactId>lucene-suggest</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-grouping</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-commons-csv</artifactId>
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
<properties>
|
||||
<module-directory>solr/src/solrj</module-directory>
|
||||
<build-directory>../../build/solrj</build-directory>
|
||||
<tests.luceneMatchVersion>4.0</tests.luceneMatchVersion>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
|
|
@ -543,6 +543,12 @@ New Features
|
|||
* LUCENE-3191: Added TopDocs.merge, to facilitate merging results from
|
||||
different shards (Uwe Schindler, Mike McCandless)
|
||||
|
||||
* LUCENE-3179: Added OpenBitSet.prevSetBit (Paul Elschot via Mike McCandless)
|
||||
|
||||
* LUCENE-3210: Made TieredMergePolicy more aggressive in reclaiming
|
||||
segments with deletions; added new methods
|
||||
set/getReclaimDeletesWeight to control this. (Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-1344: Create OSGi bundle using dev-tools/maven.
|
||||
|
|
|
@ -79,6 +79,10 @@ New Features
|
|||
facilitate doing grouping in a distributed environment (Uwe
|
||||
Schindler, Mike McCandless)
|
||||
|
||||
* LUCENE-2919: Added PKIndexSplitter, that splits an index according
|
||||
to a middle term in a specified field. (Jason Rutherglen via Mike
|
||||
McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Split an index based on a given primary key term
|
||||
* and a 'middle' term. If the middle term is present, it's
|
||||
* sent to dir2.
|
||||
*/
|
||||
public class PKIndexSplitter {
|
||||
private Term midTerm;
|
||||
Directory input;
|
||||
Directory dir1;
|
||||
Directory dir2;
|
||||
|
||||
public PKIndexSplitter(Term midTerm, Directory input,
|
||||
Directory dir1, Directory dir2) {
|
||||
this.midTerm = midTerm;
|
||||
this.input = input;
|
||||
this.dir1 = dir1;
|
||||
this.dir2 = dir2;
|
||||
}
|
||||
|
||||
public void split() throws IOException {
|
||||
IndexReader reader = IndexReader.open(input);
|
||||
OpenBitSet lowDels = setDeletes(reader, null, midTerm.bytes());
|
||||
OpenBitSet hiDels = setDeletes(reader, midTerm.bytes(), null);
|
||||
|
||||
createIndex(dir1, reader, lowDels);
|
||||
createIndex(dir2, reader, hiDels);
|
||||
reader.close();
|
||||
}
|
||||
|
||||
private void createIndex(Directory target, IndexReader reader, OpenBitSet bv) throws IOException {
|
||||
IndexWriter w = new IndexWriter(target, new IndexWriterConfig(
|
||||
Version.LUCENE_CURRENT,
|
||||
new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
w.addIndexes(new DeletesIndexReader(reader, bv));
|
||||
w.close();
|
||||
}
|
||||
|
||||
private OpenBitSet setDeletes(IndexReader reader, BytesRef startTerm,
|
||||
BytesRef endTermExcl) throws IOException {
|
||||
OpenBitSet incl = new OpenBitSet(reader.maxDoc());
|
||||
Terms terms = MultiFields.getTerms(reader, midTerm.field());
|
||||
TermsEnum te = terms.iterator();
|
||||
if (startTerm != null) {
|
||||
te.seek(startTerm);
|
||||
}
|
||||
while (true) {
|
||||
final BytesRef term = te.next();
|
||||
if (term == null) {
|
||||
break;
|
||||
}
|
||||
if (endTermExcl != null && term.compareTo(endTermExcl) >= 0) {
|
||||
break;
|
||||
}
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getDeletedDocs(reader), midTerm.field(), term);
|
||||
while (true) {
|
||||
final int doc = docs.nextDoc();
|
||||
if (doc != DocsEnum.NO_MORE_DOCS) {
|
||||
incl.set(doc);
|
||||
} else break;
|
||||
}
|
||||
}
|
||||
OpenBitSet dels = new OpenBitSet(reader.maxDoc());
|
||||
for (int x=0; x < reader.maxDoc(); x++) {
|
||||
if (!incl.get(x)) {
|
||||
dels.set(x);
|
||||
}
|
||||
}
|
||||
return dels;
|
||||
}
|
||||
|
||||
public static class DeletesIndexReader extends FilterIndexReader {
|
||||
OpenBitSet readerDels;
|
||||
|
||||
public DeletesIndexReader(IndexReader reader, OpenBitSet deletes) {
|
||||
super(new SlowMultiReaderWrapper(reader));
|
||||
readerDels = new OpenBitSet(reader.maxDoc());
|
||||
if (in.hasDeletions()) {
|
||||
final Bits oldDelBits = MultiFields.getDeletedDocs(in);
|
||||
assert oldDelBits != null;
|
||||
for (int i = 0; i < in.maxDoc(); i++) {
|
||||
if (oldDelBits.get(i) || deletes.get(i)) {
|
||||
readerDels.set(i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
readerDels = deletes;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
return in.maxDoc() - (int)readerDels.cardinality();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDeletions() {
|
||||
return (int)readerDels.cardinality() > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDeletedDocs() {
|
||||
return readerDels;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.NumberFormat;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
||||
public class TestPKIndexSplitter extends LuceneTestCase {
|
||||
public void testSplit() throws Exception {
|
||||
NumberFormat format = new DecimalFormat("000000000");
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(
|
||||
Version.LUCENE_CURRENT,
|
||||
new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
for (int x=0; x < 10; x++) {
|
||||
Document doc = createDocument(x, "1", 3, format);
|
||||
w.addDocument(doc);
|
||||
}
|
||||
for (int x=15; x < 20; x++) {
|
||||
Document doc = createDocument(x, "2", 3, format);
|
||||
w.addDocument(doc);
|
||||
}
|
||||
w.close();
|
||||
|
||||
Directory dir1 = newDirectory();
|
||||
Directory dir2 = newDirectory();
|
||||
Term splitTerm = new Term("id", new BytesRef(format.format(11)));
|
||||
PKIndexSplitter splitter = new PKIndexSplitter(splitTerm,
|
||||
dir, dir1, dir2);
|
||||
splitter.split();
|
||||
|
||||
IndexReader ir1 = IndexReader.open(dir1);
|
||||
IndexReader ir2 = IndexReader.open(dir2);
|
||||
assertEquals(10, ir1.maxDoc());
|
||||
assertEquals(4, ir2.maxDoc());
|
||||
|
||||
ir1.close();
|
||||
ir2.close();
|
||||
|
||||
dir1.close();
|
||||
dir2.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public Document createDocument(int n, String indexName,
|
||||
int numFields, NumberFormat format) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Document doc = new Document();
|
||||
String id = format.format(n);
|
||||
doc.add(new Field("id", id, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
|
||||
doc.add(new Field("indexname", indexName, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
|
||||
sb.append("a");
|
||||
sb.append(n);
|
||||
doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
|
||||
sb.append(" b");
|
||||
sb.append(n);
|
||||
for (int i = 1; i < numFields; i++) {
|
||||
doc.add(new Field("field" + (i + 1), sb.toString(), Store.YES,
|
||||
Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
}
|
|
@ -30,7 +30,6 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
|
|
@ -82,6 +82,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
private double expungeDeletesPctAllowed = 10.0;
|
||||
private boolean useCompoundFile = true;
|
||||
private double noCFSRatio = 0.1;
|
||||
private double reclaimDeletesWeight = 2.0;
|
||||
|
||||
/** Maximum number of segments to be merged at a time
|
||||
* during "normal" merging. For explicit merging (eg,
|
||||
|
@ -133,6 +134,23 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
return maxMergedSegmentBytes/1024/1024.;
|
||||
}
|
||||
|
||||
/** Controls how aggressively merges that reclaim more
|
||||
* deletions are favored. Higher values favor selecting
|
||||
* merges that reclaim deletions. A value of 0.0 means
|
||||
* deletions don't impact merge selection. */
|
||||
public TieredMergePolicy setReclaimDeletesWeight(double v) {
|
||||
if (v < 0.0) {
|
||||
throw new IllegalArgumentException("reclaimDeletesWeight must be >= 0.0 (got " + v + ")");
|
||||
}
|
||||
reclaimDeletesWeight = v;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** See {@link #setReclaimDeletesWeight}. */
|
||||
public double getReclaimDeletesWeight() {
|
||||
return reclaimDeletesWeight;
|
||||
}
|
||||
|
||||
/** Segments smaller than this are "rounded up" to this
|
||||
* size, ie treated as equal (floor) size for merge
|
||||
* selection. This is to prevent frequent flushing of
|
||||
|
@ -435,7 +453,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
|
||||
// Strongly favor merges that reclaim deletes:
|
||||
final double nonDelRatio = ((double) totAfterMergeBytes)/totBeforeMergeBytes;
|
||||
mergeScore *= nonDelRatio;
|
||||
mergeScore *= Math.pow(nonDelRatio, reclaimDeletesWeight);
|
||||
|
||||
final double finalMergeScore = mergeScore;
|
||||
|
||||
|
|
|
@ -17,22 +17,22 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Caches all docs, and optionally also scores, coming from
|
||||
* a search, and is then able to replay them to another
|
||||
* collector. You specify the max RAM this class may use.
|
||||
* Once the collection is done, call {@link #isCached}. If
|
||||
* this returns true, you can use {@link #replay} against a
|
||||
* new collector. If it returns false, this means too much
|
||||
* RAM was required and you must instead re-run the original
|
||||
* search.
|
||||
* Once the collection is done, call {@link #isCached}. If
|
||||
* this returns true, you can use {@link #replay(Collector)}
|
||||
* against a new collector. If it returns false, this means
|
||||
* too much RAM was required and you must instead re-run the
|
||||
* original search.
|
||||
*
|
||||
* <p><b>NOTE</b>: this class consumes 4 (or 8 bytes, if
|
||||
* scoring is cached) per collected document. If the result
|
||||
|
@ -105,7 +105,16 @@ public abstract class CachingCollector extends Collector {
|
|||
|
||||
cachedScorer = new CachedScorer();
|
||||
cachedScores = new ArrayList<float[]>();
|
||||
curScores = new float[128];
|
||||
curScores = new float[INITIAL_ARRAY_SIZE];
|
||||
cachedScores.add(curScores);
|
||||
}
|
||||
|
||||
ScoreCachingCollector(Collector other, int maxDocsToCache) {
|
||||
super(other, maxDocsToCache);
|
||||
|
||||
cachedScorer = new CachedScorer();
|
||||
cachedScores = new ArrayList<float[]>();
|
||||
curScores = new float[INITIAL_ARRAY_SIZE];
|
||||
cachedScores.add(curScores);
|
||||
}
|
||||
|
||||
|
@ -210,7 +219,11 @@ public abstract class CachingCollector extends Collector {
|
|||
NoScoreCachingCollector(Collector other, double maxRAMMB) {
|
||||
super(other, maxRAMMB, false);
|
||||
}
|
||||
|
||||
|
||||
NoScoreCachingCollector(Collector other, int maxDocsToCache) {
|
||||
super(other, maxDocsToCache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
|
||||
|
@ -353,7 +366,25 @@ public abstract class CachingCollector extends Collector {
|
|||
*/
|
||||
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
|
||||
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link CachingCollector} that wraps the given collector and
|
||||
* caches documents and scores up to the specified max docs threshold.
|
||||
*
|
||||
* @param other
|
||||
* the Collector to wrap and delegate calls to.
|
||||
* @param cacheScores
|
||||
* whether to cache scores in addition to document IDs. Note that
|
||||
* this increases the RAM consumed per doc
|
||||
* @param maxDocsToCache
|
||||
* the maximum number of documents for caching the documents and
|
||||
* possible the scores. If the collector exceeds the threshold,
|
||||
* no documents and scores are cached.
|
||||
*/
|
||||
public static CachingCollector create(Collector other, boolean cacheScores, int maxDocsToCache) {
|
||||
return cacheScores ? new ScoreCachingCollector(other, maxDocsToCache) : new NoScoreCachingCollector(other, maxDocsToCache);
|
||||
}
|
||||
|
||||
// Prevent extension from non-internal classes
|
||||
private CachingCollector(Collector other, double maxRAMMB, boolean cacheScores) {
|
||||
|
@ -369,6 +400,15 @@ public abstract class CachingCollector extends Collector {
|
|||
}
|
||||
maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024) / bytesPerDoc);
|
||||
}
|
||||
|
||||
private CachingCollector(Collector other, int maxDocsToCache) {
|
||||
this.other = other;
|
||||
|
||||
cachedDocs = new ArrayList<int[]>();
|
||||
curDocs = new int[INITIAL_ARRAY_SIZE];
|
||||
cachedDocs.add(curDocs);
|
||||
this.maxDocsToCache = maxDocsToCache;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
|
|
|
@ -778,6 +778,28 @@ public final class BitUtil {
|
|||
return n - (y & 1);
|
||||
}
|
||||
|
||||
/** table of number of leading zeros in a byte */
|
||||
public static final byte[] nlzTable = {8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
|
||||
/** Returns the number of leading zero bits.
|
||||
*/
|
||||
public static int nlz(long x) {
|
||||
int n = 0;
|
||||
// do the first step as a long
|
||||
int y = (int)(x>>>32);
|
||||
if (y==0) {n+=32; y = (int)(x); }
|
||||
if ((y & 0xFFFF0000) == 0) { n+=16; y<<=16; }
|
||||
if ((y & 0xFF000000) == 0) { n+=8; y<<=8; }
|
||||
return n + nlzTable[y >>> 24];
|
||||
/* implementation without table:
|
||||
if ((y & 0xF0000000) == 0) { n+=4; y<<=4; }
|
||||
if ((y & 0xC0000000) == 0) { n+=2; y<<=2; }
|
||||
if ((y & 0x80000000) == 0) { n+=1; y<<=1; }
|
||||
if ((y & 0x80000000) == 0) { n+=1;}
|
||||
return n;
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/** returns true if v is a power of two or zero*/
|
||||
public static boolean isPowerOfTwo(int v) {
|
||||
|
|
|
@ -659,7 +659,34 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
}
|
||||
|
||||
|
||||
/** Returns the index of the first set bit starting downwards at
|
||||
* the index specified.
|
||||
* -1 is returned if there are no more set bits.
|
||||
*/
|
||||
public int prevSetBit(int index) {
|
||||
if (index < 0) {
|
||||
return -1;
|
||||
}
|
||||
int i = index>>6;
|
||||
if (i >= wlen) {
|
||||
i = wlen - 1;
|
||||
}
|
||||
final int subIndex = index & 0x3f; // index within the word
|
||||
long word = (bits[i] << (63-subIndex)); // skip all the bits to the left of index
|
||||
|
||||
if (word != 0) {
|
||||
return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197
|
||||
}
|
||||
|
||||
while (--i >= 0) {
|
||||
word = bits[i];
|
||||
if (word !=0 ) {
|
||||
return (i << 6) + 63 - Long.numberOfLeadingZeros(word);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
|
|
|
@ -947,6 +947,7 @@ public abstract class LuceneTestCase extends Assert {
|
|||
tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20));
|
||||
tmp.setUseCompoundFile(r.nextBoolean());
|
||||
tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
|
||||
tmp.setReclaimDeletesWeight(r.nextDouble()*4);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,15 +17,11 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.CachingCollector;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class TestCachingCollector extends LuceneTestCase {
|
||||
|
||||
private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB
|
||||
|
@ -76,7 +72,7 @@ public class TestCachingCollector extends LuceneTestCase {
|
|||
|
||||
public void testBasic() throws Exception {
|
||||
for (boolean cacheScores : new boolean[] { false, true }) {
|
||||
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1);
|
||||
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1.0);
|
||||
cc.setScorer(new MockScorer());
|
||||
|
||||
// collect 1000 docs
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util;
|
||||
|
||||
public class TestBitUtil extends LuceneTestCase {
|
||||
|
||||
private static int slowNlz(long x) {
|
||||
if (x == 0L) return 64;
|
||||
int nlz = 0;
|
||||
while ( ((x << nlz) & (1L << 63)) == 0) {
|
||||
nlz++;
|
||||
}
|
||||
return nlz;
|
||||
}
|
||||
|
||||
private void checkNlz(long x) {
|
||||
assertEquals(slowNlz(x), BitUtil.nlz(x));
|
||||
assertEquals(Long.numberOfLeadingZeros(x), BitUtil.nlz(x));
|
||||
}
|
||||
|
||||
public void testNlz() {
|
||||
checkNlz(0L);
|
||||
checkNlz(1L);
|
||||
checkNlz(-1L);
|
||||
for (int i = 1; i <= 63; i++) {
|
||||
checkNlz(1L << i);
|
||||
checkNlz((1L << i) + (1L << (i>>1)));
|
||||
}
|
||||
}
|
||||
|
||||
public void testBitUtils() {
|
||||
long num = 100000;
|
||||
assertEquals( 5, BitUtil.ntz(num) );
|
||||
assertEquals( 5, BitUtil.ntz2(num) );
|
||||
assertEquals( 5, BitUtil.ntz3(num) );
|
||||
|
||||
num = 10;
|
||||
assertEquals( 1, BitUtil.ntz(num) );
|
||||
assertEquals( 1, BitUtil.ntz2(num) );
|
||||
assertEquals( 1, BitUtil.ntz3(num) );
|
||||
|
||||
for (int i=0; i<64; i++) {
|
||||
num = 1L << i;
|
||||
assertEquals( i, BitUtil.ntz(num) );
|
||||
assertEquals( i, BitUtil.ntz2(num) );
|
||||
assertEquals( i, BitUtil.ntz3(num) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private long testArg(int shift) {
|
||||
return (1L << shift) + (1L << (shift>>1));
|
||||
}
|
||||
|
||||
private long nlzBitUtilBasicLoop(int iters) {
|
||||
long sumRes = 0;
|
||||
while (iters-- >= 0) {
|
||||
for (int i = 1; i <= 63; i++) {
|
||||
long a = testArg(i);
|
||||
sumRes += BitUtil.nlz(a);
|
||||
sumRes += BitUtil.nlz(a+1);
|
||||
sumRes += BitUtil.nlz(a-1);
|
||||
sumRes += BitUtil.nlz(a+10);
|
||||
sumRes += BitUtil.nlz(a-10);
|
||||
}
|
||||
}
|
||||
return sumRes;
|
||||
}
|
||||
|
||||
private long nlzLongBasicLoop(int iters) {
|
||||
long sumRes = 0;
|
||||
while (iters-- >= 0) {
|
||||
for (int i = 1; i <= 63; i++) {
|
||||
long a = testArg(i);
|
||||
sumRes += Long.numberOfLeadingZeros(a);
|
||||
sumRes += Long.numberOfLeadingZeros(a+1);
|
||||
sumRes += Long.numberOfLeadingZeros(a-1);
|
||||
sumRes += Long.numberOfLeadingZeros(a+10);
|
||||
sumRes += Long.numberOfLeadingZeros(a-10);
|
||||
}
|
||||
}
|
||||
return sumRes;
|
||||
}
|
||||
|
||||
public void tstPerfNlz() { // See LUCENE-3197, prefer to use Long.numberOfLeadingZeros() over BitUtil.nlz().
|
||||
final long measureMilliSecs = 2000;
|
||||
final int basicIters = 100000;
|
||||
long startTime;
|
||||
long endTime;
|
||||
long curTime;
|
||||
long dummy = 0; // avoid optimizing away
|
||||
|
||||
dummy = 0;
|
||||
int bitUtilLoops = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
endTime = startTime + measureMilliSecs;
|
||||
do {
|
||||
dummy += nlzBitUtilBasicLoop(basicIters);
|
||||
bitUtilLoops++;
|
||||
curTime = System.currentTimeMillis();
|
||||
} while (curTime < endTime);
|
||||
int bitUtilPsTime = (int) (1000000000 * (curTime - startTime) / (basicIters * 5 * 63 * (float) bitUtilLoops));
|
||||
System.out.println("BitUtil nlz time: " + (bitUtilPsTime/1) + " picosec/call, dummy: " + dummy);
|
||||
|
||||
|
||||
dummy = 0;
|
||||
int longLoops = 0;
|
||||
startTime = System.currentTimeMillis();
|
||||
endTime = startTime + measureMilliSecs;
|
||||
do {
|
||||
dummy += nlzLongBasicLoop(basicIters);
|
||||
longLoops++;
|
||||
curTime = System.currentTimeMillis();
|
||||
} while (curTime < endTime);
|
||||
int longPsTime = (int) (1000000000 * (curTime - startTime) / (basicIters * 5 * 63 * (float) longLoops));
|
||||
System.out.println("Long nlz time: " + longPsTime + " picosec/call, dummy: " + dummy);
|
||||
}
|
||||
}
|
|
@ -41,6 +41,20 @@ public class TestOpenBitSet extends LuceneTestCase {
|
|||
} while (aa>=0);
|
||||
}
|
||||
|
||||
void doPrevSetBit(BitSet a, OpenBitSet b) {
|
||||
int aa=a.length();
|
||||
int bb=aa;
|
||||
do {
|
||||
// aa = a.prevSetBit(aa-1);
|
||||
aa--;
|
||||
while ((aa >= 0) && (! a.get(aa))) {
|
||||
aa--;
|
||||
}
|
||||
bb = b.prevSetBit(bb-1);
|
||||
assertEquals(aa,bb);
|
||||
} while (aa>=0);
|
||||
}
|
||||
|
||||
// test interleaving different OpenBitSetIterator.next()/skipTo()
|
||||
void doIterate(BitSet a, OpenBitSet b, int mode) {
|
||||
if (mode==1) doIterate1(a, b);
|
||||
|
@ -123,6 +137,7 @@ public class TestOpenBitSet extends LuceneTestCase {
|
|||
bb = (OpenBitSet)b.clone(); bb.clear(fromIndex,toIndex);
|
||||
|
||||
doNextSetBit(aa,bb); // a problem here is from clear() or nextSetBit
|
||||
doPrevSetBit(aa,bb);
|
||||
|
||||
fromIndex = random.nextInt(sz+80);
|
||||
toIndex = fromIndex + random.nextInt((sz>>1)+1);
|
||||
|
@ -130,6 +145,7 @@ public class TestOpenBitSet extends LuceneTestCase {
|
|||
bb = (OpenBitSet)b.clone(); bb.set(fromIndex,toIndex);
|
||||
|
||||
doNextSetBit(aa,bb); // a problem here is from set() or nextSetBit
|
||||
doPrevSetBit(aa,bb);
|
||||
|
||||
|
||||
if (a0 != null) {
|
||||
|
@ -168,7 +184,7 @@ public class TestOpenBitSet extends LuceneTestCase {
|
|||
b0=b;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
|
||||
// larger testsuite.
|
||||
public void testSmall() {
|
||||
|
@ -176,12 +192,13 @@ public class TestOpenBitSet extends LuceneTestCase {
|
|||
doRandomSets(atLeast(1200), atLeast(1000), 2);
|
||||
}
|
||||
|
||||
// uncomment to run a bigger test (~2 minutes).
|
||||
/*
|
||||
public void testBig() {
|
||||
// uncomment to run a bigger test (~2 minutes).
|
||||
// rand = newRandom();
|
||||
// doRandomSets(2000,200000, 1);
|
||||
// doRandomSets(2000,200000, 2);
|
||||
doRandomSets(2000,200000, 1);
|
||||
doRandomSets(2000,200000, 2);
|
||||
}
|
||||
*/
|
||||
|
||||
public void testEquals() {
|
||||
OpenBitSet b1 = new OpenBitSet(1111);
|
||||
|
@ -205,26 +222,6 @@ public class TestOpenBitSet extends LuceneTestCase {
|
|||
assertFalse(b1.equals(new Object()));
|
||||
}
|
||||
|
||||
public void testBitUtils()
|
||||
{
|
||||
long num = 100000;
|
||||
assertEquals( 5, BitUtil.ntz(num) );
|
||||
assertEquals( 5, BitUtil.ntz2(num) );
|
||||
assertEquals( 5, BitUtil.ntz3(num) );
|
||||
|
||||
num = 10;
|
||||
assertEquals( 1, BitUtil.ntz(num) );
|
||||
assertEquals( 1, BitUtil.ntz2(num) );
|
||||
assertEquals( 1, BitUtil.ntz3(num) );
|
||||
|
||||
for (int i=0; i<64; i++) {
|
||||
num = 1L << i;
|
||||
assertEquals( i, BitUtil.ntz(num) );
|
||||
assertEquals( i, BitUtil.ntz2(num) );
|
||||
assertEquals( i, BitUtil.ntz3(num) );
|
||||
}
|
||||
}
|
||||
|
||||
public void testHashCodeEquals() {
|
||||
OpenBitSet bs1 = new OpenBitSet(200);
|
||||
OpenBitSet bs2 = new OpenBitSet(64);
|
||||
|
@ -233,6 +230,35 @@ public class TestOpenBitSet extends LuceneTestCase {
|
|||
assertEquals(bs1, bs2);
|
||||
assertEquals(bs1.hashCode(), bs2.hashCode());
|
||||
}
|
||||
|
||||
|
||||
private OpenBitSet makeOpenBitSet(int[] a) {
|
||||
OpenBitSet bs = new OpenBitSet();
|
||||
for (int e: a) {
|
||||
bs.set(e);
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
private BitSet makeBitSet(int[] a) {
|
||||
BitSet bs = new BitSet();
|
||||
for (int e: a) {
|
||||
bs.set(e);
|
||||
}
|
||||
return bs;
|
||||
}
|
||||
|
||||
private void checkPrevSetBitArray(int [] a) {
|
||||
OpenBitSet obs = makeOpenBitSet(a);
|
||||
BitSet bs = makeBitSet(a);
|
||||
doPrevSetBit(bs, obs);
|
||||
}
|
||||
|
||||
public void testPrevSetBit() {
|
||||
checkPrevSetBitArray(new int[] {});
|
||||
checkPrevSetBitArray(new int[] {0});
|
||||
checkPrevSetBitArray(new int[] {0,2});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -78,6 +78,8 @@ New Features
|
|||
Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald,
|
||||
Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger,
|
||||
Harish Agarwal, yonik)
|
||||
SOLR-2564: Integrate grouping module into Solr. Also adds the ability to return number of
|
||||
groups that have match a query.
|
||||
|
||||
* SOLR-1665: Add debug component options for timings, results and query info only (gsingers, hossman, yonik)
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
<target name="compile-core" depends="init, clover, compile-analyzers-common,
|
||||
compile-analyzers-phonetic, compile-suggest, compile-highlighter,
|
||||
compile-memory, compile-misc, compile-queries, compile-spatial,
|
||||
compile-solrj, common.compile-core, compile-webapp"
|
||||
compile-grouping, compile-solrj, common.compile-core, compile-webapp"
|
||||
unless="solr-core.compiled">
|
||||
</target>
|
||||
|
||||
|
@ -150,6 +150,9 @@
|
|||
<target name="compile-suggest" unless="suggest.uptodate">
|
||||
<ant dir="${common.dir}/../modules/suggest" target="default" inheritAll="false"/>
|
||||
</target>
|
||||
<target name="compile-grouping" unless="grouping.uptodate">
|
||||
<ant dir="${common.dir}/../modules/grouping" target="default" inheritAll="false"/>
|
||||
</target>
|
||||
<target name="compile-highlighter" unless="highlighter.uptodate">
|
||||
<ant dir="${common.dir}/contrib/highlighter" target="default" inheritAll="false"/>
|
||||
</target>
|
||||
|
|
|
@ -60,6 +60,8 @@
|
|||
property="analyzers-phonetic.uptodate" classpath.property="analyzers-phonetic.jar"/>
|
||||
<module-uptodate name="suggest" jarfile="${common.dir}/../modules/suggest/build/lucene-suggest-${version}.jar"
|
||||
property="suggest.uptodate" classpath.property="suggest.jar"/>
|
||||
<module-uptodate name="grouping" jarfile="${common.dir}/../modules/grouping/build/lucene-grouping-${version}.jar"
|
||||
property="grouping.uptodate" classpath.property="grouping.jar"/>
|
||||
<contrib-uptodate name="highlighter" property="highlighter.uptodate" classpath.property="highlighter.jar"/>
|
||||
<contrib-uptodate name="memory" property="memory.uptodate" classpath.property="memory.jar"/>
|
||||
<contrib-uptodate name="misc" property="misc.uptodate" classpath.property="misc.jar"/>
|
||||
|
@ -75,6 +77,7 @@
|
|||
<pathelement path="${queries.jar}"/>
|
||||
<pathelement path="${spatial.jar}"/>
|
||||
<pathelement path="${suggest.jar}"/>
|
||||
<pathelement path="${grouping.jar}"/>
|
||||
<pathelement location="${common-solr.dir}/build/classes/solrj"/>
|
||||
<pathelement location="${common-solr.dir}/build/classes/webapp"/>
|
||||
<pathelement location="${common-solr.dir}/build/classes/java"/>
|
||||
|
|
|
@ -120,7 +120,7 @@ public class TestSqlEntityProcessorDelta extends AbstractDataImportHandlerTestCa
|
|||
runFullImport(dataConfig_delta);
|
||||
assertQ(req("id:1"), "//*[@numFound='0']");
|
||||
} finally {
|
||||
f.setWritable(true);
|
||||
f.delete();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,5 +38,20 @@ public interface GroupParams {
|
|||
|
||||
/** treat the first group result as the main result. true/false */
|
||||
public static final String GROUP_FORMAT = GROUP + ".format";
|
||||
|
||||
/**
|
||||
* Whether to cache the first pass search (doc ids and score) for the second pass search.
|
||||
* Also defines the maximum size of the group cache relative to maxdoc in a percentage.
|
||||
* Values can be a positive integer, from 0 till 100. A value of 0 will disable the group cache.
|
||||
* The default is 0.*/
|
||||
public static final String GROUP_CACHE_PERCENTAGE = GROUP + ".cache.percent";
|
||||
|
||||
// Note: Since you can supply multiple fields to group on, but only have a facets for the whole result. It only makes
|
||||
// sense to me to support these parameters for the first group.
|
||||
/** Whether the docSet (for example for faceting) should be based on plain documents (a.k.a UNGROUPED) or on the groups (a.k.a GROUPED). */
|
||||
public static final String GROUP_COLLAPSE = GROUP + ".collapse";
|
||||
|
||||
/** Whether the group count should be included in the response. */
|
||||
public static final String GROUP_TOTAL_COUNT = GROUP + ".ngroups";
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,9 @@ public class QuerySenderListener extends AbstractSolrEventListener {
|
|||
public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
|
||||
final SolrIndexSearcher searcher = newSearcher;
|
||||
log.info("QuerySenderListener sending requests to " + newSearcher);
|
||||
for (NamedList nlst : (List<NamedList>)args.get("queries")) {
|
||||
List<NamedList> allLists = (List<NamedList>)args.get("queries");
|
||||
if (allLists == null) return;
|
||||
for (NamedList nlst : allLists) {
|
||||
SolrQueryRequest req = null;
|
||||
|
||||
try {
|
||||
|
|
|
@ -45,8 +45,6 @@ import org.apache.solr.response.SolrQueryResponse;
|
|||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.*;
|
||||
import org.apache.solr.search.function.FunctionQuery;
|
||||
import org.apache.solr.search.function.QueryValueSource;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -315,16 +313,25 @@ public class QueryComponent extends SearchComponent
|
|||
boolean doGroup = params.getBool(GroupParams.GROUP, false);
|
||||
if (doGroup) {
|
||||
try {
|
||||
Grouping grouping = new Grouping(searcher, result, cmd);
|
||||
|
||||
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
|
||||
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
|
||||
String[] fields = params.getParams(GroupParams.GROUP_FIELD);
|
||||
String[] funcs = params.getParams(GroupParams.GROUP_FUNC);
|
||||
String[] queries = params.getParams(GroupParams.GROUP_QUERY);
|
||||
String groupSortStr = params.get(GroupParams.GROUP_SORT);
|
||||
boolean main = params.getBool(GroupParams.GROUP_MAIN, false);
|
||||
String format = params.get(GroupParams.GROUP_FORMAT);
|
||||
Grouping.Format defaultFormat = "simple".equals(format) ? Grouping.Format.Simple : Grouping.Format.Grouped;
|
||||
|
||||
String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name());
|
||||
Grouping.Format defaultFormat;
|
||||
try {
|
||||
defaultFormat = Grouping.Format.valueOf(formatStr);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Illegal %s parameter", GroupParams.GROUP_FORMAT));
|
||||
}
|
||||
|
||||
boolean includeTotalGroupCount = params.getBool(GroupParams.GROUP_TOTAL_COUNT, false);
|
||||
Grouping.TotalCount defaultTotalCount = includeTotalGroupCount ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
|
||||
Sort sort = cmd.getSort();
|
||||
// groupSort defaults to sort
|
||||
Sort groupSort = groupSortStr == null ? cmd.getSort() : QueryParsing.parseSort(groupSortStr, req);
|
||||
|
||||
|
@ -332,95 +339,47 @@ public class QueryComponent extends SearchComponent
|
|||
int groupOffsetDefault = params.getInt(GroupParams.GROUP_OFFSET, 0);
|
||||
int docsPerGroupDefault = params.getInt(GroupParams.GROUP_LIMIT, 1);
|
||||
|
||||
// temporary: implement all group-by-field as group-by-func
|
||||
if (funcs == null) {
|
||||
funcs = fields;
|
||||
} else if (fields != null) {
|
||||
// catenate functions and fields
|
||||
String[] both = new String[fields.length + funcs.length];
|
||||
System.arraycopy(fields, 0, both, 0, fields.length);
|
||||
System.arraycopy(funcs, 0, both, fields.length, funcs.length);
|
||||
funcs = both;
|
||||
}
|
||||
Grouping grouping = new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, main);
|
||||
grouping.setSort(sort)
|
||||
.setGroupSort(groupSort)
|
||||
.setDefaultFormat(defaultFormat)
|
||||
.setLimitDefault(limitDefault)
|
||||
.setDefaultTotalCount(defaultTotalCount)
|
||||
.setDocsPerGroupDefault(docsPerGroupDefault)
|
||||
.setGroupOffsetDefault(groupOffsetDefault);
|
||||
|
||||
if (fields != null) {
|
||||
for (String field : fields) {
|
||||
grouping.addFieldCommand(field, rb.req);
|
||||
}
|
||||
}
|
||||
|
||||
if (funcs != null) {
|
||||
for (String groupByStr : funcs) {
|
||||
QParser parser = QParser.getParser(groupByStr, "func", rb.req);
|
||||
Query q = parser.getQuery();
|
||||
Grouping.CommandFunc gc = grouping.new CommandFunc();
|
||||
gc.groupSort = groupSort;
|
||||
|
||||
if (q instanceof FunctionQuery) {
|
||||
gc.groupBy = ((FunctionQuery)q).getValueSource();
|
||||
} else {
|
||||
gc.groupBy = new QueryValueSource(q, 0.0f);
|
||||
}
|
||||
gc.key = groupByStr;
|
||||
gc.numGroups = limitDefault;
|
||||
gc.docsPerGroup = docsPerGroupDefault;
|
||||
gc.groupOffset = groupOffsetDefault;
|
||||
gc.offset = cmd.getOffset();
|
||||
gc.sort = cmd.getSort();
|
||||
gc.format = defaultFormat;
|
||||
|
||||
if (main) {
|
||||
gc.main = true;
|
||||
gc.format = Grouping.Format.Simple;
|
||||
main = false;
|
||||
}
|
||||
|
||||
if (gc.format == Grouping.Format.Simple) {
|
||||
gc.groupOffset = 0; // doesn't make sense
|
||||
}
|
||||
|
||||
grouping.add(gc);
|
||||
grouping.addFunctionCommand(groupByStr, rb.req);
|
||||
}
|
||||
}
|
||||
|
||||
if (queries != null) {
|
||||
for (String groupByStr : queries) {
|
||||
QParser parser = QParser.getParser(groupByStr, null, rb.req);
|
||||
Query gq = parser.getQuery();
|
||||
Grouping.CommandQuery gc = grouping.new CommandQuery();
|
||||
gc.query = gq;
|
||||
gc.groupSort = groupSort;
|
||||
gc.key = groupByStr;
|
||||
gc.numGroups = limitDefault;
|
||||
gc.docsPerGroup = docsPerGroupDefault;
|
||||
gc.groupOffset = groupOffsetDefault;
|
||||
|
||||
// these two params will only be used if this is for the main result set
|
||||
gc.offset = cmd.getOffset();
|
||||
gc.numGroups = limitDefault;
|
||||
|
||||
gc.format = defaultFormat;
|
||||
|
||||
if (main) {
|
||||
gc.main = true;
|
||||
gc.format = Grouping.Format.Simple;
|
||||
main = false;
|
||||
}
|
||||
if (gc.format == Grouping.Format.Simple) {
|
||||
gc.docsPerGroup = gc.numGroups; // doesn't make sense to limit to one
|
||||
gc.groupOffset = gc.offset;
|
||||
}
|
||||
|
||||
grouping.add(gc);
|
||||
grouping.addQueryCommand(groupByStr, rb.req);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (rb.doHighlights || rb.isDebug()) {
|
||||
// we need a single list of the returned docs
|
||||
cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
|
||||
}
|
||||
|
||||
// searcher.search(result,cmd);
|
||||
grouping.execute();
|
||||
rb.setResult( result );
|
||||
if (grouping.isSignalCacheWarning()) {
|
||||
rsp.add(
|
||||
"cacheWarning",
|
||||
String.format("Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache)
|
||||
);
|
||||
}
|
||||
rb.setResult(result);
|
||||
rsp.add("grouped", result.groupedResults);
|
||||
// TODO: get "hits" a different way to log
|
||||
|
||||
if (grouping.mainResult != null) {
|
||||
ResultContext ctx = new ResultContext();
|
||||
|
@ -428,10 +387,10 @@ public class QueryComponent extends SearchComponent
|
|||
ctx.query = null; // TODO? add the query?
|
||||
rsp.add("response", ctx);
|
||||
rsp.getToLog().add("hits", grouping.mainResult.matches());
|
||||
} else if (!grouping.getCommands().isEmpty()) { // Can never be empty since grouping.execute() checks for this.
|
||||
rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
} catch (ParseException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -37,6 +37,10 @@ public abstract class FieldCacheSource extends ValueSource {
|
|||
return cache;
|
||||
}
|
||||
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return field;
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
<?xml version="1.0" ?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- $Id: solrconfig-querysender.xml 1048886 2010-12-14 01:10:52Z hossman $
|
||||
$Source$
|
||||
$Name$
|
||||
-->
|
||||
|
||||
<config>
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||
<!-- The DirectoryFactory to use for indexes.
|
||||
solr.StandardDirectoryFactory, the default, is filesystem based.
|
||||
solr.RAMDirectoryFactory is memory based and not persistent. -->
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
<listener event="postCommit"
|
||||
class="org.apache.solr.core.MockEventListener" />
|
||||
<listener event="postOptimize"
|
||||
class="org.apache.solr.core.MockEventListener" />
|
||||
</updateHandler>
|
||||
|
||||
<query>
|
||||
|
||||
|
||||
<!-- a newSearcher event is fired whenever a new searcher is being prepared
|
||||
and there is a current searcher handling requests (aka registered). -->
|
||||
<!-- QuerySenderListener takes an array of NamedList and executes a
|
||||
local query request for each NamedList in sequence. -->
|
||||
<listener event="newSearcher" class="solr.QuerySenderListener">
|
||||
<!--
|
||||
<arr name="queries">
|
||||
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> <str name="qt">mock</str></lst>
|
||||
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> <str name="qt">mock</str></lst>
|
||||
</arr>
|
||||
-->
|
||||
</listener>
|
||||
<listener event="newSearcher"
|
||||
class="org.apache.solr.core.MockEventListener" />
|
||||
|
||||
|
||||
<!-- a firstSearcher event is fired whenever a new searcher is being
|
||||
prepared but there is no current registered searcher to handle
|
||||
requests or to gain prewarming data from. -->
|
||||
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
||||
<!--
|
||||
<arr name="queries">
|
||||
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str>
|
||||
<str name="qt">mock</str>
|
||||
</lst>
|
||||
</arr>
|
||||
-->
|
||||
</listener>
|
||||
<listener event="firstSearcher"
|
||||
class="org.apache.solr.core.MockEventListener" />
|
||||
|
||||
|
||||
</query>
|
||||
<requestHandler name="mock" class="org.apache.solr.core.MockQuerySenderListenerReqHandler" default="true">
|
||||
<!-- default values for query parameters -->
|
||||
|
||||
</requestHandler>
|
||||
</config>
|
|
@ -20,6 +20,7 @@ package org.apache.solr;
|
|||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.noggit.JSONUtil;
|
||||
import org.apache.noggit.ObjectBuilder;
|
||||
import org.apache.solr.common.params.GroupParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.junit.Before;
|
||||
|
@ -88,6 +89,49 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupingGroupSortingScore_withTotalGroupCount() {
|
||||
assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_si", "1")));
|
||||
assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_si", "2")));
|
||||
assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_si", "1")));
|
||||
assertU(add(doc("id", "4","name", "author2", "title", "title", "group_si", "2")));
|
||||
assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_si", "1")));
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("q","title:title", "group", "true", "group.field","name", "group.ngroups", "true")
|
||||
,"//lst[@name='grouped']/lst[@name='name']"
|
||||
,"//lst[@name='grouped']/lst[@name='name']/int[@name='matches'][.='5']"
|
||||
,"//lst[@name='grouped']/lst[@name='name']/int[@name='ngroups'][.='3']"
|
||||
,"*[count(//arr[@name='groups']/lst) = 3]"
|
||||
|
||||
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
|
||||
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
|
||||
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
|
||||
,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
|
||||
|
||||
,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
|
||||
,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
|
||||
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
|
||||
assertQ(req("q","title:title", "group", "true", "group.field","group_si", "group.ngroups", "true")
|
||||
,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']"
|
||||
,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']"
|
||||
,"*[count(//arr[@name='groups']/lst) = 2]"
|
||||
|
||||
,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
|
||||
,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
|
||||
,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
|
||||
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupingGroupSortingScore_basicWithGroupSortEqualToSort() {
|
||||
assertU(add(doc("id", "1","name", "author1", "title", "a book title")));
|
||||
|
@ -353,7 +397,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
, "/grouped/foo_i=={'matches':10,'doclist':"
|
||||
+"{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -476,14 +520,16 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
List<Grp> sortedGroups = new ArrayList(groups.values());
|
||||
Collections.sort(sortedGroups, groupComparator==sortComparator ? createFirstDocComparator(sortComparator) : createMaxDocComparator(sortComparator));
|
||||
|
||||
Object modelResponse = buildGroupedResult(h.getCore().getSchema(), sortedGroups, start, rows, group_offset, group_limit);
|
||||
boolean includeNGroups = random.nextBoolean();
|
||||
Object modelResponse = buildGroupedResult(h.getCore().getSchema(), sortedGroups, start, rows, group_offset, group_limit, includeNGroups);
|
||||
|
||||
int randomPercentage = random.nextInt(101);
|
||||
// TODO: create a random filter too
|
||||
|
||||
SolrQueryRequest req = req("group","true","wt","json","indent","true", "echoParams","all", "q","{!func}score_f", "group.field",groupField
|
||||
,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr
|
||||
,(groupSortStr==null || groupSortStr==sortStr) ? "nosort":"group.sort", groupSortStr==null ? "": groupSortStr
|
||||
,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit
|
||||
,(groupSortStr==null || groupSortStr==sortStr) ? "noGroupsort":"group.sort", groupSortStr==null ? "": groupSortStr
|
||||
,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit,
|
||||
GroupParams.GROUP_CACHE_PERCENTAGE, Integer.toString(randomPercentage), GroupParams.GROUP_TOTAL_COUNT, includeNGroups ? "true" : "false"
|
||||
);
|
||||
|
||||
String strResponse = h.query(req);
|
||||
|
@ -508,7 +554,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
public static Object buildGroupedResult(IndexSchema schema, List<Grp> sortedGroups, int start, int rows, int group_offset, int group_limit) {
|
||||
public static Object buildGroupedResult(IndexSchema schema, List<Grp> sortedGroups, int start, int rows, int group_offset, int group_limit, boolean includeNGroups) {
|
||||
Map<String,Object> result = new LinkedHashMap<String,Object>();
|
||||
|
||||
long matches = 0;
|
||||
|
@ -516,6 +562,9 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
matches += grp.docs.size();
|
||||
}
|
||||
result.put("matches", matches);
|
||||
if (includeNGroups) {
|
||||
result.put("ngroups", sortedGroups.size());
|
||||
}
|
||||
List groupList = new ArrayList();
|
||||
result.put("groups", groupList);
|
||||
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
package org.apache.solr.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.EventParams;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.TestExtendedDismaxParser;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestQuerySenderNoQuery extends SolrTestCaseJ4 {
|
||||
|
||||
// number of instances configured in the solrconfig.xml
|
||||
private static final int EXPECTED_MOCK_LISTENER_INSTANCES = 4;
|
||||
|
||||
private static int preInitMockListenerCount = 0;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
// record current value prior to core initialization
|
||||
// so we can verify the correct number of instances later
|
||||
// NOTE: this won't work properly if concurrent tests run
|
||||
// in the same VM
|
||||
preInitMockListenerCount = MockEventListener.getCreateCount();
|
||||
|
||||
initCore("solrconfig-querysender-noquery.xml","schema.xml");
|
||||
}
|
||||
|
||||
public void testListenerCreationCounts() {
|
||||
SolrCore core = h.getCore();
|
||||
|
||||
assertEquals("Unexpected number of listeners created",
|
||||
EXPECTED_MOCK_LISTENER_INSTANCES,
|
||||
MockEventListener.getCreateCount() - preInitMockListenerCount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRequestHandlerRegistry() {
|
||||
// property values defined in build.xml
|
||||
SolrCore core = h.getCore();
|
||||
|
||||
assertEquals( 2, core.firstSearcherListeners.size() );
|
||||
assertEquals( 2, core.newSearcherListeners.size() );
|
||||
}
|
||||
|
||||
// Determine that when the query lists are commented out of both new and
|
||||
// first searchers in the config, we don't throw an NPE
|
||||
@Test
|
||||
public void testSearcherEvents() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
SolrEventListener newSearcherListener = core.newSearcherListeners.get(0);
|
||||
assertTrue("Not an instance of QuerySenderListener", newSearcherListener instanceof QuerySenderListener);
|
||||
QuerySenderListener qsl = (QuerySenderListener) newSearcherListener;
|
||||
|
||||
RefCounted<SolrIndexSearcher> currentSearcherRef = core.getSearcher();
|
||||
SolrIndexSearcher currentSearcher = currentSearcherRef.get();
|
||||
SolrIndexSearcher dummy = null;
|
||||
qsl.newSearcher(currentSearcher, dummy);//test first Searcher (since param is null)
|
||||
MockQuerySenderListenerReqHandler mock = (MockQuerySenderListenerReqHandler) core.getRequestHandler("mock");
|
||||
assertNotNull("Mock is null", mock);
|
||||
assertNull("Req (firstsearcher) is not null", mock.req);
|
||||
|
||||
Directory dir = currentSearcher.getIndexReader().directory();
|
||||
SolrIndexSearcher newSearcher = new SolrIndexSearcher(core, core.getSchema(), "testQuerySenderNoQuery", dir, true, false);
|
||||
|
||||
qsl.newSearcher(newSearcher, currentSearcher); // get newSearcher.
|
||||
assertNull("Req (newsearcher) is not null", mock.req);
|
||||
newSearcher.close();
|
||||
currentSearcherRef.decref();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue