SOLR-2452: Merged with trunk up to r1137526

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1137527 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-06-20 07:12:59 +00:00
commit 2e4fbd2baf
9 changed files with 67 additions and 62 deletions

View File

@ -450,12 +450,6 @@ Optimizations
MultiTermQuery now stores TermState per leaf reader during rewrite to re-
seek the term dictionary in TermQuery / TermWeight.
(Simon Willnauer, Mike McCandless, Robert Muir)
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
buffer deleted terms to later apply to past segments. (Mike McCandless)
* LUCENE-1736: DateTools.java general improvements.
(David Smiley via Steve Rowe)
Bug fixes
@ -478,6 +472,10 @@ Changes in backwards compatibility policy
of IndexInput) as its first argument. (Robert Muir, Dawid Weiss,
Mike McCandless)
* LUCENE-3191: FieldComparator.value now returns an Object not
Comparable; FieldDoc.fields also changed from Comparable[] to
Object[] (Uwe Schindler, Mike McCandless)
* LUCENE-3208: Made deprecated methods Query.weight(Searcher) and
Searcher.createWeight() final to prevent override. If you have
overridden one of these methods, cut over to the non-deprecated
@ -545,6 +543,11 @@ API Changes
argument, so the merge policy knows which segments were originally
present vs produced by an optimizing merge (Mike McCandless)
Optimizations
* LUCENE-1736: DateTools.java general improvements.
(David Smiley via Steve Rowe)
New Features
* LUCENE-3140: Added experimental FST implementation to Lucene.

View File

@ -34,6 +34,7 @@ import org.apache.lucene.util.Version;
/**
* Split an index based on a {@link Filter}.
*/
public class PKIndexSplitter {
private final Filter docsInFirstIndex;
private final Directory input;

View File

@ -1674,11 +1674,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
for(final MergePolicy.OneMerge merge : pendingMerges) {
merge.optimize = true;
merge.maxNumSegmentsOptimize = maxNumSegments;
segmentsToOptimize.put(merge.info, Boolean.TRUE);
}
for ( final MergePolicy.OneMerge merge: runningMerges ) {
merge.optimize = true;
merge.maxNumSegmentsOptimize = maxNumSegments;
segmentsToOptimize.put(merge.info, Boolean.TRUE);
}
}
@ -1891,7 +1893,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final MergePolicy.MergeSpecification spec;
if (optimize) {
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableMap(segmentsToOptimize));
if (spec != null) {
final int numMerges = spec.merges.size();
for(int i=0;i<numMerges;i++) {
@ -3044,7 +3045,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
if (merge.optimize) {
// cascade the optimize:
segmentsToOptimize.put(merge.info, Boolean.FALSE);
if (!segmentsToOptimize.containsKey(merge.info)) {
segmentsToOptimize.put(merge.info, Boolean.FALSE);
}
}
return true;

View File

@ -285,7 +285,7 @@ public abstract class LogMergePolicy extends MergePolicy {
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
final List<SegmentInfo> segments = infos.asList();
// First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) {
@ -353,7 +353,7 @@ public abstract class LogMergePolicy extends MergePolicy {
assert maxNumSegments > 0;
if (verbose()) {
message("findMergesForOptimize: maxNumSegs=" + maxNumSegments + " segsToOptimize= "+ segmentsToOptimize);
message("findMergesForOptimize: maxNumSegs=" + maxNumSegments + " segsToOptimize="+ segmentsToOptimize);
}
// If the segments are already optimized (e.g. there's only 1 segment), or
@ -401,7 +401,7 @@ public abstract class LogMergePolicy extends MergePolicy {
break;
}
}
if (anyTooLarge) {
return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last);
} else {

View File

@ -101,9 +101,7 @@ public class UpgradeIndexMergePolicy extends MergePolicy {
// the resulting set contains all segments that are left over
// and will be merged to one additional segment:
for (final OneMerge om : spec.merges) {
for(SegmentInfo info : om.segments) {
oldSegments.remove(info);
}
oldSegments.keySet().removeAll(om.segments);
}
}

View File

@ -285,7 +285,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase {
for (Codec codec : codecs) {
provider.register(codec);
}
int num = atLeast(30);
int num = _TestUtil.nextInt(random, 30, 60);
for (int j = 0; j < num; j++) {
provider.setFieldCodec("" + j, codecs[random.nextInt(codecs.length)].name);
}
@ -296,7 +296,6 @@ public class TestPerFieldCodecSupport extends LuceneTestCase {
IndexWriter writer = newWriter(dir, config);
for (int j = 0; j < docsPerRound; j++) {
final Document doc = new Document();
num = atLeast(30);
for (int k = 0; k < num; k++) {
Field field = newField("" + k, _TestUtil
.randomRealisticUnicodeString(random, 128), indexValue[random

View File

@ -617,7 +617,7 @@ public class TestPhraseQuery extends LuceneTestCase {
int NUM_DOCS = atLeast(10);
for (int i = 0; i < NUM_DOCS; i++) {
// must be > 4096 so it spans multiple chunks
int termCount = atLeast(5000);
int termCount = _TestUtil.nextInt(random, 4097, 8200);
List<String> doc = new ArrayList<String>();

View File

@ -963,7 +963,7 @@ public class TestFSTs extends LuceneTestCase {
@Nightly
public void testBigSet() throws IOException {
testRandomWords(atLeast(50000), atLeast(1));
testRandomWords(_TestUtil.nextInt(random, 50000, 60000), atLeast(1));
}
private static String inputToString(int inputMode, IntsRef term) {

View File

@ -64,29 +64,12 @@ New Features
supports "percentages" which get evaluated relative the current size of
the cache when warming happens.
(Tomas Fernandez Lobbe and hossman)
* SOLR-1915: DebugComponent now supports using a NamedList to model
Explanation objects in it's responses instead of
Explanation.toString (hossman)
* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
norm, maxdoc, numdocs. (yonik)
* SOLR-1682: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Search grouping / Field collapsing.
(Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar,
Koji Sekiguchi, Iván de Prado, Ryan McKinley, Marc Sturlese, Peter Karich,
Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald,
Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger,
Harish Agarwal, yonik)
SOLR-2564: Integrate grouping module into Solr. Also adds the ability to return number of
groups that have match a query.
* SOLR-1665: Add debug component options for timings, results and query info only (gsingers, hossman, yonik)
* SOLR-2113: Add TermQParserPlugin, registered as "term". This is useful
when generating filter queries from terms returned from field faceting or
the terms component. Example: fq={!term f=weight}1.5 (hossman, yonik)
* SOLR-2001: The query component will substitute an empty query that matches
no documents if the query parser returns null. This also prevents an
exception from being thrown by the default parser if "q" is missing. (yonik)
@ -210,15 +193,6 @@ Bug Fixes
* SOLR-2275: fix DisMax 'mm' parsing to be tolerant of whitespace
(Erick Erickson via hossman)
* SOLR-309: Fix FieldType so setting an analyzer on a FieldType that
doesn't expect it will generate an error. Practically speaking this
means that Solr will now correctly generate an error on
initialization if the schema.xml contains an analyzer configuration
for a fieldType that does not use TextField. (hossman)
* SOLR-2467: Fix <analyzer class="..." /> initialization so any errors
are logged properly. (hossman)
Other Changes
----------------------
@ -251,19 +225,10 @@ Other Changes
* SOLR-2423: FieldType argument changed from String to Object
Conversion from SolrInputDocument > Object > Fieldable is now managed
by FieldType rather then DocumentBuilder. (ryan)
* SOLR-2061: Pull base tests out into a new Solr Test Framework module,
and publish binary, javadoc, and source test-framework jars.
(Drew Farris, Robert Muir, Steve Rowe)
* SOLR-2461: QuerySenderListener and AbstractSolrEventListener are
now public (hossman)
* SOLR-2451: Enhance assertJQ to allow individual tests to specify the
tolerance delta used in numeric equalities. This allows for slight
variance in asserting score comparisons in unit tests.
(David Smiley, Chris Hostetter)
* LUCENE-2995: Moved some spellchecker and suggest APIs to modules/suggest:
HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
suggester APIs and implementations. (rmuir)
@ -271,15 +236,6 @@ Other Changes
* SOLR-2576: Remove deprecated SpellingResult.add(Token, int).
(James Dyer via rmuir)
* LUCENE-3204: The maven-ant-tasks jar is now included in the source tree;
users of the generate-maven-artifacts target no longer have to manually
place this jar in the Ant classpath. NOTE: when Ant looks for the
maven-ant-tasks jar, it looks first in its pre-existing classpath, so
any copies it finds will be used instead of the copy included in the
Lucene/Solr source tree. For this reason, it is recommeded to remove
any copies of the maven-ant-tasks jar in the Ant classpath, e.g. under
~/.ant/lib/ or under the Ant installation's lib/ directory. (Steve Rowe)
Documentation
----------------------
@ -302,6 +258,14 @@ New Features
previous Tokenizers/TokenFilters (called "positionHistory").
(Uwe Schindler)
* SOLR-2524: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Grouping / Field collapsing
using the Lucene grouping contrib. The search result can be grouped by field and query.
(Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, Koji Sekiguchi,
Iván de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, Bojan Smid,
Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald,
Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger,
Harish Agarwal, yonik, Michael McCandless, Bill Bell)
Optimizations
----------------------
@ -331,6 +295,17 @@ Other Changes
* SOLR-2576: Deprecate SpellingResult.add(Token token, int docFreq), please use
SpellingResult.addFrequency(Token token, int docFreq) instead.
(James Dyer via rmuir)
* SOLR-2574: Upgrade slf4j to v1.6.1 (shalin)
* LUCENE-3204: The maven-ant-tasks jar is now included in the source tree;
users of the generate-maven-artifacts target no longer have to manually
place this jar in the Ant classpath. NOTE: when Ant looks for the
maven-ant-tasks jar, it looks first in its pre-existing classpath, so
any copies it finds will be used instead of the copy included in the
Lucene/Solr source tree. For this reason, it is recommeded to remove
any copies of the maven-ant-tasks jar in the Ant classpath, e.g. under
~/.ant/lib/ or under the Ant installation's lib/ directory. (Steve Rowe)
================== 3.2.0 ==================
Versions of Major Components
@ -359,6 +334,14 @@ New Features
Example: [{"id":"doc1"},{"id":"doc2"}]
(yonik)
* SOLR-2113: Add TermQParserPlugin, registered as "term". This is useful
when generating filter queries from terms returned from field faceting or
the terms component. Example: fq={!term f=weight}1.5 (hossman, yonik)
* SOLR-1915: DebugComponent now supports using a NamedList to model
Explanation objects in it's responses instead of
Explanation.toString (hossman)
Optimizations
----------------------
@ -401,6 +384,9 @@ Bug Fixes
did not clear all attributes so they displayed incorrect attribute values for tokens
in later filter stages. (uschindler, rmuir, yonik)
* SOLR-2467: Fix <analyzer class="..." /> initialization so any errors
are logged properly. (hossman)
* SOLR-2493: SolrQueryParser was fixed to not parse the SolrConfig DOM tree on each
instantiation which is a huge slowdown. (Stephane Bailliez via uschindler)
@ -418,12 +404,27 @@ Bug Fixes
* SOLR-2539: VectorValueSource.floatVal incorrectly used byteVal on sub-sources.
(Tom Liu via yonik)
* SOLR-2554: RandomSortField didn't work when used in a function query. (yonik)
Other Changes
----------------------
* SOLR-2061: Pull base tests out into a new Solr Test Framework module,
and publish binary, javadoc, and source test-framework jars.
(Drew Farris, Robert Muir, Steve Rowe)
* SOLR-2105: Rename RequestHandler param 'update.processor' to 'update.chain'.
(Jan Høydahl via Mark Miller)
* SOLR-2485: Deprecate BaseResponseWriter, GenericBinaryResponseWriter, and
GenericTextResponseWriter. These classes will be removed in 4.0. (ryan)
* SOLR-2451: Enhance assertJQ to allow individual tests to specify the
tolerance delta used in numeric equalities. This allows for slight
variance in asserting score comparisons in unit tests.
(David Smiley, Chris Hostetter)
* SOLR-2528: Remove default="true" from HtmlEncoder in example solrconfig.xml,
because html encoding confuses non-ascii users. (koji)