From 9c2a21ce29d190718446f03a4e77df6d1089f1c5 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 19 Jun 2011 10:15:32 +0000 Subject: [PATCH 1/6] LUCENE-3197: must also count the segment being produced by an in-flight merge as original (as of when optimize starts) to ensure all deletions against that segment are also merged away git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137330 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/index/PKIndexSplitter.java | 1 + lucene/src/java/org/apache/lucene/index/IndexWriter.java | 7 +++++-- .../src/java/org/apache/lucene/index/LogMergePolicy.java | 6 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java index 398d99ddb7f..0ecc5fb784e 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java @@ -34,6 +34,7 @@ import org.apache.lucene.util.Version; /** * Split an index based on a {@link Filter}. */ + public class PKIndexSplitter { private final Filter docsInFirstIndex; private final Directory input; diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index ce421ca68dd..8817c7601bd 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -1674,11 +1674,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { for(final MergePolicy.OneMerge merge : pendingMerges) { merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegments; + segmentsToOptimize.put(merge.info, Boolean.TRUE); } for ( final MergePolicy.OneMerge merge: runningMerges ) { merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegments; + segmentsToOptimize.put(merge.info, Boolean.TRUE); } } @@ -1891,7 +1893,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { final MergePolicy.MergeSpecification spec; if (optimize) { spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableMap(segmentsToOptimize)); - if (spec != null) { final int numMerges = spec.merges.size(); for(int i=0;i segments = infos.asList(); - + // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { @@ -353,7 +353,7 @@ public abstract class LogMergePolicy extends MergePolicy { assert maxNumSegments > 0; if (verbose()) { - message("findMergesForOptimize: maxNumSegs=" + maxNumSegments + " segsToOptimize= "+ segmentsToOptimize); + message("findMergesForOptimize: maxNumSegs=" + maxNumSegments + " segsToOptimize="+ segmentsToOptimize); } // If the segments are already optimized (e.g. there's only 1 segment), or @@ -401,7 +401,7 @@ public abstract class LogMergePolicy extends MergePolicy { break; } } - + if (anyTooLarge) { return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last); } else { From 3eeef5bfbcbf94512ccd8b6b13d42c798528c8d7 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 19 Jun 2011 12:15:15 +0000 Subject: [PATCH 2/6] use keySet() view to remove merging segments git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137357 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/index/UpgradeIndexMergePolicy.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java b/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java index 6fa95c31806..9070b4924c1 100644 --- a/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java @@ -101,9 +101,7 @@ public class UpgradeIndexMergePolicy extends MergePolicy { // the resulting set contains all segments that are left over // and will be merged to one additional segment: for (final OneMerge om : spec.merges) { - for(SegmentInfo info : om.segments) { - oldSegments.remove(info); - } + oldSegments.keySet().removeAll(om.segments); } } From df18530cbda34f87fade6c84886c3156cfdd59c3 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 19 Jun 2011 12:27:22 +0000 Subject: [PATCH 3/6] sync CHANGES.txt git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137359 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 15 ++++---- solr/CHANGES.txt | 89 +++++++++++++++++++++++----------------------- 2 files changed, 54 insertions(+), 50 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 63060009486..d5f8328cf0b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -450,12 +450,6 @@ Optimizations MultiTermQuery now stores TermState per leaf reader during rewrite to re- seek the term dictionary in TermQuery / TermWeight. (Simon Willnauer, Mike McCandless, Robert Muir) - -* LUCENE-2897: Apply deleted terms while flushing a segment. We still - buffer deleted terms to later apply to past segments. (Mike McCandless) - -* LUCENE-1736: DateTools.java general improvements. - (David Smiley via Steve Rowe) Bug fixes @@ -478,6 +472,10 @@ Changes in backwards compatibility policy of IndexInput) as its first argument. (Robert Muir, Dawid Weiss, Mike McCandless) +* LUCENE-3191: FieldComparator.value now returns an Object not + Comparable; FieldDoc.fields also changed from Comparable[] to + Object[] (Uwe Schindler, Mike McCandless) + * LUCENE-3208: Made deprecated methods Query.weight(Searcher) and Searcher.createWeight() final to prevent override. If you have overridden one of these methods, cut over to the non-deprecated @@ -545,6 +543,11 @@ API Changes argument, so the merge policy knows which segments were originally present vs produced by an optimizing merge (Mike McCandless) +Optimizations + +* LUCENE-1736: DateTools.java general improvements. + (David Smiley via Steve Rowe) + New Features * LUCENE-3140: Added experimental FST implementation to Lucene. diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 230baa08fd3..3bbe0fbb4b4 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -64,29 +64,12 @@ New Features supports "percentages" which get evaluated relative the current size of the cache when warming happens. (Tomas Fernandez Lobbe and hossman) - -* SOLR-1915: DebugComponent now supports using a NamedList to model - Explanation objects in it's responses instead of - Explanation.toString (hossman) * SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf norm, maxdoc, numdocs. (yonik) -* SOLR-1682: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Search grouping / Field collapsing. - (Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, - Koji Sekiguchi, Iván de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, - Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald, - Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger, - Harish Agarwal, yonik) - SOLR-2564: Integrate grouping module into Solr. Also adds the ability to return number of - groups that have match a query. - * SOLR-1665: Add debug component options for timings, results and query info only (gsingers, hossman, yonik) -* SOLR-2113: Add TermQParserPlugin, registered as "term". This is useful - when generating filter queries from terms returned from field faceting or - the terms component. Example: fq={!term f=weight}1.5 (hossman, yonik) - * SOLR-2001: The query component will substitute an empty query that matches no documents if the query parser returns null. This also prevents an exception from being thrown by the default parser if "q" is missing. (yonik) @@ -210,15 +193,6 @@ Bug Fixes * SOLR-2275: fix DisMax 'mm' parsing to be tolerant of whitespace (Erick Erickson via hossman) -* SOLR-309: Fix FieldType so setting an analyzer on a FieldType that - doesn't expect it will generate an error. Practically speaking this - means that Solr will now correctly generate an error on - initialization if the schema.xml contains an analyzer configuration - for a fieldType that does not use TextField. (hossman) - -* SOLR-2467: Fix initialization so any errors - are logged properly. (hossman) - Other Changes ---------------------- @@ -251,19 +225,10 @@ Other Changes * SOLR-2423: FieldType argument changed from String to Object Conversion from SolrInputDocument > Object > Fieldable is now managed by FieldType rather then DocumentBuilder. (ryan) - -* SOLR-2061: Pull base tests out into a new Solr Test Framework module, - and publish binary, javadoc, and source test-framework jars. - (Drew Farris, Robert Muir, Steve Rowe) * SOLR-2461: QuerySenderListener and AbstractSolrEventListener are now public (hossman) -* SOLR-2451: Enhance assertJQ to allow individual tests to specify the - tolerance delta used in numeric equalities. This allows for slight - variance in asserting score comparisons in unit tests. - (David Smiley, Chris Hostetter) - * LUCENE-2995: Moved some spellchecker and suggest APIs to modules/suggest: HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the suggester APIs and implementations. (rmuir) @@ -271,15 +236,6 @@ Other Changes * SOLR-2576: Remove deprecated SpellingResult.add(Token, int). (James Dyer via rmuir) -* LUCENE-3204: The maven-ant-tasks jar is now included in the source tree; - users of the generate-maven-artifacts target no longer have to manually - place this jar in the Ant classpath. NOTE: when Ant looks for the - maven-ant-tasks jar, it looks first in its pre-existing classpath, so - any copies it finds will be used instead of the copy included in the - Lucene/Solr source tree. For this reason, it is recommeded to remove - any copies of the maven-ant-tasks jar in the Ant classpath, e.g. under - ~/.ant/lib/ or under the Ant installation's lib/ directory. (Steve Rowe) - Documentation ---------------------- @@ -302,6 +258,14 @@ New Features previous Tokenizers/TokenFilters (called "positionHistory"). (Uwe Schindler) +* SOLR-2524: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Grouping / Field collapsing + using the Lucene grouping contrib. The search result can be grouped by field and query. + (Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, Koji Sekiguchi, + Iván de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, Bojan Smid, + Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald, + Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger, + Harish Agarwal, yonik, Michael McCandless, Bill Bell) + Optimizations ---------------------- @@ -331,6 +295,17 @@ Other Changes * SOLR-2576: Deprecate SpellingResult.add(Token token, int docFreq), please use SpellingResult.addFrequency(Token token, int docFreq) instead. (James Dyer via rmuir) + +* SOLR-2574: Upgrade slf4j to v1.6.1 (shalin) + +* LUCENE-3204: The maven-ant-tasks jar is now included in the source tree; + users of the generate-maven-artifacts target no longer have to manually + place this jar in the Ant classpath. NOTE: when Ant looks for the + maven-ant-tasks jar, it looks first in its pre-existing classpath, so + any copies it finds will be used instead of the copy included in the + Lucene/Solr source tree. For this reason, it is recommeded to remove + any copies of the maven-ant-tasks jar in the Ant classpath, e.g. under + ~/.ant/lib/ or under the Ant installation's lib/ directory. (Steve Rowe) ================== 3.2.0 ================== Versions of Major Components @@ -359,6 +334,14 @@ New Features Example: [{"id":"doc1"},{"id":"doc2"}] (yonik) +* SOLR-2113: Add TermQParserPlugin, registered as "term". This is useful + when generating filter queries from terms returned from field faceting or + the terms component. Example: fq={!term f=weight}1.5 (hossman, yonik) + +* SOLR-1915: DebugComponent now supports using a NamedList to model + Explanation objects in it's responses instead of + Explanation.toString (hossman) + Optimizations ---------------------- @@ -401,6 +384,9 @@ Bug Fixes did not clear all attributes so they displayed incorrect attribute values for tokens in later filter stages. (uschindler, rmuir, yonik) +* SOLR-2467: Fix initialization so any errors + are logged properly. (hossman) + * SOLR-2493: SolrQueryParser was fixed to not parse the SolrConfig DOM tree on each instantiation which is a huge slowdown. (Stephane Bailliez via uschindler) @@ -418,12 +404,27 @@ Bug Fixes * SOLR-2539: VectorValueSource.floatVal incorrectly used byteVal on sub-sources. (Tom Liu via yonik) +* SOLR-2554: RandomSortField didn't work when used in a function query. (yonik) + + Other Changes ---------------------- +* SOLR-2061: Pull base tests out into a new Solr Test Framework module, + and publish binary, javadoc, and source test-framework jars. + (Drew Farris, Robert Muir, Steve Rowe) + * SOLR-2105: Rename RequestHandler param 'update.processor' to 'update.chain'. (Jan Høydahl via Mark Miller) +* SOLR-2485: Deprecate BaseResponseWriter, GenericBinaryResponseWriter, and + GenericTextResponseWriter. These classes will be removed in 4.0. (ryan) + +* SOLR-2451: Enhance assertJQ to allow individual tests to specify the + tolerance delta used in numeric equalities. This allows for slight + variance in asserting score comparisons in unit tests. + (David Smiley, Chris Hostetter) + * SOLR-2528: Remove default="true" from HtmlEncoder in example solrconfig.xml, because html encoding confuses non-ascii users. (koji) From aaaaa42d54b311d4cc5ff53bd99f211652039e79 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 20 Jun 2011 02:06:16 +0000 Subject: [PATCH 4/6] fix test OOM problems git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137477 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java index 96238895821..3cf7df3626b 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -617,7 +617,7 @@ public class TestPhraseQuery extends LuceneTestCase { int NUM_DOCS = atLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks - int termCount = atLeast(5000); + int termCount = _TestUtil.nextInt(random, 4097, 8200); List doc = new ArrayList(); From 22c6d6e93ddec17efa6ecd731a99aa01cfb9d4b3 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 20 Jun 2011 02:10:57 +0000 Subject: [PATCH 5/6] fix test OOM git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137479 13f79535-47bb-0310-9956-ffa450edef68 --- .../test/org/apache/lucene/index/TestPerFieldCodecSupport.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java index c9efba94695..2108b687384 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java +++ b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java @@ -285,7 +285,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { for (Codec codec : codecs) { provider.register(codec); } - int num = atLeast(30); + int num = _TestUtil.nextInt(random, 30, 60); for (int j = 0; j < num; j++) { provider.setFieldCodec("" + j, codecs[random.nextInt(codecs.length)].name); } @@ -296,7 +296,6 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); - num = atLeast(30); for (int k = 0; k < num; k++) { Field field = newField("" + k, _TestUtil .randomRealisticUnicodeString(random, 128), indexValue[random From 4d68c56a04ae93eae12cbc5c0fa27edcb3b91340 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 20 Jun 2011 02:13:39 +0000 Subject: [PATCH 6/6] fix test OOM problems git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137480 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java index d4aacaacaa9..d1abb262252 100644 --- a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -963,7 +963,7 @@ public class TestFSTs extends LuceneTestCase { @Nightly public void testBigSet() throws IOException { - testRandomWords(atLeast(50000), atLeast(1)); + testRandomWords(_TestUtil.nextInt(random, 50000, 60000), atLeast(1)); } private static String inputToString(int inputMode, IntsRef term) {